index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
  <!-- Replace the content tag with appropriate information -->
  <meta name="description" content="GenTuber Project Page">
  <meta property="og:title" content="GenTuber Project Page"/>
  <meta property="og:description" content="Official project page for GenTuber"/>
  <meta property="og:url" content="https://longnhatne.github.io/GenTuber/"/>
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X630-->
  <meta property="og:image" content="./static/images/teaser.jpg" />
  <meta property="og:image:secure_url" content="./static/images/teaser.jpg" />
  <meta property="og:image:width" content="1200"/>
  <meta property="og:image:height" content="630"/>


  <meta name="twitter:title" content="GenTuber Project Page">
  <meta name="twitter:description" content="Official project page for GenTuber">
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X600-->
  <meta name="twitter:image" content="./static/images/teaser.jpg">
  <meta name="twitter:card" content="summary_large_image">
  <!-- Keywords for your paper to be indexed by-->
  <meta name="keywords" content="mbzuai, tubingen, pinscreen, realtime, gentuber, editing, portrait, stylegan, headreenactment, computervision, neuralradiancefield, avatar, singleview, siggraph, digitalhuman, virtualhuman">
  <meta name="viewport" content="width=device-width, initial-scale=1">


  <title>GenTuber</title>
  <link rel="icon" type="image/x-icon" href="static/images/favicon.png">
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
  rel="stylesheet">

  <link rel="stylesheet" href="static/css/bulma.min.css">
  <link rel="stylesheet" href="static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
  href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="static/css/index.css">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
  <script defer src="static/js/fontawesome.all.min.js"></script>
  <script src="static/js/bulma-carousel.min.js"></script>
  <script src="static/js/bulma-slider.min.js"></script>
  <script src="static/js/index.js"></script>
</head>
<body>


  <section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h1 class="title is-1 publication-title">GenTuber: Real-Time Generative Video Portrait Editing </h1>
            <!-- <h1 class="title is-size-3 publication-title">CVPR 2024</h1> -->
            <div class="is-size-4 publication-authors">
              <!-- Paper authors -->
              <span class="author-block">
                <a href="https://scholar.google.com/citations?user=w3vrRqsAAAAJ&hl=en" target="_blank">Long-Nhat Ho<sup>1</sup></a>,</span>
                <span class="author-block">
                  <a href="https://axelsauer.com/" target="_blank">Axel Sauer<sup>2</sup></a>,</span>
                  <span class="author-block">
                    <a href="https://p0lyfish.github.io/" target="_blank">Phong Tran<sup>1</sup></a>,</span>
                    <span class="author-block">
                      <a href="https://www.cvlibs.net/" target="_blank">Andreas Geiger<sup>3</sup></a>,</span>
                        <span class="author-block">
                          <a href="https://www.hao-li.com/" target="_blank">Hao Li<sup>1,4</sup></a>
                        </span>
                  </div>

                  <div class="is-size-5 publication-authors">
                    <span class="author-block"><sup>1</sup><a href="https://mbzuai.ac.ae/" target=" blank">MBZUAI</a>, <sup>2</sup><a href="https://blackforestlabs.ai/" target=" blank">Black Forest Labs</a>, <sup>3</sup> <a href="https://uni-tuebingen.de/en/" target=" blank">University of Tübingen</a>, <sup>4</sup><a href="https://www.pinscreen.com/" target=" blank">Pinscreen</a></span>
                  </div>

                  <div class="column has-text-centered">
                    <div class="publication-links">
                         <!-- Arxiv PDF link -->
                      <span class="link-block">
                      <a href="" target="_blank"
                          class="external-link button is-normal is-rounded is-dark">
                        <span class="icon">
                          <i class="fas fa-file-pdf"></i>
                        </span>
                        <span style="display: block;">
                          <span style="font-size: 14px;">Abstract</span><br>
                          <span style="font-size: 14px; margin-top: -5px; display: block;">(coming soon)</span>
                        </span>
                      </a>
                    </span>
                        
                  <!-- Github link -->
                  <span class="link-block">
                    <a href="" target="_blank"
                       class="external-link button is-normal is-rounded is-dark">
                      <span class="icon">
                        <i class="fab fa-github"></i>
                      </span>
                      <span style="display: block;">
                        <span style="font-size: 14px; margin-top: -5px;">Code</span><br>
                        <span style="font-size: 14px; margin-top: -5px; display: block;">(coming soon)</span>
                      </span>
                    </a>
                  </span>
                  
                  <!-- Youtube link -->
                  <span class="link-block">
                    <a href="https://youtu.be/jC6StOWvnLo" target="_blank"
                       class="external-link button is-normal is-rounded is-dark">
                      <span class="icon">
                        <i class="fab fa-youtube"></i>
                      </span>
                      <span style="display: block;">
                        <span style="font-size: 14px;">Youtube</span>
                      </span>
                    </a>
                  </span>
                  
            </div>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>


<!-- Teaser video-->
<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
      <h2 class="subtitle has-text-centered">
				We present <b>GenTuber</b>, a real-time generative AI system for editing a live stream of a video portrait using text prompts and/or an input image. The input face is altered on-the-fly using spatial features extracted from the input to guide a generator based on StyleGAN-T, which is trained using diffusion model-based images.
      </h2>
    </div>
  </div>
</section>
<!-- End teaser video -->

<!-- Paper abstract -->
<section class="section hero is-light">
  <div class="container is-max-desktop">
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Trends in diffusion-based models can be attributed to their ability to generate high quality and extremely diverse images from any text prompts. Similarly, impressive text-based editing capabilities for videos have been shown recently. While diffusion-based techniques are known to be slow, real-time approaches have been introduced (e.g., one-step approaches, scheduling/parallelization, etc.). These acceleration methods come with limitations such as temporal inconsistency and lack of expressiveness for generated faces. We introduce GenTuber, a StyleGAN-T and transformer-based neural network, that can modify video portraits on-the-fly and produce highly convincing and complex video edits, such as becoming another person/creature, changing hair color/clothing, adding glasses/beards, using either text prompts and/or an image reference as input. Our method runs at 30 fps while producing highly expressive and faithful facial expressions. Our method adopts an architecture for diverse content generation based on a StyleGAN-T decoder, where each input frame is first passed into a CNN-based encoder. The spatial features are then mapped to transformer blocks of our decoder for image generation. Style codes obtained by a text/image-based CLIP encoder are then connected to image editing layers of our decoder. While similar real-time architectures have been introduced, only domain-specific image generations (cartoons) or low-level stylization effects have been demonstrated. To enable highly diverse facial appearance generation, we use a large training datasets of 250K face samples based on synthesized faces using SDXL (image-to-image translation),  as well as the LAION-face dataset. Furthermore, highly expressive face reenactment is possible using a novel loss function based on a pre-trained expression encoder that uses implicit keypoints and training data from the Nersemble face dataset. Our method can produce high-fidelity video portraits reenactments and edits in real-time. Compared to existing head reenactment techniques, our approach processes the entire video frame including the upper body, face, and hair, instead of only the head.
          </p>
        </div>
      </div>
    </div>
  </div>
</section>
<!-- End paper abstract -->


<!-- Short video-->
<section class="hero is-small">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <h2 class="title is-3">Demo Video</h2>
      <div class="publication-video">
        <iframe src="https://www.youtube.com/embed/jC6StOWvnLo" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
      </div>
    </div>
  </div>
</section>
<!-- End short video -->


<!--BibTex citation -->
  <!-- <section class="section" id="BibTeX">
    <div class="container is-max-desktop content">
      <h2 class="title">BibTeX</h2>
      <pre><code>
        (coming soon)
      </code></pre>
    </div>
</section> -->
<!--End BibTex citation -->


<footer class="footer">
  <div class="container">
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">

          <p>
            This page was built using the <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a> which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
            <br> This website is licensed under a <a rel="license"  href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
          </p>

        </div>
      </div>
    </div>
  </div>
</footer>

<!-- Statcounter tracking code -->
<!-- Default Statcounter code for GenTuber
https://longnhatne.github.io/GenTuber/ -->
<script type="text/javascript">
var sc_project=12948107; 
var sc_invisible=1; 
var sc_security="8b4e6576"; 
</script>
<script type="text/javascript"
src="https://www.statcounter.com/counter/counter.js"
async></script>
<noscript><div class="statcounter"><a title="web stats"
href="https://statcounter.com/" target="_blank"><img
class="statcounter"
src="https://c.statcounter.com/12948107/0/8b4e6576/1/"
alt="web stats"
referrerPolicy="no-referrer-when-downgrade"></a></div></noscript>
<!-- End of Statcounter Code -->
  
<!-- You can add a tracker to track page visits by creating an account at statcounter.com -->

    <!-- End of Statcounter Code -->


  </body>
  </html>