-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
228 lines (199 loc) · 11.7 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
<!-- Replace the content tag with appropriate information -->
<meta name="description" content="GenTuber Project Page">
<meta property="og:title" content="GenTuber Project Page"/>
<meta property="og:description" content="Official project page for GenTuber"/>
<meta property="og:url" content="https://longnhatne.github.io/GenTuber/"/>
<!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X630-->
<meta property="og:image" content="./static/images/teaser.jpg" />
<meta property="og:image:secure_url" content="./static/images/teaser.jpg" />
<meta property="og:image:width" content="1200"/>
<meta property="og:image:height" content="630"/>
<meta name="twitter:title" content="GenTuber Project Page">
<meta name="twitter:description" content="Official project page for GenTuber">
<!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X600-->
<meta name="twitter:image" content="./static/images/teaser.jpg">
<meta name="twitter:card" content="summary_large_image">
<!-- Keywords for your paper to be indexed by-->
<meta name="keywords" content="mbzuai, tubingen, pinscreen, realtime, gentuber, editing, portrait, stylegan, headreenactment, computervision, neuralradiancefield, avatar, singleview, siggraph, digitalhuman, virtualhuman">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>GenTuber</title>
<link rel="icon" type="image/x-icon" href="static/images/favicon.png">
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
rel="stylesheet">
<link rel="stylesheet" href="static/css/bulma.min.css">
<link rel="stylesheet" href="static/css/bulma-carousel.min.css">
<link rel="stylesheet" href="static/css/bulma-slider.min.css">
<link rel="stylesheet" href="static/css/fontawesome.all.min.css">
<link rel="stylesheet"
href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
<link rel="stylesheet" href="static/css/index.css">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
<script defer src="static/js/fontawesome.all.min.js"></script>
<script src="static/js/bulma-carousel.min.js"></script>
<script src="static/js/bulma-slider.min.js"></script>
<script src="static/js/index.js"></script>
</head>
<body>
<section class="hero">
<div class="hero-body">
<div class="container is-max-desktop">
<div class="columns is-centered">
<div class="column has-text-centered">
<h1 class="title is-1 publication-title">GenTuber: Real-Time Generative Video Portrait Editing </h1>
<!-- <h1 class="title is-size-3 publication-title">CVPR 2024</h1> -->
<div class="is-size-4 publication-authors">
<!-- Paper authors -->
<span class="author-block">
<a href="https://scholar.google.com/citations?user=w3vrRqsAAAAJ&hl=en" target="_blank">Long-Nhat Ho<sup>1</sup></a>,</span>
<span class="author-block">
<a href="https://axelsauer.com/" target="_blank">Axel Sauer<sup>2</sup></a>,</span>
<span class="author-block">
<a href="https://p0lyfish.github.io/" target="_blank">Phong Tran<sup>1</sup></a>,</span>
<span class="author-block">
<a href="https://www.cvlibs.net/" target="_blank">Andreas Geiger<sup>3</sup></a>,</span>
<span class="author-block">
<a href="https://www.hao-li.com/" target="_blank">Hao Li<sup>1,4</sup></a>
</span>
</div>
<div class="is-size-5 publication-authors">
<span class="author-block"><sup>1</sup><a href="https://mbzuai.ac.ae/" target=" blank">MBZUAI</a>, <sup>2</sup><a href="https://blackforestlabs.ai/" target=" blank">Black Forest Labs</a>, <sup>3</sup> <a href="https://uni-tuebingen.de/en/" target=" blank">University of Tübingen</a>, <sup>4</sup><a href="https://www.pinscreen.com/" target=" blank">Pinscreen</a></span>
</div>
<div class="column has-text-centered">
<div class="publication-links">
<!-- Arxiv PDF link -->
<span class="link-block">
<a href="" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-file-pdf"></i>
</span>
<span style="display: block;">
<span style="font-size: 14px;">Abstract</span><br>
<span style="font-size: 14px; margin-top: -5px; display: block;">(coming soon)</span>
</span>
</a>
</span>
<!-- Github link -->
<span class="link-block">
<a href="" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-github"></i>
</span>
<span style="display: block;">
<span style="font-size: 14px; margin-top: -5px;">Code</span><br>
<span style="font-size: 14px; margin-top: -5px; display: block;">(coming soon)</span>
</span>
</a>
</span>
<!-- Youtube link -->
<span class="link-block">
<a href="https://youtu.be/jC6StOWvnLo" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-youtube"></i>
</span>
<span style="display: block;">
<span style="font-size: 14px;">Youtube</span>
</span>
</a>
</span>
</div>
</div>
</div>
</div>
</div>
</div>
</section>
<!-- Teaser video-->
<section class="hero teaser">
<div class="container is-max-desktop">
<div class="hero-body">
<h2 class="subtitle has-text-centered">
We present <b>GenTuber</b>, a real-time generative AI system for editing a live stream of a video portrait using text prompts and/or an input image. The input face is altered on-the-fly using spatial features extracted from the input to guide a generator based on StyleGAN-T, which is trained using diffusion model-based images.
</h2>
</div>
</div>
</section>
<!-- End teaser video -->
<!-- Paper abstract -->
<section class="section hero is-light">
<div class="container is-max-desktop">
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Abstract</h2>
<div class="content has-text-justified">
<p>
Trends in diffusion-based models can be attributed to their ability to generate high quality and extremely diverse images from any text prompts. Similarly, impressive text-based editing capabilities for videos have been shown recently. While diffusion-based techniques are known to be slow, real-time approaches have been introduced (e.g., one-step approaches, scheduling/parallelization, etc.). These acceleration methods come with limitations such as temporal inconsistency and lack of expressiveness for generated faces. We introduce GenTuber, a StyleGAN-T and transformer-based neural network, that can modify video portraits on-the-fly and produce highly convincing and complex video edits, such as becoming another person/creature, changing hair color/clothing, adding glasses/beards, using either text prompts and/or an image reference as input. Our method runs at 30 fps while producing highly expressive and faithful facial expressions. Our method adopts an architecture for diverse content generation based on a StyleGAN-T decoder, where each input frame is first passed into a CNN-based encoder. The spatial features are then mapped to transformer blocks of our decoder for image generation. Style codes obtained by a text/image-based CLIP encoder are then connected to image editing layers of our decoder. While similar real-time architectures have been introduced, only domain-specific image generations (cartoons) or low-level stylization effects have been demonstrated. To enable highly diverse facial appearance generation, we use a large training datasets of 250K face samples based on synthesized faces using SDXL (image-to-image translation), as well as the LAION-face dataset. Furthermore, highly expressive face reenactment is possible using a novel loss function based on a pre-trained expression encoder that uses implicit keypoints and training data from the Nersemble face dataset. Our method can produce high-fidelity video portraits reenactments and edits in real-time. Compared to existing head reenactment techniques, our approach processes the entire video frame including the upper body, face, and hair, instead of only the head.
</p>
</div>
</div>
</div>
</div>
</section>
<!-- End paper abstract -->
<!-- Short video-->
<section class="hero is-small">
<div class="hero-body">
<div class="container is-max-desktop">
<h2 class="title is-3">Demo Video</h2>
<div class="publication-video">
<iframe src="https://www.youtube.com/embed/jC6StOWvnLo" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
</div>
</div>
</div>
</section>
<!-- End short video -->
<!--BibTex citation -->
<!-- <section class="section" id="BibTeX">
<div class="container is-max-desktop content">
<h2 class="title">BibTeX</h2>
<pre><code>
(coming soon)
</code></pre>
</div>
</section> -->
<!--End BibTex citation -->
<footer class="footer">
<div class="container">
<div class="columns is-centered">
<div class="column is-8">
<div class="content">
<p>
This page was built using the <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a> which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
<br> This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative
Commons Attribution-ShareAlike 4.0 International License</a>.
</p>
</div>
</div>
</div>
</div>
</footer>
<!-- Statcounter tracking code -->
<!-- Default Statcounter code for GenTuber
https://longnhatne.github.io/GenTuber/ -->
<script type="text/javascript">
var sc_project=12948107;
var sc_invisible=1;
var sc_security="8b4e6576";
</script>
<script type="text/javascript"
src="https://www.statcounter.com/counter/counter.js"
async></script>
<noscript><div class="statcounter"><a title="web stats"
href="https://statcounter.com/" target="_blank"><img
class="statcounter"
src="https://c.statcounter.com/12948107/0/8b4e6576/1/"
alt="web stats"
referrerPolicy="no-referrer-when-downgrade"></a></div></noscript>
<!-- End of Statcounter Code -->
<!-- You can add a tracker to track page visits by creating an account at statcounter.com -->
<!-- End of Statcounter Code -->
</body>
</html>