Add example videos
Browse files- .gitattributes +2 -0
- .gitignore +0 -1
- app.py +50 -23
- data/CkWOpyrAXdw_210.0_360.0.mp4 +3 -0
- data/HkLfNhgP0TM_660.0_810.0.mp4 +3 -0
- data/gTAvxnQtjXM_60.0_210.0.mp4 +3 -0
- data/ocLUzCNodj4_360.0_510.0.mp4 +3 -0
- data/pA6Z-qYhSNg_210.0_360.0.mp4 +3 -0
- setup.cfg +1 -1
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
data filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -5,6 +5,5 @@ __pycache__/
|
|
| 5 |
|
| 6 |
# Temporary data
|
| 7 |
/checkpoints
|
| 8 |
-
/flagged
|
| 9 |
.DS_Store
|
| 10 |
._*
|
|
|
|
| 5 |
|
| 6 |
# Temporary data
|
| 7 |
/checkpoints
|
|
|
|
| 8 |
.DS_Store
|
| 9 |
._*
|
app.py
CHANGED
|
@@ -1,24 +1,39 @@
|
|
| 1 |
# Copyright (c) Ye Liu. Licensed under the BSD 3-Clause License.
|
| 2 |
|
|
|
|
| 3 |
from functools import partial
|
| 4 |
|
| 5 |
import clip
|
| 6 |
import decord
|
| 7 |
-
import nncore
|
| 8 |
-
import torch
|
| 9 |
import gradio as gr
|
|
|
|
| 10 |
import numpy as np
|
| 11 |
-
import
|
| 12 |
import torchvision.transforms.functional as F
|
| 13 |
from decord import VideoReader
|
| 14 |
from nncore.engine import load_checkpoint
|
| 15 |
from nncore.nn import build_model
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
|
| 21 |
-
WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
def convert_time(seconds):
|
|
@@ -88,22 +103,34 @@ def main(video, query, model, cfg):
|
|
| 88 |
|
| 89 |
model, cfg = init_model(CONFIG, WEIGHT)
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
demo.launch()
|
|
|
|
| 1 |
# Copyright (c) Ye Liu. Licensed under the BSD 3-Clause License.
|
| 2 |
|
| 3 |
+
import random
|
| 4 |
from functools import partial
|
| 5 |
|
| 6 |
import clip
|
| 7 |
import decord
|
|
|
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
+
import nncore
|
| 10 |
import numpy as np
|
| 11 |
+
import torch
|
| 12 |
import torchvision.transforms.functional as F
|
| 13 |
from decord import VideoReader
|
| 14 |
from nncore.engine import load_checkpoint
|
| 15 |
from nncore.nn import build_model
|
| 16 |
|
| 17 |
+
import pandas as pd
|
| 18 |
+
|
| 19 |
+
TITLE = '๐R2-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding'
|
| 20 |
+
|
| 21 |
+
TITLE_MD = '<h1 align="center">๐R<sup>2</sup>-Tuning: Efficient Image-to-Video Transfer Learning for Video Temporal Grounding</h1>'
|
| 22 |
+
DESCRIPTION_MD = 'R<sup>2</sup>-Tuning is a parameter- and memory-efficient transfer learning method for video temporal grounding. Please find more details in our <a href="https://arxiv.org/abs/2404.00801" target="_blank">Tech Report</a> and <a href="https://github.com/yeliudev/R2-Tuning" target="_blank">GitHub Repo</a>.'
|
| 23 |
+
GUIDE_MD = '### User Guide:\n1. Upload a video or click "random" to sample one.\n2. Input a text query. A good practice is to write a sentence with 5~15 words.\n3. Click "submit" and you\'ll see the moment retrieval and highlight detection results on the right.'
|
| 24 |
|
| 25 |
CONFIG = 'configs/qvhighlights/r2_tuning_qvhighlights.py'
|
| 26 |
+
WEIGHT = 'https://huggingface.co/yeliudev/R2-Tuning/resolve/main/checkpoints/r2_tuning_qvhighlights-ed516355.pth'
|
| 27 |
+
|
| 28 |
+
# yapf:disable
|
| 29 |
+
EXAMPLES = [
|
| 30 |
+
('data/gTAvxnQtjXM_60.0_210.0.mp4', 'A man in a white t shirt wearing a backpack is showing a nearby cathedral.'),
|
| 31 |
+
('data/pA6Z-qYhSNg_210.0_360.0.mp4', 'Different Facebook posts on transgender bathrooms are shown.'),
|
| 32 |
+
('data/CkWOpyrAXdw_210.0_360.0.mp4', 'Indian girl cleaning her kitchen before cooking.'),
|
| 33 |
+
('data/ocLUzCNodj4_360.0_510.0.mp4', 'A woman stands in her bedroom in front of a mirror and talks.'),
|
| 34 |
+
('data/HkLfNhgP0TM_660.0_810.0.mp4', 'Woman lays down on the couch while talking to the camera.')
|
| 35 |
+
]
|
| 36 |
+
# yapf:enable
|
| 37 |
|
| 38 |
|
| 39 |
def convert_time(seconds):
|
|
|
|
| 103 |
|
| 104 |
model, cfg = init_model(CONFIG, WEIGHT)
|
| 105 |
|
| 106 |
+
fn = partial(main, model=model, cfg=cfg)
|
| 107 |
+
|
| 108 |
+
with gr.Blocks(title=TITLE) as demo:
|
| 109 |
+
gr.Markdown(TITLE_MD)
|
| 110 |
+
gr.Markdown(DESCRIPTION_MD)
|
| 111 |
+
gr.Markdown(GUIDE_MD)
|
| 112 |
+
|
| 113 |
+
with gr.Row():
|
| 114 |
+
with gr.Column():
|
| 115 |
+
video = gr.Video(label='Video')
|
| 116 |
+
query = gr.Textbox(label='Text Query')
|
| 117 |
+
|
| 118 |
+
with gr.Row():
|
| 119 |
+
random_btn = gr.Button(value='๐ฎ Random')
|
| 120 |
+
gr.ClearButton([video, query], value='๐๏ธ Reset')
|
| 121 |
+
submit_btn = gr.Button(value='๐ Submit')
|
| 122 |
+
|
| 123 |
+
with gr.Column():
|
| 124 |
+
mr = gr.DataFrame(
|
| 125 |
+
headers=['Start Time', 'End Time', 'Score'], label='Moment Retrieval')
|
| 126 |
+
hd = gr.LinePlot(
|
| 127 |
+
x='x',
|
| 128 |
+
y='y',
|
| 129 |
+
x_title='Time (seconds)',
|
| 130 |
+
y_title='Saliency Score',
|
| 131 |
+
label='Highlight Detection')
|
| 132 |
+
|
| 133 |
+
random_btn.click(lambda: random.sample(EXAMPLES, 1)[0], None, [video, query])
|
| 134 |
+
submit_btn.click(fn, [video, query], [mr, hd])
|
| 135 |
|
| 136 |
demo.launch()
|
data/CkWOpyrAXdw_210.0_360.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8a433f49ddeabe2ac5eae143e5129de8f2b6ae3838d286b94c838b0b01f9365
|
| 3 |
+
size 6004497
|
data/HkLfNhgP0TM_660.0_810.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f9ec60b9b5c2f0d235465610f3680216c42c87ce777a6698a78f263711bde36
|
| 3 |
+
size 5166216
|
data/gTAvxnQtjXM_60.0_210.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:beedc2fd64f1c9da322a32b5246b2219726784abf92f0b0236bc8bb16ba5497b
|
| 3 |
+
size 7422854
|
data/ocLUzCNodj4_360.0_510.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:093de4b36ca46d8b410f01b0cebc1f36c05669f6cb3cb4b5514f7de0329fdceb
|
| 3 |
+
size 9791456
|
data/pA6Z-qYhSNg_210.0_360.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a34d5e47ebdb868ea24fac0d38d3cb063c16cf947a91eb77056cc389fc224421
|
| 3 |
+
size 6419206
|
setup.cfg
CHANGED
|
@@ -12,4 +12,4 @@ no_lines_before = STDLIB,LOCALFOLDER
|
|
| 12 |
default_section = FIRSTPARTY
|
| 13 |
|
| 14 |
[flake8]
|
| 15 |
-
max-line-length =
|
|
|
|
| 12 |
default_section = FIRSTPARTY
|
| 13 |
|
| 14 |
[flake8]
|
| 15 |
+
max-line-length = 500
|