This is the repository that contains the source code for the JavisVerse project page, which contains a series of joint audio-video intelligence sub-projects such as JavisDIT and JavisGPT.
If you use JavisDiT in your project, please kindly cite:
@inproceedings{liu2025javisdit,
title={JavisDiT: Joint Audio-Video Diffusion Transformer with Hierarchical Spatio-Temporal Prior Synchronization},
author={Kai Liu and Wei Li and Lai Chen and Shengqiong Wu and Yanhao Zheng and Jiayi Ji and Fan Zhou and Rongxin Jiang and Jiebo Luo and Hao Fei and Tat-Seng Chua},
booktitle={arxiv},
year={2025},
}
If you use JavisGPT in your project, please kindly cite:
@inproceedings{liu2025javisgpt,
title={JavisGPT: A Unified Multi-modal LLM for Sounding-Video Comprehension and Generation},
author={Kai Liu and Jungang Li and Yuchong Sun and Shengqiong Wu and jianzhang gao and Daoan Zhang and Wei Zhang and Sheng Jin and Sicheng Yu and Geng Zhan and Jiayi Ji and Fan Zhou and Liang Zheng and Shuicheng YAN and Hao Fei and Tat-Seng Chua},
booktitle={The Thirty-ninth Annual Conference on Neural Information Processing Systems},
year={2025},
}
This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
