reference. Dapo: An open-source llm reinforcement learning system at scale [yu2025dapo]
reference. Dapo: An open-source llm reinforcement learning system at scale [yu2025dapo]
@article{yu2025dapo, title = {Dapo: An open-source llm reinforcement learning system at scale}, author = {Yu, Qiying and Zhang, Zheng and Zhu, Ruofei and Yuan, Yufeng and Zuo, Xiaochen and Yue, Yu and Fan, Tiantian and Liu, Gaohong and Liu, Lingjun and Liu, Xin and others}, year = {2025}, url = {https://arxiv.org/abs/2503.14476}, journal = {arXiv preprint arXiv:2503.14476} }