reference. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning [guo2025deepseek]
reference. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning [guo2025deepseek]
@article{guo2025deepseek, title = {Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning}, author = {Guo, Daya and Yang, Dejian and Zhang, Haowei and Song, Junxiao and Zhang, Ruoyu and Xu, Runxin and Zhu, Qihao and Ma, Shirong and Wang, Peiyi and Bi, Xiao and others}, year = {2025}, journal = {arXiv preprint arXiv:2501.12948} }