NOTE: This site has just upgraded to Forester 5.x and is still having some style and functionality issues, we will fix them ASAP.

reference. Why transformers need adam: A hessian perspective [zhang2024transformers]

@article{zhang2024transformers,
 title = {Why transformers need adam: A hessian perspective},
 author = {Zhang, Yushun and Chen, Congliang and Ding, Tian and Li, Ziniu and Sun, Ruoyu and Luo, Zhiquan},
 year = {2024},
 journal = {Advances in Neural Information Processing Systems},
 volume = {37},
 pages = {131786--131823}
}