<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:x="urn:schemas-microsoft-com:office:excel" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><meta http-equiv=Content-Type content="text/html; charset=us-ascii"><meta name=Generator content="Microsoft Word 15 (filtered medium)"><!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style><![endif]--><style><!--
/* Font Definitions */
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:Aptos;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        font-size:11.0pt;
        font-family:"Aptos",sans-serif;
        mso-ligatures:standardcontextual;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:#467886;
        text-decoration:underline;}
span.EmailStyle17
        {mso-style-type:personal-compose;
        font-family:"Aptos",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:11.0pt;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1027" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]--></head><body lang=EN-US link="#467886" vlink="#96607D" style='word-wrap:break-word'><div class=WordSection1><p class=MsoNormal style='background:white'><b><u><span style='font-size:22.0pt;font-family:"Arial",sans-serif;color:#286DC0;mso-ligatures:none'>FDS Colloquium<o:p></o:p></span></u></b></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Eran Malach, Harvard University<o:p></o:p></span></p><p class=MsoNormal><!--[if gte vml 1]><v:shapetype id="_x0000_t75" coordsize="21600,21600" o:spt="75" o:preferrelative="t" path="m@4@5l@4@11@9@11@9@5xe" filled="f" stroked="f">
<v:stroke joinstyle="miter" />
<v:formulas>
<v:f eqn="if lineDrawn pixelLineWidth 0" />
<v:f eqn="sum @0 1 0" />
<v:f eqn="sum 0 0 @1" />
<v:f eqn="prod @2 1 2" />
<v:f eqn="prod @3 21600 pixelWidth" />
<v:f eqn="prod @3 21600 pixelHeight" />
<v:f eqn="sum @0 0 1" />
<v:f eqn="prod @6 1 2" />
<v:f eqn="prod @7 21600 pixelWidth" />
<v:f eqn="sum @8 21600 0" />
<v:f eqn="prod @7 21600 pixelHeight" />
<v:f eqn="sum @10 21600 0" />
</v:formulas>
<v:path o:extrusionok="f" gradientshapeok="t" o:connecttype="rect" />
<o:lock v:ext="edit" aspectratio="t" />
</v:shapetype><v:shape id="Picture_x0020_2" o:spid="_x0000_s1026" type="#_x0000_t75" style='position:absolute;margin-left:0;margin-top:.3pt;width:108.75pt;height:130.5pt;z-index:251658240;visibility:visible;mso-wrap-style:square;mso-width-percent:0;mso-height-percent:0;mso-wrap-distance-left:9pt;mso-wrap-distance-top:0;mso-wrap-distance-right:9pt;mso-wrap-distance-bottom:0;mso-position-horizontal:absolute;mso-position-horizontal-relative:text;mso-position-vertical:absolute;mso-position-vertical-relative:text;mso-width-percent:0;mso-height-percent:0;mso-width-relative:page;mso-height-relative:page'>
<v:imagedata src="cid:image002.jpg@01DB65B2.76267EE0" o:title="" />
<w:wrap type="square"/>
</v:shape><![endif]--><![if !vml]><img width=145 height=174 style='width:1.5104in;height:1.8125in' src="cid:image003.jpg@01DB65C0.1CB98C90" align=left hspace=12 v:shapes="Picture_x0020_2"><![endif]><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'><o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Date: Wednesday, January 15, 2025<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Time: 12:00PM to 1:00PM<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Location: Kline Tower, 13th Floor, Rm. 1327 <a href="http://maps.google.com/?q=219+Prospect+Street%2C+New+Haven%2C+CT%2C+06511%2C+us">See map</a> <o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>219 Prospect Street<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>New Haven, CT 06511<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'><a href="https://www.eranmalach.com/">Website</a><o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Webcast: <a href="https://yale.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=6f3e7e6c-c1ef-4066-b4f2-b2590138c39">https://yale.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=6f3e7e6c-c1ef-4066-b4f2-b2590138c39</a><o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Learning Hard Problems with Neural Networks and Language Models<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Information and Abstract:  Modern machine learning models, and in particular large language models, can now solve surprisingly complex mathematical reasoning problems. In this talk I will explore how neural networks and autoregressive language models can learn to solve computationally hard reasoning tasks. I will begin by discussing the sparse parity problem, a theoretical proxy for studying the challenges of learning complex functions with Stochastic Gradient Descent (SGD). I will show that the computational resources required for learning sparse parities with SGD scale exponentially with the “sparsity” of the problem, making it computationally hard to learn. Next, I will demonstrate how introducing step-by-step supervision through auto-regressive language models overcomes these barriers, enabling simple models trained on next-token prediction to efficiently learn any Turing-computable function. These results serve as a basis for studying machine learning with language models, with implications on data structure, architecture design and training paradigms.<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Bio: Eran Malach is a postdoc Research Fellow in the Kempner Institute at Harvard University. Previously, he did his PhD at the School of Computer Science and Engineering in the Hebrew University of Jerusalem, advised by Prof. Shai Shalev-Shwartz. His research focus is Machine Learning and Theoretical Foundations of Deep Learning and Language Models. He is mainly interested in computational aspects of learning and optimization. He also worked in Mobileye, where he developed machine learning and computer vision algorithms for driver-assistance systems and self-driving cars. His research is supported by the Rothschild Fellowship, the William F. Milton Fund and the OpenAI Superalignment Fast Grant.<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'><o:p> </o:p></span></p><p class=MsoNormal><b><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Lunch at 11:30am in room 1307<br>Talk at 12:00-1:00pm in room 1327A<o:p></o:p></span></b></p><p class=MsoNormal><b><i><span style='font-size:12.0pt;font-family:"Calibri",sans-serif;mso-ligatures:none'><o:p> </o:p></span></i></b></p><p class=MsoNormal><span style='font-size:12.0pt;font-family:"Arial",sans-serif;color:black;mso-ligatures:none'>For more details and upcoming events visit our website at </span><span style='font-size:12.0pt'><a href="https://statistics.yale.edu/calendar"><span style='font-family:"Arial",sans-serif;color:#467886;mso-ligatures:none'>https://statistics.yale.edu/calendar</span></a></span><span style='font-size:12.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>. </span><span style='font-size:12.0pt;mso-ligatures:none'><o:p></o:p></span></p><p class=MsoNormal><span style='font-family:"Arial",sans-serif;mso-ligatures:none'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:18.0pt;font-family:"Arial",sans-serif;mso-ligatures:none'>Department of Statistics and Data Science<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:9.0pt;font-family:"Arial",sans-serif;color:black;mso-ligatures:none'>Yale University<br>Kline Tower<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:9.0pt;font-family:"Arial",sans-serif;color:black;mso-ligatures:none'>219 Prospect Street<br>New Haven, CT 06511<o:p></o:p></span></p><p class=MsoNormal><a href="https://statistics.yale.edu/"><span style='color:#467886'>https://statistics.yale.edu/</span></a><o:p></o:p></p><p class=MsoNormal><o:p> </o:p></p></div></body></html>