mdfiles_latest.txt 306 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026
  1. Directory structure:
  2. └── end-to-end-use-cases/
  3. ├── README.md
  4. ├── agents/
  5. │ ├── README.md
  6. │ ├── Agents_Tutorial/
  7. │ ├── calendar_assistant/
  8. │ │ ├── README.md
  9. │ │ └── assets/
  10. │ └── DeepLearningai_Course_Notebooks/
  11. │ └── README.md
  12. ├── ArticleSummarizer/
  13. │ ├── README.md
  14. │ ├── gradle.properties
  15. │ ├── gradlew
  16. │ ├── .gitignore
  17. │ ├── app/
  18. │ │ ├── .gitignore
  19. │ │ └── src/
  20. │ │ └── main/
  21. │ │ ├── java/
  22. │ │ │ └── com/
  23. │ │ │ └── example/
  24. │ │ │ └── llamaandroiddemo/
  25. │ │ └── res/
  26. │ │ ├── drawable/
  27. │ │ ├── layout/
  28. │ │ ├── mipmap-anydpi-v26/
  29. │ │ ├── mipmap-hdpi/
  30. │ │ ├── mipmap-mdpi/
  31. │ │ ├── mipmap-xhdpi/
  32. │ │ ├── mipmap-xxhdpi/
  33. │ │ ├── mipmap-xxxhdpi/
  34. │ │ ├── values/
  35. │ │ └── xml/
  36. │ └── gradle/
  37. │ └── wrapper/
  38. │ └── gradle-wrapper.properties
  39. ├── benchmarks/
  40. │ ├── README.md
  41. │ ├── inference/
  42. │ │ ├── README.md
  43. │ │ ├── requirements.txt
  44. │ │ ├── cloud/
  45. │ │ │ ├── README.md
  46. │ │ │ ├── aws/
  47. │ │ │ │ └── fmbench/
  48. │ │ │ │ ├── README.md
  49. │ │ │ │ └── img/
  50. │ │ │ └── azure/
  51. │ │ └── on_prem/
  52. │ │ ├── README.md
  53. │ │ └── vllm/
  54. │ └── llm_eval_harness/
  55. │ ├── README.md
  56. │ └── meta_eval/
  57. │ ├── README.md
  58. │ └── meta_template/
  59. │ ├── bbh/
  60. │ ├── gpqa/
  61. │ ├── gpqa_cot/
  62. │ ├── ifeval/
  63. │ ├── math_hard/
  64. │ ├── mmlu/
  65. │ └── mmlu_pro/
  66. ├── book-character-mindmap/
  67. │ ├── README.md
  68. │ ├── .gitignore
  69. │ ├── public/
  70. │ │ └── robots.txt
  71. │ ├── server/
  72. │ │ └── requirements.txt
  73. │ └── src/
  74. │ └── pages/
  75. │ ├── bookPage/
  76. │ │ └── components/
  77. │ └── homePage/
  78. │ └── components/
  79. ├── browser_use/
  80. │ ├── README.md
  81. │ └── agent/
  82. ├── coding/
  83. │ └── text2sql/
  84. │ ├── README.md
  85. │ ├── nba.txt
  86. │ ├── nba_roster.db
  87. │ └── requirements.txt
  88. ├── Contextual-Chunking-RAG/
  89. │ ├── README.md
  90. │ └── data/
  91. │ └── llama_article.txt
  92. ├── customerservice_chatbots/
  93. │ ├── README.md
  94. │ ├── messenger_chatbot/
  95. │ │ └── messenger_llama3.md
  96. │ ├── RAG_chatbot/
  97. │ │ ├── requirements.txt
  98. │ │ ├── data/
  99. │ │ └── vectorstore/
  100. │ │ ├── db_faiss/
  101. │ │ └── mongodb/
  102. │ └── whatsapp_chatbot/
  103. │ └── whatsapp_llama3.md
  104. ├── email_agent/
  105. │ ├── README.md
  106. │ └── requirements.txt
  107. ├── github_triage/
  108. │ ├── README.md
  109. │ ├── requirements.txt
  110. │ └── output/
  111. │ └── pytorch/
  112. │ └── pytorch/
  113. │ └── 2024-08-28_2024-08-28/
  114. │ └── plots/
  115. ├── long_context/
  116. │ └── H2O/
  117. │ ├── README.md
  118. │ ├── requirements.txt
  119. │ ├── data/
  120. │ │ └── summarization/
  121. │ ├── src/
  122. │ └── utils/
  123. ├── Multi-Modal-RAG/
  124. │ ├── README.md
  125. │ ├── notebooks/
  126. │ └── scripts/
  127. ├── multilingual/
  128. │ ├── README.md
  129. │ └── img/
  130. ├── NotebookLlama/
  131. │ ├── README.md
  132. │ ├── requirements.txt
  133. │ ├── TTS_Notes.md
  134. │ └── resources/
  135. │ └── clean_extracted_text.txt
  136. ├── RAFT-Chatbot/
  137. │ ├── README.md
  138. │ └── images/
  139. ├── research_paper_analyzer/
  140. │ ├── README.md
  141. │ └── requirements.txt
  142. └── whatsapp_llama_4_bot/
  143. ├── README.md
  144. ├── requirements.txt
  145. └── .env
  146. ================================================
  147. FILE: end-to-end-use-cases/README.md
  148. ================================================
  149. <h1 align="center"> End to End Use Applications using various Llama Models </h1>
  150. <p align="center">
  151. <a href="https://llama.developer.meta.com/join_waitlist?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img src="https://img.shields.io/badge/Llama_API-Join_Waitlist-brightgreen?logo=meta" /></a>
  152. <a href="https://llama.developer.meta.com/docs?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img src="https://img.shields.io/badge/Llama_API-Documentation-4BA9FE?logo=meta" /></a>
  153. </p>
  154. <p align="center">
  155. <a href="https://github.com/meta-llama/llama-models/blob/main/models/?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img alt="Llama Model cards" src="https://img.shields.io/badge/Llama_OSS-Model_cards-green?logo=meta" /></a>
  156. <a href="https://www.llama.com/docs/overview/?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img alt="Llama Documentation" src="https://img.shields.io/badge/Llama_OSS-Documentation-4BA9FE?logo=meta" /></a>
  157. <a href="https://huggingface.co/meta-llama"><img alt="Hugging Face meta-llama" src="https://img.shields.io/badge/Hugging_Face-meta--llama-yellow?logo=huggingface" /></a>
  158. </p>
  159. <p align="center">
  160. <a href="https://github.com/meta-llama/synthetic-data-kit"><img alt="Llama Tools Syntethic Data Kit" src="https://img.shields.io/badge/Llama_Tools-synthetic--data--kit-orange?logo=meta" /></a>
  161. <a href="https://github.com/meta-llama/llama-prompt-ops"><img alt="Llama Tools Syntethic Data Kit" src="https://img.shields.io/badge/Llama_Tools-llama--prompt--ops-orange?logo=meta" /></a>
  162. </p>
  163. ## [Building an Intelligent WhatsApp Bot with Llama 4 APIs](./whatsapp-llama4-bot/README.md)
  164. ### A Step-by-Step Guide
  165. Create a WhatsApp bot that leverages the power of Llama 4 APIs to provide intelligent and interactive responses. This guide will walk you through the process of building a bot that supports text, image, and audio interactions, making it versatile for various use cases.
  166. - **Text Interaction**: Respond to text messages with accurate and contextually relevant answers.
  167. - **Image Reasoning**: Analyze images to provide insights, descriptions, or answers related to the content.
  168. - **Audio-to-Audio Interaction**: Transcribe audio messages to text, process them, and convert back to audio for seamless voice-based interaction.
  169. Get started with building your own WhatsApp bot using Llama 4 APIs today!
  170. ## [Research Paper Analyzer with Llama4 Maverick](./research_paper_analyzer/README.md)
  171. ### Analyze Research Papers with Ease
  172. Leverage Llama4 Maverick to retrieve references from an arXiv paper and ingest all their content for question-answering.
  173. - **Long Context Length**: Process entire papers at once.
  174. - **Comprehensive Analysis**: Get insights, descriptions, or answers related to the content.
  175. Get started with analyzing research papers using Llama4 Maverick today!
  176. ## [Book Character Mind Map With Llama4 Maverick](./book_character_mindmap/README.md)
  177. ### Explore Book Characters and Storylines
  178. Use Llama4 Maverick to process entire books at once and visualize character relationships and storylines.
  179. - **Interactive Mind Maps**: Visualize relationships between characters and plot elements.
  180. - **Book Summaries**: Get concise overviews of plots and themes.
  181. Discover new insights into your favorite books!
  182. ## [Agentic Tutorial](./agents/):
  183. ### 101 and 201 tutorials on performing Tool Calling and building an Agentic Workflow using Llama Models
  184. 101 notebooks show how to apply Llama models and enable tool calling functionality, 201 notebook walks you through an end to end workflow of building an agent that can search two papers, fetch their details and find their differences.
  185. ## [Benchmarks](./benchmarks/):
  186. ### A folder contains benchmark scripts
  187. The scripts apply a throughput analysis and introduction to `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality
  188. ## [Browser Usage](./browser_use/):
  189. ### Demo of how to apply Llama models and use them for browsing the internet and completing tasks
  190. ## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb):
  191. ### Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience
  192. This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications.
  193. ## [NBA2023-24](./coding/text2sql/quickstart.ipynb):
  194. ### Ask Llama 3 about Structured Data
  195. This demo app shows how to use LangChain and Llama 3 to let users ask questions about **structured** data stored in a SQL DB. As the 2023-24 NBA season is entering the playoff, we use the NBA roster info saved in a SQLite DB to show you how to ask Llama 3 questions about your favorite teams or players.
  196. ## [NotebookLlama](./NotebookLlama/):
  197. ### PDF to Podcast using Llama Models
  198. Workflow showcasing how to use multiple Llama models to go from any PDF to a Podcast and using open models to generate a multi-speaker podcast
  199. ## [WhatsApp Chatbot](./customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md):
  200. ### Building a Llama 3 Enabled WhatsApp Chatbot
  201. This step-by-step tutorial shows how to use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to build a Llama 3 enabled WhatsApp chatbot.
  202. ## [Messenger Chatbot](./customerservice_chatbots/messenger_chatbot/messenger_llama3.md):
  203. ### Building a Llama 3 Enabled Messenger Chatbot
  204. This step-by-step tutorial shows how to use the [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview) to build a Llama 3 enabled Messenger chatbot.
  205. ### RAG Chatbot Example (running [locally](./customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb)
  206. A complete example of how to build a Llama 3 chatbot hosted on your browser that can answer questions based on your own data using retrieval augmented generation (RAG).
  207. ================================================
  208. FILE: end-to-end-use-cases/agents/README.md
  209. ================================================
  210. ## Agents and Tool Calling
  211. Structure:
  212. - Agents_Tutorial: Showcases 101 and 201 notebooks guidance for using tool calling with Llama models
  213. - DeepLearning_Course_Notebooks: Notebooks from the DL.ai course teaching Agents
  214. ================================================
  215. FILE: end-to-end-use-cases/agents/calendar_assistant/README.md
  216. ================================================
  217. # Google Calendar Assistant with with Llama 3.2 3B Tool Calling
  218. This notebook showcases how to go about building a digital assistant to schedule meetings with the Llama 3.2 3B model. The core concepts used to implement this are Prompt Engineering and Tool Calling. This demo shows how Llama can be used to interact with 3rd party apps like Google Contacts & Google Calendar and schedule a meeting requested by the user. Even though we are using prompt engineering to achieve this, the approach described doesn't degrade the model's ability to answer general queries. This approach can extended to perform other tasks in a similar manner without affecting the quality of other tasks
  219. ## Approach
  220. Instead of using a complex system prompt with multiple conditions & expecting Llama to perform various tasks accurately out of the box, the approach here is to treat this as a 2 step process
  221. - Determine user intent - Task classification
  222. - Take action for the specific task using Tool Calling
  223. In the diagram shown below,
  224. - system prompt 1 determines the classification of the query
  225. - In steps 2 & 3, we classify the task being requested.
  226. - system prompt 2 is chosen based on the classification result
  227. - Steps 4 & 5 implement the classified task.
  228. - For the sake of demo, we show 2 classes: General & Meeting
  229. ![Tool Calling Flow Diagram](./assets/flow_diagram.png)
  230. Both these tasks have a specific prompt. We use the same model with different system prompts depending on the classification result.
  231. Additionally, this demo also showcases how Llama can be used to do double tool calling with 1 prompt. In the case of Meeting, Llama returns 2 function calls in Step 5
  232. ```
  233. <function=google_contact>{{"name": "John Constantine"}}</function>
  234. <function=google_calendar>{{"date": "Mar 31 ", "time": "5:30 pm", "attendees": "John Constantine"}}</function>
  235. ```
  236. ## Examples
  237. - General query
  238. **User Prompt** : `Tell me about Paris`
  239. **Prompt Classification**: `general`
  240. **Answer**:
  241. ```
  242. Paris, the City of Light, is the capital of France. Known for its stunning architecture, art museums, and romantic atmosphere, Paris is a popular tourist destination. The city is famous for its iconic landmarks such as the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum, which houses the Mona Lisa. Paris is also renowned for its fashion, cuisine, and wine. The city's charming streets and historic neighborhoods, like Montmartre and Le Marais, offer a glimpse
  243. ```
  244. - Set up a Calendar Invite
  245. **User Prompt** : `Schedule a meeting with John Constantine on Mar 31 at 5:30 pm`
  246. **Prompt Classification**: `meeting`
  247. **Answer**:
  248. ```
  249. <function=google_contact>{{"name": "John Constantine"}}</function>
  250. <function=google_calendar>{{"date": "Mar 31", "time": "5:30 pm", "attendees": "John Constantine"}}</function>
  251. ```
  252. ### Actions based on tool calling output
  253. - The google_contact function call returned by the model is used to call [People API](https://developers.google.com/people) to look up the email address of the person of interest
  254. - The email address from the previous step is used to call [Calendar API](https://developers.google.com/calendar) along with the other information in the google_calendar tool calling output returned by the model
  255. The end result is that a google meeting is scheduled with the person of interest at the date & time specified
  256. ================================================
  257. FILE: end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/README.md
  258. ================================================
  259. # Quickstart Llama 3 Powered Agent Examples Ported from DeepLearning.ai Short Courses
  260. The notebooks in this folder are ported from the 4 recent agent short courses on [Deeplearning.ai](https://www.deeplearning.ai) to use Llama 3 to build agent apps from scratch or with open source frameworks (LangChain, LlamaIndex, AutoGen).
  261. 1. [Functions Tools and Agents with LangChain L1 Function Calling](Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb)
  262. 2. [AI Agents in LangGraph L1 Build an Agent from Scratch](AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb)
  263. 3. [Building Agentic RAG with Llamaindex L1 Router Engine](Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb)
  264. 4. [AI Agentic Design Patterns with AutoGen L4 Tool Use and Conversational Chess](AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb)
  265. ================================================
  266. FILE: end-to-end-use-cases/ArticleSummarizer/README.md
  267. ================================================
  268. # Android Document Summarizer App
  269. <img src="./screenshot.png">
  270. This is a sample Android app to demonstrate Llama 4 multimodal and multilingual capabilities. This app allows user to take a picture/screenshot of a document, and then summarize and translate it into any of the supported languages
  271. ## Quick Start
  272. 1. Open the DocumentSummarizer folder in Android Studio
  273. 2. Update the `API_KEY` in `AppUtils.java`
  274. 3. Build the Android Project
  275. 4. Inside the app, tap on settings icon on top right
  276. 5. Configure the Remote URL endpoint (any supported providers that serve Llama 4 models. For example: https://api.together.xyz)
  277. 6. Select the desired model from the drop down list. If you need to add more models, modify `ModelUtils.java`
  278. 7. Go back to the Main chat window
  279. 8. Press the '+' button on the bottom left and select an image document (or take a picture of one!)
  280. 9. Select the 'globe' button on the bottom left and select your languages
  281. 10. Enter a prompt like "summarize this" and press Enter!
  282. > **_NOTE:_** This is an example project to demonstrate E2E flow. You should NOT use/store API key directly on client. Exposing your API key in client-side environments allows malicious users to take that key and make requests on your behalf. Requests should always be routed through your own backend server where you can keep your API key secure.
  283. ## Reporting Issues
  284. If you encountered any bugs or issues following this tutorial please file a bug/issue here on [Github](https://github.com/meta-llama/llama-cookbook/issues)).
  285. ================================================
  286. FILE: end-to-end-use-cases/ArticleSummarizer/gradle.properties
  287. ================================================
  288. # Project-wide Gradle settings.
  289. # IDE (e.g. Android Studio) users:
  290. # Gradle settings configured through the IDE *will override*
  291. # any settings specified in this file.
  292. # For more details on how to configure your build environment visit
  293. # http://www.gradle.org/docs/current/userguide/build_environment.html
  294. # Specifies the JVM arguments used for the daemon process.
  295. # The setting is particularly useful for tweaking memory settings.
  296. org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
  297. # When configured, Gradle will run in incubating parallel mode.
  298. # This option should only be used with decoupled projects. More details, visit
  299. # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
  300. # org.gradle.parallel=true
  301. # AndroidX package structure to make it clearer which packages are bundled with the
  302. # Android operating system, and which are packaged with your app's APK
  303. # https://developer.android.com/topic/libraries/support-library/androidx-rn
  304. android.useAndroidX=true
  305. # Kotlin code style for this project: "official" or "obsolete":
  306. kotlin.code.style=official
  307. # Enables namespacing of each library's R class so that its R class includes only the
  308. # resources declared in the library itself and none from the library's dependencies,
  309. # thereby reducing the size of the R class for that library
  310. android.nonTransitiveRClass=true
  311. ================================================
  312. FILE: end-to-end-use-cases/ArticleSummarizer/gradlew
  313. ================================================
  314. #!/usr/bin/env sh
  315. #
  316. # Copyright 2015 the original author or authors.
  317. #
  318. # Licensed under the Apache License, Version 2.0 (the "License");
  319. # you may not use this file except in compliance with the License.
  320. # You may obtain a copy of the License at
  321. #
  322. # https://www.apache.org/licenses/LICENSE-2.0
  323. #
  324. # Unless required by applicable law or agreed to in writing, software
  325. # distributed under the License is distributed on an "AS IS" BASIS,
  326. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  327. # See the License for the specific language governing permissions and
  328. # limitations under the License.
  329. #
  330. ##############################################################################
  331. ##
  332. ## Gradle start up script for UN*X
  333. ##
  334. ##############################################################################
  335. # Attempt to set APP_HOME
  336. # Resolve links: $0 may be a link
  337. PRG="$0"
  338. # Need this for relative symlinks.
  339. while [ -h "$PRG" ] ; do
  340. ls=`ls -ld "$PRG"`
  341. link=`expr "$ls" : '.*-> \(.*\)$'`
  342. if expr "$link" : '/.*' > /dev/null; then
  343. PRG="$link"
  344. else
  345. PRG=`dirname "$PRG"`"/$link"
  346. fi
  347. done
  348. SAVED="`pwd`"
  349. cd "`dirname \"$PRG\"`/" >/dev/null
  350. APP_HOME="`pwd -P`"
  351. cd "$SAVED" >/dev/null
  352. APP_NAME="Gradle"
  353. APP_BASE_NAME=`basename "$0"`
  354. # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
  355. DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
  356. # Use the maximum available, or set MAX_FD != -1 to use that value.
  357. MAX_FD="maximum"
  358. warn () {
  359. echo "$*"
  360. }
  361. die () {
  362. echo
  363. echo "$*"
  364. echo
  365. exit 1
  366. }
  367. # OS specific support (must be 'true' or 'false').
  368. cygwin=false
  369. msys=false
  370. darwin=false
  371. nonstop=false
  372. case "`uname`" in
  373. CYGWIN* )
  374. cygwin=true
  375. ;;
  376. Darwin* )
  377. darwin=true
  378. ;;
  379. MINGW* )
  380. msys=true
  381. ;;
  382. NONSTOP* )
  383. nonstop=true
  384. ;;
  385. esac
  386. CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
  387. # Determine the Java command to use to start the JVM.
  388. if [ -n "$JAVA_HOME" ] ; then
  389. if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
  390. # IBM's JDK on AIX uses strange locations for the executables
  391. JAVACMD="$JAVA_HOME/jre/sh/java"
  392. else
  393. JAVACMD="$JAVA_HOME/bin/java"
  394. fi
  395. if [ ! -x "$JAVACMD" ] ; then
  396. die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
  397. Please set the JAVA_HOME variable in your environment to match the
  398. location of your Java installation."
  399. fi
  400. else
  401. JAVACMD="java"
  402. which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
  403. Please set the JAVA_HOME variable in your environment to match the
  404. location of your Java installation."
  405. fi
  406. # Increase the maximum file descriptors if we can.
  407. if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
  408. MAX_FD_LIMIT=`ulimit -H -n`
  409. if [ $? -eq 0 ] ; then
  410. if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
  411. MAX_FD="$MAX_FD_LIMIT"
  412. fi
  413. ulimit -n $MAX_FD
  414. if [ $? -ne 0 ] ; then
  415. warn "Could not set maximum file descriptor limit: $MAX_FD"
  416. fi
  417. else
  418. warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
  419. fi
  420. fi
  421. # For Darwin, add options to specify how the application appears in the dock
  422. if $darwin; then
  423. GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
  424. fi
  425. # For Cygwin or MSYS, switch paths to Windows format before running java
  426. if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
  427. APP_HOME=`cygpath --path --mixed "$APP_HOME"`
  428. CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
  429. JAVACMD=`cygpath --unix "$JAVACMD"`
  430. # We build the pattern for arguments to be converted via cygpath
  431. ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
  432. SEP=""
  433. for dir in $ROOTDIRSRAW ; do
  434. ROOTDIRS="$ROOTDIRS$SEP$dir"
  435. SEP="|"
  436. done
  437. OURCYGPATTERN="(^($ROOTDIRS))"
  438. # Add a user-defined pattern to the cygpath arguments
  439. if [ "$GRADLE_CYGPATTERN" != "" ] ; then
  440. OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
  441. fi
  442. # Now convert the arguments - kludge to limit ourselves to /bin/sh
  443. i=0
  444. for arg in "$@" ; do
  445. CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
  446. CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
  447. if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
  448. eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
  449. else
  450. eval `echo args$i`="\"$arg\""
  451. fi
  452. i=`expr $i + 1`
  453. done
  454. case $i in
  455. 0) set -- ;;
  456. 1) set -- "$args0" ;;
  457. 2) set -- "$args0" "$args1" ;;
  458. 3) set -- "$args0" "$args1" "$args2" ;;
  459. 4) set -- "$args0" "$args1" "$args2" "$args3" ;;
  460. 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
  461. 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
  462. 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
  463. 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
  464. 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
  465. esac
  466. fi
  467. # Escape application args
  468. save () {
  469. for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
  470. echo " "
  471. }
  472. APP_ARGS=`save "$@"`
  473. # Collect all arguments for the java command, following the shell quoting and substitution rules
  474. eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
  475. exec "$JAVACMD" "$@"
  476. ================================================
  477. FILE: end-to-end-use-cases/ArticleSummarizer/.gitignore
  478. ================================================
  479. # Gradle files
  480. .gradle/
  481. build/
  482. # Local configuration file (sdk path, etc)
  483. local.properties
  484. # Log/OS Files
  485. *.log
  486. # Android Studio generated files and folders
  487. captures/
  488. .externalNativeBuild/
  489. .cxx/
  490. *.apk
  491. output.json
  492. # IntelliJ
  493. *.iml
  494. .idea/
  495. misc.xml
  496. deploymentTargetDropDown.xml
  497. render.experimental.xml
  498. # Keystore files
  499. *.jks
  500. *.keystore
  501. # Google Services (e.g. APIs or Firebase)
  502. google-services.json
  503. # Android Profiling
  504. *.hprof
  505. .DS_Store
  506. *.jar
  507. *.aar
  508. *.so
  509. ================================================
  510. FILE: end-to-end-use-cases/ArticleSummarizer/app/.gitignore
  511. ================================================
  512. /build
  513. ================================================
  514. FILE: end-to-end-use-cases/ArticleSummarizer/gradle/wrapper/gradle-wrapper.properties
  515. ================================================
  516. #Mon Sep 25 11:23:11 PDT 2023
  517. distributionBase=GRADLE_USER_HOME
  518. distributionPath=wrapper/dists
  519. distributionUrl=https\://services.gradle.org/distributions/gradle-8.0-bin.zip
  520. zipStoreBase=GRADLE_USER_HOME
  521. zipStorePath=wrapper/dists
  522. ================================================
  523. FILE: end-to-end-use-cases/benchmarks/README.md
  524. ================================================
  525. # Benchmarks
  526. * inference - a folder contains benchmark scripts that apply a throughput analysis for Llama models inference on various backends including on-prem, cloud and on-device.
  527. * llm_eval_harness - a folder that introduces `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality. We also included a recipe that calculates Llama 3.1 evaluation metrics Using `lm-evaluation-harness` and instructions that calculate HuggingFace Open LLM Leaderboard v2 metrics.
  528. ================================================
  529. FILE: end-to-end-use-cases/benchmarks/inference/README.md
  530. ================================================
  531. # Inference Throughput Benchmarks
  532. In this folder we provide a series of benchmark scripts that apply a throughput analysis for Llama models inference on various backends:
  533. * On-prem - Popular serving frameworks and containers (i.e. vLLM)
  534. * Cloud API - Popular API services (i.e. Azure Model-as-a-Service or Serverless API)
  535. * [**WIP**]On-device - Popular on-device inference solutions on mobile and desktop (i.e. ExecuTorch, MLC-LLM, Ollama)
  536. * [**WIP**]Optimization - Popular optimization solutions for faster inference and quantization (i.e. AutoAWQ)
  537. # Why
  538. There are three major reasons we want to run these benchmarks and share them with our Llama community:
  539. * Provide inference throughput analysis based on real world situation to help you select the best service or deployment for your scenario
  540. * Provide a baseline measurement for validating various optimization solutions on different backends, so we can provide guidance on which solutions work best for your scenario
  541. * Encourage the community to develop benchmarks on top of our works, so we can better quantify the latest proposed solutions combined with current popular frameworks, especially in this crazy fast-moving area
  542. # Parameters
  543. Here are the parameters (if applicable) that you can configure for running the benchmark:
  544. * **PROMPT** - Prompt sent in for inference (configure the length of prompt, choose from 5, 25, 50, 100, 500, 1k and 2k)
  545. * **MAX_NEW_TOKENS** - Max number of tokens generated
  546. * **CONCURRENT_LEVELS** - Max number of concurrent requests
  547. * **MODEL_PATH** - Model source from Huggingface
  548. * **MODEL_HEADERS** - Request headers
  549. * **SAFE_CHECK** - Content safety check (either Azure service or simulated latency)
  550. * **THRESHOLD_TPS** - Threshold TPS (threshold for tokens per second below which we deem the query to be slow)
  551. * **TOKENIZER_PATH** - Tokenizer source
  552. * **RANDOM_PROMPT_LENGTH** - Random prompt length (for pretrained models)
  553. * **NUM_GPU** - Number of GPUs for request dispatch among multiple containers
  554. * **TEMPERATURE** - Temperature for inference
  555. * **TOP_P** - Top_p for inference
  556. * **MODEL_ENDPOINTS** - Container endpoints
  557. * Model parallelism or model replicas - Load one model into multiple GPUs or multiple model replicas on one instance. More detail in the README files for specific containers.
  558. You can also configure other model hyperparameters as part of the request payload.
  559. All these parameters are stored in ```parameter.json``` and real prompts are stored in ```input.jsonl```. Running the script will load these configurations.
  560. # Metrics
  561. The benchmark will report these metrics per instance:
  562. * Number of concurrent requests
  563. * P50 Latency(ms)
  564. * P99 Latency(ms)
  565. * Request per second (RPS)
  566. * Output tokens per second
  567. * Output tokens per second per GPU
  568. * Input tokens per second
  569. * Input tokens per second per GPU
  570. * Average tokens per second per request
  571. We intend to add these metrics in the future:
  572. * Time to first token (TTFT)
  573. The benchmark result will be displayed in the terminal output and saved as a CSV file (```performance_metrics.csv```) which you can export to spreadsheets.
  574. # Getting Started
  575. Please follow the ```README.md``` in each subfolder for instructions on how to setup and run these benchmarks.
  576. ================================================
  577. FILE: end-to-end-use-cases/benchmarks/inference/requirements.txt
  578. ================================================
  579. transformers
  580. requests
  581. azure-core
  582. azure-ai-contentsafety
  583. torch
  584. ================================================
  585. FILE: end-to-end-use-cases/benchmarks/inference/cloud/README.md
  586. ================================================
  587. # Llama-Cloud-API-Benchmark
  588. This folder contains code to run inference benchmark for Llama 2 models on cloud API with popular cloud service providers. The benchmark will focus on overall inference **throughput** for querying the API endpoint for output generation with different level of concurrent requests. Remember that to send queries to the API endpoint, you are required to acquire subscriptions with the cloud service providers and there will be a fee associated with it.
  589. Disclaimer - The purpose of the code is to provide a configurable setup to measure inference throughput. It is not a representative of the performance of these API services and we do not plan to make comparisons between different API providers.
  590. # Azure - Getting Started
  591. To get started, there are certain steps we need to take to deploy the models:
  592. <!-- markdown-link-check-disable -->
  593. * Register for a valid Azure account with subscription [here](https://azure.microsoft.com/en-us/free/search/?ef_id=_k_CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE_k_&OCID=AIDcmm5edswduu_SEM__k_CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE_k_&gad_source=1&gclid=CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE)
  594. <!-- markdown-link-check-enable -->
  595. * Take a quick look on what is the [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home) and navigate to the website from the link in the article
  596. * Follow the demos in the article to create a project and [resource](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-portal) group, or you can also follow the guide [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-llama?tabs=azure-studio)
  597. * Select Llama models from Model catalog
  598. * Click the "Deploy" button
  599. * Select Serverless API with Azure AI Content Safety. Note that currently this API service is offered for Llama 2 pretrained model, chat model and Llama 3 instruct model
  600. * Select the project you created in previous step
  601. * Choose a deployment name then Go to deployment
  602. Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.
  603. For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-llama?tabs=azure-studio) for model deployment and inference.
  604. Now, replace the endpoint url and API key in ```azure/parameters.json```. For parameter `MODEL_ENDPOINTS`, with chat models the suffix should be `v1/chat/completions` and with pretrained models the suffix should be `v1/completions`.
  605. Note that the API endpoint might implemented a rate limit for token generation in certain amount of time. If you encountered the error, you can try reduce `MAX_NEW_TOKEN` or start with smaller `CONCURRENT_LEVELS`.
  606. For `MODEL_PATH`, copy the model path from Huggingface under meta-llama organization. For Llama 2, make sure you copy the path of the model with hf format. This model path is used to retrieve corresponding tokenizer for your model of choice. Llama 3 used a different tokenizer compare to Llama 2.
  607. Once everything configured, to run chat model benchmark:
  608. ```python chat_azure_api_benchmark.py```
  609. To run pretrained model benchmark:
  610. ```python pretrained_azure_api_benchmark.py```
  611. Once finished, the result will be written into a CSV file in the same directory, which can be later imported into dashboard of your choice.
  612. ================================================
  613. FILE: end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/README.md
  614. ================================================
  615. # Benchmark Llama models on AWS
  616. The [`FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main) tool provides a quick and easy way to benchmark the Llama family of models for price and performance on any AWS service including [`Amazon SagMaker`](https://aws.amazon.com/solutions/guidance/generative-ai-deployments-using-amazon-sagemaker-jumpstart/), [`Amazon Bedrock`](https://aws.amazon.com/bedrock/) or `Amazon EKS` or `Amazon EC2` as `Bring your own endpoint`.
  617. ## The need for benchmarking
  618. <!-- markdown-link-check-disable -->
  619. Customers often wonder what is the best AWS service to run Llama models for _my specific use-case_ and _my specific price performance requirements_. While model evaluation metrics are available on several leaderboards ([`HELM`](https://crfm.stanford.edu/helm/lite/latest/#/leaderboard), [`LMSys`](https://chat.lmsys.org/?leaderboard)), but the price performance comparison can be notoriously hard to find and even more harder to trust. In such a scenario, we think it is best to be able to run performance benchmarking yourself on either on your own dataset or on a similar (task wise, prompt size wise) open-source datasets such as ([`LongBench`](https://huggingface.co/datasets/THUDM/LongBench), [`QMSum`](https://paperswithcode.com/dataset/qmsum)). This is the problem that [`FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main) solves.
  620. <!-- markdown-link-check-enable -->
  621. ## [`FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main): an open-source Python package for FM benchmarking on AWS
  622. `FMBench` runs inference requests against endpoints that are either deployed through `FMBench` itself (as in the case of SageMaker) or are available either as a fully-managed endpoint (as in the case of Bedrock) or as bring your own endpoint. The metrics such as inference latency, transactions per-minute, error rates and cost per transactions are captured and presented in the form of a Markdown report containing explanatory text, tables and figures. The figures and tables in the report provide insights into what might be the best serving stack (instance type, inference container and configuration parameters) for a given Llama model for a given use-case.
  623. The following figure gives an example of the price performance numbers that include inference latency, transactions per-minute and concurrency level for running the `Llama2-13b` model on different instance types available on SageMaker using prompts for Q&A task created from the [`LongBench`](https://huggingface.co/datasets/THUDM/LongBench) dataset, these prompts are between 3000 to 3840 tokens in length. **_Note that the numbers are hidden in this figure but you would be able to see them when you run `FMBench` yourself_**.
  624. ![`Llama2-13b` on different instance types ](./img/business_summary.png)
  625. The following table (also included in the report) provides information about the best available instance type for that experiment<sup>1</sup>.
  626. |Information |Value |
  627. |--- |--- |
  628. |experiment_name |llama2-13b-inf2.24xlarge |
  629. |payload_file |payload_en_3000-3840.jsonl |
  630. |instance_type |ml.inf2.24xlarge |
  631. |concurrency |** |
  632. |error_rate |** |
  633. |prompt_token_count_mean |3394 |
  634. |prompt_token_throughput |2400 |
  635. |completion_token_count_mean |31 |
  636. |completion_token_throughput |15 |
  637. |latency_mean |** |
  638. |latency_p50 |** |
  639. |latency_p95 |** |
  640. |latency_p99 |** |
  641. |transactions_per_minute |** |
  642. |price_per_txn |** |
  643. <sup>1</sup> ** represent values hidden on purpose, these are available when you run the tool yourself.
  644. The report also includes latency Vs prompt size charts for different concurrency levels. As expected, inference latency increases as prompt size increases but what is interesting to note is that the increase is much more at higher concurrency levels (and this behavior varies with instance types).
  645. ![Effect of prompt size on inference latency for different concurrency levels](./img/latency_vs_tokens.png)
  646. ### How to get started with `FMBench`
  647. The following steps provide a [Quick start guide for `FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool#quickstart). For a more detailed DIY version, please see the [`FMBench Readme`](https://github.com/aws-samples/foundation-model-benchmarking-tool?tab=readme-ov-file#the-diy-version-with-gory-details).
  648. 1. Each `FMBench` run works with a configuration file that contains the information about the model, the deployment steps, and the tests to run. A typical `FMBench` workflow involves either directly using an already provided config file from the [`configs`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main/fmbench/configs) folder in the `FMBench` GitHub repo or editing an already provided config file as per your own requirements (say you want to try benchmarking on a different instance type, or a different inference container etc.).
  649. >A simple config file with key parameters annotated is included in this repo, see [`config.yml`](./config.yml). This file benchmarks performance of Llama2-7b on an `ml.g5.xlarge` instance and an `ml.g5.2xlarge` instance. You can use this provided config file as it is for this Quickstart.
  650. 1. Launch the AWS CloudFormation template included in this repository using one of the buttons from the table below. The CloudFormation template creates the following resources within your AWS account: Amazon S3 buckets, Amazon IAM role and an Amazon SageMaker Notebook with this repository cloned. A read S3 bucket is created which contains all the files (configuration files, datasets) required to run `FMBench` and a write S3 bucket is created which will hold the metrics and reports generated by `FMBench`. The CloudFormation stack takes about 5-minutes to create.
  651. |AWS Region | Link |
  652. |:------------------------:|:-----------:|
  653. |us-east-1 (N. Virginia) | [<img src="./img/CFT.png">](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/new?stackName=fmbench&templateURL=https://aws-blogs-artifacts-public.s3.amazonaws.com/artifacts/ML-FMBT/template.yml) |
  654. |us-west-2 (Oregon) | [<img src="./img/CFT.png">](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/new?stackName=fmbench&templateURL=https://aws-blogs-artifacts-public.s3.amazonaws.com/artifacts/ML-FMBT/template.yml) |
  655. 1. Once the CloudFormation stack is created, navigate to SageMaker Notebooks and open the `fmbench-notebook`.
  656. 1. On the `fmbench-notebook` open a Terminal and run the following commands.
  657. ```{.bash}
  658. conda create --name fmbench_python311 -y python=3.11 ipykernel
  659. source activate fmbench_python311;
  660. pip install -U fmbench
  661. ```
  662. 1. Now you are ready to `fmbench` with the following command line. We will use a sample config file placed in the S3 bucket by the CloudFormation stack for a quick first run.
  663. 1. We benchmark performance for the `Llama2-7b` model on a `ml.g5.xlarge` and a `ml.g5.2xlarge` instance type, using the `huggingface-pytorch-tgi-inference` inference container. This test would take about 30 minutes to complete and cost about $0.20.
  664. 1. It uses a simple relationship that 750 words equals 1000 tokens, to get a more accurate representation of token counts use the `Llama2 tokenizer`. **_It is strongly recommended that for more accurate results on token throughput you use a tokenizer specific to the model you are testing rather than the default tokenizer. See instructions provided [here](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main?tab=readme-ov-file#the-diy-version-with-gory-details) on how to use a custom tokenizer_**.
  665. <!-- markdown-link-check-disable -->
  666. ```{.bash}
  667. account=`aws sts get-caller-identity | jq .Account | tr -d '"'`
  668. region=`aws configure get region`
  669. fmbench --config-file s3://sagemaker-fmbench-read-${region}-${account}/configs/llama2/7b/config-llama2-7b-g5-quick.yml >> fmbench.log 2>&1
  670. ```
  671. <!-- markdown-link-check-enable -->
  672. 1. Open another terminal window and do a `tail -f` on the `fmbench.log` file to see all the traces being generated at runtime.
  673. ```{.bash}
  674. tail -f fmbench.log
  675. ```
  676. 1. The generated reports and metrics are available in the `sagemaker-fmbench-write-<replace_w_your_aws_region>-<replace_w_your_aws_account_id>` bucket. The metrics and report files are also downloaded locally and in the `results` directory (created by `FMBench`) and the benchmarking report is available as a markdown file called `report.md` in the `results` directory. You can view the rendered Markdown report in the SageMaker notebook itself or download the metrics and report files to your machine for offline analysis.
  677. ## 🚨 Benchmarking Llama3 on Amazon Bedrock 🚨
  678. Llama3 is now available on Bedrock (read [blog post](https://aws.amazon.com/blogs/aws/metas-llama-3-models-are-now-available-in-amazon-bedrock/)), and you can now benchmark it using `FMBench`. Here is the config file for benchmarking `Llama3-8b-instruct` and `Llama3-70b-instruct` on Bedrock.
  679. <!-- markdown-link-check-disable -->
  680. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/bedrock/config-bedrock-llama3.yml) for `Llama3-8b-instruct` and `Llama3-70b-instruct`.
  681. <!-- markdown-link-check-enable -->
  682. ## 🚨 Benchmarking Llama3 on Amazon SageMaker 🚨
  683. Llama3 is now available on SageMaker (read [blog post](https://aws.amazon.com/blogs/machine-learning/meta-llama-3-models-are-now-available-in-amazon-sagemaker-jumpstart/)), and you can now benchmark it using `FMBench`. Here are the config files for benchmarking `Llama3-8b-instruct` and `Llama3-70b-instruct` on `ml.p4d.24xlarge`, `ml.inf2.24xlarge` and `ml.g5.12xlarge` instances.
  684. <!-- markdown-link-check-disable -->
  685. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama3/8b/config-llama3-8b-instruct-g5-p4d.yml) for `Llama3-8b-instruct` on `ml.p4d.24xlarge` and `ml.g5.12xlarge`.
  686. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama3/70b/config-llama3-70b-instruct-g5-p4d.yml) for `Llama3-70b-instruct` on `ml.p4d.24xlarge` and `ml.g5.48xlarge`.
  687. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama3/8b/config-llama3-8b-inf2-g5.yml) for `Llama3-8b-instruct` on `ml.inf2.24xlarge` and `ml.g5.12xlarge`.
  688. <!-- markdown-link-check-enable -->
  689. ## Benchmarking Llama2 on Amazon SageMaker
  690. Llama2 models are available through SageMaker JumpStart as well as directly deployable from Hugging Face to a SageMaker endpoint. You can use `FMBench` to benchmark Llama2 on SageMaker for different combinations of instance types and inference containers.
  691. <!-- markdown-link-check-disable -->
  692. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/7b/config-llama2-7b-g5-quick.yml) for `Llama2-7b` on `ml.g5.xlarge` and `ml.g5.2xlarge` instances, using the [Hugging Face TGI container](763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04).
  693. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/7b/config-llama2-7b-g4dn-g5-trt.yml) for `Llama2-7b` on `ml.g4dn.12xlarge` instance using the [Deep Java Library DeepSpeed container](763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.26.0-deepspeed0.12.6-cu121).
  694. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/13b/config-llama2-13b-inf2-g5-p4d.yml) for `Llama2-13b` on `ml.g5.12xlarge`, `ml.inf2.24xlarge` and `ml.p4d.24xlarge` instances using the [Hugging Face TGI container](763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04) and the [Deep Java Library & NeuronX container](763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.26.0-neuronx-sdk2.16.0).
  695. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/70b/config-llama2-70b-g5-p4d-trt.yml) for `Llama2-70b` on `ml.p4d.24xlarge` instance using the [Deep Java Library TensorRT container](763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.26.0-tensorrtllm0.7.1-cu122).
  696. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/70b/config-llama2-70b-inf2-g5.yml) for `Llama2-70b` on `ml.inf2.48xlarge` instance using the [HuggingFace TGI with Optimum NeuronX container](763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-tgi-inference:1.13.1-optimum0.0.17-neuronx-py310-ubuntu22.04).
  697. <!-- markdown-link-check-enable -->
  698. ## Benchmarking Llama2 on Amazon Bedrock
  699. The Llama2-13b-chat and Llama2-70b-chat models are available on [Bedrock](https://aws.amazon.com/bedrock/llama/). You can use `FMBench` to benchmark Llama2 on Bedrock for both on-demand throughput and provisioned throughput inference options.
  700. <!-- markdown-link-check-disable -->
  701. - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/bedrock/config-bedrock.yml) for `Llama2-13b-chat` and `Llama2-70b-chat` on Bedrock for on-demand throughput.
  702. <!-- markdown-link-check-enable -->
  703. - For testing provisioned throughput simply replace the `ep_name` parameter in `experiments` section of the config file with the ARN of your provisioned throughput.
  704. ## More..
  705. For bug reports, enhancement requests and any questions please create a [GitHub issue](https://github.com/aws-samples/foundation-model-benchmarking-tool/issues) on the `FMBench` repo.
  706. ================================================
  707. FILE: end-to-end-use-cases/benchmarks/inference/on_prem/README.md
  708. ================================================
  709. # Llama-On-Prem-Benchmark
  710. This folder contains code to run inference benchmark for Meta Llama 3 models on-prem with popular serving frameworks.
  711. The benchmark will focus on overall inference **throughput** for running containers on one instance (single or multiple GPUs) that you can acquire from cloud service providers such as Azure and AWS. You can also run this benchmark on local laptop or desktop.
  712. We support benchmark on these serving framework:
  713. * [vLLM](https://github.com/vllm-project/vllm)
  714. # vLLM - Getting Started
  715. To get started, we first need to deploy containers on-prem as a API host. Follow the guidance [here](../../../../3p-integrations/llama_on_prem.md#setting-up-vllm-with-llama-3) to deploy vLLM on-prem.
  716. Note that in common scenario which overall throughput is important, we suggest you prioritize deploying as many model replicas as possible to reach higher overall throughput and request-per-second (RPS), comparing to deploy one model container among multiple GPUs for model parallelism. Additionally, as deploying multiple model replicas, there is a need for a higher level wrapper to handle the load balancing which here has been simulated in the benchmark scripts.
  717. For example, we have an instance from Azure that has 8xA100 80G GPUs, and we want to deploy the Meta Llama 3 70B instruct model, which is around 140GB with FP16. So for deployment we can do:
  718. * 1x70B model parallel on 8 GPUs, each GPU RAM takes around 17.5GB for loading model weights.
  719. * 2x70B models each use 4 GPUs, each GPU RAM takes around 35GB for loading model weights.
  720. * 4x70B models each use 2 GPUs, each GPU RAM takes around 70GB for loading model weights. (Preferred configuration for max overall throughput. Note that you will have 4 endpoints hosted on different ports and the benchmark script will route requests into each model equally)
  721. Here are examples for deploying 2x70B chat models over 8 GPUs with vLLM.
  722. ```
  723. CUDA_VISIBLE_DEVICES=0,1,2,3 python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-70B-Instruct --tensor-parallel-size 4 --disable-log-requests --port 8000
  724. CUDA_VISIBLE_DEVICES=4,5,6,7 python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-70B-Instruct --tensor-parallel-size 4 --disable-log-requests --port 8001
  725. ```
  726. Once you have finished deployment, you can use the command below to run benchmark scripts in a separate terminal.
  727. ```
  728. python chat_vllm_benchmark.py
  729. ```
  730. <!-- markdown-link-check-disable -->
  731. If you are going to use [Azure AI content check](https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety), then you should install dependencies as shown below in your terminal:
  732. <!-- markdown-link-check-enable -->
  733. ```
  734. pip install azure-ai-contentsafety azure-core
  735. ```
  736. Besides chat models, we also provide benchmark scripts for running pretrained models for text completion tasks. To better simulate the real traffic, we generate configurable random token prompt as input. In this process, we select vocabulary that is longer than 2 tokens so the generated words are closer to the English, rather than symbols.
  737. However, random token prompts can't be applied for chat model benchmarks, since the chat model expects a valid question. By feeding random prompts, chat models rarely provide answers that is meeting our ```MAX_NEW_TOKEN``` requirement, defeating the purpose of running throughput benchmarks. Hence for chat models, the questions are copied over to form long inputs such as for 2k and 4k inputs.
  738. To run pretrained model benchmark, follow the command below.
  739. ```
  740. python pretrained_vllm_benchmark.py
  741. ```
  742. Refer to more vLLM benchmark details on their official Github repo [here](https://github.com/vllm-project/vllm/tree/main/benchmarks).
  743. ================================================
  744. FILE: end-to-end-use-cases/benchmarks/llm_eval_harness/README.md
  745. ================================================
  746. # Llama Model Evaluation
  747. Llama-Recipe make use of `lm-evaluation-harness` for evaluating our fine-tuned Meta Llama3 (or Llama2) model. It also can serve as a tool to evaluate quantized model to ensure the quality in lower precision or other optimization applied to the model that might need evaluation.
  748. `lm-evaluation-harness` provide a wide range of [features](https://github.com/EleutherAI/lm-evaluation-harness?tab=readme-ov-file#overview):
  749. - Over 60 standard academic benchmarks for LLMs, with hundreds of subtasks and variants implemented.
  750. - Support for models loaded via [transformers](https://github.com/huggingface/transformers/) (including quantization via [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ)), [GPT-NeoX](https://github.com/EleutherAI/gpt-neox), and [Megatron-DeepSpeed](https://github.com/microsoft/Megatron-DeepSpeed/), with a flexible tokenization-agnostic interface.
  751. - Support for fast and memory-efficient inference with [vLLM](https://github.com/vllm-project/vllm).
  752. - Support for commercial APIs including OpenAI and TextSynth.
  753. - Support for evaluation on adapters (e.g. LoRA) supported in [HuggingFace's PEFT library](https://github.com/huggingface/peft).
  754. - Support for local models and benchmarks.
  755. - Evaluation with publicly available prompts ensures reproducibility and comparability between papers.
  756. - Easy support for custom prompts and evaluation metrics.
  757. The Language Model Evaluation Harness is also the backend for 🤗 [Hugging Face's (HF) popular Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
  758. ## Setup
  759. Before running the evaluation, ensure you have all the necessary dependencies installed.
  760. ### Dependencies
  761. - Python 3.8+
  762. - Your language model's dependencies
  763. ### Installation
  764. Clone the lm-evaluation-harness repository and install it:
  765. ```bash
  766. git clone https://github.com/EleutherAI/lm-evaluation-harness.git
  767. cd lm-evaluation-harness
  768. pip install -e .
  769. ```
  770. ### Quick Test
  771. To run evaluation for Hugging Face `Llama3.1 8B` model on a single GPU please run the following,
  772. ```bash
  773. lm_eval --model hf --model_args pretrained=meta-llama/Llama-3.1-8B --tasks hellaswag --device cuda:0 --batch_size 8
  774. ```
  775. Tasks can be extended by using `,` between them for example `--tasks hellaswag,arc`.
  776. To set the number of shots you can use `--num_fewshot` to set the number for few shot evaluation.
  777. ### PEFT Fine-tuned model Evaluation
  778. In case you have fine-tuned your model using PEFT you can set the PATH to the PEFT checkpoints using PEFT as part of model_args as shown below:
  779. ```bash
  780. lm_eval --model hf --model_args pretrained=meta-llama/Llama-3.1-8B, dtype="float",peft=../peft_output --tasks hellaswag --num_fewshot 10 --device cuda:0 --batch_size 8
  781. ```
  782. ### Limit the number of examples in benchmarks
  783. There has been an study from [IBM on efficient benchmarking of LLMs](https://arxiv.org/pdf/2308.11696.pdf), with main take a way that to identify if a model is performing poorly, benchmarking on wider range of tasks is more important than the number example in each task. This means you could run the evaluation harness with fewer number of example to have initial decision if the performance got worse from the base line. To limit the number of example here, it can be set using `--limit` flag with actual desired number. But for the full assessment you would need to run the full evaluation. Please read more in the paper linked above.
  784. ```bash
  785. lm_eval --model hf --model_args pretrained=meta-llama/Llama-3.1-8B,dtype="float",peft=../peft_output --tasks hellaswag --num_fewshot 10 --device cuda:0 --batch_size 8 --limit 100
  786. ```
  787. ### Customized Llama Model
  788. In case you have customized the Llama model, for example a quantized version of model where it has different model loading from normal HF model, you can follow [this guide](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage) to use `lm_eval.simple_evaluate()` to run the eval benchmarks.
  789. You can also find full task list [here](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks).
  790. ### Multi-GPU Evaluation with Hugging Face `accelerate`
  791. `lm-evaluation-harness` support three main ways of using Hugging Face's [accelerate 🚀](https://github.com/huggingface/accelerate) library for multi-GPU evaluation.
  792. To perform *data-parallel evaluation* (where each GPU loads a **separate full copy** of the model), `lm-evaluation-harness` leverage the `accelerate` launcher as follows:
  793. ```bash
  794. accelerate launch -m lm_eval --model hf \
  795. --model_args pretrained=meta-llama/Llama-3.1-8B \
  796. --tasks lambada_openai,arc_easy \
  797. --batch_size 16
  798. ```
  799. (or via `accelerate launch --no-python lm_eval`).
  800. For cases where your model can fit on a single GPU, this allows you to evaluate on K GPUs K times faster than on one.
  801. **WARNING**: This setup does not work with FSDP model sharding, so in `accelerate config` FSDP must be disabled, or the NO_SHARD FSDP option must be used.
  802. The second way of using `accelerate` for multi-GPU evaluation is when your model is *too large to fit on a single GPU.*
  803. In this setting, run the library *outside the `accelerate` launcher*, but passing `parallelize=True` to `--model_args` as follows:
  804. ```
  805. lm_eval --model hf \
  806. --tasks lambada_openai,arc_easy \
  807. --model_args pretrained=meta-llama/Llama-3.1-70B,parallelize=True \
  808. --batch_size 16
  809. ```
  810. This means that your model's weights will be split across all available GPUs.
  811. For more advanced users or even larger models, `lm-evaluation-harness` allows for the following arguments when `parallelize=True` as well:
  812. - `device_map_option`: How to split model weights across available GPUs. defaults to "auto".
  813. - `max_memory_per_gpu`: the max GPU memory to use per GPU in loading the model.
  814. - `max_cpu_memory`: the max amount of CPU memory to use when offloading the model weights to RAM.
  815. - `offload_folder`: a folder where model weights will be offloaded to disk if needed.
  816. There is also an option to run with tensor parallel and data parallel together. This will allow you to take advantage of both data parallelism and model sharding, and is especially useful for models that are too large to fit on a single GPU.
  817. ```
  818. accelerate launch --multi_gpu --num_processes {nb_of_copies_of_your_model} \
  819. -m lm_eval --model hf \
  820. --model_args pretrained=meta-llama/Llama-3.1-70B \
  821. --tasks lambada_openai,arc_easy \
  822. --model_args parallelize=True \
  823. --batch_size 16
  824. ```
  825. To learn more about model parallelism and how to use it with the `accelerate` library, see the [accelerate documentation](https://huggingface.co/docs/transformers/v4.15.0/en/parallelism)
  826. ### Tensor + Data Parallel and Optimized Inference with `vLLM`
  827. `lm-evaluation-harness` also support vLLM for faster inference on [supported model types](https://docs.vllm.ai/en/latest/models/supported_models.html), especially faster when splitting a model across multiple GPUs. For single-GPU or multi-GPU — tensor parallel, data parallel, or a combination of both — inference, for example:
  828. ```bash
  829. lm_eval --model vllm \
  830. --model_args pretrained={model_name},tensor_parallel_size={GPUs_per_model},dtype=auto,gpu_memory_utilization=0.8,data_parallel_size={model_replicas} \
  831. --tasks lambada_openai \
  832. --batch_size auto
  833. ```
  834. To use vllm, do `pip install lm_eval[vllm]`. For a full list of supported vLLM configurations, please reference our [vLLM integration](https://github.com/EleutherAI/lm-evaluation-harness/blob/e74ec966556253fbe3d8ecba9de675c77c075bce/lm_eval/models/vllm_causallms.py) and the vLLM documentation.
  835. vLLM occasionally differs in output from Huggingface. `lm-evaluation-harness` treat Huggingface as the reference implementation, and it provides a script for checking the validity of vllm results against HF.
  836. > [!Tip]
  837. > For fastest performance, `lm-evaluation-harness` recommend using `--batch_size auto` for vLLM whenever possible, to leverage its continuous batching functionality!
  838. > [!Tip]
  839. > Passing `max_model_len=4096` or some other reasonable default to vLLM through model args may cause speedups or prevent out-of-memory errors when trying to use auto batch size, such as for Mistral-7B-v0.1 which defaults to a maximum length of 32k.
  840. For more details about `lm-evaluation-harness`, please visit checkout their github repo [README.md](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/README.md).
  841. ## Calculating Meta 3.1 Evaluation Metrics Using LM-Evaluation-Harness
  842. [meta_eval](./meta_eval/) folder provides a detailed guide on how to calculate the Meta Llama 3.1 evaluation metrics reported in our [Meta Llama website](https://llama.meta.com/) using the [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main) and our [3.1 evals Huggingface collection](https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f). By following the steps outlined, users can replicate a evaluation process that is similar to Meta's, for specific tasks and compare their results with our reported metrics. While slight variations in results are expected due to differences in implementation and model behavior, we aim to provide a transparent method for evaluating Meta Llama 3 models using third party library. Please check the [README.md](./meta_eval/README.md) for more details.
  843. ## Reproducing HuggingFace Open-LLM-Leaderboard v2
  844. In the HF leaderboard v2, the [LLMs are evaluated on 6 benchmarks](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) from Language Model Evaluation Harness as described below:
  845. - **IFEval**: [IFEval](https://arxiv.org/abs/2311.07911) is a dataset designed to test a model’s ability to follow explicit instructions, such as “include keyword x” or “use format y.” The focus is on the model’s adherence to formatting instructions rather than the content generated, allowing for the use of strict and rigorous metrics.
  846. - **BBH (Big Bench Hard)**: [BBH](https://arxiv.org/abs/2210.09261) is a subset of 23 challenging tasks from the BigBench dataset to evaluate language models. The tasks use objective metrics, are highly difficult, and have sufficient sample sizes for statistical significance. They include multistep arithmetic, algorithmic reasoning (e.g., boolean expressions, SVG shapes), language understanding (e.g., sarcasm detection, name disambiguation), and world knowledge. BBH performance correlates well with human preferences, providing valuable insights into model capabilities.
  847. - **MATH**: [MATH](https://arxiv.org/abs/2103.03874) is a compilation of high-school level competition problems gathered from several sources, formatted consistently using Latex for equations and asymptote for figures. Generations must fit a very specific output format. HuggingFace Open-LLM-Leaderboard v2 keeps only level 5 MATH questions and call it MATH Level 5.
  848. - **GPQA (Graduate-Level Google-Proof Q&A Benchmark)**: [GPQA](https://arxiv.org/abs/2311.12022) is a highly challenging knowledge dataset with questions crafted by PhD-level domain experts in fields like biology, physics, and chemistry. These questions are designed to be difficult for laypersons but relatively easy for experts. The dataset has undergone multiple rounds of validation to ensure both difficulty and factual accuracy. Access to GPQA is restricted through gating mechanisms to minimize the risk of data contamination. Consequently, HuggingFace Open-LLM-Leaderboard v2 does not provide plain text examples from this dataset, as requested by the authors.
  849. - **MuSR (Multistep Soft Reasoning)**: [MuSR](https://arxiv.org/abs/2310.16049) is a new dataset consisting of algorithmically generated complex problems, each around 1,000 words in length. The problems include murder mysteries, object placement questions, and team allocation optimizations. Solving these problems requires models to integrate reasoning with long-range context parsing. Few models achieve better than random performance on this dataset.
  850. - **MMLU-PRO (Massive Multitask Language Understanding - Professional)**: [MMLU-Pro](https://arxiv.org/abs/2406.01574) is a refined version of the MMLU dataset, which has been a standard for multiple-choice knowledge assessment. Recent research identified issues with the original MMLU, such as noisy data (some unanswerable questions) and decreasing difficulty due to advances in model capabilities and increased data contamination. MMLU-Pro addresses these issues by presenting models with 10 choices instead of 4, requiring reasoning on more questions, and undergoing expert review to reduce noise. As a result, MMLU-Pro is of higher quality and currently more challenging than the original.
  851. In order to install correct lm-evaluation-harness version, please check the Huggingface 🤗 Open LLM Leaderboard v2 [reproducibility section](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about#reproducibility).
  852. To run a leaderboard evaluation for `Llama-3.1-8B`, we can run the following:
  853. ```bash
  854. accelerate launch -m lm_eval --model_args pretrained=meta-llama/Llama-3.1-8B,dtype=bfloat16 --log_samples --output_path eval_results --tasks leaderboard --batch_size 4
  855. ```
  856. Similarly to run a leaderboard evaluation for `Llama-3.1-8B-Instruct`, we can run the following, using `--apply_chat_template --fewshot_as_multiturn`:
  857. ```bash
  858. accelerate launch -m lm_eval --model_args pretrained=meta-llama/Llama-3.1-8B-Instruct,dtype=bfloat16 --log_samples --output_path eval_results --tasks leaderboard --batch_size 4 --apply_chat_template --fewshot_as_multiturn
  859. ```
  860. As for 70B models, it is required to run tensor parallelism as it can not fit into 1 GPU, therefore we can run the following for `Llama-3.1-70B-Instruct`:
  861. ```bash
  862. lm_eval --model hf --batch_size 4 --model_args pretrained=meta-llama/Llama-3.1-70B-Instruct,parallelize=True --tasks leaderboard --log_samples --output_path eval_results --apply_chat_template --fewshot_as_multiturn
  863. ```
  864. ================================================
  865. FILE: end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md
  866. ================================================
  867. # Calculating Meta 3.x Evaluation Metrics Using LM-Evaluation-Harness
  868. As Llama models gain popularity, evaluating these models has become increasingly important. We have released all the evaluation details for Llama 3.x models on Hugging Face as datasets in the [3.1 evals collection](https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f) and the [3.2 evals collection](https://huggingface.co/collections/meta-llama/llama-32-evals-66f44b3d2df1c7b136d821f0). This recipe demonstrates how to calculate the Llama 3.x reported benchmark numbers using the [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main) library and our prompts from the 3.x evals datasets on selected tasks.
  869. ## Disclaimer
  870. 1. **This recipe is not the official implementation** of Llama evaluation. Since our internal eval repo isn't public, we want to provide this recipe as an aid for anyone who wants to use the datasets we released. It is based on public third-party libraries, as this implementation is not mirroring Llama evaluation, therefore this may lead to minor differences in the produced numbers.
  871. 2. **Model Compatibility**: This tutorial is specifically for Llama 3 based models, as our prompts include Llama 3 special tokens, e.g. `<|start_header_id|>user<|end_header_id|>`. It will not work with models that are not based on Llama 3.
  872. ## Insights from Our Evaluation Process
  873. Here are our insights about the differences in terms of the eval configurations and prompting methods between this implementation and Hugging Face [leaderboard implementation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard).
  874. - **Prompts**: We use Chain-of-Thought(COT) prompts while Hugging Face leaderboard does not. The prompts that define the output format are also different.
  875. - **Metric calculation**: For MMLU-Pro, BBH, GPQA tasks, we ask the model to generate response and score the parsed answer from generated response, while Hugging Face leaderboard evaluation is comparing log likelihood of all label words, such as [ (A),(B),(C),(D) ].
  876. - **Parsers**: For generative tasks, where the final answer needs to be parsed before scoring, the parser functions can be different between ours and Hugging Face leaderboard evaluation, as our prompts that define the model output format are designed differently.
  877. - **Inference**: We use an internal LLM inference solution that does not apply padding, while Hugging Face leaderboard uses padding on the generative tasks (MATH and IFEVAL).
  878. - **Tasks** We run benchmarks on BBH and MMLU-Pro only for pretrained models and Math-Hard, IFeval, GPQA, only for instruct models.
  879. Given those differences, the numbers from this recipe can not be compared to the numbers in the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard), even if the task names are the same.
  880. ## Environment setups
  881. Please install lm-evaluation-harness and our llama-cookbook repo by following:
  882. ```
  883. git clone git@github.com:meta-llama/llama-cookbook.git
  884. cd llama-cookbook
  885. pip install -U pip setuptools
  886. pip install -e .
  887. pip install -U antlr4_python3_runtime==4.11
  888. pip install lm-eval[math,ifeval,sentencepiece,vllm]==0.4.3
  889. cd end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval
  890. ```
  891. To access our [3.1 evals Hugging Face collection](https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f), you must:
  892. - Log in to the Hugging Face website and click the 3.1 evals dataset pages and agree to the terms.
  893. - Follow the [Hugging Face authentication instructions](https://huggingface.co/docs/huggingface_hub/en/quick-start#authentication) to gain read access for your machine.
  894. The same process can be followed to access the [3.2 evals Hugging Face collection](https://huggingface.co/collections/meta-llama/llama-32-evals-66f44b3d2df1c7b136d821f0)
  895. It is recommended to read the dataset card to understand the meaning of each column and use the viewer feature in the Hugging Face dataset to view our dataset. It is important to have some basic understanding of our dataset format and content before proceeding.
  896. ### Task Selection
  897. Given the extensive number of tasks available (12 for pretrained models and 30 for instruct models), a subset of tasks are chosen:
  898. - **Tasks for 3.1 pretrained models**: BBH and MMLU-Pro
  899. - **Tasks for 3.1 instruct models**: Math-Hard, IFeval, GPQA, and MMLU-Pro
  900. - **Tasks for 3.2 pretrained models**: MMLU
  901. - **Tasks for 3.2 instruct models**: MMLU, GPQA
  902. These tasks are common evaluations, many of which overlap with the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
  903. Here, we aim to get the benchmark numbers on the aforementioned tasks using Hugging Face [leaderboard implementation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard). Please follow the instructions below to make necessary modifications to use our eval prompts and get more eval metrics.
  904. ### Run eval tasks
  905. 1. We created [eval_config.yaml](./eval_config.yaml) to store all the arguments and hyperparameters. This is the main config file you need to change if you want to eval other models, and a part of eval_config.yaml looks like this:
  906. ```yaml
  907. model_name: "meta-llama/Llama-3.1-8B-Instruct" # The name of the model to evaluate. This must be a valid Llama 3 based model name in the HuggingFace model hub."
  908. evals_dataset: "meta-llama/Llama-3.1-8B-Instruct-evals" # The name of the 3.1 evals dataset to evaluate, please make sure this eval dataset corresponds to the model loaded. This must be a valid Llama 3.1 evals dataset name in the Llama 3.1 Evals collection.
  909. # Must be one of the following ["meta-llama/Llama-3.1-8B-Instruct-evals","meta-llama/Llama-3.1-70B-Instruct-evals","meta-llama/Llama-3.1-405B-Instruct-evals","meta-llama/Llama-3.1-8B-evals","meta-llama/Llama-3.1-70B-evals","meta-llama/Llama-3.1-405B-evals","meta-llama/Llama-3.2-1B-evals","meta-llama/Llama-3.2-3B-evals", "meta-llama/Llama-3.2-1B-Instruct-evals", "meta-llama/Llama-3.2-3B-Instruct-evals"]
  910. tasks: "meta_instruct" # Available tasks for 3.1 instruct model: "meta_math_hard", "meta_gpqa_cot", "meta_mmlu_pro_instruct", "meta_ifeval"; or just use "meta_instruct" to run all of them.
  911. # Available tasks for 3.1 pretrain model: "meta_bbh", "meta_mmlu_pro_pretrain"; or just use "meta_pretrain" to run all of them.
  912. # Available tasks for 3.2 instruct model: "meta_mmlu", "meta_math", "meta_gpqa"; or just use "meta_instruct" to run all of them.
  913. # Available tasks for 3.2 pretrain model: "meta_mmlu"; or just use "meta_pretrain" to run all of them
  914. tensor_parallel_size: 1 # The VLLM argument that specify the tensor parallel size for the model, eg how many GPUs to use for a model copy.
  915. data_parallel_size: 4 # The VLLM argument that specify the data parallel size for the model, eg how copies of model will be used.
  916. ...
  917. ```
  918. Change `model_name` to the model name you want to eval on and change the `evals_dataset` according to the model type and parameters. Remember to adjust the `tensor_parallel_size` to 2 or more to load the 70B models and change the `data_parallel_size` accordingly so that `tensor_parallel_size * data_parallel_size` is the number of GPUs you have. Please read the comments inside this yaml for detailed explanations on other parameters.
  919. 2. We already included all the related eval task yaml and python files in the [meta_template](./meta_template/) folder, which define all the task implementation. You do not need to change those manually, we will use [prepare_meta_eval.py](./prepare_meta_eval.py) to automatically change them later.
  920. 3. Then we can run [prepare_meta_eval.py](./prepare_meta_eval.py) that reads the configuration from [eval_config.yaml](./eval_config.yaml), copies everything in the template folder to a working folder `work_dir`, makes modification to those templates accordingly, prepares dataset if needed and prints out the CLI command to run the `lm_eval`.
  921. To run the [prepare_meta_eval.py](./prepare_meta_eval.py), we can do:
  922. ```
  923. python prepare_meta_eval.py --config_path ./eval_config.yaml
  924. ```
  925. This script will load the default [eval_config.yaml](./eval_config.yaml) config and print out a `lm_eval` command to run `meta_instruct` group tasks, which includes `meta_ifeval`, `meta_math_hard`, `meta_gpqa` and `meta_mmlu_pro_instruct`, for `meta-llama/Llama-3.1-8B-Instruct` model using `meta-llama/Llama-3.1-8B-Instruct-evals` dataset.
  926. An example output from [prepare_meta_eval.py](./prepare_meta_eval.py) looks like this:
  927. ```
  928. lm_eval --model vllm --model_args pretrained=meta-llama/Llama-3.1-8B-Instruct,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.9,data_parallel_size=4,max_model_len=8192,add_bos_token=True,seed=42 --tasks meta_instruct --batch_size auto --output_path eval_results --include_path ./work_dir --seed 42 --log_samples
  929. ```
  930. 4. Then just copy the `lm_eval` command printed by [prepare_meta_eval.py](./prepare_meta_eval.py) back to your terminal and run it to get the result, which will be saved into `eval_results` folder by default.
  931. **NOTE**: As for `--model vllm`, here we will use VLLM inference instead of Hugging Face inference because of the padding issue. By default, for the generative tasks, the `lm-eval --model_args="{...}" --batch_size=auto` command will use Hugging Face inference solution that uses a static batch method with [left padding](https://github.com/EleutherAI/lm-evaluation-harness/blob/8ad598dfd305ece8c6c05062044442d207279a97/lm_eval/models/huggingface.py#L773) using EOS_token for Llama models, but our internal evaluation will load python original checkpoints and handle individual generation request asynchronously without any padding. To simulate this, we will use VLLM inference solution to do dynamic batching without any padding.
  932. **NOTE**: As for `add_bos_token=True`, since our prompts in the evals dataset has already included all the special tokens required by instruct model, such as `<|start_header_id|>user<|end_header_id|>`, we will not use `--apply_chat_template` argument for instruct models anymore. However, we need to use `add_bos_token=True` flag to add the BOS_token back during VLLM inference, as the BOS_token is removed by default in [this PR](https://github.com/EleutherAI/lm-evaluation-harness/pull/1465).
  933. **NOTE**: For `meta_math_hard` tasks, some of our internal math ground truth has been converted to scientific notation, e.g. `6\sqrt{7}` has been converted to `1.59e+1`, which will be later handled by our internal math evaluation functions. As the lm-evaluation-harness [math evaluation utils.py](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py) can not fully handle those conversion, we will use the original ground truth from the original dataset [lighteval/MATH-Hard](https://www.oxen.ai/lighteval/MATH-Hard) by joining the tables on the original input questions. The `get_math_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) will handle this step and produce a local parquet dataset file.
  934. Moreover, we have modified this [math_hard/utils.py](./meta_template/math_hard/utils.py) to address two issues:
  935. 1. This python script only use [a regular expression "Final Answer: The final answer is(.*?). I hope it is correct."](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py#L192) to get the final answer, because this format is shown in the previous 4 shot examples prompts. However, our MATH Hard task is using 0 shot COT prompts that ask model to put the final answer into this string format `Therefore, the final answer is: $\\boxed{answer}$. I hope it is correct.` which can not be captured by previous regular expression, so we will use `\\box{}` to parse the final answer instead.
  936. 2. The [is_equiv(x1: str, x2: str)](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py#L144) function failed parse some ground truth, as we noticed some error logs like `[utils.py:158] couldn't parse one of [0,1) or [0,1)`, so all those questions will be marked as wrong. We raised [a issue to lm_evaluation_harness](https://github.com/EleutherAI/lm-evaluation-harness/issues/2212) about this problem and will add a string equality check statement before going to is_equiv() function for now as a temporary solution.
  937. **NOTE**: For `meta_ifeval` tasks, we have to use the original configs, such as `instruction_id_list`, `kwargs`, from [wis-k/instruction-following-eval](https://huggingface.co/datasets/wis-k/instruction-following-eval) in order to use [lm-evaluation-harness IFeval evaluation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard/ifeval). We will perform similar join back method using `get_ifeval_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) to get a local parquet dataset file.
  938. ### Yaml Config Deep Dive
  939. Here, we will use MMLU-Pro as a example to show the steps to create a yaml config with detailed explanations, so that people can follow this example to create other tasks configurations if they want. For more information, please read lm-evaluation-harness [new task guide](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md).
  940. **1.Define the config to load datasets**
  941. We can use our 3.1 evals dataset as the source dataset and the corresponding subset and define the test split to latest. For example, if we want to calculate the MMLU_Pro metric for 3.1 8B instruct, the following configs are needed as explained below:
  942. ```yaml
  943. task: meta_mmlu_pro_instruct
  944. dataset_path: meta-llama/Llama-3.1-8B-Instruct-evals
  945. dataset_name: Llama-3.1-8B-Instruct-evals__mmlu_pro__details
  946. test_split: latest
  947. ```
  948. If you want to run evaluation on 70B-Instruct, then it is recommended to change the `dataset_path` and `dataset_name` from 8B to 70B, even though 70B-instruct and 8B-instruct share the same prompts, the `is_correct` column, which can be used to get the difference between current result and the reported results for each sample, is different.
  949. **Note**: Config files for Llama-3.1-8B-Instruct are already provided in each task subfolder under [meta_template folder](./meta_template/). Remember to change the eval dataset name according to the model type and DO NOT use pretrained evals dataset on instruct models or vice versa.
  950. **2.Configure preprocessing, prompts and ground truth**
  951. Here is the example yaml snippet in the MMLU-Pro that handles dataset preprocess, prompts and ground truth.
  952. ```yaml
  953. process_docs: !function utils.process_docs
  954. doc_to_text: !function utils.doc_to_text
  955. doc_to_target: gold
  956. ```
  957. - `process_docs` : Defines the preprocess function for our datasets. In this case, we uses the `process_docs` python function that is defined in [utils.py](./meta_template/mmlu_pro/utils.py). This function will take the original dataset and output a processed dataset that has a out_doc, which contains `problem` which is the input question, `gold` which is the ground truth. We also renamed the `is_correct` column to `previously_is_correct` to allow detailed comparison for the difference of each sample between previously reported score and the current score. You must use eval dataset and model with same parameters and same model type to get a valid comparison.
  958. - `doc_to_text`: Defines the prompts. In the MMLU-Pro case, the `input_final_prompts` column always contains a list of a prompt, so we just use a python function that returns `input_final_prompts[0]`.
  959. - `doc_to_target` Defines the ground truth, which in the MMLU-Pro case, is the `gold` that derived from `input_correct_responses[0]`.
  960. **3.Configure task type and parser**
  961. While Open LLM Leaderboard v2 uses [multiple choice format](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#multiple-choice-format) for MMLU-Pro, BBH, GPQA tasks by comparing log likelihood of all label words, such as [ (A),(B),(C),(D) ], we use generative task option, by asking the model to generate response in sentences given our carefully designed prompts, then using some parsers to grab the final answer, and scoring that final answer based on the ground truth. Here is a example config in the MMLU-Pro that enable the generative task and defines the regex parser:
  962. ```yaml
  963. filter_list:
  964. - name: "strict-match"
  965. filter:
  966. - function: "regex"
  967. group_select: -1
  968. regex_pattern: 'best answer is ([A-Z])'
  969. - function: "take_first"
  970. ```
  971. Since the MMLU-Pro task uses a 5-shot Chain-of-Thought(COT) prompts and the prompts are designed with explicitly instruction: "Your response should end with \"The best answer is [the_answer_letter].\" where the [the_answer_letter] is a letter from the provided choices.", we will use a simple and intuitive regex expression `best answer is ([A-Z])` to parse the model response and take the last appearance as the final answer and this final answer will be scored based on the ground truth `gold` using exact match method.
  972. **4.Define generation and metric config**
  973. Then we need to define the generation and metric config, which looks like this:
  974. ```yaml
  975. generation_kwargs:
  976. until: []
  977. do_sample: false
  978. temperature: 0
  979. max_gen_toks: 1024
  980. num_fewshot: 0
  981. metric_list:
  982. - metric: exact_match
  983. aggregation: mean
  984. higher_is_better: true
  985. ignore_case: true
  986. ignore_punctuation: true
  987. ```
  988. Here we set the `num_fewshot` to 0 as our prompts have already been converted to 5-shots, and the model generation will only stop if the generated output tokens exceeds 1024, as stated in the [mmlu-pro eval details](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/eval_details.md#mmlu-pro). We will set the `do_sample` to false and `temperature` to 0 as stated in our `eval_config` column in the dataset. We will use metric `exact_match` for this tasks and calculate the `mean` as our task aggregated number.
  989. **NOTE**: While we tried our best to create the template files, those configs and functions are created based on public third-party library and are not exactly the same as our internal implementation, so there is a chance that the eval numbers are slightly different.
  990. **NOTE**: We used the average of `inst_level_strict_acc,none` and `prompt_level_strict_acc,none` to get the final number for `IFeval` as stated [here](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about#task-evaluations-and-parameters).
  991. **NOTE**: In the [Llama website](https://llama.com/), we reported the `macro_avg` metric, which is the average of all subtask average score, for `MMLU-Pro `task, but here we are calculating the `micro_avg` metric, which is the average score for all the individual samples, and those `micro_avg` numbers can be found in the [eval_details.md](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/eval_details.md#mmlu-pro).
  992. **NOTE**: The eval numbers may be slightly different, as we observed around ±0.01 differences between each evaluation run because the latest VLLM inference is not very deterministic even with temperature=0. This behavior maybe related [this issue](https://github.com/vllm-project/vllm/issues/5404).
  993. or it is expected due to 16-bits inference as stated in [this comment](https://github.com/huggingface/transformers/issues/25420#issuecomment-1775317535) and [this comment](https://github.com/vllm-project/vllm/issues/4112#issuecomment-2071115725).
  994. ## Acknowledgement
  995. This tutorial is inspired by [leaderboard tasks implementation on the lm_evaluation_harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard) created by Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) team.
  996. We also extend our gratitude to the [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) github repo from [EleutherAI](https://www.eleuther.ai/).
  997. ================================================
  998. FILE: end-to-end-use-cases/book-character-mindmap/README.md
  999. ================================================
  1000. # Book Character Mind Map With Llama4 Maverick
  1001. ![Book Character Mind Map](public/character_relationship.png)
  1002. Book Mind is a web application that allows users to explore character relationships and storylines in books using AI-powered visualizations.
  1003. This leverages **Llama 4 Maverick**'s impressive 1M token context windows to process entire books at once, enabling comprehensive analysis of complex narratives and character relationships across lengthy texts.
  1004. ## Features
  1005. ### Leverage Long Context Length
  1006. | Model | Meta Llama4 Maverick | Meta Llama4 Scout | OpenAI GPT-4.5 | Claude Sonnet 3.7 |
  1007. | ----- | -------------- | -------------- | -------------- | -------------- |
  1008. | Context Window | 1M tokens | 10M tokens | 128K tokens | 1K tokens | 200K tokens |
  1009. Because of the long context length, Book Mind can process entire books at once, providing a comprehensive understanding of complex narratives and character relationships.
  1010. - Interactive Mind Maps: Visualize relationships between characters and plot elements.
  1011. - Book Summaries: Get concise overviews of plots and themes.
  1012. ### Step-by-Step Instructions
  1013. We implemented a step-by-step approach to ensure the model outputs' reliability.
  1014. 1. **Character Identification**: Identify all characters in the book and summarize their roles.
  1015. ```
  1016. You are a highly detailed literary analyst AI. Your sole mission is to meticulously extract comprehensive information about characters and the *nuances* of their relationships from the provided text segment. This data will be used later to build a relationship graph.
  1017. ```
  1018. 2. **Character Relationships**: Determine the relationships between characters.
  1019. ```
  1020. You are an expert data architect AI specializing in transforming literary analysis into structured graph data. Your task is to synthesize character and relationship information into a specific JSON format containing nodes and links, including a title and summary.
  1021. ```
  1022. 3. **JSON Format**: Output the results in a JSON format for easy parsing and visualization.
  1023. ```
  1024. You are an extremely precise and strict JSON extractor.
  1025. Extract only the complete JSON object from the input. Get the last one if there are multiple.
  1026. ```
  1027. ### Ask the Book with Chat Interface
  1028. We also implemented a chat interface to interact with the book. Users can ask questions about the book's characters, plot, and relationships. The model will respond with a concise answer based on the book's content and the relationships between characters.
  1029. ```
  1030. You are an expert search AI designed to help users find detailed information about character relationships from a book. Your task is to assist users in querying the relationship data extracted from the book.
  1031. ```
  1032. ![Chat Interface](public/chat_interface.png)
  1033. ## Getting Started
  1034. ### Frontend Setup
  1035. To communicate with the [server/server.py](server/server.py), we use `React.js` and `axios`.
  1036. 1. Install dependencies:
  1037. ```
  1038. npm install
  1039. ```
  1040. 2. Run the application:
  1041. ```
  1042. npm start
  1043. ```
  1044. ### Server Setup
  1045. We use `Flask` to serve the model's responses and `vllm` to run the **Llama 4 Maverick** model.
  1046. 1. Install dependencies:
  1047. ```
  1048. cd server
  1049. pip install -r requirements.txt
  1050. ```
  1051. 2. Run the server:
  1052. ```
  1053. python server.py
  1054. ```
  1055. ## Get Copyright Free Books
  1056. - [Project Gutenberg](https://www.gutenberg.org/)
  1057. - [Romeo and Juliet](https://www.gutenberg.org/ebooks/1513): 50,687 input tokens
  1058. - [Moby-Dick; The Whale](https://www.gutenberg.org/ebooks/2701): 318,027 input tokens
  1059. ================================================
  1060. FILE: end-to-end-use-cases/book-character-mindmap/.gitignore
  1061. ================================================
  1062. # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
  1063. # dependencies
  1064. /node_modules
  1065. /.pnp
  1066. .pnp.js
  1067. # testing
  1068. /coverage
  1069. # production
  1070. /build
  1071. # misc
  1072. .DS_Store
  1073. .env.local
  1074. .env.development.local
  1075. .env.test.local
  1076. .env.production.local
  1077. npm-debug.log*
  1078. yarn-debug.log*
  1079. yarn-error.log*
  1080. ================================================
  1081. FILE: end-to-end-use-cases/book-character-mindmap/public/robots.txt
  1082. ================================================
  1083. # https://www.robotstxt.org/robotstxt.html
  1084. User-agent: *
  1085. Disallow:
  1086. ================================================
  1087. FILE: end-to-end-use-cases/book-character-mindmap/server/requirements.txt
  1088. ================================================
  1089. flask
  1090. flask-cors
  1091. asyncio
  1092. werkzeug
  1093. vllm
  1094. transformers
  1095. ================================================
  1096. FILE: end-to-end-use-cases/browser_use/README.md
  1097. ================================================
  1098. ## Building an Intelligent Browser Agent with Llama 4 Scout
  1099. This project provides a comprehensive guide to creating an AI-powered browser agent capable of autonomously navigating and interacting with websites. By leveraging the capabilities of Llama 4 Scout, Playwright, and Together AI, this agent can perform tasks seamlessly while understanding both visual and textual content.
  1100. ### Features
  1101. - Visual Understanding: Utilizes screenshots for visual comprehension of web pages
  1102. - Autonomous Navigation: Capable of navigating and interacting with web elements.
  1103. - Natural Language Instructions: Executes tasks based on natural language commands.
  1104. - Persistent Session Management: Maintains browser sessions for continuous interaction.
  1105. ### Example Tasks
  1106. - Search for a product on Amazon.
  1107. - Find the cheapest flight to Tokyo.
  1108. - Purchase tickets for the next Warriors game.
  1109. ### What's in this Project?
  1110. - Environment setup instructions
  1111. - Browser automation guides using Playwright
  1112. - Structured prompting techniques for guiding the LLM in task execution
  1113. - Content comprehension utilizing Llama 4 Scout
  1114. - Creating a persistent and intelligent browser agent for real-world applications
  1115. ### Demo
  1116. For a detailed explanation and demo video, visit: [Blog Post and Demo Video](https://miguelg719.github.io/browser-use-blog/)
  1117. ### Prerequisite for Running the Notebook
  1118. - Before getting started, please make sure to setup Together.ai and get an API key from [here](https://www.together.ai/).
  1119. ### Collaborators
  1120. Feel free to reach out with any questions or feedback!
  1121. - Miguel Gonzalez: [X](https://x.com/miguel_gonzf) | [LinkedIn](https://www.linkedin.com/in/gonzalezfernandezmiguel/)
  1122. - Dimitry Khorzov: [X](https://x.com/korzhov_dm) | [LinkedIn](https://www.linkedin.com/in/korzhovdm)
  1123. ================================================
  1124. FILE: end-to-end-use-cases/coding/text2sql/README.md
  1125. ================================================
  1126. ## Text2SQL: Natural Language to SQL Interface
  1127. This project provides a set of scripts to convert natural language queries into SQL statements using Meta's Llama model. The goal is to enable users to interact with databases using natural language inputs, making it easier for non-technical users to access and analyze data.
  1128. For detailed instructions on setting up the environment, creating a database, and executing natural language queries using the Text2SQL interface, please refer to the quickstart.ipynb notebook.
  1129. ### Structure:
  1130. - quickstart.ipynb: A Quick Demo of Text2SQL Using Llama 3.3. This Jupyter Notebook includes examples of how to use the interface to execute natural language queries on the sample data. It uses Llama 3.3 to answer questions about a SQLite database using LangChain and the Llama cloud provider Together.ai.
  1131. - nba.txt: A text file containing NBA roster information, which is used as sample data for demonstration purposes.
  1132. - txt2csv.py: A script that converts text data into a CSV format. This script is used to preprocess the input data before it is fed into csv2db.py.
  1133. - csv2db.py: A script that imports data from a CSV file into a SQLite database. This script is used to populate the database with sample data.
  1134. - nba_roster.db: A SQLite database file created from the nba.txt data, used to test the Text2SQL interface.
  1135. ### Detailed steps on running the notebook:
  1136. - Before getting started, please make sure to setup Together.ai and get an API key from [here](https://www.together.ai/).
  1137. - First, please install the requirements from [here](https://github.com/meta-llama/llama-cookbook/blob/main/end-to-end-use-cases/coding/text2sql/requirements.txt) by running inside the folder:
  1138. ```
  1139. git clone https://github.com/meta-llama/llama-cookbook.git
  1140. cd llama-cookbook/end-to-end-use-cases/coding/text2sql/
  1141. pip install -r requirements.txt
  1142. ```
  1143. ### Contributing
  1144. Contributions are welcome! If you'd like to add new features or improve existing ones, please submit a pull request. We encourage contributions in the following areas:
  1145. - Adding support for additional databases
  1146. - Developing new interfaces or applications that use the Text2SQL interface
  1147. ================================================
  1148. FILE: end-to-end-use-cases/coding/text2sql/nba.txt
  1149. ================================================
  1150. Atlanta Hawks Roster 2023-24
  1151. NAME POS AGE HT WT COLLEGE SALARY
  1152. https://a.espncdn.com/i/headshots/nba/players/full/4397136.png
  1153. Saddiq Bey41 SF 24 6' 7" 215 lbs Villanova $4,556,983
  1154. https://a.espncdn.com/i/headshots/nba/players/full/3037789.png
  1155. Bogdan Bogdanovic13 SG 31 6' 5" 225 lbs -- $18,700,000
  1156. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1157. Kobe Bufkin G 20 6' 4" 195 lbs Michigan $4,094,280
  1158. https://a.espncdn.com/i/headshots/nba/players/full/3102529.png
  1159. Clint Capela15 C 29 6' 10" 256 lbs -- $20,616,000
  1160. https://a.espncdn.com/i/headshots/nba/players/full/4277952.png
  1161. Bruno Fernando0 PF 25 6' 10" 240 lbs Maryland $2,581,522
  1162. https://a.espncdn.com/i/headshots/nba/players/full/4065656.png
  1163. Trent Forrest2 G 25 6' 4" 210 lbs Florida State --
  1164. https://a.espncdn.com/i/headshots/nba/players/full/4432585.png
  1165. AJ Griffin14 F 20 6' 6" 222 lbs Duke $3,712,920
  1166. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1167. Mouhamed Gueye F 20 6' 11" 210 lbs Washington State $1,119,563
  1168. https://a.espncdn.com/i/headshots/nba/players/full/4065732.png
  1169. De'Andre Hunter12 SF 25 6' 8" 221 lbs Virginia $20,089,286
  1170. https://a.espncdn.com/i/headshots/nba/players/full/4701230.png
  1171. Jalen Johnson1 SF 21 6' 8" 219 lbs Duke $2,925,360
  1172. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1173. Jarkel Joiner G 24 6' 1" 180 lbs NC State --
  1174. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1175. Seth Lundy G 23 6' 6" 220 lbs Penn State --
  1176. https://a.espncdn.com/i/headshots/nba/players/full/3913180.png
  1177. Garrison Mathews25 SG 26 6' 6" 215 lbs Lipscomb $2,000,000
  1178. https://a.espncdn.com/i/headshots/nba/players/full/4032.png
  1179. Wesley Matthews23 SG 36 6' 4" 220 lbs Marquette $2,019,706
  1180. https://a.espncdn.com/i/headshots/nba/players/full/4004.png
  1181. Patty Mills8 PG 35 6' 0" 180 lbs Saint Mary's $6,802,950
  1182. https://a.espncdn.com/i/headshots/nba/players/full/3907497.png
  1183. Dejounte Murray5 SG 27 6' 5" 180 lbs Washington $18,214,000
  1184. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1185. Miles Norris F 23 6' 10" 220 lbs UC Santa Barbara --
  1186. https://a.espncdn.com/i/headshots/nba/players/full/4431680.png
  1187. Onyeka Okongwu17 PF 22 6' 9" 240 lbs USC $8,109,063
  1188. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1189. Keaton Wallace G 24 6' 3" 185 lbs UTSA --
  1190. https://a.espncdn.com/i/headshots/nba/players/full/4277905.png
  1191. Trae Young11 PG 25 6' 1" 164 lbs Oklahoma $40,064,220
  1192. Boston Celtics Roster 2023-24
  1193. NAME POS AGE HT WT COLLEGE SALARY
  1194. https://a.espncdn.com/i/headshots/nba/players/full/4397885.png
  1195. Dalano Banton G 23 6' 7" 204 lbs Nebraska $2,019,706
  1196. https://a.espncdn.com/i/headshots/nba/players/full/4278031.png
  1197. Oshae Brissett12 SF 25 6' 7" 210 lbs Syracuse $2,165,000
  1198. https://a.espncdn.com/i/headshots/nba/players/full/2566769.png
  1199. Malcolm Brogdon13 PG 30 6' 4" 229 lbs Virginia $22,500,000
  1200. https://a.espncdn.com/i/headshots/nba/players/full/3917376.png
  1201. Jaylen Brown7 SG 26 6' 6" 223 lbs California $31,830,357
  1202. https://a.espncdn.com/i/headshots/nba/players/full/4432907.png
  1203. Justin Champagnie99 SF 22 6' 6" 206 lbs Pittsburgh --
  1204. https://a.espncdn.com/i/headshots/nba/players/full/4576085.png
  1205. JD Davison20 SG 20 6' 1" 195 lbs Alabama --
  1206. https://a.espncdn.com/i/headshots/nba/players/full/3989.png
  1207. Blake Griffin91 PF 34 6' 9" 250 lbs Oklahoma --
  1208. https://a.espncdn.com/i/headshots/nba/players/full/4065804.png
  1209. Sam Hauser30 SF 25 6' 7" 217 lbs Virginia $1,927,896
  1210. https://a.espncdn.com/i/headshots/nba/players/full/3213.png
  1211. Al Horford42 C 37 6' 9" 240 lbs Florida $10,000,000
  1212. https://a.espncdn.com/i/headshots/nba/players/full/3064560.png
  1213. Luke Kornet40 C 28 7' 2" 250 lbs Vanderbilt $2,413,304
  1214. https://a.espncdn.com/i/headshots/nba/players/full/3133602.png
  1215. Svi Mykhailiuk SG 26 6' 7" 205 lbs Kansas $2,019,706
  1216. https://a.espncdn.com/i/headshots/nba/players/full/3102531.png
  1217. Kristaps Porzingis C 28 7' 3" 240 lbs -- $36,016,200
  1218. https://a.espncdn.com/i/headshots/nba/players/full/4066354.png
  1219. Payton Pritchard11 PG 25 6' 1" 195 lbs Oregon $4,037,278
  1220. https://a.espncdn.com/i/headshots/nba/players/full/4397424.png
  1221. Neemias Queta C 24 7' 0" 245 lbs Utah State $2,019,706
  1222. https://a.espncdn.com/i/headshots/nba/players/full/4065648.png
  1223. Jayson Tatum0 SF 25 6' 8" 210 lbs Duke $32,600,060
  1224. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1225. Jordan Walsh G 19 6' 7" 205 lbs Arkansas $1,119,563
  1226. https://a.espncdn.com/i/headshots/nba/players/full/3078576.png
  1227. Derrick White9 PG 29 6' 4" 190 lbs Colorado $18,357,143
  1228. https://a.espncdn.com/i/headshots/nba/players/full/4066211.png
  1229. Robert Williams III44 C 25 6' 9" 237 lbs Texas A&M $11,571,429
  1230. Brooklyn Nets Roster 2023-24
  1231. NAME POS AGE HT WT COLLEGE SALARY
  1232. https://a.espncdn.com/i/headshots/nba/players/full/4423545.png
  1233. Darius Bazley PF 23 6' 9" 216 lbs -- $2,019,706
  1234. https://a.espncdn.com/i/headshots/nba/players/full/3147657.png
  1235. Mikal Bridges1 SF 27 6' 6" 209 lbs Villanova $21,700,000
  1236. https://a.espncdn.com/i/headshots/nba/players/full/4278067.png
  1237. Nic Claxton33 C 24 6' 11" 215 lbs Georgia $9,625,000
  1238. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1239. Noah Clowney F 19 6' 10" 210 lbs Alabama $3,089,520
  1240. https://a.espncdn.com/i/headshots/nba/players/full/2580782.png
  1241. Spencer Dinwiddie26 PG 30 6' 6" 215 lbs Colorado $20,357,143
  1242. https://a.espncdn.com/i/headshots/nba/players/full/2578185.png
  1243. Dorian Finney-Smith28 PF 30 6' 7" 220 lbs Florida $13,932,008
  1244. https://a.espncdn.com/i/headshots/nba/players/full/3138196.png
  1245. Cameron Johnson2 SF 27 6' 8" 210 lbs North Carolina $25,679,348
  1246. https://a.espncdn.com/i/headshots/nba/players/full/2583632.png
  1247. Royce O'Neale00 PF 30 6' 6" 226 lbs Baylor $9,500,000
  1248. https://a.espncdn.com/i/headshots/nba/players/full/4432194.png
  1249. Day'Ron Sharpe20 C 21 6' 9" 265 lbs North Carolina $2,210,040
  1250. https://a.espncdn.com/i/headshots/nba/players/full/3907387.png
  1251. Ben Simmons10 PG 27 6' 10" 240 lbs LSU $37,893,408
  1252. https://a.espncdn.com/i/headshots/nba/players/full/4065697.png
  1253. Dennis Smith Jr. PG 25 6' 2" 205 lbs NC State $2,019,706
  1254. https://a.espncdn.com/i/headshots/nba/players/full/4432174.png
  1255. Cam Thomas24 SG 21 6' 3" 210 lbs LSU $2,240,160
  1256. https://a.espncdn.com/i/headshots/nba/players/full/4277890.png
  1257. Lonnie Walker IV G 24 6' 4" 204 lbs Miami $2,019,706
  1258. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1259. Dariq Whitehead F 19 6' 7" 220 lbs Duke $2,966,040
  1260. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1261. Jalen Wilson F 22 6' 8" 225 lbs Kansas --
  1262. Charlotte Hornets Roster 2023-24
  1263. NAME POS AGE HT WT COLLEGE SALARY
  1264. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1265. Angelo Allegri16 F 24 6' 7" 215 lbs Eastern Washington --
  1266. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1267. Amari Bailey G 19 6' 5" 185 lbs UCLA --
  1268. https://a.espncdn.com/i/headshots/nba/players/full/4432816.png
  1269. LaMelo Ball1 PG 22 6' 7" 180 lbs -- $10,900,635
  1270. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1271. Leaky Black F 24 6' 9" 205 lbs North Carolina --
  1272. https://a.espncdn.com/i/headshots/nba/players/full/4431712.png
  1273. James Bouknight2 SG 23 6' 4" 190 lbs UConn $4,570,080
  1274. https://a.espncdn.com/i/headshots/nba/players/full/4066383.png
  1275. Miles Bridges0 SF 25 6' 7" 225 lbs Michigan State $7,921,301
  1276. https://a.espncdn.com/i/headshots/nba/players/full/4249.png
  1277. Gordon Hayward20 SF 33 6' 7" 225 lbs Butler $31,500,000
  1278. https://a.espncdn.com/i/headshots/nba/players/full/4431699.png
  1279. Kai Jones23 PF 22 6' 11" 221 lbs Texas $3,047,880
  1280. https://a.espncdn.com/i/headshots/nba/players/full/4683027.png
  1281. Theo Maledon9 PG 22 6' 5" 175 lbs -- --
  1282. https://a.espncdn.com/i/headshots/nba/players/full/3138161.png
  1283. Cody Martin11 SF 27 6' 6" 205 lbs Nevada $7,560,000
  1284. https://a.espncdn.com/i/headshots/nba/players/full/4576086.png
  1285. Bryce McGowens7 G 20 6' 6" 175 lbs Nebraska $1,719,865
  1286. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1287. Nathan Mensah31 C 25 6' 10" 230 lbs San Diego State --
  1288. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1289. Brandon Miller F 20 6' 9" 200 lbs Alabama $10,880,400
  1290. https://a.espncdn.com/i/headshots/nba/players/full/5144059.png
  1291. James Nnaji C 19 6' 11" 250 lbs -- --
  1292. https://a.espncdn.com/i/headshots/nba/players/full/4230547.png
  1293. Frank Ntilikina PG 25 6' 4" 200 lbs -- $2,019,706
  1294. https://a.espncdn.com/i/headshots/nba/players/full/3133603.png
  1295. Kelly Oubre Jr.12 SG 27 6' 6" 203 lbs Kansas --
  1296. https://a.espncdn.com/i/headshots/nba/players/full/4278076.png
  1297. Nick Richards4 C 25 7' 0" 245 lbs Kentucky $5,000,000
  1298. https://a.espncdn.com/i/headshots/nba/players/full/3074752.png
  1299. Terry Rozier3 SG 29 6' 1" 190 lbs Louisville $22,205,221
  1300. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1301. Trevon Scott18 F 26 6' 8" 225 lbs Cincinnati --
  1302. https://a.espncdn.com/i/headshots/nba/players/full/4397570.png
  1303. Jaylen Sims30 G 24 6' 6" 210 lbs UNC Wilmington --
  1304. https://a.espncdn.com/i/headshots/nba/players/full/4683686.png
  1305. Nick Smith Jr. G 19 6' 4" 170 lbs Arkansas $2,463,960
  1306. https://a.espncdn.com/i/headshots/nba/players/full/4702233.png
  1307. JT Thor21 F 21 6' 9" 203 lbs Auburn $1,836,096
  1308. https://a.espncdn.com/i/headshots/nba/players/full/4278078.png
  1309. P.J. Washington25 PF 25 6' 7" 230 lbs Kentucky $16,847,826
  1310. https://a.espncdn.com/i/headshots/nba/players/full/4701232.png
  1311. Mark Williams5 C 21 7' 0" 240 lbs Duke $3,908,160
  1312. Chicago Bulls Roster 2023-24
  1313. NAME POS AGE HT WT COLLEGE SALARY
  1314. https://a.espncdn.com/i/headshots/nba/players/full/4066421.png
  1315. Lonzo Ball2 PG 25 6' 6" 190 lbs UCLA $20,465,117
  1316. https://a.espncdn.com/i/headshots/nba/players/full/3133635.png
  1317. Jevon Carter PG 28 6' 1" 200 lbs West Virginia $6,190,476
  1318. https://a.espncdn.com/i/headshots/nba/players/full/2991350.png
  1319. Alex Caruso6 SG 29 6' 5" 186 lbs Texas A&M $9,460,000
  1320. https://a.espncdn.com/i/headshots/nba/players/full/2528693.png
  1321. Torrey Craig0 SF 32 6' 7" 221 lbs South Carolina Upstate $2,528,233
  1322. https://a.espncdn.com/i/headshots/nba/players/full/3978.png
  1323. DeMar DeRozan11 SF 34 6' 6" 220 lbs USC $28,600,000
  1324. https://a.espncdn.com/i/headshots/nba/players/full/4397002.png
  1325. Ayo Dosunmu12 SG 23 6' 5" 200 lbs Illinois $6,481,481
  1326. https://a.espncdn.com/i/headshots/nba/players/full/6585.png
  1327. Andre Drummond3 C 30 6' 11" 279 lbs UConn $3,360,000
  1328. https://a.espncdn.com/i/headshots/nba/players/full/2596112.png
  1329. Javonte Green24 SF 30 6' 5" 205 lbs Radford --
  1330. https://a.espncdn.com/i/headshots/nba/players/full/4067735.png
  1331. Carlik Jones22 G 25 6' 0" 174 lbs Louisville $1,922,896
  1332. https://a.espncdn.com/i/headshots/nba/players/full/3064440.png
  1333. Zach LaVine8 SG 28 6' 5" 200 lbs UCLA $40,064,220
  1334. https://a.espncdn.com/i/headshots/nba/players/full/4432239.png
  1335. Justin Lewis34 F 21 6' 7" 245 lbs Marquette --
  1336. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1337. Julian Phillips F 19 6' 8" 197 lbs Tennessee $1,600,000
  1338. https://a.espncdn.com/i/headshots/nba/players/full/4683622.png
  1339. Marko Simonovic19 C 23 7' 0" 220 lbs -- $1,836,096
  1340. https://a.espncdn.com/i/headshots/nba/players/full/4279815.png
  1341. Terry Taylor32 F 23 6' 4" 230 lbs Austin Peay $2,019,706
  1342. https://a.espncdn.com/i/headshots/nba/players/full/4433159.png
  1343. Dalen Terry25 F 21 6' 7" 200 lbs Arizona $3,350,760
  1344. https://a.espncdn.com/i/headshots/nba/players/full/6478.png
  1345. Nikola Vucevic9 C 32 6' 10" 260 lbs USC $18,518,519
  1346. https://a.espncdn.com/i/headshots/nba/players/full/4395651.png
  1347. Coby White0 PG 23 6' 5" 195 lbs North Carolina $11,111,111
  1348. https://a.espncdn.com/i/headshots/nba/players/full/4431687.png
  1349. Patrick Williams44 PF 22 6' 7" 215 lbs Florida State $9,835,881
  1350. Cleveland Cavaliers Roster 2023-24
  1351. NAME POS AGE HT WT COLLEGE SALARY
  1352. https://a.espncdn.com/i/headshots/nba/players/full/4066328.png
  1353. Jarrett Allen31 C 25 6' 9" 243 lbs Texas $20,000,000
  1354. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1355. Emoni Bates21 F 19 6' 10" 170 lbs Eastern Michigan --
  1356. https://a.espncdn.com/i/headshots/nba/players/full/4432173.png
  1357. Sharife Cooper2 PG 22 6' 1" 176 lbs Auburn --
  1358. https://a.espncdn.com/i/headshots/nba/players/full/3947156.png
  1359. Mamadi Diakite21 F 26 6' 9" 228 lbs Virginia --
  1360. https://a.espncdn.com/i/headshots/nba/players/full/4396907.png
  1361. Darius Garland10 PG 23 6' 1" 192 lbs Vanderbilt $34,005,250
  1362. https://a.espncdn.com/i/headshots/nba/players/full/4065733.png
  1363. Ty Jerome SG 26 6' 5" 195 lbs Virginia $2,439,025
  1364. https://a.espncdn.com/i/headshots/nba/players/full/3064559.png
  1365. Damian Jones15 C 28 6' 11" 245 lbs Vanderbilt $2,586,665
  1366. https://a.espncdn.com/i/headshots/nba/players/full/2991043.png
  1367. Caris LeVert3 SG 29 6' 6" 205 lbs Michigan $15,384,616
  1368. https://a.espncdn.com/i/headshots/nba/players/full/4066757.png
  1369. Sam Merrill20 SG 27 6' 4" 205 lbs Utah State $1,997,238
  1370. https://a.espncdn.com/i/headshots/nba/players/full/3908809.png
  1371. Donovan Mitchell45 SG 27 6' 1" 215 lbs Louisville $33,162,030
  1372. https://a.espncdn.com/i/headshots/nba/players/full/4432158.png
  1373. Evan Mobley4 PF 22 6' 11" 215 lbs USC $8,882,760
  1374. https://a.espncdn.com/i/headshots/nba/players/full/4432815.png
  1375. Isaiah Mobley15 F 23 6' 8" 238 lbs USC --
  1376. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1377. Pete Nance48 F 23 6' 11" 230 lbs North Carolina --
  1378. https://a.espncdn.com/i/headshots/nba/players/full/2968361.png
  1379. Raul Neto19 PG 31 6' 2" 180 lbs -- --
  1380. https://a.espncdn.com/i/headshots/nba/players/full/2990969.png
  1381. Georges Niang SF 30 6' 7" 230 lbs Iowa State $8,800,000
  1382. https://a.espncdn.com/i/headshots/nba/players/full/4432822.png
  1383. Isaac Okoro35 SF 22 6' 5" 225 lbs Auburn $8,920,795
  1384. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1385. Craig Porter Jr.40 G 23 6' 2" 186 lbs Wichita State --
  1386. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1387. Justin Powell G -- 6' 6" 197 lbs Washington State --
  1388. https://a.espncdn.com/i/headshots/nba/players/full/4011.png
  1389. Ricky Rubio13 PG 32 6' 2" 190 lbs -- $6,146,342
  1390. https://a.espncdn.com/i/headshots/nba/players/full/4065778.png
  1391. Max Strus31 SG 27 6' 5" 215 lbs DePaul $14,487,684
  1392. https://a.espncdn.com/i/headshots/nba/players/full/6474.png
  1393. Tristan Thompson13 C 32 6' 9" 254 lbs Texas --
  1394. https://a.espncdn.com/i/headshots/nba/players/full/3912848.png
  1395. Dean Wade32 PF 26 6' 9" 228 lbs Kansas State $5,709,877
  1396. Dallas Mavericks Roster 2023-24
  1397. NAME POS AGE HT WT COLLEGE SALARY
  1398. https://a.espncdn.com/i/headshots/nba/players/full/4432161.png
  1399. Greg Brown III36 PF 22 6' 8" 217 lbs Texas --
  1400. https://a.espncdn.com/i/headshots/nba/players/full/2326307.png
  1401. Seth Curry30 SG 33 6' 1" 190 lbs Duke $4,000,000
  1402. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1403. Dexter Dennis38 G 24 6' 5" 210 lbs Texas A&M --
  1404. https://a.espncdn.com/i/headshots/nba/players/full/3945274.png
  1405. Luka Doncic77 PG 24 6' 7" 230 lbs -- $40,064,220
  1406. https://a.espncdn.com/i/headshots/nba/players/full/3102528.png
  1407. Dante Exum0 G 28 6' 5" 214 lbs -- $2,019,760
  1408. https://a.espncdn.com/i/headshots/nba/players/full/4432811.png
  1409. Josh Green8 SG 22 6' 6" 210 lbs Arizona $4,765,339
  1410. https://a.espncdn.com/i/headshots/nba/players/full/2528210.png
  1411. Tim Hardaway Jr.10 SF 31 6' 5" 205 lbs Michigan $17,897,728
  1412. https://a.espncdn.com/i/headshots/nba/players/full/4868423.png
  1413. Jaden Hardy1 G 21 6' 4" 198 lbs -- $1,719,864
  1414. https://a.espncdn.com/i/headshots/nba/players/full/2993370.png
  1415. Richaun Holmes20 F 29 6' 10" 240 lbs Bowling Green $12,046,020
  1416. https://a.espncdn.com/i/headshots/nba/players/full/6442.png
  1417. Kyrie Irving11 PG 31 6' 2" 191 lbs Duke $37,037,037
  1418. https://a.espncdn.com/i/headshots/nba/players/full/3936099.png
  1419. Derrick Jones Jr.55 SF 26 6' 6" 210 lbs UNLV $2,019,706
  1420. https://a.espncdn.com/i/headshots/nba/players/full/2960236.png
  1421. Maxi Kleber42 PF 31 6' 10" 240 lbs -- $11,000,000
  1422. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1423. A.J. Lawson9 G 23 6' 6" 185 lbs South Carolina --
  1424. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1425. Dereck Lively II2 C 19 7' 1" 234 lbs Duke $4,775,640
  1426. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1427. Mike Miles Jr.14 G 21 6' 2" 205 lbs TCU --
  1428. https://a.espncdn.com/i/headshots/nba/players/full/6461.png
  1429. Markieff Morris88 PF 34 6' 9" 245 lbs Kansas --
  1430. https://a.espncdn.com/i/headshots/nba/players/full/2531367.png
  1431. Dwight Powell7 C 32 6' 10" 240 lbs Stanford $4,000,000
  1432. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1433. Olivier-Maxence Prosper18 F 21 6' 8" 215 lbs Marquette $2,733,360
  1434. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1435. Jordan Walker16 SG 24 5' 11" 170 lbs UAB --
  1436. https://a.espncdn.com/i/headshots/nba/players/full/4397033.png
  1437. Joe Wieskamp37 SF 24 6' 6" 205 lbs Iowa $1,927,896
  1438. https://a.espncdn.com/i/headshots/nba/players/full/4066218.png
  1439. Grant Williams3 PF 24 6' 6" 236 lbs Tennessee $12,325,581
  1440. Denver Nuggets Roster 2023-24
  1441. NAME POS AGE HT WT COLLEGE SALARY
  1442. https://a.espncdn.com/i/headshots/nba/players/full/4431767.png
  1443. Christian Braun0 G 22 6' 6" 220 lbs Kansas $2,949,120
  1444. https://a.espncdn.com/i/headshots/nba/players/full/2581018.png
  1445. Kentavious Caldwell-Pope5 SG 30 6' 5" 204 lbs Georgia $14,704,938
  1446. https://a.espncdn.com/i/headshots/nba/players/full/4230557.png
  1447. Vlatko Cancar31 SF 26 6' 8" 236 lbs -- $2,234,359
  1448. https://a.espncdn.com/i/headshots/nba/players/full/4278585.png
  1449. Collin Gillespie21 G 24 6' 3" 190 lbs Villanova --
  1450. https://a.espncdn.com/i/headshots/nba/players/full/3064290.png
  1451. Aaron Gordon50 PF 28 6' 8" 235 lbs Arizona $22,266,182
  1452. https://a.espncdn.com/i/headshots/nba/players/full/2284101.png
  1453. Justin Holiday0 SF 34 6' 6" 180 lbs Washington $2,019,706
  1454. https://a.espncdn.com/i/headshots/nba/players/full/4065731.png
  1455. Jay Huff C 25 7' 1" 240 lbs Virginia --
  1456. https://a.espncdn.com/i/headshots/nba/players/full/6443.png
  1457. Reggie Jackson7 PG 33 6' 2" 208 lbs Boston College $5,000,000
  1458. https://a.espncdn.com/i/headshots/nba/players/full/3112335.png
  1459. Nikola Jokic15 C 28 6' 11" 284 lbs -- $47,607,350
  1460. https://a.espncdn.com/i/headshots/nba/players/full/3442.png
  1461. DeAndre Jordan6 C 35 6' 11" 265 lbs Texas A&M $2,019,706
  1462. https://a.espncdn.com/i/headshots/nba/players/full/4066240.png
  1463. Braxton Key F 26 6' 8" 225 lbs Virginia --
  1464. https://a.espncdn.com/i/headshots/nba/players/full/3936299.png
  1465. Jamal Murray27 PG 26 6' 4" 215 lbs Kentucky $33,833,400
  1466. https://a.espncdn.com/i/headshots/nba/players/full/4431690.png
  1467. Zeke Nnaji22 PF 22 6' 9" 240 lbs Arizona $4,306,281
  1468. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1469. Jalen Pickett G 23 6' 4" 202 lbs Siena $2,019,706
  1470. https://a.espncdn.com/i/headshots/nba/players/full/4278104.png
  1471. Michael Porter Jr.1 SF 25 6' 10" 218 lbs Missouri $33,386,850
  1472. https://a.espncdn.com/i/headshots/nba/players/full/4305.png
  1473. Ish Smith14 PG 35 6' 0" 175 lbs Wake Forest --
  1474. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1475. Julian Strawther G 21 6' 7" 205 lbs Gonzaga $2,431,080
  1476. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1477. Hunter Tyson F 23 6' 8" 215 lbs Clemson $1,119,563
  1478. https://a.espncdn.com/i/headshots/nba/players/full/4576087.png
  1479. Peyton Watson8 F 21 6' 8" 200 lbs UCLA $2,303,520
  1480. Detroit Pistons Roster 2023-24
  1481. NAME POS AGE HT WT COLLEGE SALARY
  1482. https://a.espncdn.com/i/headshots/nba/players/full/4277848.png
  1483. Marvin Bagley III35 PF 24 6' 10" 235 lbs Duke $12,500,000
  1484. https://a.espncdn.com/i/headshots/nba/players/full/4395676.png
  1485. Buddy Boeheim27 F 23 6' 6" 205 lbs Syracuse --
  1486. https://a.espncdn.com/i/headshots/nba/players/full/3593.png
  1487. Bojan Bogdanovic44 SF 34 6' 7" 226 lbs -- $20,000,000
  1488. https://a.espncdn.com/i/headshots/nba/players/full/6429.png
  1489. Alec Burks5 SG 32 6' 6" 214 lbs Colorado $10,489,600
  1490. https://a.espncdn.com/i/headshots/nba/players/full/4871137.png
  1491. Malcolm Cazalon20 G -- 6' 6" 185 lbs -- --
  1492. https://a.espncdn.com/i/headshots/nba/players/full/4432166.png
  1493. Cade Cunningham2 PG 21 6' 7" 220 lbs Oklahoma State $11,055,240
  1494. https://a.espncdn.com/i/headshots/nba/players/full/4080610.png
  1495. Hamidou Diallo6 SG 25 6' 5" 202 lbs Kentucky --
  1496. https://a.espncdn.com/i/headshots/nba/players/full/4433621.png
  1497. Jalen Duren0 C 19 6' 11" 250 lbs Memphis $4,330,680
  1498. https://a.espncdn.com/i/headshots/nba/players/full/2528794.png
  1499. Joe Harris12 SF 32 6' 6" 220 lbs Virginia $19,928,571
  1500. https://a.espncdn.com/i/headshots/nba/players/full/4683024.png
  1501. Killian Hayes7 PG 22 6' 5" 195 lbs -- $7,413,955
  1502. https://a.espncdn.com/i/headshots/nba/players/full/4433218.png
  1503. Jaden Ivey23 PG 21 6' 4" 195 lbs Purdue $7,641,480
  1504. https://a.espncdn.com/i/headshots/nba/players/full/4277957.png
  1505. Isaiah Livers12 PF 25 6' 6" 232 lbs Michigan $1,836,096
  1506. https://a.espncdn.com/i/headshots/nba/players/full/2488826.png
  1507. Rodney McGruder17 G 32 6' 4" 205 lbs Kansas State --
  1508. https://a.espncdn.com/i/headshots/nba/players/full/3059310.png
  1509. Monte Morris22 PG 28 6' 2" 183 lbs Iowa State $9,800,926
  1510. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1511. Jared Rhoden8 G 24 6' 6" 210 lbs Seton Hall --
  1512. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1513. Marcus Sasser G 23 6' 2" 195 lbs Houston $2,623,680
  1514. https://a.espncdn.com/i/headshots/nba/players/full/4432810.png
  1515. Isaiah Stewart28 C 22 6' 8" 250 lbs Washington $5,266,713
  1516. https://a.espncdn.com/i/headshots/nba/players/full/4684742.png
  1517. Ausar Thompson F 20 6' 7" 190 lbs -- $7,977,480
  1518. https://a.espncdn.com/i/headshots/nba/players/full/4432808.png
  1519. James Wiseman13 C 22 7' 0" 240 lbs Memphis $12,119,400
  1520. Golden State Warriors Roster 2023-24
  1521. NAME POS AGE HT WT COLLEGE SALARY
  1522. https://a.espncdn.com/i/headshots/nba/players/full/3975.png
  1523. Stephen Curry30 PG 35 6' 2" 185 lbs Davidson $51,915,615
  1524. https://a.espncdn.com/i/headshots/nba/players/full/6589.png
  1525. Draymond Green23 PF 33 6' 6" 230 lbs Michigan State $22,321,429
  1526. https://a.espncdn.com/i/headshots/nba/players/full/2327577.png
  1527. JaMychal Green1 F 33 6' 9" 227 lbs Alabama --
  1528. https://a.espncdn.com/i/headshots/nba/players/full/2386.png
  1529. Andre Iguodala9 F 39 6' 6" 215 lbs Arizona --
  1530. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1531. Trayce Jackson-Davis F 23 6' 9" 245 lbs Indiana $1,119,563
  1532. https://a.espncdn.com/i/headshots/nba/players/full/6446.png
  1533. Cory Joseph18 PG 32 6' 3" 200 lbs Texas $2,019,706
  1534. https://a.espncdn.com/i/headshots/nba/players/full/4433247.png
  1535. Jonathan Kuminga00 PF 20 6' 7" 225 lbs -- $6,012,840
  1536. https://a.espncdn.com/i/headshots/nba/players/full/4066790.png
  1537. Anthony Lamb40 F 25 6' 6" 227 lbs Vermont --
  1538. https://a.espncdn.com/i/headshots/nba/players/full/3155535.png
  1539. Kevon Looney5 F 27 6' 9" 222 lbs UCLA $7,500,000
  1540. https://a.espncdn.com/i/headshots/nba/players/full/4432171.png
  1541. Moses Moody4 SG 21 6' 5" 211 lbs Arkansas $3,918,480
  1542. https://a.espncdn.com/i/headshots/nba/players/full/2779.png
  1543. Chris Paul PG 38 6' 0" 175 lbs Wake Forest $30,800,000
  1544. https://a.espncdn.com/i/headshots/nba/players/full/3134903.png
  1545. Gary Payton II8 SG 30 6' 2" 195 lbs Oregon State $8,715,000
  1546. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1547. Brandin Podziemski G 20 6' 4" 203 lbs Santa Clara $3,352,440
  1548. https://a.espncdn.com/i/headshots/nba/players/full/4431720.png
  1549. Lester Quinones25 G 22 6' 5" 205 lbs Memphis --
  1550. https://a.espncdn.com/i/headshots/nba/players/full/3032978.png
  1551. Dario Saric9 PF 29 6' 10" 225 lbs -- $2,019,706
  1552. https://a.espncdn.com/i/headshots/nba/players/full/6475.png
  1553. Klay Thompson11 SG 33 6' 6" 220 lbs Washington State $43,219,440
  1554. https://a.espncdn.com/i/headshots/nba/players/full/3059319.png
  1555. Andrew Wiggins22 SF 28 6' 7" 197 lbs Kansas $24,330,357
  1556. Houston Rockets Roster 2023-24
  1557. NAME POS AGE HT WT COLLEGE SALARY
  1558. https://a.espncdn.com/i/headshots/nba/players/full/3415.png
  1559. D.J. Augustin4 G 35 5' 11" 183 lbs Texas --
  1560. https://a.espncdn.com/i/headshots/nba/players/full/3155526.png
  1561. Dillon Brooks SF 27 6' 7" 225 lbs Oregon $22,627,671
  1562. https://a.espncdn.com/i/headshots/nba/players/full/2991282.png
  1563. Willie Cauley-Stein2 C 30 7' 0" 240 lbs Kentucky --
  1564. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1565. Darius Days5 F 23 6' 7" 245 lbs LSU --
  1566. https://a.espncdn.com/i/headshots/nba/players/full/4433192.png
  1567. Tari Eason17 F 22 6' 8" 215 lbs LSU $3,527,160
  1568. https://a.espncdn.com/i/headshots/nba/players/full/4437244.png
  1569. Jalen Green4 SG 21 6' 4" 186 lbs -- $9,891,480
  1570. https://a.espncdn.com/i/headshots/nba/players/full/3209.png
  1571. Jeff Green32 PF 37 6' 8" 235 lbs Georgetown $9,600,000
  1572. https://a.espncdn.com/i/headshots/nba/players/full/3922230.png
  1573. Aaron Holiday G 26 6' 0" 185 lbs UCLA $2,019,706
  1574. https://a.espncdn.com/i/headshots/nba/players/full/5075393.png
  1575. Trevor Hudgins12 G 24 6' 0" 180 lbs Northwest Missouri State --
  1576. https://a.espncdn.com/i/headshots/nba/players/full/2579294.png
  1577. Frank Kaminsky33 PF 30 7' 0" 240 lbs Wisconsin --
  1578. https://a.espncdn.com/i/headshots/nba/players/full/3146557.png
  1579. Jock Landale C 27 6' 11" 255 lbs Saint Mary's $8,000,000
  1580. https://a.espncdn.com/i/headshots/nba/players/full/4376.png
  1581. Boban Marjanovic51 C 35 7' 4" 290 lbs -- $2,019,706
  1582. https://a.espncdn.com/i/headshots/nba/players/full/4397140.png
  1583. Kevin Porter Jr.3 SG 23 6' 4" 203 lbs USC $15,860,000
  1584. https://a.espncdn.com/i/headshots/nba/players/full/4871144.png
  1585. Alperen Sengun28 C 21 6' 11" 243 lbs -- $3,536,280
  1586. https://a.espncdn.com/i/headshots/nba/players/full/4432639.png
  1587. Jabari Smith Jr.1 PF 20 6' 11" 220 lbs Auburn $9,326,520
  1588. https://a.espncdn.com/i/headshots/nba/players/full/3136777.png
  1589. Jae'Sean Tate8 SF 27 6' 5" 230 lbs Ohio State $6,500,000
  1590. https://a.espncdn.com/i/headshots/nba/players/full/4684740.png
  1591. Amen Thompson F 20 6' 7" 214 lbs -- $8,809,320
  1592. https://a.espncdn.com/i/headshots/nba/players/full/2991230.png
  1593. Fred VanVleet SG 29 6' 0" 197 lbs Wichita State $40,806,300
  1594. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1595. Cam Whitmore F 19 6' 7" 232 lbs Villanova $3,218,160
  1596. Indiana Pacers Roster 2023-24
  1597. NAME POS AGE HT WT COLLEGE SALARY
  1598. https://a.espncdn.com/i/headshots/nba/players/full/4065670.png
  1599. Bruce Brown SF 27 6' 4" 202 lbs Miami $22,000,000
  1600. https://a.espncdn.com/i/headshots/nba/players/full/4433254.png
  1601. Kendall Brown10 SG 20 6' 8" 205 lbs Baylor --
  1602. https://a.espncdn.com/i/headshots/nba/players/full/4396993.png
  1603. Tyrese Haliburton0 PG 23 6' 5" 185 lbs Iowa State $5,808,435
  1604. https://a.espncdn.com/i/headshots/nba/players/full/2990984.png
  1605. Buddy Hield24 SG 30 6' 4" 220 lbs Oklahoma $19,279,841
  1606. https://a.espncdn.com/i/headshots/nba/players/full/3438.png
  1607. George Hill7 PG 37 6' 4" 188 lbs IUPUI --
  1608. https://a.espncdn.com/i/headshots/nba/players/full/3439.png
  1609. Serge Ibaka F 34 6' 10" 235 lbs -- --
  1610. https://a.espncdn.com/i/headshots/nba/players/full/4432170.png
  1611. Isaiah Jackson22 SF 21 6' 10" 206 lbs Kentucky $2,696,280
  1612. https://a.espncdn.com/i/headshots/nba/players/full/3999.png
  1613. James Johnson16 PF 36 6' 7" 240 lbs Wake Forest --
  1614. https://a.espncdn.com/i/headshots/nba/players/full/4683634.png
  1615. Bennedict Mathurin00 SG 21 6' 6" 195 lbs Arizona $6,916,080
  1616. https://a.espncdn.com/i/headshots/nba/players/full/2530530.png
  1617. T.J. McConnell9 PG 31 6' 1" 190 lbs Arizona $8,700,000
  1618. https://a.espncdn.com/i/headshots/nba/players/full/4395712.png
  1619. Andrew Nembhard2 PG 23 6' 5" 193 lbs Gonzaga $2,131,905
  1620. https://a.espncdn.com/i/headshots/nba/players/full/4396909.png
  1621. Aaron Nesmith23 SF 23 6' 5" 215 lbs Vanderbilt $5,634,257
  1622. https://a.espncdn.com/i/headshots/nba/players/full/4277883.png
  1623. Jordan Nwora13 SF 25 6' 8" 225 lbs Louisville $3,000,000
  1624. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1625. Ben Sheppard G 22 6' 6" 190 lbs Belmont $2,537,160
  1626. https://a.espncdn.com/i/headshots/nba/players/full/4397189.png
  1627. Jalen Smith25 PF 23 6' 9" 215 lbs Maryland $5,604,192
  1628. https://a.espncdn.com/i/headshots/nba/players/full/2451037.png
  1629. Daniel Theis27 C 31 6' 8" 245 lbs -- $9,108,387
  1630. https://a.espncdn.com/i/headshots/nba/players/full/4278355.png
  1631. Obi Toppin1 PF 25 6' 9" 220 lbs Dayton $6,803,012
  1632. https://a.espncdn.com/i/headshots/nba/players/full/3133628.png
  1633. Myles Turner33 C 27 6' 11" 250 lbs Texas $20,975,000
  1634. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1635. Jarace Walker F 20 6' 8" 240 lbs Houston $6,059,520
  1636. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1637. Isaiah Wong G 22 6' 4" 184 lbs Miami --
  1638. https://a.espncdn.com/i/headshots/nba/players/full/2991011.png
  1639. Gabe York32 G 30 6' 3" 190 lbs Arizona --
  1640. LA Clippers Roster 2023-24
  1641. NAME POS AGE HT WT COLLEGE SALARY
  1642. https://a.espncdn.com/i/headshots/nba/players/full/3416.png
  1643. Nicolas Batum33 PF 34 6' 8" 230 lbs -- $11,710,818
  1644. https://a.espncdn.com/i/headshots/nba/players/full/4432162.png
  1645. Brandon Boston Jr.4 SG 21 6' 6" 188 lbs Kentucky $1,836,096
  1646. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1647. Kobe Brown G 23 6' 8" 250 lbs Missouri $2,413,320
  1648. https://a.espncdn.com/i/headshots/nba/players/full/4066387.png
  1649. Amir Coffey7 SG 26 6' 7" 210 lbs Minnesota $3,666,667
  1650. https://a.espncdn.com/i/headshots/nba/players/full/2490620.png
  1651. Robert Covington23 PF 32 6' 7" 209 lbs Tennessee State $11,692,308
  1652. https://a.espncdn.com/i/headshots/nba/players/full/4433249.png
  1653. Moussa Diabate25 F 21 6' 11" 210 lbs Michigan --
  1654. https://a.espncdn.com/i/headshots/nba/players/full/4251.png
  1655. Paul George13 F 33 6' 8" 220 lbs Fresno State $45,640,084
  1656. https://a.espncdn.com/i/headshots/nba/players/full/4592492.png
  1657. Bones Hyland5 PG 23 6' 2" 169 lbs VCU $2,306,400
  1658. https://a.espncdn.com/i/headshots/nba/players/full/6450.png
  1659. Kawhi Leonard2 SF 32 6' 7" 225 lbs San Diego State $45,640,084
  1660. https://a.espncdn.com/i/headshots/nba/players/full/3907823.png
  1661. Terance Mann14 SG 26 6' 5" 215 lbs Florida State $10,576,923
  1662. https://a.espncdn.com/i/headshots/nba/players/full/4702882.png
  1663. KJ Martin F 22 6' 7" 215 lbs -- $1,930,681
  1664. https://a.espncdn.com/i/headshots/nba/players/full/4431828.png
  1665. Kenyon Martin Jr. F 22 6' 7" 215 lbs -- $1,930,681
  1666. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1667. Jordan Miller G 23 6' 7" 195 lbs Miami --
  1668. https://a.espncdn.com/i/headshots/nba/players/full/3914075.png
  1669. Xavier Moon15 PG 28 6' 0" 165 lbs Morehead State --
  1670. https://a.espncdn.com/i/headshots/nba/players/full/6462.png
  1671. Marcus Morris Sr.8 SF 34 6' 8" 218 lbs Kansas $17,116,279
  1672. https://a.espncdn.com/i/headshots/nba/players/full/2488653.png
  1673. Mason Plumlee44 C 33 7' 0" 254 lbs Duke $5,000,000
  1674. https://a.espncdn.com/i/headshots/nba/players/full/2595516.png
  1675. Norman Powell24 G 30 6' 3" 215 lbs UCLA $18,000,000
  1676. https://a.espncdn.com/i/headshots/nba/players/full/4397916.png
  1677. Jason Preston17 PG 24 6' 3" 181 lbs Ohio $1,836,096
  1678. https://a.espncdn.com/i/headshots/nba/players/full/3468.png
  1679. Russell Westbrook0 PG 34 6' 3" 200 lbs UCLA $3,835,738
  1680. https://a.espncdn.com/i/headshots/nba/players/full/4017837.png
  1681. Ivica Zubac40 C 26 7' 0" 240 lbs -- $10,933,333
  1682. Los Angeles Lakers Roster 2023-24
  1683. NAME POS AGE HT WT COLLEGE SALARY
  1684. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1685. Damion Baugh G 23 6' 4" 194 lbs TCU --
  1686. https://a.espncdn.com/i/headshots/nba/players/full/4432582.png
  1687. Max Christie10 G 20 6' 5" 190 lbs Michigan State $1,719,864
  1688. https://a.espncdn.com/i/headshots/nba/players/full/6583.png
  1689. Anthony Davis3 PF 30 6' 10" 253 lbs Kentucky $40,600,080
  1690. https://a.espncdn.com/i/headshots/nba/players/full/4066264.png
  1691. Wenyen Gabriel35 F 26 6' 9" 205 lbs Kentucky --
  1692. https://a.espncdn.com/i/headshots/nba/players/full/4066648.png
  1693. Rui Hachimura28 PF 25 6' 8" 230 lbs Gonzaga $15,740,741
  1694. https://a.espncdn.com/i/headshots/nba/players/full/4397077.png
  1695. Jaxson Hayes10 C 23 7' 0" 220 lbs Texas $2,165,000
  1696. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1697. Jalen Hood-Schifino G 20 6' 6" 215 lbs Indiana $3,695,040
  1698. https://a.espncdn.com/i/headshots/nba/players/full/1966.png
  1699. LeBron James23 SF 38 6' 9" 250 lbs -- $47,607,350
  1700. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1701. Maxwell Lewis F 21 6' 7" 205 lbs Pepperdine $1,119,563
  1702. https://a.espncdn.com/i/headshots/nba/players/full/4431785.png
  1703. Scotty Pippen Jr.14 G 22 6' 1" 185 lbs Vanderbilt --
  1704. https://a.espncdn.com/i/headshots/nba/players/full/2990962.png
  1705. Taurean Prince PF 29 6' 6" 218 lbs Baylor $7,455,000
  1706. https://a.espncdn.com/i/headshots/nba/players/full/4066457.png
  1707. Austin Reaves15 SG 25 6' 5" 197 lbs Oklahoma $12,015,150
  1708. https://a.espncdn.com/i/headshots/nba/players/full/4395627.png
  1709. Cam Reddish SF 24 6' 8" 217 lbs Duke $2,165,000
  1710. https://a.espncdn.com/i/headshots/nba/players/full/3136776.png
  1711. D'Angelo Russell1 PG 27 6' 4" 193 lbs Ohio State $17,307,693
  1712. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1713. Vincent Valerio-Bodon SF 22 6' 9" 200 lbs -- --
  1714. https://a.espncdn.com/i/headshots/nba/players/full/4278077.png
  1715. Jarred Vanderbilt2 PF 24 6' 8" 214 lbs Kentucky $4,698,000
  1716. https://a.espncdn.com/i/headshots/nba/players/full/3137259.png
  1717. Gabe Vincent PG 27 6' 3" 200 lbs UC Santa Barbara $10,500,000
  1718. https://a.espncdn.com/i/headshots/nba/players/full/3058254.png
  1719. Christian Wood F 27 6' 9" 214 lbs UNLV $2,709,849
  1720. Memphis Grizzlies Roster 2023-24
  1721. NAME POS AGE HT WT COLLEGE SALARY
  1722. https://a.espncdn.com/i/headshots/nba/players/full/2991235.png
  1723. Steven Adams4 C 30 6' 11" 265 lbs Pittsburgh $12,600,000
  1724. https://a.espncdn.com/i/headshots/nba/players/full/4593125.png
  1725. Santi Aldama7 PF 22 6' 11" 215 lbs Loyola (MD) $2,194,200
  1726. https://a.espncdn.com/i/headshots/nba/players/full/4066320.png
  1727. Desmond Bane22 SG 25 6' 5" 215 lbs TCU $3,845,083
  1728. https://a.espncdn.com/i/headshots/nba/players/full/5148538.png
  1729. Tarik Biberovic F 22 6' 7" 218 lbs -- --
  1730. https://a.espncdn.com/i/headshots/nba/players/full/4432168.png
  1731. Josh Christopher SG 21 6' 4" 215 lbs Arizona State $2,485,200
  1732. https://a.espncdn.com/i/headshots/nba/players/full/3906665.png
  1733. Brandon Clarke15 PF 27 6' 8" 215 lbs Gonzaga $12,500,000
  1734. https://a.espncdn.com/i/headshots/nba/players/full/4278378.png
  1735. Jacob Gilyard0 G 25 5' 9" 160 lbs Richmond --
  1736. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1737. Gregory Jackson II F 18 6' 9" 215 lbs South Carolina --
  1738. https://a.espncdn.com/i/headshots/nba/players/full/4277961.png
  1739. Jaren Jackson Jr.13 PF 24 6' 11" 242 lbs Michigan State $27,102,202
  1740. https://a.espncdn.com/i/headshots/nba/players/full/3913174.png
  1741. Luke Kennard10 SG 27 6' 5" 206 lbs Duke $15,418,363
  1742. https://a.espncdn.com/i/headshots/nba/players/full/3134932.png
  1743. John Konchar46 SG 27 6' 5" 210 lbs -- $2,400,000
  1744. https://a.espncdn.com/i/headshots/nba/players/full/4592691.png
  1745. Jake LaRavia3 F 21 6' 7" 235 lbs Wake Forest $3,199,920
  1746. https://a.espncdn.com/i/headshots/nba/players/full/4585610.png
  1747. Kenneth Lofton Jr.6 F 21 6' 8" 275 lbs Louisiana Tech $1,719,864
  1748. https://a.espncdn.com/i/headshots/nba/players/full/4279888.png
  1749. Ja Morant12 PG 24 6' 3" 174 lbs Murray State $34,005,250
  1750. https://a.espncdn.com/i/headshots/nba/players/full/4593041.png
  1751. David Roddy27 F 22 6' 5" 255 lbs Colorado State $2,718,240
  1752. https://a.espncdn.com/i/headshots/nba/players/full/3456.png
  1753. Derrick Rose PG 34 6' 3" 200 lbs Memphis $3,196,448
  1754. https://a.espncdn.com/i/headshots/nba/players/full/2990992.png
  1755. Marcus Smart PG 29 6' 4" 220 lbs Oklahoma State $18,833,712
  1756. https://a.espncdn.com/i/headshots/nba/players/full/4277964.png
  1757. Xavier Tillman2 F 24 6' 8" 245 lbs Michigan State $1,930,681
  1758. https://a.espncdn.com/i/headshots/nba/players/full/4715198.png
  1759. Isaiah Todd PF 21 6' 9" 219 lbs -- $1,836,096
  1760. https://a.espncdn.com/i/headshots/nba/players/full/4433137.png
  1761. Ziaire Williams8 F 22 6' 9" 185 lbs Stanford $4,810,200
  1762. https://a.espncdn.com/i/headshots/nba/players/full/4397227.png
  1763. Vince Williams Jr.5 G 23 6' 6" 205 lbs VCU --
  1764. Miami Heat Roster 2023-24
  1765. NAME POS AGE HT WT COLLEGE SALARY
  1766. https://a.espncdn.com/i/headshots/nba/players/full/4066261.png
  1767. Bam Adebayo13 C 26 6' 9" 255 lbs Kentucky $32,600,060
  1768. https://a.espncdn.com/i/headshots/nba/players/full/3934723.png
  1769. Thomas Bryant13 C 26 6' 10" 248 lbs Indiana $2,528,233
  1770. https://a.espncdn.com/i/headshots/nba/players/full/6430.png
  1771. Jimmy Butler22 SF 34 6' 7" 230 lbs Marquette $45,183,960
  1772. https://a.espncdn.com/i/headshots/nba/players/full/4278572.png
  1773. Jamal Cain8 F 24 6' 7" 191 lbs Oakland --
  1774. https://a.espncdn.com/i/headshots/nba/players/full/2184.png
  1775. Udonis Haslem40 PF 43 6' 8" 235 lbs Florida --
  1776. https://a.espncdn.com/i/headshots/nba/players/full/4395725.png
  1777. Tyler Herro14 PG 23 6' 5" 195 lbs Kentucky $27,000,000
  1778. https://a.espncdn.com/i/headshots/nba/players/full/4291678.png
  1779. Haywood Highsmith24 F 26 6' 4" 220 lbs Wheeling Jesuit $1,902,137
  1780. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1781. Jaime Jaquez Jr. G 22 6' 7" 225 lbs UCLA $3,510,600
  1782. https://a.espncdn.com/i/headshots/nba/players/full/4997528.png
  1783. Nikola Jovic5 F 20 6' 10" 205 lbs -- $2,352,000
  1784. https://a.espncdn.com/i/headshots/nba/players/full/3449.png
  1785. Kevin Love42 PF 35 6' 8" 251 lbs UCLA $3,835,738
  1786. https://a.espncdn.com/i/headshots/nba/players/full/3012.png
  1787. Kyle Lowry7 PG 37 6' 0" 196 lbs Villanova $29,682,540
  1788. https://a.espncdn.com/i/headshots/nba/players/full/3138160.png
  1789. Caleb Martin16 SF 27 6' 5" 205 lbs Nevada $6,802,950
  1790. https://a.espncdn.com/i/headshots/nba/players/full/2581190.png
  1791. Josh Richardson SG 30 6' 6" 200 lbs Tennessee $2,891,467
  1792. https://a.espncdn.com/i/headshots/nba/players/full/3157465.png
  1793. Duncan Robinson55 F 29 6' 7" 215 lbs Michigan $18,154,000
  1794. https://a.espncdn.com/i/headshots/nba/players/full/4593049.png
  1795. Orlando Robinson25 C 23 7' 0" 235 lbs Fresno State $1,801,769
  1796. https://a.espncdn.com/i/headshots/nba/players/full/4066993.png
  1797. Dru Smith G 25 6' 3" 203 lbs Missouri --
  1798. Milwaukee Bucks Roster 2023-24
  1799. NAME POS AGE HT WT COLLEGE SALARY
  1800. https://a.espncdn.com/i/headshots/nba/players/full/3135045.png
  1801. Grayson Allen12 SG 27 6' 4" 198 lbs Duke $8,925,000
  1802. https://a.espncdn.com/i/headshots/nba/players/full/3032977.png
  1803. Giannis Antetokounmpo34 PF 28 6' 11" 243 lbs -- $45,640,084
  1804. https://a.espncdn.com/i/headshots/nba/players/full/3102533.png
  1805. Thanasis Antetokounmpo43 SF 31 6' 7" 219 lbs -- $2,019,706
  1806. https://a.espncdn.com/i/headshots/nba/players/full/3907822.png
  1807. Malik Beasley5 SG 26 6' 4" 187 lbs Florida State $2,019,760
  1808. https://a.espncdn.com/i/headshots/nba/players/full/4432179.png
  1809. MarJon Beauchamp0 F 22 6' 7" 202 lbs -- $2,609,400
  1810. https://a.espncdn.com/i/headshots/nba/players/full/2578239.png
  1811. Pat Connaughton24 SG 30 6' 5" 209 lbs Notre Dame $9,423,869
  1812. https://a.espncdn.com/i/headshots/nba/players/full/6581.png
  1813. Jae Crowder99 PF 33 6' 6" 235 lbs Marquette $2,019,760
  1814. https://a.espncdn.com/i/headshots/nba/players/full/3423.png
  1815. Goran Dragic31 PG 37 6' 3" 190 lbs -- --
  1816. https://a.espncdn.com/i/headshots/nba/players/full/4397475.png
  1817. AJ Green20 G 23 6' 5" 207 lbs Northern Iowa $1,801,769
  1818. https://a.espncdn.com/i/headshots/nba/players/full/3995.png
  1819. Jrue Holiday21 PG 33 6' 4" 205 lbs UCLA $36,861,707
  1820. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1821. Andre Jackson Jr. G 21 6' 6" 200 lbs UConn $1,119,563
  1822. https://a.espncdn.com/i/headshots/nba/players/full/6605.png
  1823. Meyers Leonard3 C 31 7' 0" 260 lbs Illinois --
  1824. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1825. Chris Livingston F 19 6' 6" 220 lbs Kentucky $1,119,563
  1826. https://a.espncdn.com/i/headshots/nba/players/full/3448.png
  1827. Brook Lopez11 C 35 7' 0" 282 lbs Stanford $25,000,000
  1828. https://a.espncdn.com/i/headshots/nba/players/full/3447.png
  1829. Robin Lopez C 35 7' 0" 281 lbs Stanford --
  1830. https://a.espncdn.com/i/headshots/nba/players/full/6609.png
  1831. Khris Middleton22 SF 32 6' 7" 222 lbs Texas A&M $29,320,988
  1832. https://a.espncdn.com/i/headshots/nba/players/full/3064482.png
  1833. Bobby Portis9 F 28 6' 10" 250 lbs Arkansas $11,710,818
  1834. https://a.espncdn.com/i/headshots/nba/players/full/4277887.png
  1835. Lindell Wigginton28 G 25 6' 1" 189 lbs Iowa State --
  1836. Minnesota Timberwolves Roster 2023-24
  1837. NAME POS AGE HT WT COLLEGE SALARY
  1838. https://a.espncdn.com/i/headshots/nba/players/full/4278039.png
  1839. Nickeil Alexander-Walker9 SG 25 6' 5" 205 lbs Virginia Tech $4,687,500
  1840. https://a.espncdn.com/i/headshots/nba/players/full/2993874.png
  1841. Kyle Anderson5 SF 30 6' 9" 230 lbs UCLA $9,219,512
  1842. https://a.espncdn.com/i/headshots/nba/players/full/4278508.png
  1843. Troy Brown Jr. SF 24 6' 6" 215 lbs Oregon $4,000,000
  1844. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1845. Jaylen Clark G 21 6' 5" 205 lbs UCLA --
  1846. https://a.espncdn.com/i/headshots/nba/players/full/3195.png
  1847. Mike Conley10 PG 35 6' 1" 175 lbs Ohio State $24,360,000
  1848. https://a.espncdn.com/i/headshots/nba/players/full/4594268.png
  1849. Anthony Edwards5 SG 22 6' 4" 225 lbs Georgia $9,219,512
  1850. https://a.espncdn.com/i/headshots/nba/players/full/4277951.png
  1851. Luka Garza55 C 24 6' 10" 243 lbs Iowa --
  1852. https://a.espncdn.com/i/headshots/nba/players/full/3032976.png
  1853. Rudy Gobert27 C 31 7' 1" 258 lbs -- $41,000,000
  1854. https://a.espncdn.com/i/headshots/nba/players/full/4578893.png
  1855. Vit Krejci F 23 6' 7" 195 lbs -- $1,836,096
  1856. https://a.espncdn.com/i/headshots/nba/players/full/4431671.png
  1857. Jaden McDaniels3 PF 22 6' 9" 185 lbs Washington $3,901,399
  1858. https://a.espncdn.com/i/headshots/nba/players/full/3134916.png
  1859. Jordan McLaughlin6 PG 27 6' 0" 185 lbs USC $2,320,000
  1860. https://a.espncdn.com/i/headshots/nba/players/full/3915195.png
  1861. Shake Milton SG 26 6' 5" 205 lbs SMU $5,000,000
  1862. https://a.espncdn.com/i/headshots/nba/players/full/4687718.png
  1863. Josh Minott8 F 21 6' 8" 205 lbs Memphis $1,719,864
  1864. https://a.espncdn.com/i/headshots/nba/players/full/4592187.png
  1865. Wendell Moore Jr.7 G 22 6' 5" 215 lbs Duke $2,421,720
  1866. https://a.espncdn.com/i/headshots/nba/players/full/4432176.png
  1867. Daishen Nix15 SG 21 6' 4" 226 lbs -- $1,836,096
  1868. https://a.espncdn.com/i/headshots/nba/players/full/4278541.png
  1869. Jaylen Nowell4 SG 24 6' 4" 201 lbs Washington --
  1870. https://a.espncdn.com/i/headshots/nba/players/full/4396971.png
  1871. Naz Reid11 C 24 6' 9" 264 lbs LSU $12,950,400
  1872. https://a.espncdn.com/i/headshots/nba/players/full/6617.png
  1873. Austin Rivers25 SG 31 6' 4" 200 lbs Duke --
  1874. https://a.espncdn.com/i/headshots/nba/players/full/3908336.png
  1875. Matt Ryan37 F 26 6' 6" 215 lbs Chattanooga --
  1876. https://a.espncdn.com/i/headshots/nba/players/full/3136195.png
  1877. Karl-Anthony Towns32 C 27 7' 0" 248 lbs Kentucky $36,016,200
  1878. New Orleans Pelicans Roster 2023-24
  1879. NAME POS AGE HT WT COLLEGE SALARY
  1880. https://a.espncdn.com/i/headshots/nba/players/full/4277869.png
  1881. Jose Alvarado15 PG 25 6' 0" 179 lbs Georgia Tech $1,836,096
  1882. https://a.espncdn.com/i/headshots/nba/players/full/4869342.png
  1883. Dyson Daniels11 G 20 6' 8" 200 lbs -- $5,784,120
  1884. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1885. Jordan Hawkins24 G 21 6' 5" 195 lbs UConn $4,310,160
  1886. https://a.espncdn.com/i/headshots/nba/players/full/2999409.png
  1887. Willy Hernangomez9 C 29 6' 11" 250 lbs -- $2,559,942
  1888. https://a.espncdn.com/i/headshots/nba/players/full/3913176.png
  1889. Brandon Ingram14 SF 26 6' 8" 190 lbs Duke $33,833,400
  1890. https://a.espncdn.com/i/headshots/nba/players/full/4277813.png
  1891. Herbert Jones5 SF 24 6' 7" 206 lbs Alabama $12,015,150
  1892. https://a.espncdn.com/i/headshots/nba/players/full/4395698.png
  1893. Kira Lewis Jr.13 PG 22 6' 1" 170 lbs Alabama $5,722,116
  1894. https://a.espncdn.com/i/headshots/nba/players/full/4432821.png
  1895. E.J. Liddell32 F 22 6' 6" 240 lbs Ohio State $1,801,769
  1896. https://a.espncdn.com/i/headshots/nba/players/full/4278594.png
  1897. Naji Marshall8 SF 25 6' 7" 220 lbs Xavier $1,930,681
  1898. https://a.espncdn.com/i/headshots/nba/players/full/2490149.png
  1899. CJ McCollum3 SG 32 6' 3" 190 lbs Lehigh $35,802,469
  1900. https://a.espncdn.com/i/headshots/nba/players/full/4397688.png
  1901. Trey Murphy III25 SG 23 6' 8" 206 lbs Virginia $3,359,280
  1902. https://a.espncdn.com/i/headshots/nba/players/full/2580365.png
  1903. Larry Nance Jr.22 PF 30 6' 8" 245 lbs Wyoming $10,375,000
  1904. https://a.espncdn.com/i/headshots/nba/players/full/4431811.png
  1905. Dereon Seabron0 G 23 6' 5" 183 lbs NC State --
  1906. https://a.espncdn.com/i/headshots/nba/players/full/6477.png
  1907. Jonas Valanciunas17 C 31 6' 11" 265 lbs -- $15,435,000
  1908. https://a.espncdn.com/i/headshots/nba/players/full/4395628.png
  1909. Zion Williamson1 PF 23 6' 6" 284 lbs Duke $34,005,250
  1910. https://a.espncdn.com/i/headshots/nba/players/full/2579258.png
  1911. Cody Zeller44 C 30 6' 11" 240 lbs Indiana $2,019,706
  1912. New York Knicks Roster 2023-24
  1913. NAME POS AGE HT WT COLLEGE SALARY
  1914. https://a.espncdn.com/i/headshots/nba/players/full/4395625.png
  1915. RJ Barrett9 SG 23 6' 6" 214 lbs Duke $23,883,929
  1916. https://a.espncdn.com/i/headshots/nba/players/full/3934672.png
  1917. Jalen Brunson11 PG 27 6' 1" 190 lbs Villanova $26,346,666
  1918. https://a.espncdn.com/i/headshots/nba/players/full/3934673.png
  1919. Donte DiVincenzo SG 26 6' 4" 203 lbs Villanova $10,960,000
  1920. https://a.espncdn.com/i/headshots/nba/players/full/6588.png
  1921. Evan Fournier13 SG 30 6' 6" 205 lbs -- $18,857,143
  1922. https://a.espncdn.com/i/headshots/nba/players/full/4397014.png
  1923. Quentin Grimes6 SG 23 6' 4" 210 lbs Houston $2,385,720
  1924. https://a.espncdn.com/i/headshots/nba/players/full/3062679.png
  1925. Josh Hart3 SG 28 6' 4" 215 lbs Villanova $12,960,000
  1926. https://a.espncdn.com/i/headshots/nba/players/full/4222252.png
  1927. Isaiah Hartenstein55 C 25 7' 0" 250 lbs -- $9,245,121
  1928. https://a.espncdn.com/i/headshots/nba/players/full/3913220.png
  1929. DaQuan Jeffries8 SG 26 6' 5" 225 lbs Western Texas Coll. (J.C.) $2,065,585
  1930. https://a.espncdn.com/i/headshots/nba/players/full/4432645.png
  1931. Trevor Keels3 G 20 6' 4" 221 lbs Duke --
  1932. https://a.espncdn.com/i/headshots/nba/players/full/4067325.png
  1933. Nathan Knight F 26 6' 8" 253 lbs William & Mary --
  1934. https://a.espncdn.com/i/headshots/nba/players/full/5149711.png
  1935. Jaylen Martin G 19 6' 6" 196 lbs -- --
  1936. https://a.espncdn.com/i/headshots/nba/players/full/4431823.png
  1937. Miles McBride2 PG 23 6' 2" 195 lbs West Virginia $1,836,096
  1938. https://a.espncdn.com/i/headshots/nba/players/full/4395724.png
  1939. Immanuel Quickley5 SG 24 6' 3" 190 lbs Kentucky $4,171,548
  1940. https://a.espncdn.com/i/headshots/nba/players/full/3064514.png
  1941. Julius Randle30 PF 28 6' 8" 250 lbs Kentucky $28,226,880
  1942. https://a.espncdn.com/i/headshots/nba/players/full/4351852.png
  1943. Mitchell Robinson23 C 25 7' 0" 240 lbs -- $15,681,818
  1944. https://a.espncdn.com/i/headshots/nba/players/full/4066392.png
  1945. Isaiah Roby18 F 25 6' 8" 230 lbs Nebraska $2,066,585
  1946. https://a.espncdn.com/i/headshots/nba/players/full/4277922.png
  1947. Jericho Sims45 C 24 6' 9" 250 lbs Texas $1,927,896
  1948. https://a.espncdn.com/i/headshots/nba/players/full/4397235.png
  1949. Duane Washington Jr.4 PG 23 6' 2" 197 lbs Ohio State --
  1950. https://a.espncdn.com/i/headshots/nba/players/full/3906786.png
  1951. Dylan Windler SG 26 6' 7" 196 lbs Belmont --
  1952. Oklahoma City Thunder Roster 2023-24
  1953. NAME POS AGE HT WT COLLEGE SALARY
  1954. https://a.espncdn.com/i/headshots/nba/players/full/6426.png
  1955. Davis Bertans SF 30 6' 10" 225 lbs -- $17,000,000
  1956. https://a.espncdn.com/i/headshots/nba/players/full/4997526.png
  1957. Ousmane Dieng13 F 20 6' 10" 220 lbs -- $4,798,440
  1958. https://a.espncdn.com/i/headshots/nba/players/full/4397020.png
  1959. Luguentz Dort5 G 24 6' 4" 220 lbs Arizona State $15,277,778
  1960. https://a.espncdn.com/i/headshots/nba/players/full/4871145.png
  1961. Josh Giddey3 SG 20 6' 8" 216 lbs -- $6,587,040
  1962. https://a.espncdn.com/i/headshots/nba/players/full/4278073.png
  1963. Shai Gilgeous-Alexander2 PG 25 6' 6" 195 lbs Kentucky $33,386,850
  1964. https://a.espncdn.com/i/headshots/nba/players/full/4433255.png
  1965. Chet Holmgren7 PF 21 7' 1" 208 lbs Gonzaga $10,386,000
  1966. https://a.espncdn.com/i/headshots/nba/players/full/4395702.png
  1967. Isaiah Joe11 SG 24 6' 3" 165 lbs Arkansas $1,997,238
  1968. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1969. Keyontae Johnson18 F 23 6' 6" 235 lbs Kansas State --
  1970. https://a.espncdn.com/i/headshots/nba/players/full/4432819.png
  1971. Tre Mann23 PG 22 6' 3" 184 lbs Florida $3,191,280
  1972. https://a.espncdn.com/i/headshots/nba/players/full/3102532.png
  1973. Vasilije Micic98 G 29 6' 5" 203 lbs -- $7,723,000
  1974. https://a.espncdn.com/i/headshots/nba/players/full/2527963.png
  1975. Victor Oladipo4 SG 31 6' 4" 213 lbs Indiana $9,450,000
  1976. https://a.espncdn.com/i/headshots/nba/players/full/4683018.png
  1977. Aleksej Pokusevski17 F 21 7' 0" 190 lbs -- $5,009,633
  1978. https://a.espncdn.com/i/headshots/nba/players/full/4432813.png
  1979. Jeremiah Robinson-Earl50 PF 22 6' 9" 240 lbs Villanova $1,900,000
  1980. https://a.espncdn.com/i/headshots/nba/players/full/4278046.png
  1981. Olivier Sarr30 C 24 7' 0" 240 lbs Kentucky --
  1982. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1983. Cason Wallace22 G 19 6' 4" 193 lbs Kentucky $5,291,000
  1984. https://a.espncdn.com/i/headshots/nba/players/full/4683749.png
  1985. TyTy Washington Jr. G 21 6' 3" 195 lbs Kentucky $2,320,440
  1986. https://a.espncdn.com/i/headshots/nba/players/full/4066317.png
  1987. Lindy Waters III12 F 26 6' 6" 210 lbs Oklahoma State $1,927,986
  1988. https://a.espncdn.com/i/headshots/nba/players/full/4065653.png
  1989. Jack White F 26 6' 7" 225 lbs Duke $1,801,769
  1990. https://a.espncdn.com/i/headshots/nba/players/full/4397183.png
  1991. Aaron Wiggins21 SG 24 6' 5" 190 lbs Maryland $1,836,096
  1992. https://a.espncdn.com/i/headshots/nba/players/full/4593803.png
  1993. Jalen Williams8 F 22 6' 6" 218 lbs Santa Clara $4,558,680
  1994. https://a.espncdn.com/i/headshots/nba/players/full/4432823.png
  1995. Jaylin Williams6 F 21 6' 10" 245 lbs Arkansas $2,000,000
  1996. https://a.espncdn.com/i/headshots/nba/players/full/3133626.png
  1997. Kenrich Williams34 SF 28 6' 6" 210 lbs TCU $6,175,000
  1998. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  1999. KJ Williams12 F 24 6' 10" 250 lbs LSU --
  2000. Orlando Magic Roster 2023-24
  2001. NAME POS AGE HT WT COLLEGE SALARY
  2002. https://a.espncdn.com/i/headshots/nba/players/full/4432809.png
  2003. Cole Anthony50 PG 23 6' 3" 185 lbs North Carolina $5,539,771
  2004. https://a.espncdn.com/i/headshots/nba/players/full/4432573.png
  2005. Paolo Banchero5 PF 20 6' 10" 250 lbs Duke $11,608,080
  2006. https://a.espncdn.com/i/headshots/nba/players/full/4348700.png
  2007. Goga Bitadze88 C 24 6' 11" 250 lbs -- $2,062,585
  2008. https://a.espncdn.com/i/headshots/nba/players/full/4712849.png
  2009. Anthony Black G 19 6' 7" 200 lbs Arkansas $7,245,480
  2010. https://a.espncdn.com/i/headshots/nba/players/full/4277847.png
  2011. Wendell Carter Jr.34 C 24 6' 10" 270 lbs Duke $13,050,000
  2012. https://a.espncdn.com/i/headshots/nba/players/full/2596108.png
  2013. Michael Carter-Williams11 G 31 6' 5" 190 lbs Syracuse --
  2014. https://a.espncdn.com/i/headshots/nba/players/full/4066636.png
  2015. Markelle Fultz20 PG 25 6' 4" 209 lbs Washington $17,000,000
  2016. https://a.espncdn.com/i/headshots/nba/players/full/2999547.png
  2017. Gary Harris14 SG 29 6' 4" 210 lbs Michigan State $13,000,000
  2018. https://a.espncdn.com/i/headshots/nba/players/full/4067858.png
  2019. Kevon Harris7 G 26 6' 6" 220 lbs Stephen F. Austin --
  2020. https://a.espncdn.com/i/headshots/nba/players/full/4433623.png
  2021. Caleb Houstan2 G 20 6' 8" 210 lbs Michigan $2,000,000
  2022. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2023. Jett Howard G 20 6' 8" 215 lbs Michigan $5,026,800
  2024. https://a.espncdn.com/i/headshots/nba/players/full/2968436.png
  2025. Joe Ingles SG 35 6' 8" 220 lbs -- $11,000,000
  2026. https://a.espncdn.com/i/headshots/nba/players/full/4065654.png
  2027. Jonathan Isaac1 PF 25 6' 10" 230 lbs Florida State $17,400,000
  2028. https://a.espncdn.com/i/headshots/nba/players/full/4397071.png
  2029. Mac McClung9 SG 24 6' 2" 185 lbs Texas Tech --
  2030. https://a.espncdn.com/i/headshots/nba/players/full/4278052.png
  2031. Chuma Okeke3 PF 25 6' 8" 229 lbs Auburn $5,266,713
  2032. https://a.espncdn.com/i/headshots/nba/players/full/4066851.png
  2033. Miye Oni SG 26 6' 5" 206 lbs Yale --
  2034. https://a.espncdn.com/i/headshots/nba/players/full/4409222.png
  2035. Trevelin Queen12 SG 26 6' 6" 190 lbs New Mexico State --
  2036. https://a.espncdn.com/i/headshots/nba/players/full/3918423.png
  2037. Admiral Schofield25 SF 26 6' 5" 241 lbs Tennessee --
  2038. https://a.espncdn.com/i/headshots/nba/players/full/4432165.png
  2039. Jalen Suggs4 SG 22 6' 5" 205 lbs Gonzaga $7,252,080
  2040. https://a.espncdn.com/i/headshots/nba/players/full/4566434.png
  2041. Franz Wagner22 SF 22 6' 10" 220 lbs Michigan $5,508,720
  2042. https://a.espncdn.com/i/headshots/nba/players/full/3150844.png
  2043. Moritz Wagner21 C 26 6' 11" 245 lbs Michigan $8,000,000
  2044. https://a.espncdn.com/i/headshots/nba/players/full/4397040.png
  2045. Brandon Williams11 G 23 6' 2" 190 lbs Arizona --
  2046. Philadelphia 76ers Roster 2023-24
  2047. NAME POS AGE HT WT COLLEGE SALARY
  2048. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2049. Marcus Bagley38 F 21 6' 8" 215 lbs Arizona State --
  2050. https://a.espncdn.com/i/headshots/nba/players/full/4277919.png
  2051. Mo Bamba C 25 7' 0" 231 lbs Texas $2,019,706
  2052. https://a.espncdn.com/i/headshots/nba/players/full/3964.png
  2053. Patrick Beverley21 PG 35 6' 2" 180 lbs Arkansas $2,019,706
  2054. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2055. Ricky Council IV37 G 22 6' 6" 205 lbs Arkansas --
  2056. https://a.espncdn.com/i/headshots/nba/players/full/2580913.png
  2057. Dewayne Dedmon14 C 34 6' 10" 245 lbs USC --
  2058. https://a.espncdn.com/i/headshots/nba/players/full/4397107.png
  2059. David Duke Jr. G 23 6' 4" 207 lbs Providence --
  2060. https://a.espncdn.com/i/headshots/nba/players/full/3059318.png
  2061. Joel Embiid21 C 29 7' 0" 280 lbs Kansas $46,900,000
  2062. https://a.espncdn.com/i/headshots/nba/players/full/3988.png
  2063. Danny Green F 36 6' 6" 215 lbs North Carolina --
  2064. https://a.espncdn.com/i/headshots/nba/players/full/3992.png
  2065. James Harden1 SG 34 6' 5" 220 lbs Arizona State $35,640,000
  2066. https://a.espncdn.com/i/headshots/nba/players/full/2991055.png
  2067. Montrezl Harrell5 C 29 6' 7" 240 lbs Louisville $2,019,760
  2068. https://a.espncdn.com/i/headshots/nba/players/full/6440.png
  2069. Tobias Harris12 PF 31 6' 7" 226 lbs Tennessee $39,270,150
  2070. https://a.espncdn.com/i/headshots/nba/players/full/2991155.png
  2071. Danuel House Jr.25 SF 30 6' 6" 220 lbs Texas A&M $4,310,250
  2072. https://a.espncdn.com/i/headshots/nba/players/full/4397103.png
  2073. Louis King23 SF 24 6' 7" 205 lbs Oregon --
  2074. https://a.espncdn.com/i/headshots/nba/players/full/3929325.png
  2075. Furkan Korkmaz30 SG 26 6' 7" 202 lbs -- $5,370,370
  2076. https://a.espncdn.com/i/headshots/nba/players/full/4431678.png
  2077. Tyrese Maxey0 PG 22 6' 2" 200 lbs Kentucky $4,343,920
  2078. https://a.espncdn.com/i/headshots/nba/players/full/4066436.png
  2079. De'Anthony Melton8 SG 25 6' 2" 200 lbs USC $8,000,000
  2080. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2081. Filip Petrusev PF 23 6' 11" 234 lbs Gonzaga $1,119,563
  2082. https://a.espncdn.com/i/headshots/nba/players/full/4278562.png
  2083. Paul Reed44 SF 24 6' 9" 210 lbs DePaul $7,723,000
  2084. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2085. Terquavion Smith26 G 20 6' 4" 165 lbs NC State --
  2086. https://a.espncdn.com/i/headshots/nba/players/full/4432164.png
  2087. Jaden Springer11 G 20 6' 4" 205 lbs Tennessee $2,226,240
  2088. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2089. Azuolas Tubelis29 F 21 6' 11" 245 lbs Arizona --
  2090. https://a.espncdn.com/i/headshots/nba/players/full/3033.png
  2091. P.J. Tucker17 PF 38 6' 5" 245 lbs Texas $11,014,500
  2092. Phoenix Suns Roster 2023-24
  2093. NAME POS AGE HT WT COLLEGE SALARY
  2094. https://a.espncdn.com/i/headshots/nba/players/full/4278129.png
  2095. Deandre Ayton22 C 25 7' 0" 250 lbs Arizona $32,459,438
  2096. https://a.espncdn.com/i/headshots/nba/players/full/4066299.png
  2097. Udoka Azubuike C 24 6' 10" 280 lbs Kansas --
  2098. https://a.espncdn.com/i/headshots/nba/players/full/3136779.png
  2099. Keita Bates-Diop SF 27 6' 8" 229 lbs Ohio State $2,364,614
  2100. https://a.espncdn.com/i/headshots/nba/players/full/6580.png
  2101. Bradley Beal SG 30 6' 4" 207 lbs Florida $46,741,590
  2102. https://a.espncdn.com/i/headshots/nba/players/full/6427.png
  2103. Bismack Biyombo18 C 31 6' 8" 255 lbs -- --
  2104. https://a.espncdn.com/i/headshots/nba/players/full/4397102.png
  2105. Bol Bol C 23 7' 2" 220 lbs Oregon $2,019,706
  2106. https://a.espncdn.com/i/headshots/nba/players/full/3136193.png
  2107. Devin Booker1 SG 26 6' 5" 206 lbs Kentucky $36,016,200
  2108. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2109. Toumani Camara F 23 6' 8" 220 lbs Dayton $1,119,563
  2110. https://a.espncdn.com/i/headshots/nba/players/full/3202.png
  2111. Kevin Durant35 PF 34 6' 10" 240 lbs Texas $47,649,433
  2112. https://a.espncdn.com/i/headshots/nba/players/full/3914285.png
  2113. Drew Eubanks PF 26 6' 10" 245 lbs Oregon State $2,346,614
  2114. https://a.espncdn.com/i/headshots/nba/players/full/4278402.png
  2115. Jordan Goodwin G 24 6' 3" 200 lbs Saint Louis $1,927,896
  2116. https://a.espncdn.com/i/headshots/nba/players/full/2595209.png
  2117. Damion Lee10 SG 30 6' 5" 210 lbs Louisville $2,528,233
  2118. https://a.espncdn.com/i/headshots/nba/players/full/4278124.png
  2119. Saben Lee38 G 24 6' 2" 183 lbs Vanderbilt --
  2120. https://a.espncdn.com/i/headshots/nba/players/full/3914283.png
  2121. Chimezie Metu7 PF 26 6' 9" 225 lbs USC $2,019,706
  2122. https://a.espncdn.com/i/headshots/nba/players/full/4065663.png
  2123. Josh Okogie2 SF 25 6' 4" 213 lbs Georgia Tech $2,815,937
  2124. https://a.espncdn.com/i/headshots/nba/players/full/6619.png
  2125. Terrence Ross8 G 32 6' 7" 206 lbs Washington --
  2126. https://a.espncdn.com/i/headshots/nba/players/full/3059307.png
  2127. Ish Wainright12 F 29 6' 5" 250 lbs Baylor $1,927,896
  2128. https://a.espncdn.com/i/headshots/nba/players/full/2982334.png
  2129. T.J. Warren21 SF 30 6' 8" 220 lbs NC State --
  2130. https://a.espncdn.com/i/headshots/nba/players/full/3133838.png
  2131. Yuta Watanabe SF 28 6' 8" 215 lbs George Washington $2,346,614
  2132. Portland Trail Blazers Roster 2023-24
  2133. NAME POS AGE HT WT COLLEGE SALARY
  2134. https://a.espncdn.com/i/headshots/nba/players/full/4871140.png
  2135. Ibou Badji41 C 20 7' 1" 240 lbs -- --
  2136. https://a.espncdn.com/i/headshots/nba/players/full/4397126.png
  2137. Moses Brown C 23 7' 2" 245 lbs UCLA --
  2138. https://a.espncdn.com/i/headshots/nba/players/full/4432588.png
  2139. John Butler Jr.21 F 20 7' 0" 190 lbs Florida State --
  2140. https://a.espncdn.com/i/headshots/nba/players/full/2991070.png
  2141. Jerami Grant9 SF 29 6' 8" 210 lbs Syracuse $27,586,207
  2142. https://a.espncdn.com/i/headshots/nba/players/full/4683678.png
  2143. Scoot Henderson0 G 19 6' 2" 196 lbs -- $9,770,880
  2144. https://a.espncdn.com/i/headshots/nba/players/full/4432192.png
  2145. Keon Johnson6 SG 21 6' 3" 185 lbs Tennessee $2,808,720
  2146. https://a.espncdn.com/i/headshots/nba/players/full/4278075.png
  2147. Kevin Knox II11 SF 24 6' 7" 215 lbs Kentucky --
  2148. https://a.espncdn.com/i/headshots/nba/players/full/6606.png
  2149. Damian Lillard0 PG 33 6' 2" 195 lbs Weber State $45,640,084
  2150. https://a.espncdn.com/i/headshots/nba/players/full/4395652.png
  2151. Nassir Little10 SF 23 6' 5" 220 lbs North Carolina $6,250,000
  2152. https://a.espncdn.com/i/headshots/nba/players/full/4066269.png
  2153. Skylar Mays8 SG 26 6' 4" 205 lbs LSU --
  2154. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2155. Justin Minaya15 F 24 6' 7" 210 lbs Providence --
  2156. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2157. Kris Murray8 F 23 6' 8" 215 lbs Iowa $2,847,480
  2158. https://a.espncdn.com/i/headshots/nba/players/full/3102530.png
  2159. Jusuf Nurkic27 C 29 7' 0" 290 lbs -- $16,875,000
  2160. https://a.espncdn.com/i/headshots/nba/players/full/4914336.png
  2161. Shaedon Sharpe17 SG 20 6' 5" 200 lbs Kentucky $6,313,800
  2162. https://a.espncdn.com/i/headshots/nba/players/full/4351851.png
  2163. Anfernee Simons1 SG 24 6' 3" 181 lbs -- $24,107,143
  2164. https://a.espncdn.com/i/headshots/nba/players/full/3907498.png
  2165. Matisse Thybulle4 SG 26 6' 5" 201 lbs Washington $10,500,000
  2166. https://a.espncdn.com/i/headshots/nba/players/full/4432446.png
  2167. Jabari Walker34 F 21 6' 7" 200 lbs Colorado $1,719,864
  2168. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2169. Jeenathan Williams49 G 24 6' 5" 205 lbs Buffalo $1,719,864
  2170. https://a.espncdn.com/i/headshots/nba/players/full/3135047.png
  2171. Justise Winslow26 PF 27 6' 6" 222 lbs Duke --
  2172. Sacramento Kings Roster 2023-24
  2173. NAME POS AGE HT WT COLLEGE SALARY
  2174. https://a.espncdn.com/i/headshots/nba/players/full/6578.png
  2175. Harrison Barnes40 SF 31 6' 8" 225 lbs North Carolina $17,000,000
  2176. https://a.espncdn.com/i/headshots/nba/players/full/3919454.png
  2177. Terence Davis3 SG 26 6' 4" 201 lbs Ole Miss --
  2178. https://a.espncdn.com/i/headshots/nba/players/full/2489716.png
  2179. Matthew Dellavedova8 PG 33 6' 3" 200 lbs Saint Mary's --
  2180. https://a.espncdn.com/i/headshots/nba/players/full/4592402.png
  2181. Chris Duarte SG 26 6' 5" 190 lbs Oregon $4,124,400
  2182. https://a.espncdn.com/i/headshots/nba/players/full/4397693.png
  2183. Kessler Edwards17 SF 23 6' 8" 215 lbs Pepperdine $1,927,896
  2184. https://a.espncdn.com/i/headshots/nba/players/full/4702177.png
  2185. Keon Ellis23 G 23 6' 5" 175 lbs Alabama --
  2186. https://a.espncdn.com/i/headshots/nba/players/full/4066736.png
  2187. Jordan Ford1 G 25 6' 1" 175 lbs Saint Mary's --
  2188. https://a.espncdn.com/i/headshots/nba/players/full/4066259.png
  2189. De'Aaron Fox5 PG 25 6' 3" 185 lbs Kentucky $32,600,060
  2190. https://a.espncdn.com/i/headshots/nba/players/full/4066372.png
  2191. Kevin Huerter9 SG 25 6' 7" 198 lbs Maryland $15,669,643
  2192. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2193. Colby Jones G 21 6' 6" 205 lbs Xavier $2,019,706
  2194. https://a.espncdn.com/i/headshots/nba/players/full/2596107.png
  2195. Alex Len25 C 30 7' 1" 250 lbs Maryland $2,019,760
  2196. https://a.espncdn.com/i/headshots/nba/players/full/3136196.png
  2197. Trey Lyles41 PF 27 6' 9" 234 lbs Kentucky $8,000,000
  2198. https://a.espncdn.com/i/headshots/nba/players/full/3452.png
  2199. JaVale McGee00 C 35 7' 0" 270 lbs Nevada $5,734,280
  2200. https://a.espncdn.com/i/headshots/nba/players/full/4278053.png
  2201. Davion Mitchell15 PG 25 6' 0" 202 lbs Baylor $5,063,760
  2202. https://a.espncdn.com/i/headshots/nba/players/full/4066262.png
  2203. Malik Monk0 SG 25 6' 3" 200 lbs Kentucky $9,945,830
  2204. https://a.espncdn.com/i/headshots/nba/players/full/4594327.png
  2205. Keegan Murray13 SF 23 6' 8" 215 lbs Iowa $8,409,000
  2206. https://a.espncdn.com/i/headshots/nba/players/full/3155942.png
  2207. Domantas Sabonis10 PF 27 7' 0" 240 lbs Gonzaga $30,600,000
  2208. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2209. Jalen Slawson F 23 6' 7" 218 lbs Furman --
  2210. https://a.espncdn.com/i/headshots/nba/players/full/3893014.png
  2211. Sasha Vezenkov F 28 6' 9" 225 lbs -- $6,341,464
  2212. San Antonio Spurs Roster 2023-24
  2213. NAME POS AGE HT WT COLLEGE SALARY
  2214. https://a.espncdn.com/i/headshots/nba/players/full/4870562.png
  2215. Dominick Barlow26 F 20 6' 10" 220 lbs -- --
  2216. https://a.espncdn.com/i/headshots/nba/players/full/4397886.png
  2217. Charles Bassey28 C 22 6' 9" 230 lbs Western Kentucky $2,600,000
  2218. https://a.espncdn.com/i/headshots/nba/players/full/2578240.png
  2219. Khem Birch99 C 30 6' 8" 233 lbs UNLV $6,985,000
  2220. https://a.espncdn.com/i/headshots/nba/players/full/4565201.png
  2221. Malaki Branham22 G 20 6' 4" 180 lbs Ohio State $3,071,880
  2222. https://a.espncdn.com/i/headshots/nba/players/full/2528779.png
  2223. Reggie Bullock25 SF 32 6' 6" 205 lbs North Carolina $11,014,080
  2224. https://a.espncdn.com/i/headshots/nba/players/full/4592479.png
  2225. Julian Champagnie30 F 22 6' 9" 210 lbs St. John's $3,000,000
  2226. https://a.espncdn.com/i/headshots/nba/players/full/5081727.png
  2227. Sidy Cissoko G 19 6' 7" 225 lbs -- $1,119,563
  2228. https://a.espncdn.com/i/headshots/nba/players/full/4066650.png
  2229. Zach Collins23 PF 25 6' 11" 250 lbs Gonzaga $7,700,000
  2230. https://a.espncdn.com/i/headshots/nba/players/full/2534781.png
  2231. Gorgui Dieng41 C 33 6' 10" 265 lbs Louisville --
  2232. https://a.espncdn.com/i/headshots/nba/players/full/3133601.png
  2233. Devonte' Graham4 PG 28 6' 1" 195 lbs Kansas $12,100,000
  2234. https://a.espncdn.com/i/headshots/nba/players/full/4395723.png
  2235. Keldon Johnson3 SF 23 6' 5" 220 lbs Kentucky $20,000,000
  2236. https://a.espncdn.com/i/headshots/nba/players/full/4395626.png
  2237. Tre Jones33 PG 23 6' 1" 185 lbs Duke $9,895,833
  2238. https://a.espncdn.com/i/headshots/nba/players/full/4397008.png
  2239. Romeo Langford35 SG 23 6' 5" 216 lbs Indiana --
  2240. https://a.espncdn.com/i/headshots/nba/players/full/4278580.png
  2241. Sandro Mamukelashvili54 PF 24 6' 10" 240 lbs Seton Hall $2,019,760
  2242. https://a.espncdn.com/i/headshots/nba/players/full/2528588.png
  2243. Doug McDermott17 SF 31 6' 6" 225 lbs Creighton $13,750,000
  2244. https://a.espncdn.com/i/headshots/nba/players/full/3893016.png
  2245. Cedi Osman16 SF 28 6' 7" 230 lbs -- $6,718,842
  2246. https://a.espncdn.com/i/headshots/nba/players/full/4610139.png
  2247. Jeremy Sochan10 F 20 6' 8" 230 lbs Baylor $5,316,960
  2248. https://a.espncdn.com/i/headshots/nba/players/full/4395630.png
  2249. Devin Vassell24 SG 23 6' 5" 200 lbs Florida State $5,887,899
  2250. https://a.espncdn.com/i/headshots/nba/players/full/5104157.png
  2251. Victor Wembanyama C 19 7' 3" 225 lbs -- $12,160,680
  2252. https://a.espncdn.com/i/headshots/nba/players/full/4683935.png
  2253. Blake Wesley14 G 20 6' 4" 190 lbs Notre Dame $2,504,640
  2254. Toronto Raptors Roster 2023-24
  2255. NAME POS AGE HT WT COLLEGE SALARY
  2256. https://a.espncdn.com/i/headshots/nba/players/full/4431679.png
  2257. Precious Achiuwa5 PF 24 6' 8" 225 lbs Memphis $4,379,527
  2258. https://a.espncdn.com/i/headshots/nba/players/full/3934719.png
  2259. O.G. Anunoby3 SF 26 6' 7" 232 lbs Indiana $18,642,857
  2260. https://a.espncdn.com/i/headshots/nba/players/full/4433134.png
  2261. Scottie Barnes4 SF 22 6' 8" 225 lbs Florida State $8,008,560
  2262. https://a.espncdn.com/i/headshots/nba/players/full/6579.png
  2263. Will Barton1 G 32 6' 5" 181 lbs Memphis --
  2264. https://a.espncdn.com/i/headshots/nba/players/full/3948153.png
  2265. Chris Boucher25 PF 30 6' 9" 200 lbs Oregon $11,750,000
  2266. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2267. Gradey Dick G 19 6' 8" 205 lbs Kansas $4,536,720
  2268. https://a.espncdn.com/i/headshots/nba/players/full/4066786.png
  2269. Jeff Dowtin Jr.20 PG 26 6' 3" 177 lbs Rhode Island $2,109,706
  2270. https://a.espncdn.com/i/headshots/nba/players/full/4066668.png
  2271. Malachi Flynn22 PG 25 6' 1" 175 lbs San Diego State $3,873,025
  2272. https://a.espncdn.com/i/headshots/nba/players/full/4397251.png
  2273. Ron Harper Jr.8 F 23 6' 5" 245 lbs Rutgers --
  2274. https://a.espncdn.com/i/headshots/nba/players/full/4431778.png
  2275. Christian Koloko35 C 23 7' 0" 230 lbs Arizona $1,719,864
  2276. https://a.espncdn.com/i/headshots/nba/players/full/4066731.png
  2277. Jalen McDaniels7 PF 25 6' 9" 205 lbs San Diego State $4,516,000
  2278. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2279. Markquis Nowell G 23 5' 8" 160 lbs Kansas State --
  2280. https://a.espncdn.com/i/headshots/nba/players/full/3134908.png
  2281. Jakob Poeltl19 C 27 7' 1" 245 lbs Utah $19,500,000
  2282. https://a.espncdn.com/i/headshots/nba/players/full/2594922.png
  2283. Otto Porter Jr.32 SF 30 6' 8" 198 lbs Georgetown $6,300,000
  2284. https://a.espncdn.com/i/headshots/nba/players/full/3032979.png
  2285. Dennis Schroder PG 30 6' 1" 172 lbs -- $12,405,000
  2286. https://a.espncdn.com/i/headshots/nba/players/full/3149673.png
  2287. Pascal Siakam43 PF 29 6' 8" 230 lbs New Mexico State $37,893,408
  2288. https://a.espncdn.com/i/headshots/nba/players/full/4277843.png
  2289. Gary Trent Jr.33 SG 24 6' 5" 209 lbs Duke $18,560,000
  2290. https://a.espncdn.com/i/headshots/nba/players/full/3244.png
  2291. Thaddeus Young21 PF 35 6' 8" 235 lbs Georgia Tech $8,000,000
  2292. Utah Jazz Roster 2023-24
  2293. NAME POS AGE HT WT COLLEGE SALARY
  2294. https://a.espncdn.com/i/headshots/nba/players/full/4397018.png
  2295. Ochai Agbaji30 G 23 6' 5" 215 lbs Kansas $4,114,200
  2296. https://a.espncdn.com/i/headshots/nba/players/full/3912292.png
  2297. Jarrell Brantley5 PF 27 6' 5" 250 lbs Charleston --
  2298. https://a.espncdn.com/i/headshots/nba/players/full/2528426.png
  2299. Jordan Clarkson00 PG 31 6' 5" 194 lbs Missouri $23,487,629
  2300. https://a.espncdn.com/i/headshots/nba/players/full/3908845.png
  2301. John Collins20 PF 25 6' 9" 226 lbs Wake Forest $25,340,000
  2302. https://a.espncdn.com/i/headshots/nba/players/full/2991139.png
  2303. Kris Dunn11 G 29 6' 3" 205 lbs Providence $2,586,665
  2304. https://a.espncdn.com/i/headshots/nba/players/full/3899664.png
  2305. Simone Fontecchio16 F 27 6' 7" 209 lbs -- $3,044,872
  2306. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2307. Keyonte George G 19 6' 4" 185 lbs Baylor $3,889,800
  2308. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2309. Taylor Hendricks F 19 6' 9" 210 lbs UCF $5,569,920
  2310. https://a.espncdn.com/i/headshots/nba/players/full/4396991.png
  2311. Talen Horton-Tucker0 SG 22 6' 4" 234 lbs Iowa State $11,020,000
  2312. https://a.espncdn.com/i/headshots/nba/players/full/4432817.png
  2313. Johnny Juzang33 G 22 6' 7" 215 lbs UCLA --
  2314. https://a.espncdn.com/i/headshots/nba/players/full/4433136.png
  2315. Walker Kessler24 C 22 7' 0" 245 lbs Auburn $2,831,160
  2316. https://a.espncdn.com/i/headshots/nba/players/full/4066336.png
  2317. Lauri Markkanen23 PF 26 7' 0" 240 lbs Arizona $17,259,999
  2318. https://a.espncdn.com/i/headshots/nba/players/full/2489663.png
  2319. Kelly Olynyk41 PF 32 6' 11" 240 lbs Gonzaga $12,195,122
  2320. https://a.espncdn.com/i/headshots/nba/players/full/4423309.png
  2321. Luka Samanic91 F 23 6' 10" 227 lbs -- $2,066,585
  2322. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2323. Brice Sensabaugh F 19 6' 6" 235 lbs Ohio State $2,448,600
  2324. https://a.espncdn.com/i/headshots/nba/players/full/4277811.png
  2325. Collin Sexton2 PG 24 6' 2" 190 lbs Alabama $17,325,000
  2326. https://a.espncdn.com/i/headshots/nba/players/full/4401416.png
  2327. Juan Toscano-Anderson95 SF 30 6' 6" 209 lbs Marquette --
  2328. https://a.espncdn.com/i/headshots/nba/players/full/3074213.png
  2329. Omer Yurtseven77 C 25 6' 11" 275 lbs Georgetown $2,800,000
  2330. Toronto Raptors Roster 2023-24
  2331. NAME POS AGE HT WT COLLEGE SALARY
  2332. https://a.espncdn.com/i/headshots/nba/players/full/4431679.png
  2333. Precious Achiuwa5 PF 24 6' 8" 225 lbs Memphis $4,379,527
  2334. https://a.espncdn.com/i/headshots/nba/players/full/3934719.png
  2335. O.G. Anunoby3 SF 26 6' 7" 232 lbs Indiana $18,642,857
  2336. https://a.espncdn.com/i/headshots/nba/players/full/4433134.png
  2337. Scottie Barnes4 SF 22 6' 8" 225 lbs Florida State $8,008,560
  2338. https://a.espncdn.com/i/headshots/nba/players/full/6579.png
  2339. Will Barton1 G 32 6' 5" 181 lbs Memphis --
  2340. https://a.espncdn.com/i/headshots/nba/players/full/3948153.png
  2341. Chris Boucher25 PF 30 6' 9" 200 lbs Oregon $11,750,000
  2342. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2343. Gradey Dick G 19 6' 8" 205 lbs Kansas $4,536,720
  2344. https://a.espncdn.com/i/headshots/nba/players/full/4066786.png
  2345. Jeff Dowtin Jr.20 PG 26 6' 3" 177 lbs Rhode Island $2,109,706
  2346. https://a.espncdn.com/i/headshots/nba/players/full/4066668.png
  2347. Malachi Flynn22 PG 25 6' 1" 175 lbs San Diego State $3,873,025
  2348. https://a.espncdn.com/i/headshots/nba/players/full/4397251.png
  2349. Ron Harper Jr.8 F 23 6' 5" 245 lbs Rutgers --
  2350. https://a.espncdn.com/i/headshots/nba/players/full/4431778.png
  2351. Christian Koloko35 C 23 7' 0" 230 lbs Arizona $1,719,864
  2352. https://a.espncdn.com/i/headshots/nba/players/full/4066731.png
  2353. Jalen McDaniels7 PF 25 6' 9" 205 lbs San Diego State $4,516,000
  2354. https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
  2355. Markquis Nowell G 23 5' 8" 160 lbs Kansas State --
  2356. https://a.espncdn.com/i/headshots/nba/players/full/3134908.png
  2357. Jakob Poeltl19 C 27 7' 1" 245 lbs Utah $19,500,000
  2358. https://a.espncdn.com/i/headshots/nba/players/full/2594922.png
  2359. Otto Porter Jr.32 SF 30 6' 8" 198 lbs Georgetown $6,300,000
  2360. https://a.espncdn.com/i/headshots/nba/players/full/3032979.png
  2361. Dennis Schroder PG 30 6' 1" 172 lbs -- $12,405,000
  2362. https://a.espncdn.com/i/headshots/nba/players/full/3149673.png
  2363. Pascal Siakam43 PF 29 6' 8" 230 lbs New Mexico State $37,893,408
  2364. https://a.espncdn.com/i/headshots/nba/players/full/4277843.png
  2365. Gary Trent Jr.33 SG 24 6' 5" 209 lbs Duke $18,560,000
  2366. https://a.espncdn.com/i/headshots/nba/players/full/3244.png
  2367. Thaddeus Young21 PF 35 6' 8" 235 lbs Georgia Tech $8,000,000
  2368. Washington Wizards Roster 2023-24
  2369. NAME POS AGE HT WT COLLEGE SALARY
  2370. https://a.espncdn.com/i/headshots/nba/players/full/4683021.png
  2371. Deni Avdija9 SF 22 6' 9" 210 lbs -- $6,263,188
  2372. https://a.espncdn.com/i/headshots/nba/players/full/4433246.png
  2373. Patrick Baldwin Jr.7 F 20 6' 9" 220 lbs Milwaukee $2,337,720
  2374. https://a.espncdn.com/i/headshots/nba/players/full/4395695.png
  2375. Jared Butler SG 23 6' 3" 193 lbs Baylor --
  2376. https://a.espncdn.com/i/headshots/nba/players/full/3129932.png
  2377. Xavier Cooks12 G 28 6' 8" 185 lbs Winthrop $1,719,864
  2378. https://a.espncdn.com/i/headshots/nba/players/full/5104155.png
  2379. Bilal Coulibaly SG 19 6' 6" 195 lbs -- $6,614,280
  2380. https://a.espncdn.com/i/headshots/nba/players/full/4432356.png
  2381. Johnny Davis1 G 21 6' 4" 195 lbs Wisconsin $5,050,800
  2382. https://a.espncdn.com/i/headshots/nba/players/full/4278049.png
  2383. Daniel Gafford21 C 24 6' 10" 234 lbs Arkansas $12,402,000
  2384. https://a.espncdn.com/i/headshots/nba/players/full/3428.png
  2385. Danilo Gallinari PF 35 6' 10" 236 lbs -- $6,802,950
  2386. https://a.espncdn.com/i/headshots/nba/players/full/3986.png
  2387. Taj Gibson67 F 38 6' 9" 232 lbs USC --
  2388. https://a.espncdn.com/i/headshots/nba/players/full/2581184.png
  2389. Anthony Gill16 PF 30 6' 8" 230 lbs Virginia $1,997,239
  2390. https://a.espncdn.com/i/headshots/nba/players/full/3135046.png
  2391. Tyus Jones PG 27 6' 0" 196 lbs Duke $14,000,000
  2392. https://a.espncdn.com/i/headshots/nba/players/full/4280151.png
  2393. Corey Kispert24 SF 24 6' 6" 224 lbs Gonzaga $3,722,040
  2394. https://a.espncdn.com/i/headshots/nba/players/full/3134907.png
  2395. Kyle Kuzma33 SF 28 6' 9" 221 lbs Utah $25,568,182
  2396. https://a.espncdn.com/i/headshots/nba/players/full/2490089.png
  2397. Mike Muscala C 32 6' 11" 240 lbs Bucknell $3,500,000
  2398. https://a.espncdn.com/i/headshots/nba/players/full/3056247.png
  2399. Kendrick Nunn20 G 28 6' 3" 190 lbs Oakland --
  2400. https://a.espncdn.com/i/headshots/nba/players/full/4066410.png
  2401. Eugene Omoruyi F 26 6' 6" 235 lbs Oregon $1,927,896
  2402. https://a.espncdn.com/i/headshots/nba/players/full/4277956.png
  2403. Jordan Poole3 SG 24 6' 4" 194 lbs Michigan $27,955,357
  2404. https://a.espncdn.com/i/headshots/nba/players/full/4591725.png
  2405. Ryan Rollins2 G 21 6' 3" 180 lbs Toledo $1,719,864
  2406. https://a.espncdn.com/i/headshots/nba/players/full/3914044.png
  2407. Landry Shamet SG 26 6' 4" 190 lbs Wichita State $10,250,000
  2408. https://a.espncdn.com/i/headshots/nba/players/full/4997537.png
  2409. Tristan Vukcevic PF 20 7' 0" 225 lbs -- --
  2410. https://a.espncdn.com/i/headshots/nba/players/full/3064447.png
  2411. Delon Wright55 SG 31 6' 5" 185 lbs Utah $8,195,122
  2412. ================================================
  2413. FILE: end-to-end-use-cases/coding/text2sql/nba_roster.db
  2414. ================================================
  2415. [Non-text file]
  2416. ================================================
  2417. FILE: end-to-end-use-cases/coding/text2sql/requirements.txt
  2418. ================================================
  2419. langchain
  2420. langchain-community
  2421. langchain-together
  2422. ================================================
  2423. FILE: end-to-end-use-cases/Contextual-Chunking-RAG/README.md
  2424. ================================================
  2425. # Contextual keywords generation for RAG using Llama-3.1
  2426. **Problem**: Independent chunking in traditional RAG systems leads to the loss of contextual information between chunks. This makes it difficult for LLMs to retrieve relevant data when context (e.g., the subject or entity being discussed) is not explicitly repeated within individual chunks.
  2427. **Solution**: Generate keywords for each chunk to fulfill missing contextual information. These keywords (e.g., "BMW, X5, pricing") enrich the chunk with necessary context, ensuring better retrieval accuracy. By embedding this enriched metadata, the system bridges gaps between related chunks, enabling effective query matching and accurate answer generation.
  2428. [This article](https://medium.com/@ailabs/overcoming-independent-chunking-in-rag-systems-a-hybrid-approach-5d2c205b3732) explains benefits of contextual chunking.
  2429. **Note** This method does not require calling LLM for each chunk separately, which makes it efficient.
  2430. **Getting started**
  2431. In this cookbook, we’ll use DeepInfra for Llama inference services, so be sure to obtain an API key from https://deepinfra.com/.
  2432. You'll also need a LlamaParse API key to parse PDF files, which can be obtained from https://www.llamaindex.ai/.
  2433. Additionally, we will use the "jinaai/jina-embeddings-v2-base-en" model from HuggingFace to generate text embeddings locally.
  2434. Before getting started, update the <code>config.py</code> file as following:
  2435. "DEEPINFRA_API_KEY"="<your_api_key>"
  2436. "LLAMAPARSE_API_KEY"="<your_api_key>"
  2437. ================================================
  2438. FILE: end-to-end-use-cases/Contextual-Chunking-RAG/data/llama_article.txt
  2439. ================================================
  2440. Understanding the LLaMA Model: A Breakthrough in Large Language Models
  2441. In recent years, large language models (LLMs) have revolutionized the field of natural language processing (NLP). Among them, Meta’s LLaMA (Large Language Model Meta AI) has emerged as a powerful, efficient, and open-weight model that provides high-quality text generation capabilities while being more accessible than proprietary alternatives. This article explores the architecture, capabilities, and applications of LLaMA, along with its significance in the AI landscape.
  2442. 1. Introduction to LLaMA
  2443. LLaMA is a family of autoregressive transformer-based models designed by Meta AI. Unlike massive models like OpenAI’s GPT-4, which require extensive computational resources and are primarily closed-source, LLaMA aims to provide powerful language modeling in a more efficient and open format. The original LLaMA release included models ranging from 7 billion to 65 billion parameters, offering different levels of computational demand and performance.
  2444. The second iteration, LLaMA 2, introduced in 2023, further improved efficiency, accuracy, and usability. LLaMA 2 models are available in 7B, 13B, and 65B parameter variants, with optimized training methodologies and increased alignment with human preferences.
  2445. 2. Architecture and Training
  2446. LLaMA follows the transformer architecture, the foundation of most modern language models. Key architectural improvements and training strategies include:
  2447. Tokenization: LLaMA uses Byte Pair Encoding (BPE) for tokenization, ensuring better handling of various languages and token efficiency.
  2448. Efficient Training: Trained on a diverse dataset containing publicly available and licensed data, LLaMA reduces reliance on proprietary sources. The training process leverages a causal decoder-only transformer, meaning it predicts tokens autoregressively while attending to previous context.
  2449. Scaled Attention Mechanism: LLaMA incorporates Rotary Position Embeddings (RoPE) for efficient long-context understanding. This improves its ability to handle longer sequences compared to earlier models.
  2450. Memory Optimization: Unlike some larger models requiring thousands of GPUs for inference, LLaMA’s optimized weight distribution and efficient parameter scaling allow it to run on fewer computational resources while maintaining high performance.
  2451. The training data includes code, technical documents, research papers, and general text, making LLaMA well-suited for various NLP tasks, from answering questions to generating detailed content.
  2452. 3. Performance and Benchmarks
  2453. LLaMA models have demonstrated impressive performance across multiple benchmarks. The 65B variant outperforms GPT-3 (175B) on several standard NLP tasks while using significantly fewer parameters. Key benchmarking results include:
  2454. MMLU (Massive Multitask Language Understanding): LLaMA 2-65B achieves results comparable to GPT-4 in general knowledge and reasoning tasks.
  2455. ARC (AI2 Reasoning Challenge): LLaMA models show strong problem-solving capabilities, particularly in logic-based questions.
  2456. HellaSwag & PIQA: LLaMA performs well in commonsense reasoning, approaching human-level accuracy.
  2457. Code Generation: Though not primarily designed for coding, LLaMA exhibits notable competence in generating and completing programming code snippets.
  2458. Despite being smaller than some competing models, LLaMA's efficiency enables it to achieve state-of-the-art performance per parameter count, making it a highly cost-effective solution.
  2459. 4. Applications of LLaMA
  2460. The versatility of LLaMA enables a wide range of applications across industries, including:
  2461. Chatbots and Virtual Assistants: LLaMA powers intelligent conversational AI systems, providing human-like responses with improved contextual understanding.
  2462. Content Generation: From summarizing long documents to creating articles and reports, LLaMA is widely used for generating high-quality text.
  2463. Programming Assistance: Developers use LLaMA to generate code snippets, debug errors, and improve software development efficiency.
  2464. Scientific Research: The model helps researchers analyze papers, generate summaries, and assist in hypothesis generation.
  2465. Education and Tutoring: LLaMA aids in personalized learning, answering students’ queries and explaining complex topics interactively.
  2466. Its open-weight availability also allows organizations to fine-tune the model on proprietary data, making it adaptable for specialized use cases such as medical AI, legal document analysis, and multilingual NLP tasks.
  2467. 5. Challenges and Limitations
  2468. Despite its advantages, LLaMA faces several challenges:
  2469. Ethical Concerns: Like all LLMs, LLaMA can generate biased or misleading information. Efforts are ongoing to align the model with ethical AI principles.
  2470. Computational Costs: Although LLaMA is optimized for efficiency, larger variants still require significant GPU resources for fine-tuning and inference.
  2471. Context Length Limitations: While improved, LLaMA still has constraints on long-context reasoning compared to specialized extended-context models.
  2472. Security Risks: Open-weight models pose potential risks for misuse, such as generating harmful or deceptive content. Responsible deployment and monitoring are necessary.
  2473. 6. The Future of LLaMA
  2474. Meta continues to refine the LLaMA model family, with research focused on improving alignment, reducing biases, and extending context understanding. Future iterations may include:
  2475. LLaMA 3 and Beyond: Expected advancements in parameter efficiency and multimodal capabilities.
  2476. Better Fine-Tuning Techniques: Enhancing adaptability for domain-specific applications.
  2477. Integration with Retrieval-Augmented Generation (RAG): Combining LLaMA with external knowledge sources for more accurate responses.
  2478. Edge Deployment: Efforts to make LLaMA smaller and faster for local AI applications without cloud dependence.
  2479. As open-source AI research progresses, LLaMA remains a key player in democratizing access to powerful language models, enabling innovation across academia, business, and technology sectors.
  2480. 7. Conclusion
  2481. LLaMA represents a significant step forward in making high-quality language models more accessible. By balancing efficiency, openness, and performance, it provides a compelling alternative to closed-source models like GPT-4. Whether for research, business applications, or general AI development, LLaMA offers a robust platform for advancing NLP capabilities while promoting transparency and innovation in AI.
  2482. ================================================
  2483. FILE: end-to-end-use-cases/customerservice_chatbots/README.md
  2484. ================================================
  2485. This repository contains various end-to-end use cases for building customer service chatbots using Meta's Llama 3. Below is an outline of the sub folders and their contents.
  2486. ## Outline
  2487. - **[RAG_chatbot](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot)**
  2488. contains resources for building a Meta Llama 3 chatbot with Retrieval Augmented Generation (RAG). It contains a [notebook](https://github.com/meta-llama/llama-cookbook/blob/main/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb) which shows a complete example of how to build a Meta Llama 3 chatbot hosted on your browser that can answer questions based on your own data. It covers:
  2489. - The deployment process of Meta Llama 3 8B with the [Text-generation-inference](https://github.com/huggingface/text-generation-inference) framework as an API server.
  2490. - A chatbot example built with [Gradio](https://github.com/gradio-app/gradio) and wired to the server.
  2491. - Adding RAG capability with Meta Llama 3 specific knowledge based on our Getting Started [guide](https://ai.meta.com/llama/get-started/).
  2492. - **[ai_agent_chatbot](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot)** contains a
  2493. [Sales Bot with Llama3 - A Summarization and RAG Use Case notebook](https://github.com/meta-llama/llama-cookbook/blob/main/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb) that demonstrates building a sales chatbot using Llama3 for targeted product recommendations. The workflow involves:
  2494. * Generating product review summaries using Llama3
  2495. * Storing summaries in a vector database (Weaviate)
  2496. * Leveraging Retrieval Augmented Generation (RAG) for intelligent sales interactions
  2497. - **[messenger_chatbot](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot)** section provides a step-by-step guide to building a Llama-enabled Messenger chatbot. It includes integration details with the Messenger Platform and a [demo video](https://drive.google.com/file/d/1B4ijFH4X3jEHZfkGdTPmdsgpUes_RNud/view).
  2498. - **[whatsapp_chatbot](https://github.com/Monireh2/llama-recipes/tree/main/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot)** folder contains a tutorial for creating a Llama 3 enabled WhatsApp chatbot, similar to the Messenger chatbot guide. A demo video showcasing the use of iOS WhatsApp to send a question to a test phone number and receive a response generated by Llama 3 can be found [here](https://drive.google.com/file/d/1fZDaOsvyE1yrNGETV-e0SvL14BYeAI6R/view).
  2499. ## Additional Information
  2500. - **RAG Architecture:** The RAG method enhances LLMs by retrieving and augmenting data, allowing for more relevant and context-aware responses.
  2501. - **Development Tools:** The repository utilizes frameworks like LangChain and LlamaIndex for building LLM applications, and Gradio for creating chatbot UI.
  2502. For more detailed information, please refer to the individual sub directory documentation and examples.
  2503. ================================================
  2504. FILE: end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md
  2505. ================================================
  2506. # Building a Llama 3 Enabled Messenger Chatbot
  2507. This step-by-step tutorial shows the complete process of building a Llama-enabled Messenger chatbot. A demo video of using the iOS Messenger app to send a question to a Facebook business page and receive the Llama 3 generated answer is [here](https://drive.google.com/file/d/1B4ijFH4X3jEHZfkGdTPmdsgpUes_RNud/view).
  2508. If you're interested in a Llama 3 enabled WhatsApp chatbot, see [here](../whatsapp_chatbot/whatsapp_llama3.md) for a tutorial.
  2509. ## Overview
  2510. Messenger from Meta is a messaging service that allows a Facebook business page to respond to people who are interested in their business using [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview). The benefits of an intelligent and knowledgeable Llama 3 powered chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this tutorial, we'll cover the details of integrating Llama 3 with the Messenger Platform to build a basic Llama 3 enabled chatbot - for more Llama 3 application development and deployment demos such as how to integrate your own data with Llama 3, see the recipes.
  2511. The diagram below shows the components and overall data flow of the Llama 3 enabled Messenger chatbot demo we built, using an Amazon EC2 instance as an example for running the web server.
  2512. ![](../../../src/docs/img/messenger_llama_arch.jpg)
  2513. ## Getting Started with Messenger Platform
  2514. 1. A Facebook Page is required to send and receive messages using the Messenger Platform - see [here](https://www.facebook.com/business/help/461775097570076?id=939256796236247) for details about Facebook Pages and how to create a new page.
  2515. 2. If you have followed the [Llama WhatsApp chatbot tutorial](../whatsapp_chatbot/whatsapp_llama3.md), or if you already have a Meta developer account and a business app, then you can skip this step. Otherwise, you need to first [create a Meta developer account](https://developers.facebook.com/) and then [create a business app](https://developers.facebook.com/docs/development/create-an-app/).
  2516. 3. Add the Messenger product to your business app by going to your business app's Dashboard, click "Add Product" and then select "Messenger".
  2517. 4. Configure a webhook to receive real time HTTP notifications: follow the [Sample Callback URL for Webhooks Testing Guide](https://developers.facebook.com/docs/whatsapp/sample-app-endpoints) to create a free account on glitch.com to get your webhook's callback URL, but you can skip the WHATSAPP_TOKEN part in the guide - the rest of setting up webhook is the same for WhatsApp and Messenger.
  2518. 5. Open Messenger's API Settings, as shown in the screenshot below, then in "1. Configure webhooks", set the Callback URL and Verify Token set up in the previous step, and subscribe all message related fields for "Webhook Fields". Finally, in "2. Generate access tokens", connect your Facebook page (see step 1) and copy your page access token for later use.
  2519. ![](../../../src/docs/img/messenger_api_settings.png)
  2520. ## Writing Llama 3 Enabled Web App
  2521. The Python-based web app we developed uses [LangChain](https://www.langchain.com/), an open source LLM development framework, and [Replicate](https://replicate.com/), a service provider hosting LLM models in the cloud, to receive the user query sent by the webhook, which will be covered in the next section, pass the query to Llama 3, and send the Llama 3 answer back to the webhook. For more information on how to use LangChain or LlamaIndex, another LLM app building framework, and other Llama 3 cloud providers or on-premise deployment options to develop Llama 3 apps, see the recipes.
  2522. First, let's create a new conda (or you can use venv if you like) environment and install all the required packages:
  2523. ```
  2524. conda create -n messenger-llama python=3.8
  2525. conda activate messenger-llama
  2526. pip install langchain replicate flask requests uvicorn gunicorn
  2527. ```
  2528. Then, create a Python file named [llama_messenger.py](llama_messenger.py) that creates a Llama 3 instance and defines an HTTP method `msgrcvd_page` to:
  2529. 1. receive the user message forwarded by the webhook;
  2530. 2. ask Llama 3 for the answer;
  2531. 3. send the answer back to the sender using the Facebook graph API.
  2532. ```
  2533. import langchain
  2534. from langchain.llms import Replicate
  2535. from flask import Flask
  2536. from flask import request
  2537. import os
  2538. import requests
  2539. import json
  2540. os.environ["REPLICATE_API_TOKEN"] = "<your replicate api token"
  2541. llama3_8b_chat = "meta/meta-llama-3-8b-instruct"
  2542. llm = Replicate(
  2543. model=llama3_8b_chat,
  2544. model_kwargs={"temperature": 0.0, "top_p": 1, "max_new_tokens":500}
  2545. )
  2546. app = Flask(__name__)
  2547. @app.route('/msgrcvd_page', methods=['POST', 'GET'])
  2548. def msgrcvd_page():
  2549. message = request.args.get('message')
  2550. sender = request.args.get('sender')
  2551. recipient = request.args.get('recipient')
  2552. answer = llm(message)
  2553. url = f"https://graph.facebook.com/v18.0/{recipient}/messages"
  2554. params = {
  2555. 'recipient': '{"id": ' + sender + '}',
  2556. 'message': json.dumps({'text': answer}),
  2557. 'messaging_type': 'RESPONSE',
  2558. 'access_token': '<page_access_token>'
  2559. }
  2560. headers = {
  2561. 'Content-Type': 'application/json'
  2562. }
  2563. response = requests.post(url, params=params, headers=headers)
  2564. return message + "<p/>" + answer
  2565. ```
  2566. Replace <page_access_token> with the access token copied in step 5 "Open Messenger's API Settings" of the previous section. Now it's time to modify the webhook to complete the whole app.
  2567. ## Modifying the Webhook
  2568. Open your glitch.com webhook URL created earlier, and change your `app.js` to simply forward the user message and the user and page ids sent by the Messenger Platform to the Llama 3 enabled web app `llama_messenger.py` described in the previous section:
  2569. ```
  2570. "use strict";
  2571. // Imports dependencies and set up http server
  2572. const request = require("request"),
  2573. express = require("express"),
  2574. body_parser = require("body-parser"),
  2575. axios = require("axios").default,
  2576. app = express().use(body_parser.json()); // creates express http server
  2577. // Sets server port and logs message on success
  2578. app.listen(process.env.PORT || 1337, () => console.log("webhook is listening"));
  2579. // Accepts POST requests at /webhook endpoint
  2580. app.post("/webhook", (req, res) => {
  2581. // Parse the request body from the POST
  2582. let body = req.body;
  2583. let sender = req.body["entry"][0]["messaging"][0]['sender']['id']
  2584. let recipient = req.body["entry"][0]["messaging"][0]['recipient']['id']
  2585. let message = req.body["entry"][0]["messaging"][0]['message']['text']
  2586. // Check if this is an event from a page subscription
  2587. if (body.object === "page") {
  2588. // Returns a '200 OK' response to all requests
  2589. res.status(200).send("EVENT_RECEIVED");
  2590. let url = "http://<web server public IP>:5000/msgrcvd_page?sender=" + sender + "&recipient=" + recipient + "&message=" + encodeURIComponent(message)
  2591. console.log(url)
  2592. axios.get(url)
  2593. .then(response => {
  2594. // Handle the response data
  2595. console.log(response.data);
  2596. })
  2597. .catch(error => {
  2598. // Handle errors
  2599. console.error('Axios error:', error);
  2600. });
  2601. } else {
  2602. // Return a '404 Not Found' if event is not from a page subscription
  2603. res.sendStatus(404);
  2604. }
  2605. });
  2606. // Accepts GET requests at the /webhook endpoint. You need this URL to setup webhook initially.
  2607. // info on verification request payload: https://developers.facebook.com/docs/graph-api/webhooks/getting-started#verification-requests
  2608. app.get("/webhook", (req, res) => {
  2609. /**
  2610. * UPDATE YOUR VERIFY TOKEN
  2611. *This will be the Verify Token value when you set up webhook
  2612. **/
  2613. const verify_token = process.env.VERIFY_TOKEN;
  2614. // Parse params from the webhook verification request
  2615. let mode = req.query["hub.mode"];
  2616. let token = req.query["hub.verify_token"];
  2617. let challenge = req.query["hub.challenge"];
  2618. // Check if a token and mode were sent
  2619. if (mode && token) {
  2620. // Check the mode and token sent are correct
  2621. if (mode === "subscribe" && token === verify_token) {
  2622. // Respond with 200 OK and challenge token from the request
  2623. console.log("WEBHOOK_VERIFIED: " + token);
  2624. res.status(200).send(challenge);
  2625. } else {
  2626. // Responds with '403 Forbidden' if verify tokens do not match
  2627. res.sendStatus(403);
  2628. }
  2629. }
  2630. });
  2631. ```
  2632. Remember to change <web server public IP>, which needs to be publicly visible, to the IP of the server where your Llama 3 enabled web app in the previous section runs.
  2633. Note: It's possible and even recommended to implement a webhook in Python and call the Llama directly inside the webhook, instead of making an HTTP request, as the JavaScript code above does, to a Python app which calls Llama and sends the answer to Messenger.
  2634. ## Running the Chatbot
  2635. On your web server, run the following command on a Terminal (see [here](https://flask.palletsprojects.com/en/3.0.x/deploying/gunicorn/) for more info on Gunicorn):
  2636. ```
  2637. gunicorn -b 0.0.0.0:5000 llama_messenger:app
  2638. ```
  2639. If you use Amazon EC2 as your web server, make sure you have port 5000 added to your EC2 instance's security group's inbound rules.
  2640. Now you can open your Messenger app, select the Facebook page you connected in Messenger's API Settings, enter a message and receive the Llama 3's answer shortly, as shown in the demo video in the beginning of this post.
  2641. To debug any possible issues, go to your glitch.com app log and copy the URL generated there when a Messenger message is sent, which looks like this:
  2642. ```
  2643. http://<web server public IP>:5000/msgrcvd_page?sender=<user id>&recipient=<page id>&message=<msg>
  2644. ```
  2645. Then open the URL in a browser to verify your web server can receive the message and the two ids, and generate a Llama answer before sending the answer back to Messenger.
  2646. ================================================
  2647. FILE: end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/requirements.txt
  2648. ================================================
  2649. gradio
  2650. pypdf
  2651. langchain
  2652. sentence-transformers
  2653. faiss-cpu
  2654. text-generation
  2655. ================================================
  2656. FILE: end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md
  2657. ================================================
  2658. # Building a Llama 3 Enabled WhatsApp Chatbot
  2659. This step-by-step tutorial shows the complete process of building a Llama 3 enabled WhatsApp chatbot. A demo video of using the iOS WhatsApp to send a question to a test phone number and receive the Llama 3 generated answer is [here](https://drive.google.com/file/d/1fZDaOsvyE1yrNGETV-e0SvL14BYeAI6R/view).
  2660. If you're interested in a Llama 3 enabled Messenger chatbot, see [here](../messenger_chatbot/messenger_llama3.md) for a tutorial.
  2661. ## Overview
  2662. Businesses of all sizes can use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to connect their customers with human agents or Llama 3 powered chatbots. The benefits of an intelligent and knowledgeable chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this blog, we'll cover the details of integrating Llama 3 with the WhatsApp Business API to build a basic Llama 3 enabled chatbot.
  2663. The diagram below shows the components and overall data flow of the Llama 3 enabled WhatsApp chatbot demo we built, using Amazon EC2 instance as an example for running the web server.
  2664. ![](../../../src/docs/img/whatsapp_llama_arch.jpg)
  2665. ## Getting Started with WhatsApp Business Cloud API
  2666. First, open the [WhatsApp Business Platform Cloud API Get Started Guide](https://developers.facebook.com/docs/whatsapp/cloud-api/get-started#set-up-developer-assets) and follow the first four steps to:
  2667. 1. Add the WhatsApp product to your business app;
  2668. 2. Add a recipient number;
  2669. 3. Send a test message;
  2670. 4. Configure a webhook to receive real time HTTP notifications.
  2671. For the last step, you need to further follow the [Sample Callback URL for Webhooks Testing Guide](https://developers.facebook.com/docs/whatsapp/sample-app-endpoints) to create a free account on glitch.com to get your webhook's callback URL.
  2672. Now open the [Meta for Develops Apps](https://developers.facebook.com/apps/) page and select the WhatsApp business app and you should be able to copy the curl command (as shown in the App Dashboard - WhatsApp - API Setup - Step 2 below) and run the command on a Terminal to send a test message to your WhatsApp.
  2673. ![](../../../src/docs/img/whatsapp_dashboard.jpg)
  2674. Note down the "Temporary access token", "Phone number ID", and "a recipient phone number" in the API Setup page above, which will be used later.
  2675. ## Writing Llama 3 Enabled Web App
  2676. The Python-based web app we developed uses [LangChain](https://www.langchain.com/), an open source LLM development framework, and [Replicate](https://replicate.com/), a service provider hosting LLM models in the cloud, to receive the user query sent by the webhook, which will be covered in the next section, pass the query to Llama 3, and send the Llama 3 answer back to the webhook. For more information on how to use LangChain and Replicate to develop Llama 3 apps, see the recipes - you may also replace Replicate with other Llama deployment options shown in the demo apps.
  2677. First, let's create a new conda (or you can use venv if you like) environment and install all the required packages:
  2678. ```
  2679. conda create -n whatsapp-llama python=3.8
  2680. conda activate whatsapp-llama
  2681. pip install langchain replicate flask requests uvicorn gunicorn
  2682. ```
  2683. Then, create a Python file named llama_chatbot.py with the following code, which defines a class `WhatsAppClient` and a method `send_text_message` to post a message (the answer generated by Llama 3 on a user query) to the WhatsApp Cloud API, which then sends the answer back to the WhatsApp user. Remember to set `WHATSAPP_API_TOKEN` and `WHATSAPP_CLOUD_NUMBER_ID` to the values you saved in the previous section.
  2684. ```
  2685. import langchain
  2686. from langchain.llms import Replicate
  2687. from flask import Flask
  2688. from flask import request
  2689. import os
  2690. import requests
  2691. import json
  2692. class WhatsAppClient:
  2693. API_URL = "https://graph.facebook.com/v17.0/"
  2694. WHATSAPP_API_TOKEN = "<Temporary access token from your WhatsApp API Setup>"
  2695. WHATSAPP_CLOUD_NUMBER_ID = "<Phone number ID from your WhatsApp API Setup>"
  2696. def __init__(self):
  2697. self.headers = {
  2698. "Authorization": f"Bearer {self.WHATSAPP_API_TOKEN}",
  2699. "Content-Type": "application/json",
  2700. }
  2701. self.API_URL = self.API_URL + self.WHATSAPP_CLOUD_NUMBER_ID
  2702. def send_text_message(self, message, phone_number):
  2703. payload = {
  2704. "messaging_product": 'whatsapp',
  2705. "to": phone_number,
  2706. "type": "text",
  2707. "text": {
  2708. "preview_url": False,
  2709. "body": message
  2710. }
  2711. }
  2712. response = requests.post(f"{self.API_URL}/messages", json=payload, headers=self.headers)
  2713. return response.status_code
  2714. ```
  2715. Finally, add the code below to llama_chatbot.py, which creates a Llama 3 instance and defines an HTTP method `msgrcvd` to:
  2716. 1. receive the user message forwarded by the webhook;
  2717. 2. ask Llama 3 for the answer;
  2718. 3. call the `WhatsAppClient`'s `send_text_message`` with a recipient's phone number.
  2719. ```
  2720. os.environ["REPLICATE_API_TOKEN"] = "<your replicate api token>"
  2721. llama3_8b_chat = "meta/meta-llama-3-8b-instruct"
  2722. llm = Replicate(
  2723. model=llama3_8b_chat,
  2724. model_kwargs={"temperature": 0.0, "top_p": 1, "max_new_tokens":500}
  2725. )
  2726. client = WhatsAppClient()
  2727. app = Flask(__name__)
  2728. @app.route("/")
  2729. def hello_llama():
  2730. return "<p>Hello Llama 3</p>"
  2731. @app.route('/msgrcvd', methods=['POST', 'GET'])
  2732. def msgrcvd():
  2733. message = request.args.get('message')
  2734. answer = llm(message)
  2735. client.send_text_message(answer, "<a recipient phone number from your WhatsApp API Setup>")
  2736. return message + "<p/>" + answer
  2737. ```
  2738. The complete script of llama_chatbot.py is [here](llama_chatbot.py).
  2739. Now it's time to modify the webhook to complete the whole app.
  2740. ## Modifying the Webhook
  2741. Open your glitch.com webhook URL created earlier, and after the code snippet in app.js:
  2742. ```
  2743. // message received!
  2744. console.log(req.body["entry"][0]["changes"][0]["value"]["messages"][0]["text"]["body"]);
  2745. ```
  2746. add the code below - remember to change <web server public IP>, which needs to be publicly visible, to the IP of the server where your Llama 3 enabled web app in the previous section runs:
  2747. ```
  2748. let url = "http://<web server public IP>:5000/msgrcvd?message=" +
  2749. req.body["entry"][0]["changes"][0]["value"]["messages"][0]["text"]["body"]
  2750. axios.get(url)
  2751. .then(response => {
  2752. console.log(response.data);
  2753. })
  2754. .catch(error => {
  2755. console.error('Axios error:', error);
  2756. });
  2757. ```
  2758. The code simply forwards the user message received by the WhatsApp Cloud Platform to the Llama 3 enabled web app llama_chatbot.py described in the previous section. Because the functionality of calling the WhatsApp Cloud API to send a message has been implemented in the `send_text_message` in Python above, you can comment out the whole following code snippet in the original app.js:
  2759. ```
  2760. '// info on WhatsApp text message payload: https://developers.facebook.com/docs/whatsapp/cloud-api/webhooks/payload-examples#text-messages
  2761. if (req.body.object) {
  2762. ...
  2763. }
  2764. ```
  2765. Note: It's possible and even recommended to implement a webhook in Python and call Llama 3 directly inside the webhook, instead of making an HTTP request, as the JavaScript code above does, to a Python app which calls Llama 3 and sends the answer to WhatsApp.
  2766. ## Running the Chatbot
  2767. On your web server, run the following command on a Terminal:
  2768. ```
  2769. gunicorn -b 0.0.0.0:5000 llama_chatbot:app
  2770. ```
  2771. If you use Amazon EC2 as your web server, make sure you have port 5000 added to your EC2 instance's security group's inbound rules. Write down your web server's public IP, update the URL below with it, then open the URL in a browser to verify you can see the answer sent to your WhatsApp app, as well as shown in the browser:
  2772. ```
  2773. http://<web server public IP>:5000/msgrcvd?message=who%20wrote%20the%20book%20godfather
  2774. ```
  2775. Now you can open your WhatsApp app, enter a question and receive the Llama 3's answer shortly, as shown in the demo video in the beginning of this post.
  2776. ================================================
  2777. FILE: end-to-end-use-cases/email_agent/README.md
  2778. ================================================
  2779. # Building A Llama Powered Email Agent
  2780. This app shows how to build an email agent powered by Llama 3.1 8B running locally via Ollama. We'll start with building from scratch a basic agent with custom tool calling natively supported in Llama 3.1. The end goal is to cover all components of a production-ready agent app, acting as an assistant to your email, with great user experience: intuitive, engaging, efficient and reliable. We'll use Gmail as an example but any email client API's can be used instead.
  2781. Currently implemented features include:
  2782. * search for emails and attachments
  2783. * get email detail
  2784. * reply to a specific email
  2785. * forward an email
  2786. * get summary of a PDF attachment
  2787. * draft and send an email
  2788. ![](email_agent.png)
  2789. # Overview
  2790. Email is an essential and one top killer app people use every day. A recent [State of AI Agents](https://www.langchain.com/stateofaiagents) survey by LangChain finds that "The top use cases for agents include performing research and summarization (58%), followed by streamlining tasks for personal productivity or assistance (53.5%)."
  2791. Andrew Ng wrote a 5-part [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) in March 2024 predicting "AI agent workflows will drive massive AI progress this year".
  2792. Deloitte published in November 2024 a report on [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf) stating that "Through their ability to reason, plan, remember and act, AI agents address key limitations of typical language models." and "Executive leaders should make moves now to prepare for and embrace this next era of intelligent organizational transformation."
  2793. In the Thanksgiving week, a new startup [/dev/agent](https://sdsa.ai/) building the next-gen OS for AI agents was in the spotlight.
  2794. In December, Sequoia posted [here](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) saying 2024 has been the year of agents (an agent is an AI that can complete tasks, not only tells you how to do it but also does it for you directly), and 2025 will be the year of networks of AI agents.
  2795. So what exactly is an AI agent and how to start building an agent app?
  2796. ## What is an agent?
  2797. The concept of agent is not new - in the 2010 3rd edition of Russell and Norvig's classic book Artificial Intelligence: A Modern Approach ("Modern" by 2010, two years before the deep learning revolution that started the truly modern AI), an agent is defined as "anything that can be viewed as perceiving its environment through sensors and acting upon that environment through actuators". These days, AI agent basically means LLM-powered agent - well, if we treat natural language understanding as a type of sensor, LLM agent is still a sub-category of the traditional agent.
  2798. Lilian Weng in her popular June 2023 blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) defines LLM-powered agent system to have four key components:
  2799. * Planning and Reflection: can break down large tasks into smaller ones; can do self-reflection over past actions and self improve;
  2800. * Memory: can use contextual info and recall info over extended periods (for other components to use);
  2801. * Tool Use: can understand what external APIs to use for info or action not built into LLMs;
  2802. * Action: can actually run the tools.
  2803. Andrew Ng describes four [agentic design patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) as:
  2804. * Reflection
  2805. * Planning
  2806. * Tool calling
  2807. * Multi-agent collaboration, where "memory" is mentioned: Each agent implements its own workflow, has its own memory (itself a rapidly evolving area in agentic technology: how can an agent remember enough of its past interactions to perform better on upcoming ones?)
  2808. In Deloitte's [report](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf), AI agents are reasoning engines that can understand context, plan workflows, connect to external tools and data, and execute actions to achieve a defined goal.
  2809. In a November 2024 blog by Letta [The AI agents stack](https://www.letta.com/blog/ai-agents-stack), LLM powered agent is described as the combination of tools use, autonomous execution, and memory.
  2810. In addition, Harrison Chase defines agent in the blog [What is an AI agent](https://blog.langchain.dev/what-is-an-agent/) as "a system that uses an LLM to decide the control flow of an application."
  2811. Yet another simple [summary](https://www.felicis.com/insight/the-agentic-web) by Felicis of what an agent does is that an agent expands LLMs to go from chat to act: an agent can pair LLMs with external data, multi-step reasoning and planning, and act on the user's behalf.
  2812. All in all (see [Resources](#resources) for even more info), agents are systems that take a high-level task, use an LLM as a reasoning and planning engine, with the help of contextual info and long-term memory if needed, to decide what actions to take, reflect and improve on the actions, and eventually execute those actions to accomplish the task.
  2813. It's time to see an agent app in action and enjoy some coding. Below is a preview of the questions or requests one may ask the agent:
  2814. # Example Asks to Email Agent
  2815. * do i have any emails with attachments?
  2816. * what's the content of the email from LangSmith
  2817. * do i have emails with attachment larger than 1mb?
  2818. * what kind of attachments for the email with subject papers to read?
  2819. * give me a summary of the pdf thinking_llm.pdf
  2820. * Draft an email to xxx@gmail.com saying working on it and will keep you updated. thanks for your patience.
  2821. * send the draft
  2822. * do i have any emails with attachment larger than 10mb?
  2823. * how about 5mb
  2824. * reply to the email saying thanks for sharing!
  2825. * forward the email to xxx@gmail.com
  2826. * how many emails do i have from xxx@gmail.com?
  2827. * how about from yyy@gmail.com?
  2828. Below are screenshots of some example interactions with the agent:
  2829. ![](1.png)
  2830. ![](2.png)
  2831. ![](3.png)
  2832. # Setup and Installation
  2833. If you feel intimated by the steps of the following Enable Gmail API section, you may want to check again the example asks (to see what you can ask to the agent) and the example log (to see the whole conversation with the agent) - the devil's in the detail and all the glorious description of a powerful trendy agent may not mention the little details one has to deal with to build it.
  2834. ## Enable Gmail API
  2835. 1. Go to the [Google Cloud Console](https://console.cloud.google.com/).
  2836. 2. Create a new project by clicking the dropdown on the top left then click NEW PROJECT.
  2837. 3. Enter a Project name then click CREATE.
  2838. 4. Under "APIs & Services" > "Enabled APIs & services", search for "gmail" and then Enable the "Gmail API" for your project.
  2839. 5. Under "APIs & Services" > "OAuth consent screen", click "GO TO NEW EXPERIENCE", then click "GET STARTED", enter App name, select your gmail as User support email, choose External under Audience, enter your gmail again as Contact Information, and finally check the I agree to the Google API Services under Finish and click Continue - Create.
  2840. 5. Again under "APIs & Services", go to Credentials. Click on + CREATE CREDENTIALS, then choose OAuth client ID (NOT API key).
  2841. Select Desktop App (NOT Web application, because you're assumed to want to start your Gmail agent locally first) as the application type and name it. Click Create to generate your client ID and client secret.
  2842. 6. Click Download JSON and rename the downloaded file as credentials.json. This file will be used in your Python script for authentication.
  2843. ## Install Ollama with Llama 3.1 8B
  2844. Download Ollama (available for macOS, Linux, and Windows) [here](https://ollama.com/). Then download and test run the Llama 3.1 8B model by running on a Terminal:
  2845. ```
  2846. ollama run llama3.1
  2847. ```
  2848. This will download a quantized version of Llama 3.1 of the size 4.7GB.
  2849. ## Install required packages
  2850. First, create a Conda or virtual env:
  2851. ```
  2852. conda create -n emailagent python=3.10
  2853. conda activate emailagent
  2854. ```
  2855. or
  2856. ```
  2857. python -m venv emailagent
  2858. source emailagent/bin/activate # on Linux, macOS:
  2859. source emailagent\Scripts\activate # on Windows
  2860. ```
  2861. Then install the required Python libraries:
  2862. ```
  2863. git clone https://github.com/meta-llama/llama-cookbook
  2864. cd llama-cookbook/end-to-end-use-cases/email_agent
  2865. pip install -r requirements.txt
  2866. ```
  2867. # Run Email Agent
  2868. To run the agent, you need to first copy the `credentials.json` file downloaded and renamed above in Step 6 of Enable Gmail API to the email_agent folder, then run:
  2869. ```
  2870. python main.py --email <your_gmail_address>
  2871. ```
  2872. The first time you run it, you'll get a prompt like this;
  2873. ```
  2874. Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=xxxx
  2875. Enter the authorization code:
  2876. ```
  2877. You need to copy the URL above and open it in a browser - if you Sign in with Google using the same Gmail you enabled for the Gmail API, then you'll see "You’ve been given access to an app that’s currently being tested. You should only continue if you know the developer that invited you.", otherwise if you sign in with another Gmail, you'll see "Gmail Agent App has not completed the Google verification process. The app is currently being tested, and can only be accessed by developer-approved testers. If you think you should have access, contact the developer."
  2878. In the latter case, go to APIs & Services > OAuth consent screen > Test users, and click the + ADD USERS button, and you'll see this message: While publishing status is set to "Testing", only test users are able to access the app. Allowed user cap prior to app verification is 100, and is counted over the entire lifetime of the app.
  2879. After clicking Continue, check the Select all checkbox to enable both settings required for running the agent:
  2880. ```
  2881. View your email messages and settings.
  2882. Manage drafts and send emails.
  2883. ```
  2884. Finally, copy the Authorization code and paste it to the Terminal, hit Enter and you'll see the agent's initial greeting (which will likely differ because the default temperature value 0.8 is used here - see [Ollama's model file](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values) for detail) such as:
  2885. ```
  2886. Hello! I'm Email Agent, here to help you manage your email account with ease.
  2887. What would you like to do today? Do you want me to:
  2888. Check and respond to new emails
  2889. Compose a new email
  2890. Organize your inbox with filters or labels
  2891. Delete unwanted emails
  2892. Something else?
  2893. Let me know how I can assist you!
  2894. Your ask:
  2895. ```
  2896. If you cancel here and run the command `python main.py --email <your_gmail_address>` again you should see the agent greeting right away without the need to enter an authorization code, unless you enter a different Gmail address for the first time - in fact, for each authorized (added as a test user) Gmail address, a file `token_xxxx@gmail.com.pickle` will be created which contains the authorized token.
  2897. See the example asks and interaction log above for the types of asks you may enter.
  2898. # Implementation Notes
  2899. Notes here mainly cover how custom functions are defined, how Gmail API based functions are implemented, and how an Agent class is defined to handle memory for contextual chat and perform pre- and post-processing on the tool calling.
  2900. ## Available Custom Functions (Tools) Definition
  2901. The `functions_prompt.py` defines the following six custom functions, as part of the system prompt (along with examples for each function call spec that Llama should return):
  2902. * list_emails_function
  2903. * get_email_function
  2904. * send_email_function (new, reply, forward)
  2905. * get_pdf_summary_function
  2906. * create_draft_function
  2907. * send_draft_function
  2908. Below is an example function call spec in JSON format, for the user asks such as "do i have emails with attachments larger than 5mb", "any attachments larger than 5mb" or "let me know if i have large attachments over 5mb":
  2909. ```
  2910. {"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}}
  2911. ```
  2912. Before LLMs, it'd be a REAL pain to cover ALL the possible user natural language inputs that can be and should be all translated into the same semantic representation (if you've done Amazon Alex Skill or Google Assistant development or any pre-LLM NLU work before, you'd know that the JSON format is the same as intent-slots representation). Now LLMs such as Llama do the most heavy lifting in translating a natural language open input into its semantic representation.
  2913. But still, if you look at how the `list_emails_function` (which is used to search for emails based on a user query) is defined below, you'd see a lot of work would be needed to convert the user's asks to the filter values the Gmail API can accept:
  2914. ```
  2915. list_emails_function = """
  2916. {
  2917. "type": "function",
  2918. "function": {
  2919. "name": "list_emails",
  2920. "description": "Return a list of emails matching an optionally specified query.",
  2921. "parameters": {
  2922. "type": "dic",
  2923. "properties": [
  2924. {
  2925. "maxResults": {
  2926. "type": "integer",
  2927. "description": "The default maximum number of emails to return is 100; the maximum allowed value for this field is 500."
  2928. }
  2929. },
  2930. {
  2931. "query": {
  2932. "type": "string",
  2933. "description": "One or more keywords in the email subject and body, or one or more filters. There can be 6 types of filters: 1) Field-specific Filters: from, to, cc, bcc, subject; 2) Date Filters: before, after, older than, newer than); 3) Status Filters: read, unread, starred, importatant; 4) Attachment Filters: has, filename or type; 5) Size Filters: larger, smaller; 6) logical operators (or, and, not)."
  2934. }
  2935. }
  2936. ],
  2937. "required": []
  2938. }
  2939. }
  2940. }
  2941. """
  2942. ```
  2943. In fact, even though many hours of pre-processing work has been done to cover some test examples, not all of the examples in `functions_prompt.py`,have been covered and tested.
  2944. ## Actual Function Call Implementation
  2945. For each defined custom function call, its implementation using the Gmail API is present in `email_agent.py`. For example, the `list_emails` is defined as follows:
  2946. ```
  2947. def list_emails(query='', max_results=100):
  2948. emails = []
  2949. next_page_token = None
  2950. while True:
  2951. response = service.users().messages().list(
  2952. userId=user_id,
  2953. maxResults=max_results,
  2954. pageToken=next_page_token,
  2955. q=query
  2956. ).execute()
  2957. if 'messages' in response:
  2958. for msg in response['messages']:
  2959. sender, subject, received_time = get_email_info(msg['id'])
  2960. emails.append(
  2961. {
  2962. "message_id": msg['id'],
  2963. "sender": sender,
  2964. "subject": subject,
  2965. "received_time": received_time
  2966. }
  2967. )
  2968. next_page_token = response.get('nextPageToken')
  2969. if not next_page_token:
  2970. break
  2971. return emails
  2972. ```
  2973. The function will be called by our agent after a user ask such as "do i have emails with attachments larger than 5mb" gets Llama's response below:
  2974. ```
  2975. {"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}}
  2976. ```
  2977. ## The Agent class
  2978. Implemented also in `email_agent.py`, the Agent class uses 3 instance members to allow for contextual aware asks to the agent, making it have short-term memory:
  2979. 1. `messages`: this list holds all the previous user asks and the function call results based on Llama's response to the user asks, making Llama able to answer follow-up questions such as "how about 5mb" (after initial ask "attachments larger than 10mb") or "how about from yyy@gmail.com" (after ask "any emails from xxx@gmail.com).
  2980. 2. `emails`: this list holds a list of emails that matches the user query, so follow-up questions such as "what kind of attachments for the email with subject xxx" can be answered.
  2981. 3. `draft_id`: this is used to handle the ask "send the draft" after an initial ask such as "draft an email to xxx".
  2982. The `__call__` method of `Agent` includes the call to Llama with the `messages` and parses the Llama response if it's a tool calling spec JSON result, or if Llama doesn't return a tool calling spec, it means it doesn't find a custom tool for the user ask so the Llama response is returned directly:
  2983. ```
  2984. try:
  2985. res = json.loads(result.split("<|python_tag|>")[-1])
  2986. function_name = res['name']
  2987. parameters = res['parameters']
  2988. return {"function_name": function_name,
  2989. "parameters": parameters}
  2990. except:
  2991. return result
  2992. ```
  2993. Also implemented there are both pre-processing logic, mainly to convert some parameter values from Llama's responses to what Gmail APIs can accept to make the API calls happy, and post-processing logic to convert function call results to user-friendly natural language.
  2994. ```
  2995. function_name = result["function_name"]
  2996. func = globals()[function_name]
  2997. parameters = result["parameters"]
  2998. ... <pre-processing>
  2999. result = func(**parameters)
  3000. ... <post-processing>
  3001. ```
  3002. When you try out the app, you'll likely find that further pre- and post-processing still needed to make it production ready. In a great video on [Vertical LLM Agents](https://www.youtube.com/watch?v=eBVi_sLaYsc), Jake Heller said "after passes frankly even like 100 tests the odds that it will do on any random distribution of user inputs of the next 100,000, 100% accurately is very high" and "by the time you've dealt with like all the edge cases... there might be dozens of things you build into your application to actually make it work well and then you get to the prompting piece and writing out tests and very specific prompts and the strategy for how you break down a big problem into step by step by step thinking and how you feed in the information how you format that information the right way". That's what all the business logic is about. We'll cover decomposing a complicated ask and multi-step reasoning in a future version of the app, and continue to explore the best possible way to streamline the pre- and post-processing.
  3003. ## Debugging output
  3004. When running the app, the detailed Llama returns, pre-processed tool call specs and the actual tool calling results are inside the `-------------------------` block, e.g.:
  3005. -------------------------
  3006. Calling Llama...
  3007. Llama returned: {'function_name': 'list_emails', 'parameters': {'query': 'subject:papers to read has:attachment'}}.
  3008. Calling tool to access Gmail API: list_emails, {'query': 'subject:papers to read has:attachment'}...
  3009. Tool calling returned: [{'message_id': '1936ef72ad3f30e8', 'sender': 'xxx@gmail.com', 'subject': 'Fwd: papers to read', 'received_time': '2024-11-27 10:51:51 PST'}, {'message_id': '1936b819706a4923', 'sender': 'Jeff Tang <xxx@gmail.com>', 'subject': 'papers to read', 'received_time': '2024-11-26 18:44:19 PST'}]
  3010. -------------------------
  3011. # TODOs
  3012. 1. Port the app to using [Llama Stack](https://github.com/meta-llama/llama-stack) Agents API.
  3013. 2. Improve the search, reply, forward, create email draft, and query about types of attachments.
  3014. 3. Improve the fallback and error handling mechanism when the user asks don't lead to a correct function calling spec or the function calling fails.
  3015. 4. Improve the user experience by showing progress when some Gmail search API calls take long (minutes) to complete.
  3016. 5. Implement the async behavior of the agent - schedule an email to be sent later.
  3017. 6. Implement the agent planning - decomposing a complicated ask into sub-tasks, using ReAct and other methods.
  3018. 7. Implement the agent long-term memory - longer context and memory across sessions (consider using Llama Stack/MemGPT/Letta)
  3019. 8. Implement reflection - on the tool calling spec and results.
  3020. 9. Introduce multiple-agent collaboration.
  3021. 10. Implement the agent observability.
  3022. 11. Compare different agent frameworks using the app as the case study.
  3023. 12. Add and implement a test plan and productionize the app.
  3024. # Resources
  3025. 1. Lilian Weng's blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/)
  3026. 2. Andrew Ng's posts [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) with basic [implementations from scratch](https://github.com/neural-maze/agentic_patterns).
  3027. 3. LangChain's survey [State of AI Agents](https://www.langchain.com/stateofaiagents)
  3028. 4. Deloitte's report [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf)
  3029. 5. Letta's blog [The AI agents stack](https://www.letta.com/blog/ai-agents-stack)
  3030. 6. Microsoft's multi-agent system [Magentic-One](https://www.microsoft.com/en-us/research/articles/magentic-one-a-generalist-multi-agent-system-for-solving-complex-tasks)
  3031. 7. Amazon's [Multi-Agent Orchestrator framework](https://awslabs.github.io/multi-agent-orchestrator/)
  3032. 8. Deeplearning.ai's [agent related courses](https://www.deeplearning.ai/courses/?courses_date_desc%5Bquery%5D=agents) (Meta, AWS, Microsoft, LangChain, LlamaIndex, crewAI, AutoGen, Letta) and some [lessons ported to using Llama](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks).
  3033. 9. Felicis's [The Agentic Web](https://www.felicis.com/insight/the-agentic-web)
  3034. 10. A pretty complete [list of AI agents](https://github.com/e2b-dev/awesome-ai-agents), not including [/dev/agents](https://sdsa.ai/), a very new startup building the next-gen OS for AI agents, though.
  3035. 11. Sequoia's [post](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) on 2024 being the year of AI agents and 2025 networks of AI agents.
  3036. ================================================
  3037. FILE: end-to-end-use-cases/email_agent/requirements.txt
  3038. ================================================
  3039. google-auth==2.27.0
  3040. google-auth-oauthlib==0.4.6
  3041. google-auth-httplib2==0.1.0
  3042. google-api-python-client==2.34.0
  3043. pytz
  3044. beautifulsoup4
  3045. ollama
  3046. pypdf
  3047. ================================================
  3048. FILE: end-to-end-use-cases/github_triage/README.md
  3049. ================================================
  3050. # Automatic Issues Triaging with Llama
  3051. This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications.
  3052. ## Features
  3053. The tool performs the following tasks:
  3054. * Fetches issue threads from a specified repository
  3055. * Analyzes issue discussions and generates annotations such as category, severity, component affected, etc.
  3056. * Categorizes all issues by theme
  3057. * Synthesizes key challenges faced by users, along with probable causes and remediations
  3058. * Generates a high-level executive summary providing insights on diagnosing and improving the developer experience
  3059. For a step-by-step look, check out the [walkthrough notebook](walkthrough.ipynb).
  3060. ## Getting Started
  3061. ### Installation
  3062. ```bash
  3063. pip install -r requirements.txt
  3064. ```
  3065. ### Setup
  3066. 1. **API Keys and Model Service**: Set your GitHub token for API calls. Some privileged information may not be available if you don't have push-access to the target repository.
  3067. 2. **Model Configuration**: Set the appropriate values in the `model` section of [config.yaml](config.yaml) for using Llama via VLLM or Groq.
  3068. 3. **JSON Schemas**: Edit the output JSON schemas in [config.yaml](config.yaml) to ensure consistency in outputs. VLLM supports JSON-decoding via the `guided_json` generation argument, while Groq requires passing the schema in the system prompt.
  3069. ### Running the Tool
  3070. ```bash
  3071. python triage.py --repo_name='meta-llama/llama-cookbook' --start_date='2024-08-14' --end_date='2024-08-27'
  3072. ```
  3073. ### Output
  3074. The tool generates:
  3075. * CSV files with `annotations`, `challenges`, and `overview` data, which can be persisted in SQL tables for downstream analyses and reporting.
  3076. * Graphical matplotlib plots of repository traffic, maintenance activity, and issue attributes.
  3077. * A PDF report for easier reading and sharing.
  3078. ## Config
  3079. The tool's configuration is stored in [config.yaml](config.yaml). The following sections can be edited:
  3080. * **Github Token**: Use a token that has push-access on the target repo.
  3081. * **model**: Specify the model service (`vllm` or `groq`) and set the endpoints and API keys as applicable.
  3082. * **prompts**: For each of the 3 tasks Llama does in this tool, we specify a prompt and an output JSON schema:
  3083. * `parse_issue`: Parsing and generating annotations for the issues
  3084. * `assign_category`: Assigns each issue to a category specified in an enum in the corresponding JSON schema
  3085. * `get_overview`: Generates a high-level executive summary and analysis of all the parsed and generated data
  3086. ## Troubleshooting
  3087. * If you encounter issues with API calls, ensure that your GitHub token is set correctly and that you have the necessary permissions.
  3088. * If you encounter issues with the model service, check the configuration values in [config.yaml](config.yaml).
  3089. ================================================
  3090. FILE: end-to-end-use-cases/github_triage/requirements.txt
  3091. ================================================
  3092. kaleido
  3093. plotly
  3094. openai
  3095. groq
  3096. fpdf
  3097. plotly
  3098. ================================================
  3099. FILE: end-to-end-use-cases/long_context/H2O/README.md
  3100. ================================================
  3101. ## Run Llama with H2O for long context inference
  3102. ### Overview:
  3103. Heavy-Hitter Oracle (H2O) is an efficient inference framework of LLMs. During the generative inference of transformers, the size of KV cache grows linearly with the sequence length (prompt length + generation length) during long context generation. And the size KV cache is usually significantly larger than the model parameters, contrains the inference throughput. H2O identifies the critical KV pairs and evicts other unnecessary ones, maintaining a small cache size thus improving the throughput.
  3104. Besides, LLMs usually have poor generation to long sequence during inference. H2O handles this issue by maintaining only heavy-hitter tokens and the most recent tokens. Incorporated with the positional rolling strategy (reassigning the position of each kv with the position in the kv cache instead of the original sequence), H2O can process sequence length much longer than the pretrained context window. Different from other approaches, like [Positional Interpolation](https://arxiv.org/abs/2306.15595), H2O is a KV cache policy and do not involve any training process for long context processing.
  3105. Current implementation supports llama-1/2/3, from 7B to 70B. Since H2O only maintains the most important KV pairs, it might missing some important information in the middle content for some knowlege-intensive tasks.
  3106. More details please refer to Paper: **https://arxiv.org/pdf/2306.14048**;
  3107. **Note: this implementation is tested with transformers == 4.39.0**
  3108. ### Evaluation on Summarization Tasks
  3109. The following example runs inference of Llama-2-7b and Meta-Llama-3-8B on XSUM summarization tasks. We're using `--enable_h2o_generation` to enable H2O algorithm that only keeps heavy-hitter and the local KV pairs. Use `--num_window_length `to decide the KV cache size. The number of local and heavy-hitter KV pairs equals to half of the --num_window_length (Option: the number of heavy-hitters can also be decided by `--num_heavy_hitter_tokens`) Also, use --enable_position_rolling to enable position rolling in the KV cache size that assign the positions in the KV cache instead of the ones in original sequences. Enabling positional rolling is important when sequence length exceeds the pretrained context windows, e.g., 8K in Llama-3.
  3110. ```
  3111. python run_summarization.py \
  3112. --input-path data/summarization/xsum.jsonl \
  3113. --output-path summarization_output/xsum_h2o.jsonl \
  3114. --model-name meta-llama/Meta-Llama-3-8B \
  3115. --enable_h2o_generation
  3116. ```
  3117. ##### **Results**
  3118. Expected results on XSUM (Rouge-2 score, the higher the better) from the above scripts on Llama-2/3 models. The sequence length of inputs are ~2k. Here we constrains the size of KV cache, allowing only n KVs to be write/read after the prefilling stage. n ranges from **64** to **full** where we maintain all the KV pairs. With 128 KVs, the performance can be matched as the full baseline (~2k KVs) while performance degradation is observed with 64 KVs. Also, maintaining a smaller KV cache reduces the I/O cost of KVs, thus we can achieve better throughput.
  3119. | KV Cache Size | 64 | 128 | 256 | 512 | 1024 | Full |
  3120. | ------------- | ------ | ------ | ------ | ------ | ------ | ------ |
  3121. | Llama-2-7B | 0.0439 | 0.1127 | 0.1148 | 0.1182 | 0.1170 | 0.1164 |
  3122. | Llama-2-13B | 0.1180 | 0.1217 | 0.1243 | 0.1291 | 0.1302 | 0.1332 |
  3123. | Llama-3-8B | 0.1107 | 0.1189 | 0.1200 | 0.1347 | 0.1290 | 0.1311 |
  3124. ### One Demo on Streaming to "Infinite" Context Length
  3125. The following example demonstrates the generation process of "infinite" sequence length. We use MT-Bench data and generate the context sample-by-sample. The KV Cache will keep the KV pairs from the previous samples while maintain a fixed size.
  3126. ```
  3127. # run with full cache
  3128. # expected results: 1) normal generation at the early stage; 2) performance collapse and generation slow down at the middle stage, because the sequence length exceeds the context window and the I/O cost of KV cache contrains the throughput; 3) OOM errors and stop.
  3129. bash src/streaming.sh full
  3130. # run with h2o
  3131. # expected results: normal generation at all stage.
  3132. # adjust the number of heavy-hitter tokens with --num_heavy_hitter_tokens and size of KV cache with --num_window_length in src/streaming.sh
  3133. bash src/streaming.sh h2o
  3134. ```
  3135. ================================================
  3136. FILE: end-to-end-use-cases/long_context/H2O/requirements.txt
  3137. ================================================
  3138. transformers
  3139. rouge
  3140. xopen
  3141. needlehaystack
  3142. ================================================
  3143. FILE: end-to-end-use-cases/Multi-Modal-RAG/README.md
  3144. ================================================
  3145. # End to End Tutorial on using Llama models for Multi-Modal RAG
  3146. ## Recipe Overview: Multi-Modal RAG using `Llama-3.2-11B` model:
  3147. This is a complete workshop on how to label images using the new Llama 3.2-Vision Models and performing RAG using the image caption capabilities of the model.
  3148. - **Data Labeling and Preparation:** We start by downloading 5000 images of clothing items and labeling them using `Llama-3.2-11B-Vision-Instruct` model
  3149. - **Cleaning Labels:** With the labels based on the notebook above, we will then clean the dataset and prepare it for RAG
  3150. - **Building Vector DB and RAG Pipeline:** With the final clean dataset, we can use descriptions and 11B model to generate recommendations
  3151. ## Requirements:
  3152. Before we start:
  3153. 1. Please grab your HF CLI Token from [here](https://huggingface.co/settings/tokens)
  3154. 2. Git clone [this dataset](https://huggingface.co/datasets/Sanyam/MM-Demo) inside the Multi-Modal-RAG folder: `git clone https://huggingface.co/datasets/Sanyam/MM-Demo` (Remember to thank the original author by up voting [Kaggle Dataset](https://www.kaggle.com/datasets/agrigorev/clothing-dataset-full))
  3155. 3. Make sure you grab a together.ai token [here](https://www.together.ai)
  3156. ## Detailed Outline for running:
  3157. Order of running files, the notebook establish the method of approaching the problem. Once we establish it, we use the scripts to run the method end to end.
  3158. - Notebook 1: `Part_1_Data_Preparation.ipynb`
  3159. - Script: `label_script.py`
  3160. - Notebook 2: `Part_2_Cleaning_Data_and_DB.ipynb`
  3161. - Notebook 3: `Part_3_RAG_Setup_and_Validation.ipynb`
  3162. - Script: `final_demo.py`
  3163. Here's the detailed outline:
  3164. ### Step 1: Data Prep and Synthetic Labeling:
  3165. In this step we start with an unlabeled dataset and use the image captioning capability of the model to write a description of the image and categorize it.
  3166. [Notebook for Step 1](./notebooks/Part_1_Data_Preparation.ipynb) and [Script for Step 1](./scripts/label_script.py)
  3167. To run the script (remember to set n):
  3168. ```
  3169. python scripts/label_script.py --hf_token "your_huggingface_token_here" \
  3170. --input_path "../MM-Demo/images_compressed" \
  3171. --output_path "../MM-Demo/output/" \
  3172. --num_gpus N
  3173. ```
  3174. The dataset consists of 5000 images with some meta-data.
  3175. The first half is preparing the dataset for labeling:
  3176. - Clean/Remove corrupt images
  3177. - Some exploratory analysis to understand existing distribution
  3178. - Merging up categories of clothes to reduce complexity
  3179. - Balancing dataset by randomly sampling images to have an equal distribution for retrieval
  3180. Second Half consists of Labeling the dataset. Llama 3.2, 11B model can only process one image at a time:
  3181. - We load a few images and test captioning
  3182. - We run this pipeline on random images and iterate on the prompt till we feel the model is giving good outputs
  3183. - Finally, we can create a script to label all 5000 images on multi-GPU
  3184. After running the script on the entire dataset, we have more data cleaning to perform.
  3185. ### Step 2: Cleaning up Synthetic Labels and preparing the dataset:
  3186. [Notebook for Step 2](./notebooks/Part_2_Cleaning_Data_and_DB.ipynb)
  3187. We notice that even after some fun prompt engineering, the model faces some hallucinations-there are some issues with the JSON formatting and we notice that it hallucinates the label categories. Here is how we address this:
  3188. - Re-balance the dataset by mapping correct categories. This is useful to make sure we have an equal distribution in our dataset for retrieval
  3189. - Fix Descriptions so that we can create a CSV
  3190. Now, we are ready to try our vector db pipeline:
  3191. ### Step 3: Notebook 3: MM-RAG using lance-db to validate idea
  3192. [Notebook for Step 3](./notebooks/Part_3_RAG_Setup_and_Validation.ipynb) and [Final Demo Script](./scripts/label_script.py)
  3193. With the cleaned descriptions and dataset, we can now store these in a vector-db, here's the steps:
  3194. - We create embeddings using the text description of our clothes
  3195. - Use 11-B model to describe the uploaded image
  3196. - Ask the model to suggest complementary items to the upload
  3197. - Try to find similar or complementary images based on the upload
  3198. We try the approach with different retrieval methods.
  3199. Finally, we can bring this all together in a Gradio App.
  3200. For running the script:
  3201. ```
  3202. python scripts/final_demo.py \
  3203. --images_folder "../MM-Demo/compressed_images" \
  3204. --csv_path "../MM-Demo/final_balanced_sample_dataset.csv" \
  3205. --table_path "~/.lancedb" \
  3206. --api_key "your_together_api_key" \
  3207. --default_model "BAAI/bge-large-en-v1.5" \
  3208. --use_existing_table
  3209. ```
  3210. Note: We can further improve the description prompt. You will notice sometimes the description starts with the title of the cloth which causes in retrieval of "similar" clothes instead of "complementary" items
  3211. - Upload an image
  3212. - 11B model describes the image
  3213. - We retrieve complementary clothes to wear based on the description
  3214. - You can keep the loop going by chatting with the model
  3215. ## Resources used:
  3216. Credit and Thanks to List of models and resources used in the showcase:
  3217. Firstly, thanks to the author here for providing this dataset on which we base our exercise [here](https://www.kaggle.com/datasets/agrigorev/clothing-dataset-full)
  3218. - [Llama-3.2-11B-Vision-Instruct Model](https://www.llama.com/docs/how-to-guides/vision-capabilities/)
  3219. - [Lance-db for vector database](https://lancedb.com)
  3220. - [This Kaggle dataset](https://www.kaggle.com/datasets/agrigorev/clothing-dataset-full)
  3221. - [HF Dataset](https://huggingface.co/datasets/Sanyam/MM-Demo) Since output of the model can be non-deterministic every time we run, we will use the uploaded dataset to give a universal experience
  3222. - [Together API for demo](https://www.together.ai)
  3223. ================================================
  3224. FILE: end-to-end-use-cases/multilingual/README.md
  3225. ================================================
  3226. # Extending Llama to a new language
  3227. Authored by : Sarvam team
  3228. In this recipe, we will see how to add a new language to the Llama family of models. The steps are quite general and can be easily adapted to other models as well. Using this recipe, you should be able to replicate the findings of [OpenHathi](https://huggingface.co/sarvamai/OpenHathi-7B-Hi-v0.1-Base).
  3229. Please read more about OpenHathi [here](https://x.com/SarvamAI/status/1734645628288831557)
  3230. ## Data
  3231. The original OpenHathi model uses a combination of [Sangraha](https://huggingface.co/datasets/ai4bharat/sangraha) and Wikipedia as its primary data sources. If the reader is interested in using these sources, they would also have to preprocess the data: clean, filter, and deduplicate. See [Setu](https://github.com/AI4Bharat/setu) for an easy way to do this at scale.
  3232. In this tutorial, we will use the [Varta](https://huggingface.co/datasets/rahular/varta) dataset which contains 40M+ news articles taken from [DailyHunt](https://m.dailyhunt.in/). Since this data is already high-quality, we can skip the pre-processing step mentioned above. We will use the Hindi subset here, but you can add any other language present in the dataset by only passing the right language code (advanced users can also tweak the code to add multiple languages at once).
  3233. ## Tokenizer
  3234. Our first step towards augmenting a new language to an LLM is creating a better tokenizer. We define 'better' in terms of fertility score or the number of in-language tokens present in the tokenizer. Note that we should add new tokens without disturbing the original vocabulary, and therefore creating a better tokenizer usually involves 2 steps: (i) building a new, in-language only tokenizer, and (ii) merging this new tokenizer with the original.
  3235. ### Building the in-language tokenizer
  3236. For this, we will first download and prepare the data for training the tokenizer:
  3237. ```
  3238. python prepare_data.py --split=validation --lang=hi --docs_to_sample=10000 --save_path=./data
  3239. ```
  3240. Here we sample 10,000 Hindi documents from the validation split (we should ideally sample from the training split, but this is much faster) and save it as a text file inside `./data`. Next, we use this text to train a Hindi-only [sentencepiece](https://github.com/google/sentencepiece) tokenizer with a vocabulary size of 16,000.
  3241. ```
  3242. python train_tokenizer.py --data_file=./data/hi.txt --save_path=./hi_tokenizer --vocab_size=16000
  3243. ```
  3244. This creates a new sentencepiece Hindi tokenizer and saves it in `./hi_tokenizer`.
  3245. ### Merging the tokenizers
  3246. This process can again be divided into 2 steps:
  3247. - add new tokens to the original Llama2 tokenizer without disturbing its original vocabulary in any way
  3248. - expand the input and output embedding matrices of Llama2 to be equal to the new vocabulary size
  3249. We can do the first step by (i) downloading Llama2's `tokenizer.model` file, (ii) loading our Hindi `tokenizer.model` file, (iii) appending the Hindi tokens to Llama2 tokenizer's vocabulary if they are not already present, and (iv) save the extended tokenizer for future use. All this can be done by running
  3250. ```
  3251. python extend_tokenizer.py --new_tokenizer_path=./hi_tokenizer --extended_tokenizer_save_path=./extended_tokenizer
  3252. ```
  3253. Now, you have a new Llama2 tokenizer which works the same way on English text but can efficiently tokenize Hindi words as well. You can also test to see if it works as intended:
  3254. ```
  3255. >>> from transformers import LlamaTokenizer
  3256. >>> llama_tokenizer = LlamaTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat-hf')
  3257. >>> our_tokenizer = LlamaTokenizer.from_pretrained('./extended_tokenizer')
  3258. >>> for i in range(len(llama_tokenizer)):
  3259. ... assert llama_tokenizer.convert_ids_to_tokens(i) == our_tokenizer.convert_ids_to_tokens(i), f"Token mismatch at index {i}."
  3260. ...
  3261. >>> text = "मैं एक अच्छा हाथी हूँ"
  3262. >>> llama_tokenizer.tokenize(text)
  3263. ['▁', 'म', 'ै', 'ं', '▁', '<0xE0>', '<0xA4>', '<0x8F>', 'क', '▁', 'अ', 'च', '्', '<0xE0>', '<0xA4>', '<0x9B>', 'ा', '▁', 'ह', 'ा', 'थ', 'ी', '▁', 'ह', 'ू', '<0xE0>', '<0xA4>', '<0x81>']
  3264. >>> our_tokenizer.tokenize(text)
  3265. ['▁मैं', '▁एक', '▁अच', '्', 'छा', '▁हाथी', '▁हूँ']
  3266. ```
  3267. ## Continual pre-training
  3268. OpenHathi uses a two-stage pre-training process:
  3269. - Phase 1: learn to translate paragraphs of text (use translated text as context and generate the original text, ~15B tokens)
  3270. - Phase 2: bilingual next token prediction (train on text where the language changes after every sentence, ~15B tokens)
  3271. Note: OpenHathi's final data mixture also contains monolingual data and romanized transliterations.
  3272. We can easily create data for both phases using any translation model. OpenHathi uses [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2). We provide sample code for both phases below.
  3273. ### Phase 1
  3274. With the assumption that we don't have source-native data, let us first get some English data to translate.
  3275. ```
  3276. from datasets import load_dataset
  3277. ds = load_dataset("rahular/varta", split="train", streaming=True)
  3278. english_paragraphs = []
  3279. for d in ds:
  3280. if d["langCode"] != "en": continue
  3281. english_paragraphs.append(" ".join(d["text"].split("\n")))
  3282. ```
  3283. Now, our goal is to create data in the format `{translated_paragraph}\n\n{english_paragraph}`. We can use the `translate_paragraph` function ([link](https://github.com/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/example.py#L150])) from the IndicTrans2 codebase to do this easily.
  3284. ```
  3285. quantization = ""
  3286. en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B"
  3287. en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, "en-indic", quantization)
  3288. ip = IndicProcessor(inference=True)
  3289. phase1_data = []
  3290. for para in english_paragraphs:
  3291. trans_para = translate_paragraph(para, "eng_Latn", "hin_Deva", en_indic_model, en_indic_tokenizer, ip)
  3292. phase1_data.append({"text": f"{trans_para}\n\n{para}"})
  3293. # if you want to save it for future, you can do so easily with HF datasets
  3294. from datasets import Dataset
  3295. phase1_ds = Dataset.from_list(phase1_data)
  3296. phase1_ds.save_to_disk("data/phase1")
  3297. ```
  3298. ### Phase 2
  3299. This is almost the same as phase 1, except that we have to replace the original sentences in an alternating manner to get the data in the required format. We can use the `split_sentences` ([link](https://github.com/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/example.py#L60])) and `batch_translate` ([link](https://github.com/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/example.py#L109)) functions to do this.
  3300. ```
  3301. quantization = ""
  3302. en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B"
  3303. en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, "en-indic", quantization)
  3304. ip = IndicProcessor(inference=True)
  3305. phase2_data = []
  3306. for para in english_paragraphs:
  3307. en_sents = split_sentences(para, "eng_Latn")
  3308. trans_sents = batch_translate(input_sentences, "eng_Latn", "hin_Deva, en_indic_model, en_indic_tokenizer, ip)
  3309. final_para = []
  3310. for idx, (en_sent, trans_sent) in enumerate(zip(en_sents, trans_sents)):
  3311. sent_to_append = en_sent if idx % 2 == 0 else trans_sent
  3312. final_para.append(sent_to_append)
  3313. phase2_data.append({"text": " ".join(final_para)})
  3314. # if you want to save it for future, you can do so easily with HF datasets
  3315. from datasets import Dataset
  3316. phase2_ds = Dataset.from_list(phase2_data)
  3317. phase2_ds.save_to_disk("data/phase2")
  3318. ```
  3319. ### Train
  3320. Finally, we can start finetuning Llama2 on these datasets by following the [finetuning recipes](../../getting-started/finetuning/). Remember to pass the new tokenizer path as an argument to the script: `--tokenizer_name=./extended_tokenizer`.
  3321. OpenHathi was trained on 64 A100 80GB GPUs. Here are the hyperparameters used and other training details:
  3322. - maximum learning rate: 2e-4
  3323. - minimum learning rate: 2e-6
  3324. - optimizer: AdamW (weight decay = 0.1)
  3325. - beta1: 0.9
  3326. - beta2: 0.95
  3327. - lora rank: 128
  3328. - lora alpha: 64
  3329. - lora trainable: q_proj, v_proj, k_proj, o_proj, gate_proj, down_proj, up_proj
  3330. - lora dropout: 0.05
  3331. - block size: 4096
  3332. - global batch size: 4M tokens
  3333. - input and output embeddings are trainable
  3334. - lr schedule: cosine decay with warmup (warmup ratio = 0.1, number of cycles = 3)
  3335. - deepspeed stage 2
  3336. - dtype: bfloat16
  3337. The resulting (partial) loss plots from the OpenHathi training are shown below:
  3338. Phase 1: train loss
  3339. ![Phase 1: train loss](img/phase1_train_loss.png)
  3340. Phase 1: eval loss
  3341. ![Phase 1: eval loss](img/phase1_eval_loss.png)
  3342. Phase 2: train loss
  3343. ![Phase 2: train loss](img/phase2_train_loss.png)
  3344. Phase 2: eval loss
  3345. ![Phase 2: eval loss](img/phase2_eval_loss.png)
  3346. ================================================
  3347. FILE: end-to-end-use-cases/NotebookLlama/README.md
  3348. ================================================
  3349. ## NotebookLlama: An Open Source version of NotebookLM
  3350. ![NotebookLlama](./resources/Outline.jpg)
  3351. [Listen to audio from the example here](./resources/_podcast.mp3)
  3352. This is a guided series of tutorials/notebooks that can be taken as a reference or course to build a PDF to Podcast workflow.
  3353. You will also learn from the experiments of using Text to Speech Models.
  3354. It assumes zero knowledge of LLMs, prompting and audio models, everything is covered in their respective notebooks.
  3355. ### Outline:
  3356. Here is step by step thought (pun intended) for the task:
  3357. - Step 1: Pre-process PDF: Use `Llama-3.2-1B-Instruct` to pre-process the PDF and save it in a `.txt` file.
  3358. - Step 2: Transcript Writer: Use `Llama-3.1-70B-Instruct` model to write a podcast transcript from the text
  3359. - Step 3: Dramatic Re-Writer: Use `Llama-3.1-8B-Instruct` model to make the transcript more dramatic
  3360. - Step 4: Text-To-Speech Workflow: Use `parler-tts/parler-tts-mini-v1` and `bark/suno` to generate a conversational podcast
  3361. Note 1: In Step 1, we prompt the 1B model to not modify the text or summarize it, strictly clean up extra characters or garbage characters that might get picked due to encoding from PDF. Please see the prompt in Notebook 1 for more details.
  3362. Note 2: For Step 2, you can also use `Llama-3.1-8B-Instruct` model, we recommend experimenting and trying if you see any differences. The 70B model was used here because it gave slightly more creative podcast transcripts for the tested examples.
  3363. Note 3: For Step 4, please try to extend the approach with other models. These models were chosen based on a sample prompt and worked best, newer models might sound better. Please see [Notes](./TTS_Notes.md) for some of the sample tests.
  3364. ### Detailed steps on running the notebook:
  3365. Requirements: GPU server or an API provider for using 70B, 8B and 1B Llama models.
  3366. For running the 70B model, you will need a GPU with aggregated memory around 140GB to infer in bfloat-16 precision.
  3367. Note: For our GPU Poor friends, you can also use the 8B and lower models for the entire pipeline. There is no strong recommendation. The pipeline below is what worked best on first few tests. You should try and see what works best for you!
  3368. - Before getting started, please make sure to login using the `huggingface cli` and then launch your jupyter notebook server to make sure you are able to download the Llama models.
  3369. You'll need your Hugging Face access token, which you can get at your Settings page [here](https://huggingface.co/settings/tokens). Then run `huggingface-cli login` and copy and paste your Hugging Face access token to complete the login to make sure the scripts can download Hugging Face models if needed.
  3370. - First, please Install the requirements from [here]() by running inside the folder:
  3371. ```
  3372. git clone https://github.com/meta-llama/llama-recipes
  3373. cd llama-recipes/end-to-end-use-cases/NotebookLlama/
  3374. pip install -r requirements.txt
  3375. ```
  3376. - Notebook 1:
  3377. This notebook is used for processing the PDF and processing it using the new Feather light model into a `.txt` file.
  3378. Update the first cell with a PDF link that you would like to use. Please decide on a PDF to use for Notebook 1, it can be any link but please remember to update the first cell of the notebook with the right link.
  3379. Please try changing the prompts for the `Llama-3.2-1B-Instruct` model and see if you can improve results.
  3380. - Notebook 2:
  3381. This notebook will take in the processed output from Notebook 1 and creatively convert it into a podcast transcript using the `Llama-3.1-70B-Instruct` model. If you are GPU rich, please feel free to test with the 405B model!
  3382. Please try experimenting with the System prompts for the model and see if you can improve the results and try the 8B model as well here to see if there is a huge difference!
  3383. - Notebook 3:
  3384. This notebook takes the transcript from earlier and prompts `Llama-3.1-8B-Instruct` to add more dramatization and interruptions in the conversations.
  3385. There is also a key factor here: we return a tuple of conversation which makes our lives easier later. Yes, studying Data Structures 101 was actually useful for once!
  3386. For our TTS logic, we use two different models that behave differently with certain prompts. So we prompt the model to add specifics for each speaker accordingly.
  3387. Please again try changing the system prompt and see if you can improve the results. We encourage testing the feather light 3B and 1B models as well at this stage
  3388. - Notebook 4:
  3389. Finally, we take the results from last notebook and convert them into a podcast. We use the `parler-tts/parler-tts-mini-v1` and `bark/suno` models for a conversation.
  3390. The speakers and the prompt for parler model were decided based on experimentation and suggestions from the model authors. Please try experimenting, you can find more details in the resources section.
  3391. #### Note: Right now there is one issue: Parler needs transformers 4.43.3 or earlier and for steps 1 to 3 of the pipeline you need latest, so we just switch versions in the last notebook.
  3392. ### Next-Improvements/Further ideas:
  3393. - Speech Model experimentation: The TTS model is the limitation of how natural this will sound. This probably be improved with a better pipeline and with the help of someone more knowledgable-PRs are welcome! :)
  3394. - LLM vs LLM Debate: Another approach of writing the podcast would be having two agents debate the topic of interest and write the podcast outline. Right now we use a single LLM (70B) to write the podcast outline
  3395. - Testing 405B for writing the transcripts
  3396. - Better prompting
  3397. - Support for ingesting a website, audio file, YouTube links and more. Again, we welcome community PRs!
  3398. ### Resources for further learning:
  3399. - https://betterprogramming.pub/text-to-audio-generation-with-bark-clearly-explained-4ee300a3713a
  3400. - https://colab.research.google.com/drive/1dWWkZzvu7L9Bunq9zvD-W02RFUXoW-Pd?usp=sharing
  3401. - https://colab.research.google.com/drive/1eJfA2XUa-mXwdMy7DoYKVYHI1iTd9Vkt?usp=sharing#scrollTo=NyYQ--3YksJY
  3402. - https://replicate.com/suno-ai/bark?prediction=zh8j6yddxxrge0cjp9asgzd534
  3403. - https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c
  3404. ================================================
  3405. FILE: end-to-end-use-cases/NotebookLlama/requirements.txt
  3406. ================================================
  3407. # Core dependencies
  3408. PyPDF2>=3.0.0
  3409. torch>=2.0.0
  3410. transformers>=4.46.0
  3411. accelerate>=0.27.0
  3412. rich>=13.0.0
  3413. ipywidgets>=8.0.0
  3414. tqdm>=4.66.0
  3415. # Optional but recommended
  3416. jupyter>=1.0.0
  3417. ipykernel>=6.0.0
  3418. # Warning handling
  3419. warnings>=0.1.0
  3420. ================================================
  3421. FILE: end-to-end-use-cases/NotebookLlama/TTS_Notes.md
  3422. ================================================
  3423. ### Notes from TTS Experimentation
  3424. For the TTS Pipeline, *all* of the top models from HuggingFace and Reddit were tried.
  3425. The goal was to use the models that were easy to setup and sounded less robotic with ability to include sound effects like laughter, etc.
  3426. #### Parler-TTS
  3427. Minimal code to run their models:
  3428. ```
  3429. model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1").to(device)
  3430. tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1")
  3431. # Define text and description
  3432. text_prompt = "This is where the actual words to be spoken go"
  3433. description = """
  3434. Laura's voice is expressive and dramatic in delivery, speaking at a fast pace with a very close recording that almost has no background noise.
  3435. """
  3436. input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
  3437. prompt_input_ids = tokenizer(text_prompt, return_tensors="pt").input_ids.to(device)
  3438. generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
  3439. audio_arr = generation.cpu().numpy().squeeze()
  3440. ipd.Audio(audio_arr, rate=model.config.sampling_rate)
  3441. ```
  3442. The really cool aspect of these models are the ability to prompt the `description` which can change the speaker profile and pacing of the outputs.
  3443. Surprisingly, Parler's mini model sounded more natural.
  3444. In their [repo](https://github.com/huggingface/parler-tts/blob/main/INFERENCE.md#speaker-consistency) they share names of speakers that we can use in prompt.
  3445. #### Suno/Bark
  3446. Minimal code to run bark:
  3447. ```
  3448. voice_preset = "v2/en_speaker_6"
  3449. sampling_rate = 24000
  3450. text_prompt = """
  3451. Exactly! [sigh] And the distillation part is where you take a LARGE-model,and compress-it down into a smaller, more efficient model that can run on devices with limited resources.
  3452. """
  3453. inputs = processor(text_prompt, voice_preset=voice_preset).to(device)
  3454. speech_output = model.generate(**inputs, temperature = 0.9, semantic_temperature = 0.8)
  3455. Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)
  3456. ```
  3457. Similar to parler models, suno has a [library](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c) of speakers.
  3458. v9 from their library sounded robotic so we use Parler for our first speaker and the best one from bark.
  3459. The incredible thing about Bark model is being able to add sound effects: `[Laugh]`, `[Gasps]`, `[Sigh]`, `[clears throat]`, making words capital causes the model to emphasize them.
  3460. Adding `-` gives a break in the text. We utilize this knowledge when we re-write the transcript using the 8B model to add effects to our transcript.
  3461. Note: Authors suggest using `...`. However, this didn't work as effectively as adding a hyphen during trails.
  3462. #### Hyper-parameters:
  3463. Bark models have two parameters we can tweak: `temperature` and `semantic_temperature`
  3464. Below are the notes from a sweep, prompt and speaker were fixed and this was a vibe test to see which gives best results. `temperature` and `semantic_temperature` respectively below:
  3465. First, fix `temperature` and sweep `semantic_temperature`
  3466. - `0.7`, `0.2`: Quite bland and boring
  3467. - `0.7`, `0.3`: An improvement over the previous one
  3468. - `0.7`, `0.4`: Further improvement
  3469. - `0.7`, `0.5`: This one didn't work
  3470. - `0.7`, `0.6`: So-So, didn't stand out
  3471. - `0.7`, `0.7`: The best so far
  3472. - `0.7`, `0.8`: Further improvement
  3473. - `0.7`, `0.9`: Mix feelings on this one
  3474. Now sweeping the `temperature`
  3475. - `0.1`, `0.9`: Very Robotic
  3476. - `0.2`, `0.9`: Less Robotic but not convincing
  3477. - `0.3`, `0.9`: Slight improvement still not fun
  3478. - `0.4`, `0.9`: Still has a robotic tinge
  3479. - `0.5`, `0.9`: The laugh was weird on this one but the voice modulates so much it feels speaker is changing
  3480. - `0.6`, `0.9`: Most consistent voice but has a robotic after-taste
  3481. - `0.7`, `0.9`: Very robotic and laugh was weird
  3482. - `0.8`, `0.9`: Completely ignore the laughter but it was more natural
  3483. - `0.9`, `0.9`: We have a winner probably
  3484. After this about ~30 more sweeps were done with the promising combinations:
  3485. Best results are at ```speech_output = model.generate(**inputs, temperature = 0.9, semantic_temperature = 0.8)
  3486. Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)```
  3487. ### Notes from other models that were tested:
  3488. Promising directions to explore in future:
  3489. - [MeloTTS](https://huggingface.co/myshell-ai/MeloTTS-English) This is most popular (ever) on HuggingFace
  3490. - [WhisperSpeech](https://huggingface.co/WhisperSpeech/WhisperSpeech) sounded quite natural as well
  3491. - [F5-TTS](https://github.com/SWivid/F5-TTS) was the latest release at this time, however, it felt a bit robotic
  3492. - E2-TTS: r/locallama claims this to be a little better, however, it didn't pass the vibe test
  3493. - [xTTS](https://coqui.ai/blog/tts/open_xtts) It has great documentation and also seems promising
  3494. #### Some more models that weren't tested:
  3495. In other words, we leave this as an exercise to readers :D
  3496. - [Fish-Speech](https://huggingface.co/fishaudio/fish-speech-1.4)
  3497. - [MMS-TTS-Eng](https://huggingface.co/facebook/mms-tts-eng)
  3498. - [Metavoice](https://huggingface.co/metavoiceio/metavoice-1B-v0.1)
  3499. - [Hifigan](https://huggingface.co/nvidia/tts_hifigan)
  3500. - [TTS-Tacotron2](https://huggingface.co/speechbrain/tts-tacotron2-ljspeech)
  3501. - [MMS-TTS-Eng](https://huggingface.co/facebook/mms-tts-eng)
  3502. - [VALL-E X](https://github.com/Plachtaa/VALL-E-X)
  3503. ================================================
  3504. FILE: end-to-end-use-cases/NotebookLlama/resources/clean_extracted_text.txt
  3505. ================================================
  3506. ===============
  3507. Knowledge Distillation is a methodology that transfers advanced capabilities from leading proprietary Large Language Models (LLMs) to their open-source counterparts, such as LLaMA and Mistral. This paper presents a comprehensive survey of KD's role in imparting advanced knowledge.
  3508. Abstract —In the era of Large Language Models, Knowledge Distillation emerges as a pivotal methodology for transferring advanced capabilities from proprietary LLMs to open-source counterparts, facilitating their self-improvement by employing themselves as teachers.
  3509. xamined through a meticulous survey that delves into the foundational pillars of algorithm, skill, and verticalization, which form the backbone of knowledge distillation and deep learning models. The survey provides a comprehensive examination of key mechanisms within the knowledge distillation framework, specifically focusing on the enhancement of cognitive abilities and their practical implications across various fields, with a particular emphasis on the interplay between data augmentation (DA) and knowledge distillation.
  3510. en-source LLMs, this survey highlights the potential for more accessible, efficient, and powerful AI solutions.
  3511. Most importantly, we advocate for compliance with legal terms that regulate the use of LLMs, ensuring ethical and lawful application of knowledge distillation.
  3512. An associated Github repository is available at https://github.com/Tebmer/Awesome-Knowledge-Distillation-of-LLMs. Index Terms - Large language models, knowledge distillation, data augmentation, skill distillation, supervised fine-tuning
  3513. sophisticated problem-solving capabilities, the core significance of these large language models (LLMs) lies in their emergent abilities, enabling them to tackle a diverse array of tasks with remarkable proficiency.
  3514. their remarkable capabilities, have some notable limitations, particularly when considering the advantages offered by open-source models, such as GPT-4 and Gemini. These models are often expensive, with substantial usage fees and restricted access, making them inaccessible to individuals and smaller organizations.
  3515. ng restrictions and costs. In contrast, open-source LLMs like LLaMA and Mistral bring several advantages. Accessibility and adaptability are key benefits, as they are more readily available to a broader range of users, including researchers and organizations.
  3516. ts. One of the most significant limitations is the smaller model scale, resulting in lower performance on real-world tasks with multiple instructions (Zheng et al., 2023a). Models with fewer parameters struggle to capture the depth and breadth of knowledge embodied in larger models like GPT-4. Additionally, the pre-training investment in these open-source models is typically less substantial. This reduced investment can lead to a narrower range of pre-training data, potentially limiting their understanding and handling of diverse or specialized topics (Liang et al., 2022; Sun et al., 2024a). Fine-tuning steps are often fewer due to resource constraints, hindering model optimization for specific tasks or industries.
  3517. ary models becomes apparent when compared to highly fine-tuned proprietary LLMs. Primarily, the disparity between proprietary and open-source LLMs becomes evident, with proprietary models excelling in complex scenarios, while open-source models excel in a wide range of scenarios. Knowledge distillation, a technique that leverages the advanced capabilities of proprietary models, is used to enhance the competencies of open-source models. This process is similar to transferring the performance of a skilled teacher to a student.
  3518. tillation of LLMs, where a small seed of knowledge is used to prompt the LLM to generate more data with respect to a specific skill or domain (Taori et al., 2023). Furthermore, KD retains its fundamental role in compressing LLMs, making them more efficient without significant loss in performance.
  3519. advanced context following and instruction following**
  3520. **key aspects of knowledge distillation**
  3521. * **contextual understanding**: in-context learning and instruction following
  3522. * **alignment with user intents**: human values/principles and thinking patterns like chain-of-thought
  3523. * **NLP task specialization**: semantic understanding and code generation
  3524. **critical skills for various applications**
  3525. * **healthcare**: accuracy and contextual knowledge
  3526. * **law**: contextual knowledge and precision
  3527. * **science**: contextual knowledge and precision
  3528. ned in the era of LLMs, the benefits of knowledge distillation in the era of LLMs are multifaceted and transformative. Through a suite of distillation techniques, the gap between proprietary and open-source models narrows and is filled. This process streamlines computational requirements and enhances environmental sustainability of AI operations, as open-source models become more proficient with lower overhead.
  3529. ch domains. The escalating need for a comprehensive survey on the knowledge distillation of LLMs stems from the rapidly evolving landscape of AI and the increasing complexity of these models. The ability to efficiently and effectively distill knowledge from proprietary LLMs to open-source ones becomes a practical necessity. This is driven by the need to bridge the knowledge gap between the proprietary and open-source LLMs.
  3530. This need is driven by the 3 models mentioned, including Student, Vicuna, Opt, GPT, and others. These models are being used in various sectors such as law, healthcare, finance, and science, and the ability to distill knowledge from them is becoming increasingly important.
  3531. synthesizefeedbackFeedback input outputSelf-Knowledge outputinputinput YlabelLabelingExpansion X,Y demonstrationsexpandFeature featureinput,outputextractSec.4Sec.5 Sec.3.1Sec.3.2 Fig. 2: An overview of this survey on knowledge distillation of large language models
  3532. es emerging, but there is still much to be learned from the era of Large Language Models (LLMs). In this section, we provide a foundational overview of knowledge distillation, highlighting the role of data augmentation (DA) in this context.
  3533. Traditional techniques, such as supervised fine-tuning, have shown promise in distilling knowledge from LLMs. However, the increasing complexity of these models requires careful consideration of the trade-offs between accuracy and computational resources. To further explore the possibilities of knowledge distillation, we examine methods involving supervised fine-tuning, such as incremental learning and transfer learning.
  3534. Supervised fine-tuning involves training a model on a smaller dataset with the goal of adapting to a specific task or domain. This approach has shown significant improvement in various NLP tasks, but may not be scalable to large-scale applications. In contrast, transfer learning offers a more flexible approach, where a model is trained on a smaller dataset and then fine-tuned on a larger dataset. This can lead to improved performance on a variety of tasks, but requires careful selection of the target dataset.
  3535. Another approach is divergence and similarity, which involve exploring the differences and similarities between the knowledge distillation process and traditional machine learning. Reinforcement learning and ranking optimization are also gaining attention, particularly in the context of knowledge distillation, where the goal is to optimize the distillation process itself. These methods can improve the efficiency and effectiveness of knowledge distillation, but require careful consideration of the trade-offs between exploration and exploitation.
  3536. Skill distillation focuses on enhancing student models to improve their understanding of the task and their ability to perform well on NLP tasks. This can be achieved through various methods, including data augmentation, feature learning, and attention mechanisms. By incorporating these techniques, student models can better understand the context and intentions of the user, leading to improved performance across a variety of tasks.
  3537. We propose several strategies for skill distillation, including:
  3538. mmendation systems, and the evaluation of text generation. In §5, we delve into domain-specific vertical distillation, demonstrating how knowledge distillation techniques are applied in specialized fields such as law, healthcare, finance, and science, highlighting their practical implications and transformative impact. The survey reveals open problems in §6, highlighting current challenges and gaps in knowledge distillation research that present opportunities for future work.
  3539. large, complex model to a smaller, more efficient model, mitigating the challenges of computational demands and resource constraints in deploying large-scale models in practical applications. This process, prior to the era of Large Language Models (LLMs), focused on compacting complex neural networks for deployment in resource-constrained environments, such as mobile devices or edge computing platforms, where computational efficiency was paramount.
  3540. al., 2022a), Alpaca (Taori et al., 2023), Code Alpaca (Chaudhary, 2023) Self-Align (Sun et al., 2024b), WizardLM (Xu et al., 2023a), WizardCoder (Luo et al., 2023a), WizardMath (Luo et al., 2023b), AugGPT (Dai et al., 2023a), TDG (He et al., 2023b), CurationUltraChat (Ding et al., 2023b), Phi-1 (Gunasekar et al., 2023), Phi-1.5 (Li et al., 2023a), Phi-2 (Mar, 2023), Magicoder (Wei et al., 2023), WaveCoder (Yu et al., 2024), ZeroGen (Ye et al., 2022), InPars (Bonifacio et al., 2022)
  3541. Self-Align (Sun et al., 2024b), RLCD (Yang et al., 2024a), ImpDistill (Jung et al., 2023), LMSI (Huang et al., 2023a), ReST (Gulcehre et al., 2023), Self-Rewarding (Yuan et al., 2024a), Baize (Xu et al., 2023b), STaR (Zelikman et al., 2022) DistillationSupervised Fine-TuningAlpaca (Taori et al., 2023), Vicuna (Chiang et al., 2023), WizardLM (Xu et al., 2023a), Self-Instruct (Wang et al., 2022a), Baize (Xu et al., 2023b), STaR (Zelikman et al., 2022), Divergence and SimilarityDistilGPT (Sanh et al., 2019), f-Distill (Wen et al., 2023), MiniLLM (Gu et al., 2024) TED (Liang et al., 2023a), GKD (Agarwal et al., 2024), BabyLlama (Timiryasov and Tastet, 2023) Reinforcement LearningCAI (Bai et al., 2022a), UltraFeedback (Cui et al., 2023a), WizardMath (Luo et al., 2023b), MiniLLM (Gu et al., 2024), GKD (Agarwal et al., 2024), GPT3 Reward (Kwon et al., 2023) Rank Optimization
  3542. ollowingInstruction FollowingSelf-Instruct Wang et al., 2022a, Alpaca Taori et al., 2023, Vicuna Chiang et al., 2023, WizardLM Xu et al., 2023a, Orca Mukherjee et al., 2023, Orca2 Mitra et al., 2023, WizardMath Luo et al., 2023b, Llama-GPT4 Peng et al., 2023a, Multi-turn Dialogue Chiang et al., 2023, Baize Xu et al., 2023b, UltraLLaMA Ding et al., 2023b, CAMEL Li et al., 2023b, OpenChat Wang et al., 2023c, Zephyr Tunstall et al., 2023, RAG Kang et al., 2023a, SAIL Luo et al., 2023c, Self-RAG Asai et al., 2023, AlignmentThinking PatternYe et al., 2023, Orca Mukherjee et al., 2023, Orca2 Wang et al., 2023d, AFT Cheng et al., 2023, KnowPAT Zhang et al., 2023a, PreferenceCAI Bai et al., 2022a, GPT-3 Reward Kwon et al., 2023, ILF Scheurer et al., 2023, ALMoST Kim et al., 2023a, RLEF Roit et al., 2023
  3543. i et al., 2022a), Align Honesty (Yang et al., 2023a), SANDBOX (Liu et al., 2023b), Self-Align (Sun et al., 2024b), UltraFeedback (Cui et al., 2023a), RLCD (Yang et al., 2024a), AgentToolformer (Schick et al., 2023), Graph-ToolFormer (Zhang, 2023), Gorilla (Patil et al., 2023), ToolAlpaca (Tang et al., 2023a), ToolLLM (Qin et al., 2023a), CRAFT (Yuan et al., 2023a), Confucius (Gao et al., 2023b), MLLM-Tool (Wang et al., 2024), α-UMi (Shen et al., 2024), PlanningFireAct (Chen et al., 2023b), AgentTuning (Zeng et al., 2023a), Lumos (Yin et al., 2023a), AUTOACT (Qiao et al., 2024), TPTU-v2 (Kong et al., 2023), NLP Task SpecializationNLUAugGPT (Dai et al., 2023a), GPT Annotation (Gilardi et al., 2023), (Ding et al., 2023a), TDG (He et al., 2023b), SunGen (Gao et al., 2023a), Mix Distill (Chenglin et al., 2023), Annollm (He et al., 2023a), UDG (Wang et al., 2021a), ZeroGen (Ye et al., 2024)
  3544. al., 2023 GPT-3 Labeling Wang et al., 2021b BioGPT Guo et al., 2023a ChatGPT NMT Yang and Nicolai, 2023 Information RetrievalQUILL Srinivasan et al., 2022 Promptgator Dai et al., 2023b InPars Bonifacio et al., 2022 AugTriever Meng et al., 2023 Sun et al., 2023a RankVicuna Pradeep et al., 2023a RankZephyr Pradeep et al., 2023b ExaRanker Ferraretto et al., 2023 Recommendation NDR Mysore et al., 2023 InstrcutRec Zhang et al., 2023b ONCE Liu et al., 2023c Text Generation Evaluation PandaLM Wang et al., 2023b Prometheus Kim et al., 2024 InstructScore Xu et al., 2023d TigerScore Jiang et al., 2023c Auto-J Li et al., 2024a CodeCodeAlpaca Chaudhary, 2023 CodeLlama Rozi `ere et al., 2023 Magicoder Wei et al., 2023 Phi-1 Gunasekar et al., 2023 PERsD Chen et al., 2023 MFTCoder Liu et al., 2023d WaveCoder Yu et al., 2023
  3545. et al., 2023e), SVIT (Zhao et al., 2023b), LVIS-Instruct4V (Wang et al., 2023e), Shikra (Chen et al., 2023c), LSKD (Park et al., 2023), DetGPT (Pi et al., 2023; Zhao et al., 2023c), LRV (Liu et al., 2023f), NExT-GPT (Wu et al., 2023b), Valley (Luo et al., 2023d), ILuvUI (Jiang et al., 2023d), StableLLaVA (Li et al., 2023c), PointLLM (Xu et al., 2023e), Verticalization DistillationLaw (Huang et al., 2023b; Cui et al., 2023b); Medical & Healthcare (Zhang et al., 2023c; Chen et al., 2023d); Finance (Zhang and Yang, 2023); Science (Xie et al., 2023a; Zhang et al., 2024) and Misc. (Dan et al., 2023; Guo et al., 2023b) Fig. 3: Taxonomy of Knowledge Distillation of Large Language Models"
  3546. r network, often through techniques like soft target training, where the student learns from the softened softmax output of the teacher.
  3547. The distillation of knowledge from larger models to smaller ones is a technique used to improve the performance of AI models. In this context, distillation refers to the process of distilling the knowledge from a larger model into a smaller model, allowing it to learn from the teacher model's output.
  3548. The current era of knowledge distillation in large language models (LLMs) has shifted the focus from mere architecture compression to a more nuanced process of knowledge elicitation and transfer. This paradigm change is largely due to the immense knowledge that LLMs like GPT-4 and Gemini possess. The parameters of LLMs make it challenging to compress them using pruning or quantization techniques.
  3549. size, the current focus in llm-based knowledge distillation is to extract and transfer the rich, nuanced understanding that these models have developed the key to this modern approach lies in carefully designed prompts that elicit specific knowledge or capabilities from the llms, tapping into their understanding and capabilities in various domains ranging from natural language understanding to more complex cognitive tasks like reasoning and problem-solving
  3550. explicit training objectives. This era of knowledge distillation also emphasizes the transfer of abstract qualities such as reasoning patterns and preference alignment. This is in stark contrast to the earlier focus on output replication, indicating a shift towards a more holistic and comprehensive transfer of cognitive capabilities. The current techniques involve not just the replication of outputs, but also the emulation of thought processes and decision-making patterns of the teacher model. This involves complex strategies like chain-of-thought prompting, where the student model learns the reasoning process of the teacher, enhancing its problem-solving and decision-making capabilities. 2.2 Relation to Data Augmentation (DA)
  3551. llation, Unlike traditional techniques such as paraphrasing, or back-translation, which primarily aim at expanding the training dataset in a somewhat mechanical manner. DA within the context of LLMs focuses on the generation of novel, context-rich training data tailored to specific domains and skills. This innovation is driven by the unique capabilities of LLMs to generate coherent, diverse, and intricate data samples that closely mimic the nuanced understanding and cognitive abilities of human experts in various fields.
  3552. ource models, through Deep Learning Models (LLMs) are prompted to create targeted, high-quality datasets that are not merely larger in volume but also rich in diversity and specificity. This approach enables the distillation process to be more effective, ensuring that the distilled models replicate the teacher model's output behavior and embody its deep-seated understanding and cognitive strategies. The significance and necessity of Data Augmentation (DA) for achieving Knowledge Domains (KD) in the LLM era cannot be overstated. DA acts as a force multiplier, enabling the distilled models to acquire and refine capabilities that would otherwise require exponentially larger datasets and computational resources. It facilitates a more nuanced and effective transfer of knowledge, focusing on the qualitative aspects of learning rather than quantitative expansion.
  3553. er of LLMs empowers open-source models with the ability to approximate the contextual adeptness, ethical alignment, and deep semantic insights characteristic of their proprietary counterparts thereby democratizing access to advanced AI capabilities and fostering innovation across a broader spectrum of applications and users 2 3 Survey Scope Building on the discussions introduced earlier this survey aims to comprehensively explore the landscape of knowledge distillation within the context of LLMs following a meticulously structured taxonomy as in Figure 3 the survey’s scope is delineated through three primary facets each encapsulating a range of subtopics and methodologies
  3554. undations and methodologies of knowledge distillation. It includes an in-depth exploration of processes involved in constructing knowledge from teacher models (e.g., proprietary LLMs) and integrating this knowledge into student models (e.g., open-source LLMs). Under the umbrella of 'knowledge', we delve into strategies such as labeling, expansion, curation, feature understanding, and feedback mechanisms. The exploration seeks to uncover the various ways in which knowledge can be identified, expanded, and curated for effective distillation. This subsection examines learning approaches like supervised fine-tuning, divergence minimization, and reinforcement learning techniques.
  3555. ow algorithms enable knowledge transfer, allowing open-source models to replicate and sometimes surpass proprietary capabilities. Skill Distillation examines specific competencies and capabilities enhanced through Knowledge Distillation. Contextual discussions follow (Taori et al., 2023; Luo et al., 2023c), including instruction following and retrieval-augmented generation (RAG) capabilities. Alignment research investigates thinking patterns, persona/preference modeling, and value alignment. The 'agent' category focuses on skills like tool usage and planning. NLP task specialization (Dai et al., 2023a; Jung et al., 2023; Chaudhary, 2023) is examined through lenses like natural language understanding (NLU), natural language processing (NLP).
  3556. tion, and Code Generation**
  3557. Finally, the survey explores how Knowledge Distillation (KD) enhances Large Language Models (LLMs) in interpreting and integrating multiple forms of input, enriching their utility and applicability across various contexts. Verticalization Distillation
  3558. This section examines the application of KD across diverse domains, providing insights into how distilled LLMs can be tailored for specialized fields such as Law, Medical & Healthcare (Wang et al., 2023a), Finance (Zhang and Yang, 2023), Science (Zhang et al., 2024), among others. This exploration showcases the practical implications of KD techniques and highlights their transformative impact on domain-specific AI solutions. Through detailed analysis and examples, this part aims to demonstrate the versatility and efficacy of KD in adapting LLMs to diverse domains.
  3559. stem. by navigating through these facets, this survey endeavors to provide an extensive and nuanced analysis of knowledge distillation in the era of LLMs. it serves as a guide for researchers, practitioners, and enthusiasts in the field, shedding light on current methodologies, challenges, and opportunities for innovation in this rapidly evolving domain.
  3560. across a range of applications.
  3561. Distillation Pipeline in LLM Era
  3562. ================================================
  3563. FILE: end-to-end-use-cases/RAFT-Chatbot/README.md
  3564. ================================================
  3565. ## Chatbot Recipe:
  3566. As the popularity of our Meta Llama 3 models grows, we've seen a surge in demand to adapt them to specific domains, enabling businesses to better serve their customers. For example, a company might have a vast collection of plain text documents related to their custom domain and want to create a chatbot that can answer client questions.
  3567. In response to this demand, we're exploring the possibility of building a Llama chatbot that can answer Llama-related questions using our Meta Llama 3 models. In this tutorial, we'll demonstrate how to do just that. While our Meta Llama 3 70B Instruct model is an excellent candidate, its production costs are relatively high. To reduce these costs, we'll focus on creating a Llama chatbot based on the Meta Llama 8B Instruct model, aiming to achieve similar accuracy while minimizing inference costs.
  3568. One common approach to produce a model based on new domain data is **fine-tuning**. The idea is to start from a pre-trained model that already has some knowledge of language from its pre-training and adapt it to a new domain. However, [recent paper](https://arxiv.org/pdf/2405.05904) highlights the risk of using supervised fine-tuning to update LLMs' knowledge, as it presents empirical evidence that acquiring new knowledge through fine-tuning is correlated with hallucinations w.r.t. preexisting knowledge. Fine-tuning can also be costly if the domain knowledge has to be updated frequently.
  3569. Another solution is to use **RAG (Retrieval-Augmented Generation)**, which combines the strengths of traditional information retrieval systems (such as databases) with the capabilities of generative large language models (LLMs). RAG operates by first retrieving relevant information from a database using a query generated by the LLM. This retrieved information is then integrated into the LLM's query input, enabling it to generate more accurate and contextually relevant text. This helps to reduce LLM hallucination as the related documents are provided to LLM and has a lower cost to update the domain knowledge.
  3570. In this tutorial, we'll use **Retrieval Augmented Fine Tuning (RAFT)**, a technique that combines fine-tuning with RAG to better utilize custom domain text data. RAFT is a general recipe for fine-tuning a pre-trained Large Language Model (LLM) to a domain-specific RAG setting. It helps LLM to better utilize custom domain text data, by ignoring those documents that don’t help in answering the question. This approach can create a more factual model and reduce LLM hallucinations during inference.
  3571. The process involves preparing training data with each data point containing:
  3572. * A question (Q)
  3573. * A set of documents (D)
  3574. * A corresponding Chain-of-thought style answer (A*) generated from one of the documents (D*)
  3575. RAFT tries to teach the models to differentiate between two types of documents:
  3576. * Oracle documents (D*): documents from which the answer to the question can be deduced
  3577. * Distractor documents (Di): documents that do not contain answer-relevant information
  3578. The following graph illustrates the RAFT main concepts:
  3579. ![RAFT images](images/RAFT.png)
  3580. For more information on RAFT, please refer to their [blog post](https://gorilla.cs.berkeley.edu/blogs/9_raft.html).
  3581. ## Fine-tuning Llama
  3582. To build a Llama bot, we need to collect relevant text data. Ideally, we would include a vast range of Llama-related web documents, but for demo purposes, we'll focus on official documents. For example, we can use the raw text from official web pages listed in [Getting started with Meta Llama](https://llama.meta.com/get-started/), excluding the FAQ page since some evaluation questions will come from there.
  3583. We have two options to obtain the text data: using a local folder or web crawling. For the local folder option, we can download the desired documents in PDF, Text, or Markdown format to the "data" folder specified in the [raft.yaml](./raft.yaml) file. Langchain DirectoryLoader will load files in that folder, but it may also ask us to install more package dependency if the files formats are not supported natively.
  3584. Alternatively, we can create a sitemap XML file, similar to the example below, and put the file path in the [raft.yaml](./raft.yaml) file, so eventually a Langchain SitemapLoader can retrieve all the text from the web pages.
  3585. ```xml
  3586. <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  3587. <url>
  3588. <loc>http://llama.meta.com/responsible-use-guide/</loc>
  3589. </url>
  3590. <!-- more URLs -->
  3591. </urlset>
  3592. ```
  3593. ## Create RAFT Dataset
  3594. To create a RAFT dataset from the prepared documents, we can use the Meta Llama 3 70B Instruct model either through APIs from LLM cloud providers or by hosting a local VLLM server.
  3595. For this example, we'll demonstrate how to create a VLLM OpenAI-compatible server that hosts Meta Llama 3 70B Instruct locally and generates the RAFT dataset.
  3596. **Local Server Setup**
  3597. First, ensure VLLM is installed. Then, run the following command to start the VLLM server:
  3598. ```bash
  3599. CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server --model meta-Llama/Meta-Llama-3-70B-Instruct --tensor-parallel-size 2 --disable-log-requests --port 8001
  3600. ```
  3601. **Note**: Make sure the port is available, and the server requires at least 135GB GPU memory, so we need to use multiple GPUs in a tensor parallel way.
  3602. **Querying the Server**
  3603. Once the server is ready, query it using the following command in another terminal:
  3604. ```bash
  3605. python raft.py -u "http://localhost:8001/v1" -k "EMPTY" -t 4
  3606. ```
  3607. If you prefer to use a cloud API, replace the endpoint URL with the cloud provider's URL and set the API key using the `-k` flag or environment variables.
  3608. **RAFT Dataset Generation**
  3609. The [raft.py](raft.py) script reads all documents from local or web sources, depending on the settings, and splits the data into text chunks of 1000 characters using RecursiveCharacterTextSplitter.
  3610. Then, it applies the `question_prompt_template` defined in [raft.yaml](raft.yaml) to each chunk to generate queries to Meta Llama 3 70B model, and the model will generate a question list (By default 4 questions in that list) for each text chunk. For each question and corresponding text chunk, we generate a Chain-of-Thought (COT) style answer using Meta Llama 3 70B Instruct APIs.
  3611. Once we have the COT answers, we can create a dataset where each sample contains an "instruction" section. This section includes some unrelated chunks called distractors (by default, we add 4 distractors). In the original RAFT method, there is an oracle probability P (by default, 80%) that a related document will be included. This means that there is a 1-P (by default, 20%) chance that no related documents are provided, and the RAFT model should still try to predict the COT answer label, as stated in the blog, "By removing the oracle documents in some instances of the training data, we are compelling the model to memorize domain-knowledge."
  3612. **Modification to Add Refusal Examples**
  3613. In this tutorial, we made an important modification by adding additional refusal examples (by default, this refusal probability is 5%). When the related documents are not presented, we set the COT answer label to "Sorry, I don't know the answer to this question because related documents are not found. Please try again." Our hypothesis is that this will increase answer precision and reduce chatbot hallucination. In real-world production scenarios, we prefer that the chatbot refuses to answer when not enough context is provided, so that we can detect this refusal signal and mitigate the risk of producing wrong or misleading answers (e.g., we can ask a human agent to take over the conversation to better serve customers).
  3614. **RAFT Format JSON Example**
  3615. Here is a RAFT format JSON example from our saved `raft.jsonl` file:
  3616. ```json
  3617. {
  3618. "id":"seed_task_228",
  3619. "type":"general",
  3620. "question":"What is the context length supported by Llama 3 models?",
  3621. "context":{
  3622. "sentences":[
  3623. [
  3624. "DISTRACT_DOCS 1"
  3625. "DISTRACT_DOCS 2"
  3626. "We hope that Code Llama will inspire others to leverage Llama 2 to create new innovative tools for research and commercial products. Download the model Explore more on Code Llama Discover more about Code Llama here \u2014 visit our resources, ranging from our research paper, getting started guide and more. Code Llama GitHub repository Research paper Download the model Getting started guide Meta Llama 3 Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Get Started Experience Llama 3 on Meta AI Experience Llama 3 with Meta AI We\u2019ve integrated Llama 3 into Meta AI, our intelligent assistant, that expands the ways people can get things done, create and connect with Meta AI. You can see first-hand the performance of Llama 3 by using Meta AI for coding tasks and problem solving. Whether you're developing agents, or other AI-powered applications, Llama 3 in both 8B and 70B will offer the capabilities and flexibility you need to develop your ideas. Experience Llama 3 on Meta AI Enhanced performance Experience the state-of-the-art performance of Llama 3, an openly accessible model that excels at language nuances, contextual understanding, and complex tasks like translation and dialogue generation. With enhanced scalability and performance, Llama 3 can handle multi-step tasks effortlessly, while our refined post-training processes significantly lower false refusal rates, improve response alignment, and boost diversity in model answers. Additionally, it drastically elevates capabilities like reasoning, code generation, and instruction following. Build the future of AI with Llama 3. Download Llama 3 Getting Started Guide With each Meta Llama request, you will receive: Meta Llama Guard 2 Getting started guide Responsible Use Guide Acceptable use policy Model card Community license agreement Benchmarks Llama 3 models take data and scale to new heights. It\u2019s been trained on our two recently announced custom-built 24K GPU clusters on over 15T token of data \u2013 a training dataset 7x larger than that used for Llama 2, including 4x more code. This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. Model card Trust & safety A comprehensive approach to responsibility With the release of Llama 3, we\u2019ve updated the Responsible Use Guide (RUG) to provide the most comprehensive information on responsible development with LLMs. Our system-centric approach includes updates to our trust and safety tools with Llama Guard 2, optimized to support the newly announced taxonomy published by MLCommons expanding its coverage to a more comprehensive set of safety categories, Code Shield, and Cybersec Eval 2. In line with the principles outlined in our RUG , we recommend thorough checking and filtering of all inputs to and outputs from LLMs based on your unique content guidelines for your intended use case and audience. Meta Llama Guard 2 Explore more on Meta Llama 3 Introducing Meta Llama 3: The most capable openly available LLM to date Read the blog Meet Your New Assistant: Meta AI, Built With Llama 3 Learn more Meta Llama 3 repository View repository Model card Explore Meta Llama 3 License META LLAMA 3 COMMUNITY LICENSE AGREEMENT Meta Llama 3 Version Release Date: April 18, 2024 \u201c Agreement \u201d means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein. \u201c Documentation \u201d means the specifications, manuals and documentation accompanying Meta Llama 3 distributed by Meta at https:\/\/llama.meta.com\/get-started\/ .",
  3627. "DISTRACT_DOCS 3"
  3628. "DISTRACT_DOCS 4"
  3629. ]
  3630. ],
  3631. "title":[
  3632. [
  3633. "placeholder_title",
  3634. "placeholder_title",
  3635. "placeholder_title",
  3636. "placeholder_title",
  3637. "placeholder_title",
  3638. ]
  3639. ]
  3640. },
  3641. "oracle_context":"We hope that Code Llama will inspire others to leverage Llama 2 to create new innovative tools for research and commercial products. Download the model Explore more on Code Llama Discover more about Code Llama here \u2014 visit our resources, ranging from our research paper, getting started guide and more. Code Llama GitHub repository Research paper Download the model Getting started guide Meta Llama 3 Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Get Started Experience Llama 3 on Meta AI Experience Llama 3 with Meta AI We\u2019ve integrated Llama 3 into Meta AI, our intelligent assistant, that expands the ways people can get things done, create and connect with Meta AI. You can see first-hand the performance of Llama 3 by using Meta AI for coding tasks and problem solving. Whether you're developing agents, or other AI-powered applications, Llama 3 in both 8B and 70B will offer the capabilities and flexibility you need to develop your ideas. Experience Llama 3 on Meta AI Enhanced performance Experience the state-of-the-art performance of Llama 3, an openly accessible model that excels at language nuances, contextual understanding, and complex tasks like translation and dialogue generation. With enhanced scalability and performance, Llama 3 can handle multi-step tasks effortlessly, while our refined post-training processes significantly lower false refusal rates, improve response alignment, and boost diversity in model answers. Additionally, it drastically elevates capabilities like reasoning, code generation, and instruction following. Build the future of AI with Llama 3. Download Llama 3 Getting Started Guide With each Meta Llama request, you will receive: Meta Llama Guard 2 Getting started guide Responsible Use Guide Acceptable use policy Model card Community license agreement Benchmarks Llama 3 models take data and scale to new heights. It\u2019s been trained on our two recently announced custom-built 24K GPU clusters on over 15T token of data \u2013 a training dataset 7x larger than that used for Llama 2, including 4x more code. This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. Model card Trust & safety A comprehensive approach to responsibility With the release of Llama 3, we\u2019ve updated the Responsible Use Guide (RUG) to provide the most comprehensive information on responsible development with LLMs. Our system-centric approach includes updates to our trust and safety tools with Llama Guard 2, optimized to support the newly announced taxonomy published by MLCommons expanding its coverage to a more comprehensive set of safety categories, Code Shield, and Cybersec Eval 2. In line with the principles outlined in our RUG , we recommend thorough checking and filtering of all inputs to and outputs from LLMs based on your unique content guidelines for your intended use case and audience. Meta Llama Guard 2 Explore more on Meta Llama 3 Introducing Meta Llama 3: The most capable openly available LLM to date Read the blog Meet Your New Assistant: Meta AI, Built With Llama 3 Learn more Meta Llama 3 repository View repository Model card Explore Meta Llama 3 License META LLAMA 3 COMMUNITY LICENSE AGREEMENT Meta Llama 3 Version Release Date: April 18, 2024 \u201c Agreement \u201d means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein. \u201c Documentation \u201d means the specifications, manuals and documentation accompanying Meta Llama 3 distributed by Meta at https:\/\/llama.meta.com\/get-started\/ .",
  3642. "cot_answer":"Here's the step-by-step reasoning to answer the question:\n\n1. The question asks about the context length supported by Llama 3 models.\n2. In the context, we need to find the relevant information about Llama 3 models and their context length.\n3. The relevant sentence is: \"This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2.\"\n##begin_quote## This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. ##end_quote##\n4. From this sentence, we can see that Llama 3 models support a context length of 8K.\n\n<ANSWER>: 8K",
  3643. "instruction":"<DOCUMENT> DISTRACT_DOCS 1 <\/DOCUMENT>...<DOCUMENT> DISTRACT_DOCS 4 <\/DOCUMENT>\nWhat is the context length supported by Llama 3 models?"
  3644. }
  3645. ```
  3646. As shown in the above example, we have a "question" section for the generated question, a "cot_answer" section for the generated COT answers (where the final answer will be added after the "<ANSWER>" token), and an "instruction" section that has all the documents included (each document split by `<DOCUMENT>` and `</DOCUMENT>` tags) and finally the generated question appended at the end. This "instruction" section will be the input during fine-tuning, and the "cot_answer" will be the output label that the loss will be calculated on.
  3647. ## Creating an Evaluation Set
  3648. To create a reliable evaluation set, it's ideal to use human-annotated question and answer pairs. This ensures that the questions are relevant and the answers are accurate. However, human annotation is time-consuming and costly. For demonstration purposes, we'll use a subset of the validation set, which will never be used in the fine-tuning. We only need to keep the "question" section and the final answer section, marked by the `<ANSWER>` tag in "cot_answer". We'll manually check each example and select only the good ones. We want to ensure that the questions are general enough to be used for web search engine queries and are related to Llama. We'll also use some QA pairs from our FAQ page, with modifications. This will result in 72 question and answer pairs as our evaluation set, saved as `eval_llama.json`.
  3649. ## Fine-Tuning Steps
  3650. Once the RAFT dataset is ready in JSON format, we can start fine-tuning. Unfortunately, the LORA method didn't produce good results, so we'll use the full fine-tuning method. We can use the following commands as an example in the llama-cookbook main folder:
  3651. ```bash
  3652. export PATH_TO_ROOT_FOLDER=./raft-8b
  3653. export PATH_TO_RAFT_JSON=recipes/use_cases/end2end-recipes/raft/output/raft.jsonl
  3654. torchrun --nnodes 1 --nproc_per_node 4 recipes/quickstart/finetuning/finetuning.py --enable_fsdp --lr 1e-5 --context_length 8192 --num_epochs 1 --batch_size_training 1 --model_name meta-Llama/Meta-Llama-3-8B-Instruct --dist_checkpoint_root_folder $PATH_TO_ROOT_FOLDER --dist_checkpoint_folder fine-tuned --use_fast_kernels --dataset "custom_dataset" --custom_dataset.test_split "test" --custom_dataset.file "recipes/finetuning/datasets/raft_dataset.py" --use-wandb --run_validation True --custom_dataset.data_path $PATH_TO_RAFT_JSON
  3655. ```
  3656. For more details on multi-GPU fine-tuning, please refer to the [multigpu_finetuning.md](../../getting-started/finetuning/multigpu_finetuning.md) in the finetuning recipe.
  3657. Next, we need to convert the FSDP checkpoint to a HuggingFace checkpoint using the following command:
  3658. ```bash
  3659. python src/llama_cookbook/inference/checkpoint_converter_fsdp_hf.py --fsdp_checkpoint_path "$PATH_TO_ROOT_FOLDER/fine-tuned-meta-Llama/Meta-Llama-3-8B-Instruct" --consolidated_model_path "$PATH_TO_ROOT_FOLDER"
  3660. ```
  3661. For more details on FSDP to HuggingFace checkpoint conversion, please refer to the [readme](../../getting-started/finetuning/multigpu_finetuning.md) in the inference/local_inference recipe.
  3662. ## Evaluation Steps
  3663. Once we have the RAFT model, we need to evaluate its performance. In this tutorial, we'll not only use traditional evaluation methods (e.g., calculating exact match rate or ROUGE score) but also use LLM as a judge to score model-generated answers.
  3664. We'll launch a VLLM server to host our converted model from `PATH_TO_ROOT_FOLDER`. To make things easier, we can rename the model folder to `raft-8b`.
  3665. ```bash
  3666. CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --model raft-8b --port 8000 --disable-log-requests
  3667. ```
  3668. Similarly, if we want to get the 8B instruct baseline, we can launch a 8B model VLLM server instead:
  3669. ```bash
  3670. CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --model meta-Llama/Meta-Llama-3-8B-Instruct --port 8000 --disable-log-requests
  3671. ```
  3672. On another terminal, we can use another Meta Llama 3 70B Instruct model as a judge to compare the answers from the RAFT 8B model with the ground truth and get a score. To do this, we need to host another Meta Llama 3 70B Instruct VLLM server locally with the command, making sure the port is not in use:
  3673. ```bash
  3674. CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server --model meta-Llama/Meta-Llama-3-70B-Instruct --tensor-parallel-size 2 --disable-log-requests --port 8001
  3675. ```
  3676. Then, we can pass the ports to the eval script to evaluate our RAFT model once our `raft-8b` VLLM server is running:
  3677. ```bash
  3678. CUDA_VISIBLE_DEVICES=4 python raft_eval.py -m raft-8b -u "http://localhost:8000/v1" -j "http://localhost:8001/v1" -r 5
  3679. ```
  3680. To evaluate the 8B baseline, we can use the following command once our 8B VLLM server is running:
  3681. ```bash
  3682. CUDA_VISIBLE_DEVICES=4 python raft_eval.py -m meta-Llama/Meta-Llama-3-8B-Instruct -u "http://localhost:8000/v1" -j "http://localhost:8001/v1" -r 5
  3683. ```
  3684. **NOTE**: Please ensure that the `--model` in VLLM server creation matches the `--m` in raft_eval.py. Otherwise, VLLM will raise a `model not found` error. By default, the RAFT model is called "raft-8b". Here, `-u` specifies the RAFT model endpoint URL, `-j` specifies the judge model endpoint URL, and `-r` defines how many top-k documents the RAG should retrieve.
  3685. This [raft_eval.py](./raft_eval.py) script will load questions from the evaluation set, generate answers from models and models+RAG, and compare the generated answers with the ground truth to get the evaluation metrics, such as ROUGE score or LLM-as-judge score. It will then save those metrics and evaluation details to eval logs.
  3686. ## Experiment Results
  3687. **Overview**
  3688. During our experiments, we encountered issues with using only the Llama website data, which consisted 1980+ RAFT examples generated from 327K characters text. We believed that this initial data was insufficient, so we created an additional PyTorch RAFT dataset using text from official [Pytorch blogs](https://pytorch.org/blog/) and [Pytorch tutorials](https://pytorch.org/tutorials/). This new dataset contains 20K+ RAFT examples generated from 4.7 million characters. We combined both datasets to create an `all_data` dataset. We then fine-tuned the 8B model on each dataset separately for 1 epoch with a learning rate of 1e-5, resulting in three RAFT models: `llama_only`, `pytorch_only`, and `all_data`.
  3689. **Evaluation on non-RAG baseline**
  3690. First we run a non-RAG baseline, just using Meta Llama 3 8B Instruct and Meta Llama 3 70B Instruct model to see if our model can already answers some questions without any fine-tuning and external knowledge base. The LLM score, the percentage of correctness marked by LLM_as_judge, for 8B is 47.9% and 70B is 59.2%. Clearly, there are some information that has been pretrained into our Meta Llama 3 models.
  3691. **Evaluation on RAG baseline**
  3692. Then we tested these 3 RAFT models with Langchain RAG, along with the Meta Llama 3 8B Instruct and Meta Llama 3 70B Instruct RAG baselines, using the RAG document top-k retrieve parameters of 3, 5, and 7. We deployed a Meta Llama 70B Instruct model as the judge to score our model-generated answers against the ground truth in our evaluation set. The LLM scores are shown below:
  3693. ![RAFT LLM_score comparison](images/LLM_score_comparison.png)
  3694. Our results showed that RAFT models performed similarly to the 8B RAG baseline, but noticeably worse than the 70B RAG baseline when context documents were limited (top_k <= 5). However, when top_k = 7, the RAFT models performance suddenly increase, with the `all_data` 8B model achieving a score of 76.06% which beats the 70B baseline's 74.65%.
  3695. **Refusal Examples**
  3696. We also analyzed the number of refusal examples, where the model responded with "Sorry, I do not know." The `all_data` model was more cautious and tended to refuse to answer, whereas the `llama_only` RAFT model did not learn to refuse at all, likely due to the limited dataset size.
  3697. ![Num of refusal comparison](images/Num_of_refusal_comparison.png)
  3698. **Precision Analysis**
  3699. We calculated the precision of our model answers, which represents the likelihood of producing correct answers when the model decides to respond. The formula used was $\frac{LLMScore}{1-\frac{numRefusal}{totalQA}}$.
  3700. ![Answers Precision](images/Answers_Precision.png)
  3701. Note that the 8B and 70B RAG baselines never refused to answer, so their precision was equivalent to their LLM_score. Our `all_data` and `pytorch_only` models tended to refuse to answer when provided documents were limited (top_k < 5), but when they did generate an answer, the likelihood of it being correct was higher. Specifically, when top_k = 7, the `all_data` RAFT model had an 82.97% likelihood of producing a correct answer when it decided to respond, outperforming the 70B baseline.
  3702. **Example Comparisons**
  3703. Here are some examples where our `all_data` RAFT model correctly answered questions that the 70B baseline failed to answer:
  3704. ```
  3705. Comparing interested question: What tokenizer is used as the basis for the special tokens in Meta Llama
  3706. ground_truth: tiktoken
  3707. True all_data_RAG_answers: <ANSWER>: The tokenizer used as the basis for the special tokens in Meta Llama is tiktoken.
  3708. False 70B_RAG_answers: <ANSWER>: The tokenizer used as the basis for the special tokens in Meta Llama is SentencePiece.
  3709. ```
  3710. ```
  3711. Comparing interested question: What is the license under which the Llama Guard model and its weights are released?
  3712. groud_truth: The license is the same as Llama 3, which can be found in the LICENSE file and is accompanied by the Acceptable Use Policy.
  3713. True all_data_RAG_answers: <ANSWER>: The license under which the Llama Guard model and its weights are released is the same as Llama 3, and the [LICENSE](../LICENSE) file contains more information about the license.
  3714. False 70B_RAG_answers: <ANSWER>: The Llama Guard model and its weights are licensed under the Llama 2 Community license.
  3715. ```
  3716. **Key Takeaways**
  3717. From our experiments, we learned:
  3718. 1. Few thousand RAFT examples are insufficient, and at least 10K examples are recommended.
  3719. 2. The LLM_as_judge is not always reliable, and we noticed there are chances that answers were scored incorrectly.
  3720. 3. The chunk_size for RAFT documents and RAG documents should be the same.
  3721. 4. The RAFT method appears to help the LLM differentiate related documents from distractors rather than forcing it to memorize the training data, as we used Pytorch data as additional data to help our Llama chatbot to answer Llama questions. More research experiments will be needed to understand more about this.
  3722. ## Local Inference Steps
  3723. Once we evaluated and refined our RAFT model, we can deploy it locally to interact with it by asking questions manually. To do this, run the following command:
  3724. ```bash
  3725. python recipes/inference/local_inference/inference.py --model_name raft-8b
  3726. ```
  3727. For more details,please check [local_inference recipe](../../getting-started/inference/local_inference/README.md)
  3728. ## Acknowledgement
  3729. Finally, we would like to extend special thanks to Tianjun Zhang, the first author of the [RAFT paper](https://arxiv.org/pdf/2403.10131), for collaborating with us on this tutorial and providing valuable guidance throughout our experiments. Our code is also partially inspired by the [RAFT section in Gorilla github](https://github.com/ShishirPatil/gorilla/tree/main/raft).
  3730. ================================================
  3731. FILE: end-to-end-use-cases/research_paper_analyzer/README.md
  3732. ================================================
  3733. # Research Paper analyzer with Llama4 Maverick
  3734. This leverages **Llama 4 Maverick** model to retrieve the references of an arXiv paper and ingest all their content for question-answering without using any RAG to store these information.
  3735. ## Features
  3736. ### Leverage Long Context Length
  3737. | Model | Meta Llama4 Maverick | Meta Llama4 Scout | OpenAI GPT-4.5 | Claude Sonnet 3.7 |
  3738. | ----- | -------------- | -------------- | -------------- | -------------- |
  3739. | Context Window | 1M tokens | 10M tokens | 128K tokens | 1K tokens | 200K tokens |
  3740. Because of the long context length, the analyzer can process all the reference paper content at once, so you can ask questions about the paper without worrying about the context length.
  3741. ## Getting Started
  3742. 1. Install dependencies:
  3743. ```
  3744. pip install -r requirements.txt
  3745. ```
  3746. 2. Run the application:
  3747. ```
  3748. python research_analyzer.py
  3749. ```
  3750. 3. Open the gradio interface on localhost in the browser.
  3751. 3. Provide a paper url such as https://arxiv.org/abs/2305.11135
  3752. 4. Press "Ingest", wait for paper to be processed and ask questions about it
  3753. ================================================
  3754. FILE: end-to-end-use-cases/research_paper_analyzer/requirements.txt
  3755. ================================================
  3756. gradio
  3757. together
  3758. requests
  3759. pypdf2
  3760. gradio
  3761. ================================================
  3762. FILE: end-to-end-use-cases/whatsapp_llama_4_bot/README.md
  3763. ================================================
  3764. # WhatsApp and Llama 4 APIs : Build your own multi-modal chatbot
  3765. Welcome to the WhatsApp Llama4 Bot ! This bot leverages the power of the Llama 4 APIs to provide intelligent and interactive responses to users via WhatsApp. It supports text, image, and audio interactions, making it a versatile tool for various use cases.
  3766. ## Key Features
  3767. - **Text Interaction**: Users can send text messages to the bot, which are processed using the Llama4 APIs to generate accurate and contextually relevant responses.
  3768. - **Image Reasoning**: The bot can analyze images sent by users, providing insights, descriptions, or answers related to the image content.
  3769. - **Audio-to-Audio Interaction**: Users can send audio messages, which are transcribed to text, processed by the Llama4, and converted back to audio for a seamless voice-based interaction.
  3770. ## Technical Overview
  3771. ### Architecture
  3772. - **FastAPI**: The bot is built using FastAPI, a modern web framework for building APIs with Python.
  3773. - **Asynchronous Processing**: Utilizes `httpx` for making asynchronous HTTP requests to external APIs, ensuring efficient handling of media files.
  3774. - **Environment Configuration**: Uses `dotenv` to manage environment variables, keeping sensitive information like API keys secure.
  3775. Please refer below a high-level of architecture which explains the integrations :
  3776. ![WhatsApp Llama4 Integration Diagram](src/docs/img/WhatApp_Llama4_integration.jpeg)
  3777. ### Important Integrations
  3778. - **WhatsApp API**: Facilitates sending and receiving messages, images, and audio files.
  3779. - **Llama4 Model**: Provides advanced natural language processing capabilities for generating responses.
  3780. - **Groq API**: Handles speech-to-text (STT) and text-to-speech (TTS) conversions, enabling the audio-to-audio feature.
  3781. ## Here are the steps to setup with WhatsApp Business Cloud API
  3782. First, open the [WhatsApp Business Platform Cloud API Get Started Guide](https://developers.facebook.com/docs/whatsapp/cloud-api/get-started#set-up-developer-assets) and follow the first four steps to:
  3783. 1. Add the WhatsApp product to your business app;
  3784. 2. Add a recipient number;
  3785. 3. Send a test message;
  3786. 4. Configure a webhook to receive real time HTTP notifications.
  3787. For the last step, you need to further follow the [Sample Callback URL for Webhooks Testing Guide](https://developers.facebook.com/docs/whatsapp/sample-app-endpoints) to create a free account on glitch.com to get your webhook's callback URL.
  3788. Now open the [Meta for Develops Apps](https://developers.facebook.com/apps/) page and select the WhatsApp business app and you should be able to copy the curl command (as shown in the App Dashboard - WhatsApp - API Setup - Step 2 below) and run the command on a Terminal to send a test message to your WhatsApp.
  3789. ![](../../src/docs/img/whatsapp_dashboard.jpg)
  3790. Note down the "Temporary access token", "Phone number ID", and "a recipient phone number" in the API Setup page above, which will be used later.
  3791. ## Setup and Installation
  3792. ### Step 1: Clone the Repository
  3793. ```bash
  3794. git clone https://github.com/meta-llama/llama-cookbook.git
  3795. cd llama-cookbook/end-to-end-use-cases/whatsapp-llama4-bot
  3796. ```
  3797. ### Step 2: Install Dependencies
  3798. Ensure you have Python installed, then run the following command to install the required packages:
  3799. ```bash
  3800. pip install -r requirements.txt
  3801. ```
  3802. ### Step 3: Configure Environment Variables
  3803. Create a `.env` file in the project directory and add your API keys and other configuration details as follows:
  3804. ```plaintext
  3805. ACCESS_TOKEN=your_whatsapp_access_token
  3806. WHATSAPP_API_URL=your_whatsapp_api_url
  3807. TOGETHER_API_KEY=your_llama4_api_key
  3808. GROQ_API_KEY=your_groq_api_key
  3809. PHONE_NUMBER_ID=your_phone_number_id
  3810. ```
  3811. ### Step 4: Run the Application
  3812. On your EC2 instance, run the following command on a Terminal to start the FastAPI server
  3813. ```bash
  3814. uvicorn ec2_endpoints:app —host 0.0.0.0 —port 5000
  3815. ```
  3816. Note: If you use Amazon EC2 as your web server, make sure you have port 5000 added to your EC2 instance's security group's inbound rules.
  3817. ## License
  3818. This project is licensed under the MIT License.
  3819. ## Contributing
  3820. We welcome contributions to enhance the capabilities of this bot. Please feel free to submit issues or pull requests.
  3821. ================================================
  3822. FILE: end-to-end-use-cases/whatsapp_llama_4_bot/requirements.txt
  3823. ================================================
  3824. aiohappyeyeballs==2.6.1
  3825. aiohttp==3.11.16
  3826. aiosignal==1.3.2
  3827. annotated-types==0.7.0
  3828. anyio==4.9.0
  3829. async-timeout==5.0.1
  3830. attrs==25.3.0
  3831. certifi==2025.1.31
  3832. charset-normalizer==3.4.1
  3833. click==8.1.8
  3834. colorama==0.4.6
  3835. distro==1.9.0
  3836. dotenv==0.9.9
  3837. eval_type_backport==0.2.2
  3838. exceptiongroup==1.2.2
  3839. fastapi==0.115.12
  3840. filelock==3.18.0
  3841. frozenlist==1.5.0
  3842. groq==0.22.0
  3843. h11==0.14.0
  3844. httpcore==1.0.8
  3845. httpx==0.28.1
  3846. idna==3.10
  3847. markdown-it-py==3.0.0
  3848. mdurl==0.1.2
  3849. multidict==6.4.3
  3850. numpy==2.2.4
  3851. pillow==11.2.1
  3852. propcache==0.3.1
  3853. pyarrow==19.0.1
  3854. pydantic==2.11.3
  3855. pydantic_core==2.33.1
  3856. Pygments==2.19.1
  3857. python-dotenv==1.1.0
  3858. requests==2.32.3
  3859. rich==13.9.4
  3860. shellingham==1.5.4
  3861. sniffio==1.3.1
  3862. starlette==0.46.2
  3863. tabulate==0.9.0
  3864. together==1.5.5
  3865. tqdm==4.67.1
  3866. typer==0.15.2
  3867. typing-inspection==0.4.0
  3868. typing_extensions==4.13.2
  3869. urllib3==2.4.0
  3870. uvicorn==0.34.1
  3871. yarl==1.19.0
  3872. ================================================
  3873. FILE: end-to-end-use-cases/whatsapp_llama_4_bot/.env
  3874. ================================================
  3875. # WhatsApp Business Phone Number ID (NOT the phone number itself)
  3876. PHONE_NUMBER_ID="place your whatsapp phone number id"
  3877. # Full URL to send WhatsApp messages (use correct version and phone number ID)
  3878. WHATSAPP_API_URL="place graphql request i.e. https://graph.facebook.com/v{version}/{phone_number_id}/messages"
  3879. # Your custom backend/agent endpoint (e.g., for LLM-based processing)
  3880. AGENT_URL=https://your-agent-url.com/api
  3881. LLAMA_API_KEY="place your LLAMA API Key"
  3882. TOGETHER_API_KEY="place your Together API Key, In case you want to use Together, instead of Llama APIs"
  3883. GROQ_API_KEY="place your Groq API Key - this is for SST and TTS"
  3884. OPENAI_API_KEY="place your OpenAI Ke to run the client"
  3885. META_ACCESS_TOKEN="please your WhatsApp generated Access token from the app"