| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026 | Directory structure:└── end-to-end-use-cases/    ├── README.md    ├── agents/    │   ├── README.md    │   ├── Agents_Tutorial/    │   ├── calendar_assistant/    │   │   ├── README.md    │   │   └── assets/    │   └── DeepLearningai_Course_Notebooks/    │       └── README.md    ├── ArticleSummarizer/    │   ├── README.md    │   ├── gradle.properties    │   ├── gradlew    │   ├── .gitignore    │   ├── app/    │   │   ├── .gitignore    │   │   └── src/    │   │       └── main/    │   │           ├── java/    │   │           │   └── com/    │   │           │       └── example/    │   │           │           └── llamaandroiddemo/    │   │           └── res/    │   │               ├── drawable/    │   │               ├── layout/    │   │               ├── mipmap-anydpi-v26/    │   │               ├── mipmap-hdpi/    │   │               ├── mipmap-mdpi/    │   │               ├── mipmap-xhdpi/    │   │               ├── mipmap-xxhdpi/    │   │               ├── mipmap-xxxhdpi/    │   │               ├── values/    │   │               └── xml/    │   └── gradle/    │       └── wrapper/    │           └── gradle-wrapper.properties    ├── benchmarks/    │   ├── README.md    │   ├── inference/    │   │   ├── README.md    │   │   ├── requirements.txt    │   │   ├── cloud/    │   │   │   ├── README.md    │   │   │   ├── aws/    │   │   │   │   └── fmbench/    │   │   │   │       ├── README.md    │   │   │   │       └── img/    │   │   │   └── azure/    │   │   └── on_prem/    │   │       ├── README.md    │   │       └── vllm/    │   └── llm_eval_harness/    │       ├── README.md    │       └── meta_eval/    │           ├── README.md    │           └── meta_template/    │               ├── bbh/    │               ├── gpqa/    │               ├── gpqa_cot/    │               ├── ifeval/    │               ├── math_hard/    │               ├── mmlu/    │               └── mmlu_pro/    ├── book-character-mindmap/    │   ├── README.md    │   ├── .gitignore    │   ├── public/    │   │   └── robots.txt    │   ├── server/    │   │   └── requirements.txt    │   └── src/    │       └── pages/    │           ├── bookPage/    │           │   └── components/    │           └── homePage/    │               └── components/    ├── browser_use/    │   ├── README.md    │   └── agent/    ├── coding/    │   └── text2sql/    │       ├── README.md    │       ├── nba.txt    │       ├── nba_roster.db    │       └── requirements.txt    ├── Contextual-Chunking-RAG/    │   ├── README.md    │   └── data/    │       └── llama_article.txt    ├── customerservice_chatbots/    │   ├── README.md    │   ├── messenger_chatbot/    │   │   └── messenger_llama3.md    │   ├── RAG_chatbot/    │   │   ├── requirements.txt    │   │   ├── data/    │   │   └── vectorstore/    │   │       ├── db_faiss/    │   │       └── mongodb/    │   └── whatsapp_chatbot/    │       └── whatsapp_llama3.md    ├── email_agent/    │   ├── README.md    │   └── requirements.txt    ├── github_triage/    │   ├── README.md    │   ├── requirements.txt    │   └── output/    │       └── pytorch/    │           └── pytorch/    │               └── 2024-08-28_2024-08-28/    │                   └── plots/    ├── long_context/    │   └── H2O/    │       ├── README.md    │       ├── requirements.txt    │       ├── data/    │       │   └── summarization/    │       ├── src/    │       └── utils/    ├── Multi-Modal-RAG/    │   ├── README.md    │   ├── notebooks/    │   └── scripts/    ├── multilingual/    │   ├── README.md    │   └── img/    ├── NotebookLlama/    │   ├── README.md    │   ├── requirements.txt    │   ├── TTS_Notes.md    │   └── resources/    │       └── clean_extracted_text.txt    ├── RAFT-Chatbot/    │   ├── README.md    │   └── images/    ├── research_paper_analyzer/    │   ├── README.md    │   └── requirements.txt    └── whatsapp_llama_4_bot/        ├── README.md        ├── requirements.txt        └── .env================================================FILE: end-to-end-use-cases/README.md================================================<h1 align="center"> End to End Use Applications using various Llama Models </h1><p align="center">	<a href="https://llama.developer.meta.com/join_waitlist?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img src="https://img.shields.io/badge/Llama_API-Join_Waitlist-brightgreen?logo=meta" /></a>	<a href="https://llama.developer.meta.com/docs?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img src="https://img.shields.io/badge/Llama_API-Documentation-4BA9FE?logo=meta" /></a></p><p align="center">	<a href="https://github.com/meta-llama/llama-models/blob/main/models/?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img alt="Llama Model cards" src="https://img.shields.io/badge/Llama_OSS-Model_cards-green?logo=meta" /></a>	<a href="https://www.llama.com/docs/overview/?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img alt="Llama Documentation" src="https://img.shields.io/badge/Llama_OSS-Documentation-4BA9FE?logo=meta" /></a>	<a href="https://huggingface.co/meta-llama"><img alt="Hugging Face meta-llama" src="https://img.shields.io/badge/Hugging_Face-meta--llama-yellow?logo=huggingface" /></a></p><p align="center">	<a href="https://github.com/meta-llama/synthetic-data-kit"><img alt="Llama Tools Syntethic Data Kit" src="https://img.shields.io/badge/Llama_Tools-synthetic--data--kit-orange?logo=meta" /></a>	<a href="https://github.com/meta-llama/llama-prompt-ops"><img alt="Llama Tools Syntethic Data Kit" src="https://img.shields.io/badge/Llama_Tools-llama--prompt--ops-orange?logo=meta" /></a></p>## [Building an Intelligent WhatsApp Bot with Llama 4 APIs](./whatsapp-llama4-bot/README.md)### A Step-by-Step GuideCreate a WhatsApp bot that leverages the power of Llama 4 APIs to provide intelligent and interactive responses. This guide will walk you through the process of building a bot that supports text, image, and audio interactions, making it versatile for various use cases.- **Text Interaction**: Respond to text messages with accurate and contextually relevant answers.- **Image Reasoning**: Analyze images to provide insights, descriptions, or answers related to the content.- **Audio-to-Audio Interaction**: Transcribe audio messages to text, process them, and convert back to audio for seamless voice-based interaction.Get started with building your own WhatsApp bot using Llama 4 APIs today!## [Research Paper Analyzer with Llama4 Maverick](./research_paper_analyzer/README.md)### Analyze Research Papers with EaseLeverage Llama4 Maverick to retrieve references from an arXiv paper and ingest all their content for question-answering.- **Long Context Length**: Process entire papers at once.- **Comprehensive Analysis**: Get insights, descriptions, or answers related to the content.Get started with analyzing research papers using Llama4 Maverick today!## [Book Character Mind Map With Llama4 Maverick](./book_character_mindmap/README.md)### Explore Book Characters and StorylinesUse Llama4 Maverick to process entire books at once and visualize character relationships and storylines.- **Interactive Mind Maps**: Visualize relationships between characters and plot elements.- **Book Summaries**: Get concise overviews of plots and themes.Discover new insights into your favorite books!## [Agentic Tutorial](./agents/):### 101 and 201 tutorials on performing Tool Calling and building an Agentic Workflow using Llama Models101 notebooks show how to apply Llama models and enable tool calling functionality, 201 notebook walks you through an end to end workflow of building an agent that can search two papers, fetch their details and find their differences.## [Benchmarks](./benchmarks/):### A folder contains benchmark scriptsThe scripts apply a throughput analysis and introduction to `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality## [Browser Usage](./browser_use/):### Demo of how to apply Llama models and use them for browsing the internet and completing tasks## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb):### Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experienceThis tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications.## [NBA2023-24](./coding/text2sql/quickstart.ipynb):### Ask Llama 3 about Structured DataThis demo app shows how to use LangChain and Llama 3 to let users ask questions about **structured** data stored in a SQL DB. As the 2023-24 NBA season is entering the playoff, we use the NBA roster info saved in a SQLite DB to show you how to ask Llama 3 questions about your favorite teams or players.## [NotebookLlama](./NotebookLlama/):### PDF to Podcast using Llama ModelsWorkflow showcasing how to use multiple Llama models to go from any PDF to a Podcast and using open models to generate a multi-speaker podcast## [WhatsApp Chatbot](./customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md):### Building a Llama 3 Enabled WhatsApp ChatbotThis step-by-step tutorial shows how to use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to build a Llama 3 enabled WhatsApp chatbot.## [Messenger Chatbot](./customerservice_chatbots/messenger_chatbot/messenger_llama3.md):### Building a Llama 3 Enabled Messenger ChatbotThis step-by-step tutorial shows how to use the [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview) to build a Llama 3 enabled Messenger chatbot.### RAG Chatbot Example (running [locally](./customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb)A complete example of how to build a Llama 3 chatbot hosted on your browser that can answer questions based on your own data using retrieval augmented generation (RAG).================================================FILE: end-to-end-use-cases/agents/README.md================================================## Agents and Tool CallingStructure:- Agents_Tutorial: Showcases 101 and 201 notebooks guidance for using tool calling with Llama models- DeepLearning_Course_Notebooks: Notebooks from the DL.ai course teaching Agents================================================FILE: end-to-end-use-cases/agents/calendar_assistant/README.md================================================# Google Calendar Assistant with with Llama 3.2 3B Tool CallingThis notebook showcases how to go about building a digital assistant to schedule meetings with the Llama 3.2 3B model. The core concepts used to implement this are Prompt Engineering and Tool Calling. This demo shows how Llama can be used to interact with 3rd party apps like Google Contacts & Google Calendar and schedule a meeting requested by the user. Even though we are using prompt engineering to achieve this, the approach described doesn't degrade the model's ability to answer general queries. This approach can extended to perform other tasks in a similar manner without affecting the quality of other tasks## ApproachInstead of using a complex system prompt with multiple conditions & expecting Llama to perform various tasks accurately out of the box, the approach here is to treat this as a 2 step process- Determine user intent - Task classification- Take action for the specific task using Tool CallingIn the diagram shown below,- system prompt 1 determines the classification of the query- In steps 2 & 3, we classify the task being requested.- system prompt 2 is chosen based on the classification result- Steps 4 & 5 implement the classified task.- For the sake of demo, we show 2 classes: General & MeetingBoth these tasks have a specific prompt. We use the same model with different system prompts depending on the classification result.Additionally, this demo also showcases how Llama can be used to do double tool calling with 1 prompt. In the case of Meeting, Llama returns 2 function calls in Step 5```<function=google_contact>{{"name": "John Constantine"}}</function><function=google_calendar>{{"date": "Mar 31 ", "time": "5:30 pm", "attendees": "John Constantine"}}</function>```## Examples- General query  **User Prompt** : `Tell me about Paris`  **Prompt Classification**: `general`  **Answer**: ```Paris, the City of Light, is the capital of France. Known for its stunning architecture, art museums, and romantic atmosphere, Paris is a popular tourist destination. The city is famous for its iconic landmarks such as the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum, which houses the Mona Lisa. Paris is also renowned for its fashion, cuisine, and wine. The city's charming streets and historic neighborhoods, like Montmartre and Le Marais, offer a glimpse```- Set up a Calendar Invite  **User Prompt** : `Schedule a meeting with John Constantine on Mar 31 at 5:30 pm`  **Prompt Classification**: `meeting`  **Answer**: ```  <function=google_contact>{{"name": "John Constantine"}}</function>  <function=google_calendar>{{"date": "Mar 31", "time": "5:30 pm", "attendees": "John Constantine"}}</function>```### Actions based on tool calling output- The google_contact function call returned by the model is used to call [People API](https://developers.google.com/people) to look up the email address of the person of interest- The email address from the previous step is used to call [Calendar API](https://developers.google.com/calendar) along with the other information in the google_calendar tool calling output returned by the model The end result is that a google meeting is scheduled with the person of interest at the date & time specified================================================FILE: end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/README.md================================================# Quickstart Llama 3 Powered Agent Examples Ported from DeepLearning.ai Short CoursesThe notebooks in this folder are ported from the 4 recent agent short courses on [Deeplearning.ai](https://www.deeplearning.ai) to use Llama 3 to build agent apps from scratch or with open source frameworks (LangChain, LlamaIndex, AutoGen).1. [Functions Tools and Agents with LangChain L1 Function Calling](Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb)2. [AI Agents in LangGraph L1 Build an Agent from Scratch](AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb)3. [Building Agentic RAG with Llamaindex L1 Router Engine](Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb)4. [AI Agentic Design Patterns with AutoGen L4 Tool Use and Conversational Chess](AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb)================================================FILE: end-to-end-use-cases/ArticleSummarizer/README.md================================================# Android Document Summarizer App<img src="./screenshot.png">This is a sample Android app to demonstrate Llama 4 multimodal and multilingual capabilities. This app allows user to take a picture/screenshot of a document, and then summarize and translate it into any of the supported languages## Quick Start1. Open the DocumentSummarizer folder in Android Studio2. Update the `API_KEY` in `AppUtils.java`3. Build the Android Project4. Inside the app, tap on settings icon on top right5. Configure the Remote URL endpoint (any supported providers that serve Llama 4 models. For example: https://api.together.xyz)6. Select the desired model from the drop down list. If you need to add more models, modify `ModelUtils.java`7. Go back to the Main chat window8. Press the '+' button on the bottom left and select an image document (or take a picture of one!)9. Select the 'globe' button on the bottom left and select your languages10. Enter a prompt like "summarize this" and press Enter!> **_NOTE:_**  This is an example project to demonstrate E2E flow. You should NOT use/store API key directly on client. Exposing your API key in client-side environments allows malicious users to take that key and make requests on your behalf. Requests should always be routed through your own backend server where you can keep your API key secure.## Reporting IssuesIf you encountered any bugs or issues following this tutorial please file a bug/issue here on [Github](https://github.com/meta-llama/llama-cookbook/issues)).================================================FILE: end-to-end-use-cases/ArticleSummarizer/gradle.properties================================================# Project-wide Gradle settings.# IDE (e.g. Android Studio) users:# Gradle settings configured through the IDE *will override*# any settings specified in this file.# For more details on how to configure your build environment visit# http://www.gradle.org/docs/current/userguide/build_environment.html# Specifies the JVM arguments used for the daemon process.# The setting is particularly useful for tweaking memory settings.org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8# When configured, Gradle will run in incubating parallel mode.# This option should only be used with decoupled projects. More details, visit# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects# org.gradle.parallel=true# AndroidX package structure to make it clearer which packages are bundled with the# Android operating system, and which are packaged with your app's APK# https://developer.android.com/topic/libraries/support-library/androidx-rnandroid.useAndroidX=true# Kotlin code style for this project: "official" or "obsolete":kotlin.code.style=official# Enables namespacing of each library's R class so that its R class includes only the# resources declared in the library itself and none from the library's dependencies,# thereby reducing the size of the R class for that libraryandroid.nonTransitiveRClass=true================================================FILE: end-to-end-use-cases/ArticleSummarizer/gradlew================================================#!/usr/bin/env sh## Copyright 2015 the original author or authors.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at##      https://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.###################################################################################  Gradle start up script for UN*X################################################################################# Attempt to set APP_HOME# Resolve links: $0 may be a linkPRG="$0"# Need this for relative symlinks.while [ -h "$PRG" ] ; do    ls=`ls -ld "$PRG"`    link=`expr "$ls" : '.*-> \(.*\)$'`    if expr "$link" : '/.*' > /dev/null; then        PRG="$link"    else        PRG=`dirname "$PRG"`"/$link"    fidoneSAVED="`pwd`"cd "`dirname \"$PRG\"`/" >/dev/nullAPP_HOME="`pwd -P`"cd "$SAVED" >/dev/nullAPP_NAME="Gradle"APP_BASE_NAME=`basename "$0"`# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'# Use the maximum available, or set MAX_FD != -1 to use that value.MAX_FD="maximum"warn () {    echo "$*"}die () {    echo    echo "$*"    echo    exit 1}# OS specific support (must be 'true' or 'false').cygwin=falsemsys=falsedarwin=falsenonstop=falsecase "`uname`" in  CYGWIN* )    cygwin=true    ;;  Darwin* )    darwin=true    ;;  MINGW* )    msys=true    ;;  NONSTOP* )    nonstop=true    ;;esacCLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar# Determine the Java command to use to start the JVM.if [ -n "$JAVA_HOME" ] ; then    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then        # IBM's JDK on AIX uses strange locations for the executables        JAVACMD="$JAVA_HOME/jre/sh/java"    else        JAVACMD="$JAVA_HOME/bin/java"    fi    if [ ! -x "$JAVACMD" ] ; then        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOMEPlease set the JAVA_HOME variable in your environment to match thelocation of your Java installation."    fielse    JAVACMD="java"    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.Please set the JAVA_HOME variable in your environment to match thelocation of your Java installation."fi# Increase the maximum file descriptors if we can.if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then    MAX_FD_LIMIT=`ulimit -H -n`    if [ $? -eq 0 ] ; then        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then            MAX_FD="$MAX_FD_LIMIT"        fi        ulimit -n $MAX_FD        if [ $? -ne 0 ] ; then            warn "Could not set maximum file descriptor limit: $MAX_FD"        fi    else        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"    fifi# For Darwin, add options to specify how the application appears in the dockif $darwin; then    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""fi# For Cygwin or MSYS, switch paths to Windows format before running javaif [ "$cygwin" = "true" -o "$msys" = "true" ] ; then    APP_HOME=`cygpath --path --mixed "$APP_HOME"`    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`    JAVACMD=`cygpath --unix "$JAVACMD"`    # We build the pattern for arguments to be converted via cygpath    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`    SEP=""    for dir in $ROOTDIRSRAW ; do        ROOTDIRS="$ROOTDIRS$SEP$dir"        SEP="|"    done    OURCYGPATTERN="(^($ROOTDIRS))"    # Add a user-defined pattern to the cygpath arguments    if [ "$GRADLE_CYGPATTERN" != "" ] ; then        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"    fi    # Now convert the arguments - kludge to limit ourselves to /bin/sh    i=0    for arg in "$@" ; do        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`        else            eval `echo args$i`="\"$arg\""        fi        i=`expr $i + 1`    done    case $i in        0) set -- ;;        1) set -- "$args0" ;;        2) set -- "$args0" "$args1" ;;        3) set -- "$args0" "$args1" "$args2" ;;        4) set -- "$args0" "$args1" "$args2" "$args3" ;;        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;    esacfi# Escape application argssave () {    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done    echo " "}APP_ARGS=`save "$@"`# Collect all arguments for the java command, following the shell quoting and substitution ruleseval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"exec "$JAVACMD" "$@"================================================FILE: end-to-end-use-cases/ArticleSummarizer/.gitignore================================================# Gradle files.gradle/build/# Local configuration file (sdk path, etc)local.properties# Log/OS Files*.log# Android Studio generated files and folderscaptures/.externalNativeBuild/.cxx/*.apkoutput.json# IntelliJ*.iml.idea/misc.xmldeploymentTargetDropDown.xmlrender.experimental.xml# Keystore files*.jks*.keystore# Google Services (e.g. APIs or Firebase)google-services.json# Android Profiling*.hprof.DS_Store*.jar*.aar*.so================================================FILE: end-to-end-use-cases/ArticleSummarizer/app/.gitignore================================================/build================================================FILE: end-to-end-use-cases/ArticleSummarizer/gradle/wrapper/gradle-wrapper.properties================================================#Mon Sep 25 11:23:11 PDT 2023distributionBase=GRADLE_USER_HOMEdistributionPath=wrapper/distsdistributionUrl=https\://services.gradle.org/distributions/gradle-8.0-bin.zipzipStoreBase=GRADLE_USER_HOMEzipStorePath=wrapper/dists================================================FILE: end-to-end-use-cases/benchmarks/README.md================================================# Benchmarks* inference - a folder contains benchmark scripts that apply a throughput analysis for Llama models inference on various backends including on-prem, cloud and on-device.* llm_eval_harness - a folder that introduces `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality. We also included a recipe that calculates Llama 3.1 evaluation metrics Using `lm-evaluation-harness` and instructions that calculate HuggingFace Open LLM Leaderboard v2 metrics.================================================FILE: end-to-end-use-cases/benchmarks/inference/README.md================================================# Inference Throughput BenchmarksIn this folder we provide a series of benchmark scripts that apply a throughput analysis for Llama models inference on various backends:* On-prem - Popular serving frameworks and containers (i.e. vLLM)* Cloud API - Popular API services (i.e. Azure Model-as-a-Service or Serverless API)* [**WIP**]On-device - Popular on-device inference solutions on mobile and desktop (i.e. ExecuTorch, MLC-LLM, Ollama)* [**WIP**]Optimization - Popular optimization solutions for faster inference and quantization (i.e. AutoAWQ)# WhyThere are three major reasons we want to run these benchmarks and share them with our Llama community:* Provide inference throughput analysis based on real world situation to help you select the best service or deployment for your scenario* Provide a baseline measurement for validating various optimization solutions on different backends, so we can provide guidance on which solutions work best for your scenario* Encourage the community to develop benchmarks on top of our works, so we can better quantify the latest proposed solutions combined with current popular frameworks, especially in this crazy fast-moving area# ParametersHere are the parameters (if applicable) that you can configure for running the benchmark:* **PROMPT** - Prompt sent in for inference (configure the length of prompt, choose from 5, 25, 50, 100, 500, 1k and 2k)* **MAX_NEW_TOKENS** - Max number of tokens generated* **CONCURRENT_LEVELS** - Max number of concurrent requests* **MODEL_PATH** - Model source from Huggingface* **MODEL_HEADERS** - Request headers* **SAFE_CHECK** - Content safety check (either Azure service or simulated latency)* **THRESHOLD_TPS** - Threshold TPS (threshold for tokens per second below which we deem the query to be slow)* **TOKENIZER_PATH** - Tokenizer source* **RANDOM_PROMPT_LENGTH** - Random prompt length (for pretrained models)* **NUM_GPU** - Number of GPUs for request dispatch among multiple containers* **TEMPERATURE** - Temperature for inference* **TOP_P** - Top_p for inference* **MODEL_ENDPOINTS** - Container endpoints* Model parallelism or model replicas - Load one model into multiple GPUs or multiple model replicas on one instance. More detail in the README files for specific containers.You can also configure other model hyperparameters as part of the request payload.  All these parameters are stored in ```parameter.json``` and real prompts are stored in ```input.jsonl```. Running the script will load these configurations.# MetricsThe benchmark will report these metrics per instance:* Number of concurrent requests* P50 Latency(ms)* P99 Latency(ms)* Request per second (RPS)* Output tokens per second* Output tokens per second per GPU* Input tokens per second* Input tokens per second per GPU* Average tokens per second per requestWe intend to add these metrics in the future:* Time to first token (TTFT)  The benchmark result will be displayed in the terminal output and saved as a CSV file (```performance_metrics.csv```) which you can export to spreadsheets.# Getting StartedPlease follow the ```README.md``` in each subfolder for instructions on how to setup and run these benchmarks. ================================================FILE: end-to-end-use-cases/benchmarks/inference/requirements.txt================================================transformersrequestsazure-coreazure-ai-contentsafetytorch================================================FILE: end-to-end-use-cases/benchmarks/inference/cloud/README.md================================================# Llama-Cloud-API-BenchmarkThis folder contains code to run inference benchmark for Llama 2 models on cloud API with popular cloud service providers. The benchmark will focus on overall inference **throughput** for querying the API endpoint for output generation with different level of concurrent requests. Remember that to send queries to the API endpoint, you are required to acquire subscriptions with the cloud service providers and there will be a fee associated with it.Disclaimer - The purpose of the code is to provide a configurable setup to measure inference throughput. It is not a representative of the performance of these API services and we do not plan to make comparisons between different API providers.# Azure - Getting StartedTo get started, there are certain steps we need to take to deploy the models:<!-- markdown-link-check-disable -->* Register for a valid Azure account with subscription [here](https://azure.microsoft.com/en-us/free/search/?ef_id=_k_CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE_k_&OCID=AIDcmm5edswduu_SEM__k_CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE_k_&gad_source=1&gclid=CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE)<!-- markdown-link-check-enable -->* Take a quick look on what is the [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home) and navigate to the website from the link in the article* Follow the demos in the article to create a project and [resource](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-portal) group, or you can also follow the guide [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-llama?tabs=azure-studio)* Select Llama models from Model catalog* Click the "Deploy" button* Select Serverless API with Azure AI Content Safety. Note that currently this API service is offered for Llama 2 pretrained model, chat model and Llama 3 instruct model* Select the project you created in previous step* Choose a deployment name then Go to deploymentOnce deployed successfully, you should be assigned for an API endpoint and a security key for inference.For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-llama?tabs=azure-studio) for model deployment and inference.Now, replace the endpoint url and API key in ```azure/parameters.json```. For parameter `MODEL_ENDPOINTS`, with chat models the suffix should be `v1/chat/completions` and with pretrained models the suffix should be `v1/completions`.Note that the API endpoint might implemented a rate limit for token generation in certain amount of time. If you encountered the error, you can try reduce `MAX_NEW_TOKEN` or start with smaller `CONCURRENT_LEVELS`.For `MODEL_PATH`, copy the model path from Huggingface under meta-llama organization. For Llama 2, make sure you copy the path of the model with hf format. This model path is used to retrieve corresponding tokenizer for your model of choice. Llama 3 used a different tokenizer compare to Llama 2.Once everything configured, to run chat model benchmark:```python chat_azure_api_benchmark.py```To run pretrained model benchmark:```python pretrained_azure_api_benchmark.py```Once finished, the result will be written into a CSV file in the same directory, which can be later imported into dashboard of your choice.================================================FILE: end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/README.md================================================# Benchmark Llama models on AWSThe [`FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main) tool provides a quick and easy way to benchmark the Llama family of models for price and performance on any AWS service including [`Amazon SagMaker`](https://aws.amazon.com/solutions/guidance/generative-ai-deployments-using-amazon-sagemaker-jumpstart/), [`Amazon Bedrock`](https://aws.amazon.com/bedrock/) or `Amazon EKS` or `Amazon EC2` as `Bring your own endpoint`.## The need for benchmarking<!-- markdown-link-check-disable -->Customers often wonder what is the best AWS service to run Llama models for _my specific use-case_ and _my specific price performance requirements_. While model evaluation metrics are available on several leaderboards ([`HELM`](https://crfm.stanford.edu/helm/lite/latest/#/leaderboard), [`LMSys`](https://chat.lmsys.org/?leaderboard)), but the price performance comparison can be notoriously hard to find and even more harder to trust. In such a scenario, we think it is best to be able to run performance benchmarking yourself on either on your own dataset or on a similar (task wise, prompt size wise) open-source datasets such as ([`LongBench`](https://huggingface.co/datasets/THUDM/LongBench), [`QMSum`](https://paperswithcode.com/dataset/qmsum)). This is the problem that [`FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main) solves.<!-- markdown-link-check-enable -->## [`FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main): an open-source Python package for FM benchmarking on AWS`FMBench` runs inference requests against endpoints that are either deployed through `FMBench` itself (as in the case of SageMaker) or are available either as a fully-managed endpoint (as in the case of Bedrock) or as bring your own endpoint. The metrics such as inference latency, transactions per-minute, error rates and cost per transactions are captured and presented in the form of a Markdown report containing explanatory text, tables and figures. The figures and tables in the report provide insights into what might be the best serving stack (instance type, inference container and configuration parameters) for a given Llama model for a given use-case.The following figure gives an example of the price performance numbers that include inference latency, transactions per-minute and concurrency level for running the `Llama2-13b` model on different instance types available on SageMaker using prompts for Q&A task created from the [`LongBench`](https://huggingface.co/datasets/THUDM/LongBench) dataset, these prompts are between 3000 to 3840 tokens in length. **_Note that the numbers are hidden in this figure but you would be able to see them when you run `FMBench` yourself_**.The following table (also included in the report) provides information about the best available instance type for that experiment<sup>1</sup>.|Information	|Value	||---	|---	||experiment_name	|llama2-13b-inf2.24xlarge	||payload_file	|payload_en_3000-3840.jsonl	||instance_type	|ml.inf2.24xlarge	||concurrency	|**	||error_rate	|**	||prompt_token_count_mean	|3394	||prompt_token_throughput	|2400	||completion_token_count_mean	|31	||completion_token_throughput	|15	||latency_mean	|**	||latency_p50	|**	||latency_p95	|**	||latency_p99	|**	||transactions_per_minute	|**	||price_per_txn	|**	|<sup>1</sup> ** represent values hidden on purpose, these are available when you run the tool yourself.The report also includes latency Vs prompt size charts for different concurrency levels. As expected, inference latency increases as prompt size increases but what is interesting to note is that the increase is much more at higher concurrency levels (and this behavior varies with instance types).### How to get started with `FMBench`The following steps provide a [Quick start guide for `FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool#quickstart). For a more detailed DIY version, please see the [`FMBench Readme`](https://github.com/aws-samples/foundation-model-benchmarking-tool?tab=readme-ov-file#the-diy-version-with-gory-details).1. Each `FMBench` run works with a configuration file that contains the information about the model, the deployment steps, and the tests to run. A typical `FMBench` workflow involves either directly using an already provided config file from the [`configs`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main/fmbench/configs) folder in the `FMBench` GitHub repo or editing an already provided config file as per your own requirements (say you want to try benchmarking on a different instance type, or a different inference container etc.).    >A simple config file with key parameters annotated is included in this repo, see [`config.yml`](./config.yml). This file benchmarks performance of Llama2-7b on an `ml.g5.xlarge` instance and an `ml.g5.2xlarge` instance. You can use this provided config file as it is for this Quickstart.1. Launch the AWS CloudFormation template included in this repository using one of the buttons from the table below. The CloudFormation template creates the following resources within your AWS account: Amazon S3 buckets, Amazon IAM role and an Amazon SageMaker Notebook with this repository cloned. A read S3 bucket is created which contains all the files (configuration files, datasets) required to run `FMBench` and a write S3 bucket is created which will hold the metrics and reports generated by `FMBench`. The CloudFormation stack takes about 5-minutes to create.   |AWS Region                |     Link        |   |:------------------------:|:-----------:|   |us-east-1 (N. Virginia)    | [<img src="./img/CFT.png">](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/new?stackName=fmbench&templateURL=https://aws-blogs-artifacts-public.s3.amazonaws.com/artifacts/ML-FMBT/template.yml) |   |us-west-2 (Oregon)    | [<img src="./img/CFT.png">](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/new?stackName=fmbench&templateURL=https://aws-blogs-artifacts-public.s3.amazonaws.com/artifacts/ML-FMBT/template.yml) |1. Once the CloudFormation stack is created, navigate to SageMaker Notebooks and open the `fmbench-notebook`.1. On the `fmbench-notebook` open a Terminal and run the following commands.    ```{.bash}    conda create --name fmbench_python311 -y python=3.11 ipykernel    source activate fmbench_python311;    pip install -U fmbench    ```1. Now you are ready to `fmbench` with the following command line. We will use a sample config file placed in the S3 bucket by the CloudFormation stack for a quick first run.    1. We benchmark performance for the `Llama2-7b` model on a `ml.g5.xlarge` and a `ml.g5.2xlarge` instance type, using the `huggingface-pytorch-tgi-inference` inference container. This test would take about 30 minutes to complete and cost about $0.20.    1. It uses a simple relationship that 750 words equals 1000 tokens, to get a more accurate representation of token counts use the `Llama2 tokenizer`. **_It is strongly recommended that for more accurate results on token throughput you use a tokenizer specific to the model you are testing rather than the default tokenizer. See instructions provided [here](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main?tab=readme-ov-file#the-diy-version-with-gory-details) on how to use a custom tokenizer_**.        <!-- markdown-link-check-disable -->        ```{.bash}        account=`aws sts get-caller-identity | jq .Account | tr -d '"'`        region=`aws configure get region`        fmbench --config-file s3://sagemaker-fmbench-read-${region}-${account}/configs/llama2/7b/config-llama2-7b-g5-quick.yml >> fmbench.log 2>&1        ```        <!-- markdown-link-check-enable -->    1. Open another terminal window and do a `tail -f` on the `fmbench.log` file to see all the traces being generated at runtime.        ```{.bash}        tail -f fmbench.log        ```1. The generated reports and metrics are available in the `sagemaker-fmbench-write-<replace_w_your_aws_region>-<replace_w_your_aws_account_id>` bucket. The metrics and report files are also downloaded locally and in the `results` directory (created by `FMBench`) and the benchmarking report is available as a markdown file called `report.md` in the `results` directory. You can view the rendered Markdown report in the SageMaker notebook itself or download the metrics and report files to your machine for offline analysis.## 🚨 Benchmarking Llama3 on Amazon Bedrock 🚨Llama3 is now available on Bedrock (read [blog post](https://aws.amazon.com/blogs/aws/metas-llama-3-models-are-now-available-in-amazon-bedrock/)), and you can now benchmark it using `FMBench`. Here is the config file for benchmarking `Llama3-8b-instruct` and `Llama3-70b-instruct` on Bedrock.<!-- markdown-link-check-disable -->- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/bedrock/config-bedrock-llama3.yml) for `Llama3-8b-instruct` and `Llama3-70b-instruct`.<!-- markdown-link-check-enable -->## 🚨 Benchmarking Llama3 on Amazon SageMaker 🚨Llama3 is now available on SageMaker (read [blog post](https://aws.amazon.com/blogs/machine-learning/meta-llama-3-models-are-now-available-in-amazon-sagemaker-jumpstart/)), and you can now benchmark it using `FMBench`. Here are the config files for benchmarking `Llama3-8b-instruct` and `Llama3-70b-instruct` on `ml.p4d.24xlarge`, `ml.inf2.24xlarge` and `ml.g5.12xlarge` instances.<!-- markdown-link-check-disable -->- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama3/8b/config-llama3-8b-instruct-g5-p4d.yml) for `Llama3-8b-instruct` on  `ml.p4d.24xlarge` and `ml.g5.12xlarge`.- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama3/70b/config-llama3-70b-instruct-g5-p4d.yml) for `Llama3-70b-instruct` on  `ml.p4d.24xlarge` and `ml.g5.48xlarge`.- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama3/8b/config-llama3-8b-inf2-g5.yml) for `Llama3-8b-instruct` on  `ml.inf2.24xlarge` and `ml.g5.12xlarge`.<!-- markdown-link-check-enable -->## Benchmarking Llama2 on Amazon SageMakerLlama2 models are available through SageMaker JumpStart as well as directly deployable from Hugging Face to a SageMaker endpoint. You can use `FMBench` to benchmark Llama2 on SageMaker for different combinations of instance types and inference containers.<!-- markdown-link-check-disable -->- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/7b/config-llama2-7b-g5-quick.yml) for `Llama2-7b` on `ml.g5.xlarge` and `ml.g5.2xlarge` instances, using the [Hugging Face TGI container](763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04).- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/7b/config-llama2-7b-g4dn-g5-trt.yml) for `Llama2-7b` on `ml.g4dn.12xlarge` instance using the [Deep Java Library DeepSpeed container](763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.26.0-deepspeed0.12.6-cu121).- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/13b/config-llama2-13b-inf2-g5-p4d.yml) for `Llama2-13b` on `ml.g5.12xlarge`, `ml.inf2.24xlarge` and `ml.p4d.24xlarge` instances using the [Hugging Face TGI container](763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04) and the [Deep Java Library & NeuronX container](763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.26.0-neuronx-sdk2.16.0).- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/70b/config-llama2-70b-g5-p4d-trt.yml) for `Llama2-70b` on `ml.p4d.24xlarge` instance using the [Deep Java Library TensorRT container](763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.26.0-tensorrtllm0.7.1-cu122).- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/70b/config-llama2-70b-inf2-g5.yml) for `Llama2-70b` on `ml.inf2.48xlarge` instance using the [HuggingFace TGI with Optimum NeuronX container](763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-tgi-inference:1.13.1-optimum0.0.17-neuronx-py310-ubuntu22.04).<!-- markdown-link-check-enable -->## Benchmarking Llama2 on Amazon BedrockThe Llama2-13b-chat and Llama2-70b-chat models are available on [Bedrock](https://aws.amazon.com/bedrock/llama/). You can use `FMBench` to benchmark Llama2 on Bedrock for both on-demand throughput and provisioned throughput inference options.<!-- markdown-link-check-disable -->- [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/bedrock/config-bedrock.yml) for `Llama2-13b-chat` and `Llama2-70b-chat` on Bedrock for on-demand throughput.<!-- markdown-link-check-enable -->- For testing provisioned throughput simply replace the `ep_name` parameter in `experiments` section of the config file with the ARN of your provisioned throughput.## More..For bug reports, enhancement requests and any questions please create a [GitHub issue](https://github.com/aws-samples/foundation-model-benchmarking-tool/issues) on the `FMBench` repo.================================================FILE: end-to-end-use-cases/benchmarks/inference/on_prem/README.md================================================# Llama-On-Prem-BenchmarkThis folder contains code to run inference benchmark for Meta Llama 3 models on-prem with popular serving frameworks.The benchmark will focus on overall inference **throughput** for running containers on one instance (single or multiple GPUs) that you can acquire from cloud service providers such as Azure and AWS. You can also run this benchmark on local laptop or desktop.We support benchmark on these serving framework:* [vLLM](https://github.com/vllm-project/vllm)# vLLM - Getting StartedTo get started, we first need to deploy containers on-prem as a API host. Follow the guidance [here](../../../../3p-integrations/llama_on_prem.md#setting-up-vllm-with-llama-3) to deploy vLLM on-prem.Note that in common scenario which overall throughput is important, we suggest you prioritize deploying as many model replicas as possible to reach higher overall throughput and request-per-second (RPS), comparing to deploy one model container among multiple GPUs for model parallelism. Additionally, as deploying multiple model replicas, there is a need for a higher level wrapper to handle the load balancing which here has been simulated in the benchmark scripts.For example, we have an instance from Azure that has 8xA100 80G GPUs, and we want to deploy the Meta Llama 3 70B instruct model, which is around 140GB with FP16. So for deployment we can do:* 1x70B model parallel on 8 GPUs, each GPU RAM takes around 17.5GB for loading model weights.* 2x70B models each use 4 GPUs, each GPU RAM takes around 35GB for loading model weights.* 4x70B models each use 2 GPUs, each GPU RAM takes around 70GB for loading model weights. (Preferred configuration for max overall throughput. Note that you will have 4 endpoints hosted on different ports and the benchmark script will route requests into each model equally)Here are examples for deploying 2x70B chat models over 8 GPUs with vLLM.```CUDA_VISIBLE_DEVICES=0,1,2,3 python -m vllm.entrypoints.openai.api_server  --model meta-llama/Meta-Llama-3.1-70B-Instruct --tensor-parallel-size 4 --disable-log-requests --port 8000CUDA_VISIBLE_DEVICES=4,5,6,7 python -m vllm.entrypoints.openai.api_server  --model meta-llama/Meta-Llama-3.1-70B-Instruct --tensor-parallel-size 4 --disable-log-requests --port 8001```Once you have finished deployment, you can use the command below to run benchmark scripts in a separate terminal.```python chat_vllm_benchmark.py```<!-- markdown-link-check-disable -->If you are going to use [Azure AI content check](https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety), then you should install dependencies as shown below in your terminal:<!-- markdown-link-check-enable -->```pip install azure-ai-contentsafety azure-core```Besides chat models, we also provide benchmark scripts for running pretrained models for text completion tasks. To better simulate the real traffic, we generate configurable random token prompt as input. In this process, we select vocabulary that is longer than 2 tokens so the generated words are closer to the English, rather than symbols.However, random token prompts can't be applied for chat model benchmarks, since the chat model expects a valid question. By feeding random prompts, chat models rarely provide answers that is meeting our ```MAX_NEW_TOKEN``` requirement, defeating the purpose of running throughput benchmarks. Hence for chat models, the questions are copied over to form long inputs such as for 2k and 4k inputs.To run pretrained model benchmark, follow the command below.```python pretrained_vllm_benchmark.py```Refer to more vLLM benchmark details on their official Github repo [here](https://github.com/vllm-project/vllm/tree/main/benchmarks).================================================FILE: end-to-end-use-cases/benchmarks/llm_eval_harness/README.md================================================# Llama Model EvaluationLlama-Recipe make use of `lm-evaluation-harness` for evaluating our fine-tuned Meta Llama3 (or Llama2) model. It also can serve as a tool to evaluate quantized model to ensure the quality in lower precision or other optimization applied to the model that might need evaluation.`lm-evaluation-harness` provide a wide range of [features](https://github.com/EleutherAI/lm-evaluation-harness?tab=readme-ov-file#overview):- Over 60 standard academic benchmarks for LLMs, with hundreds of subtasks and variants implemented.- Support for models loaded via [transformers](https://github.com/huggingface/transformers/) (including quantization via [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ)), [GPT-NeoX](https://github.com/EleutherAI/gpt-neox), and [Megatron-DeepSpeed](https://github.com/microsoft/Megatron-DeepSpeed/), with a flexible tokenization-agnostic interface.- Support for fast and memory-efficient inference with [vLLM](https://github.com/vllm-project/vllm).- Support for commercial APIs including OpenAI and TextSynth.- Support for evaluation on adapters (e.g. LoRA) supported in [HuggingFace's PEFT library](https://github.com/huggingface/peft).- Support for local models and benchmarks.- Evaluation with publicly available prompts ensures reproducibility and comparability between papers.- Easy support for custom prompts and evaluation metrics.The Language Model Evaluation Harness is also the backend for 🤗 [Hugging Face's (HF) popular Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).## SetupBefore running the evaluation, ensure you have all the necessary dependencies installed.### Dependencies- Python 3.8+- Your language model's dependencies### InstallationClone the lm-evaluation-harness repository and install it:```bashgit clone https://github.com/EleutherAI/lm-evaluation-harness.gitcd lm-evaluation-harnesspip install -e .```### Quick TestTo run evaluation for Hugging Face `Llama3.1 8B` model  on a single GPU please run the following,```bashlm_eval --model hf --model_args pretrained=meta-llama/Llama-3.1-8B --tasks hellaswag --device cuda:0   --batch_size 8```Tasks can be extended by using `,` between them for example `--tasks hellaswag,arc`.To set the number of shots you can use `--num_fewshot` to set the number for few shot evaluation.### PEFT Fine-tuned model EvaluationIn case you have fine-tuned your model using PEFT you can set the PATH to the PEFT checkpoints using PEFT as part of model_args as shown below:```bashlm_eval --model hf --model_args pretrained=meta-llama/Llama-3.1-8B, dtype="float",peft=../peft_output --tasks hellaswag --num_fewshot 10  --device cuda:0 --batch_size 8```### Limit the number of examples in benchmarksThere has been an study from [IBM on efficient benchmarking of LLMs](https://arxiv.org/pdf/2308.11696.pdf), with main take a way that to identify if a model is performing poorly, benchmarking on wider range of tasks is more important than the number example in each task. This means you could run the evaluation harness with fewer number of example to have initial decision if the performance got worse from the base line. To limit the number of example here, it can be set using `--limit` flag with actual desired number. But for the full assessment you would need to run the full evaluation. Please read more in the paper linked above.```bashlm_eval --model hf --model_args pretrained=meta-llama/Llama-3.1-8B,dtype="float",peft=../peft_output --tasks hellaswag --num_fewshot 10  --device cuda:0 --batch_size 8 --limit 100```### Customized Llama ModelIn case you have customized the Llama model, for example a quantized version of model where it has different model loading from normal HF model, you can follow [this guide](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage) to use `lm_eval.simple_evaluate()` to run the eval benchmarks.You can also find full task list [here](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks).### Multi-GPU Evaluation with Hugging Face `accelerate``lm-evaluation-harness` support three main ways of using Hugging Face's [accelerate 🚀](https://github.com/huggingface/accelerate) library for multi-GPU evaluation.To perform *data-parallel evaluation* (where each GPU loads a **separate full copy** of the model), `lm-evaluation-harness` leverage the `accelerate` launcher as follows:```bashaccelerate launch -m lm_eval --model hf \    --model_args pretrained=meta-llama/Llama-3.1-8B \    --tasks lambada_openai,arc_easy \    --batch_size 16```(or via `accelerate launch --no-python lm_eval`).For cases where your model can fit on a single GPU, this allows you to evaluate on K GPUs K times faster than on one.**WARNING**: This setup does not work with FSDP model sharding, so in `accelerate config` FSDP must be disabled, or the NO_SHARD FSDP option must be used.The second way of using `accelerate` for multi-GPU evaluation is when your model is *too large to fit on a single GPU.*In this setting, run the library *outside the `accelerate` launcher*, but passing `parallelize=True` to `--model_args` as follows:```lm_eval --model hf \    --tasks lambada_openai,arc_easy \    --model_args pretrained=meta-llama/Llama-3.1-70B,parallelize=True \    --batch_size 16```This means that your model's weights will be split across all available GPUs.For more advanced users or even larger models, `lm-evaluation-harness` allows for the following arguments when `parallelize=True` as well:- `device_map_option`: How to split model weights across available GPUs. defaults to "auto".- `max_memory_per_gpu`: the max GPU memory to use per GPU in loading the model.- `max_cpu_memory`: the max amount of CPU memory to use when offloading the model weights to RAM.- `offload_folder`: a folder where model weights will be offloaded to disk if needed.There is also an option to run with tensor parallel and data parallel together. This will allow you to take advantage of both data parallelism and model sharding, and is especially useful for models that are too large to fit on a single GPU.```accelerate launch --multi_gpu --num_processes {nb_of_copies_of_your_model} \    -m lm_eval --model hf \    --model_args pretrained=meta-llama/Llama-3.1-70B \    --tasks lambada_openai,arc_easy \    --model_args parallelize=True \    --batch_size 16```To learn more about model parallelism and how to use it with the `accelerate` library, see the [accelerate documentation](https://huggingface.co/docs/transformers/v4.15.0/en/parallelism)### Tensor + Data Parallel and Optimized Inference with `vLLM``lm-evaluation-harness` also support vLLM for faster inference on [supported model types](https://docs.vllm.ai/en/latest/models/supported_models.html), especially faster when splitting a model across multiple GPUs. For single-GPU or multi-GPU — tensor parallel, data parallel, or a combination of both — inference, for example:```bashlm_eval --model vllm \    --model_args pretrained={model_name},tensor_parallel_size={GPUs_per_model},dtype=auto,gpu_memory_utilization=0.8,data_parallel_size={model_replicas} \    --tasks lambada_openai \    --batch_size auto```To use vllm, do `pip install lm_eval[vllm]`. For a full list of supported vLLM configurations, please reference our [vLLM integration](https://github.com/EleutherAI/lm-evaluation-harness/blob/e74ec966556253fbe3d8ecba9de675c77c075bce/lm_eval/models/vllm_causallms.py) and the vLLM documentation.vLLM occasionally differs in output from Huggingface. `lm-evaluation-harness` treat Huggingface as the reference implementation, and it provides a script for checking the validity of vllm results against HF.> [!Tip]> For fastest performance, `lm-evaluation-harness` recommend using `--batch_size auto` for vLLM whenever possible, to leverage its continuous batching functionality!> [!Tip]> Passing `max_model_len=4096` or some other reasonable default to vLLM through model args may cause speedups or prevent out-of-memory errors when trying to use auto batch size, such as for Mistral-7B-v0.1 which defaults to a maximum length of 32k.For more details about `lm-evaluation-harness`, please visit checkout their github repo [README.md](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/README.md).## Calculating Meta 3.1 Evaluation Metrics Using LM-Evaluation-Harness[meta_eval](./meta_eval/) folder provides a detailed guide on how to calculate the Meta Llama 3.1 evaluation metrics reported in our [Meta Llama website](https://llama.meta.com/) using the [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main) and our [3.1 evals Huggingface collection](https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f). By following the steps outlined, users can replicate a evaluation process that is similar to Meta's, for specific tasks and compare their results with our reported metrics. While slight variations in results are expected due to differences in implementation and model behavior, we aim to provide a transparent method for evaluating Meta Llama 3 models using third party library. Please check the [README.md](./meta_eval/README.md) for more details.## Reproducing HuggingFace Open-LLM-Leaderboard v2In the HF leaderboard v2, the [LLMs are evaluated on 6 benchmarks](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) from Language Model Evaluation Harness as described below:- **IFEval**: [IFEval](https://arxiv.org/abs/2311.07911) is a dataset designed to test a model’s ability to follow explicit instructions, such as “include keyword x” or “use format y.” The focus is on the model’s adherence to formatting instructions rather than the content generated, allowing for the use of strict and rigorous metrics.- **BBH (Big Bench Hard)**: [BBH](https://arxiv.org/abs/2210.09261) is a subset of 23 challenging tasks from the BigBench dataset to evaluate language models. The tasks use objective metrics, are highly difficult, and have sufficient sample sizes for statistical significance. They include multistep arithmetic, algorithmic reasoning (e.g., boolean expressions, SVG shapes), language understanding (e.g., sarcasm detection, name disambiguation), and world knowledge. BBH performance correlates well with human preferences, providing valuable insights into model capabilities.- **MATH**:  [MATH](https://arxiv.org/abs/2103.03874) is a compilation of high-school level competition problems gathered from several sources, formatted consistently using Latex for equations and asymptote for figures. Generations must fit a very specific output format. HuggingFace Open-LLM-Leaderboard v2 keeps only level 5 MATH questions and call it MATH Level 5.- **GPQA (Graduate-Level Google-Proof Q&A Benchmark)**: [GPQA](https://arxiv.org/abs/2311.12022) is a highly challenging knowledge dataset with questions crafted by PhD-level domain experts in fields like biology, physics, and chemistry. These questions are designed to be difficult for laypersons but relatively easy for experts. The dataset has undergone multiple rounds of validation to ensure both difficulty and factual accuracy. Access to GPQA is restricted through gating mechanisms to minimize the risk of data contamination. Consequently, HuggingFace Open-LLM-Leaderboard v2 does not provide plain text examples from this dataset, as requested by the authors.- **MuSR (Multistep Soft Reasoning)**: [MuSR](https://arxiv.org/abs/2310.16049) is a new dataset consisting of algorithmically generated complex problems, each around 1,000 words in length. The problems include murder mysteries, object placement questions, and team allocation optimizations. Solving these problems requires models to integrate reasoning with long-range context parsing. Few models achieve better than random performance on this dataset.- **MMLU-PRO (Massive Multitask Language Understanding - Professional)**: [MMLU-Pro](https://arxiv.org/abs/2406.01574) is a refined version of the MMLU dataset, which has been a standard for multiple-choice knowledge assessment. Recent research identified issues with the original MMLU, such as noisy data (some unanswerable questions) and decreasing difficulty due to advances in model capabilities and increased data contamination. MMLU-Pro addresses these issues by presenting models with 10 choices instead of 4, requiring reasoning on more questions, and undergoing expert review to reduce noise. As a result, MMLU-Pro is of higher quality and currently more challenging than the original.In order to install correct lm-evaluation-harness version, please check the Huggingface 🤗 Open LLM Leaderboard v2 [reproducibility section](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about#reproducibility).To run a leaderboard evaluation for `Llama-3.1-8B`, we can run the following:```bashaccelerate launch -m lm_eval --model_args pretrained=meta-llama/Llama-3.1-8B,dtype=bfloat16  --log_samples --output_path eval_results --tasks leaderboard  --batch_size 4```Similarly to run a leaderboard evaluation for `Llama-3.1-8B-Instruct`, we can run the following, using `--apply_chat_template --fewshot_as_multiturn`:```bashaccelerate launch -m lm_eval --model_args pretrained=meta-llama/Llama-3.1-8B-Instruct,dtype=bfloat16  --log_samples --output_path eval_results --tasks leaderboard  --batch_size 4 --apply_chat_template --fewshot_as_multiturn```As for 70B models, it is required to run tensor parallelism as it can not fit into 1 GPU, therefore we can run the following for `Llama-3.1-70B-Instruct`:```bashlm_eval --model hf --batch_size 4 --model_args pretrained=meta-llama/Llama-3.1-70B-Instruct,parallelize=True --tasks leaderboard --log_samples --output_path eval_results --apply_chat_template --fewshot_as_multiturn```================================================FILE: end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md================================================# Calculating Meta 3.x Evaluation Metrics Using LM-Evaluation-HarnessAs Llama models gain popularity, evaluating these models has become increasingly important. We have released all the evaluation details for Llama 3.x models on Hugging Face as datasets in the [3.1 evals collection](https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f) and the [3.2 evals collection](https://huggingface.co/collections/meta-llama/llama-32-evals-66f44b3d2df1c7b136d821f0). This recipe demonstrates how to calculate the Llama 3.x reported benchmark numbers using the [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main) library and our prompts from the 3.x evals datasets on selected tasks.## Disclaimer1. **This recipe is not the official implementation** of Llama evaluation. Since our internal eval repo isn't public, we want to provide this recipe as an aid for anyone who wants to use the datasets we released. It is based on public third-party libraries, as this implementation is not mirroring Llama evaluation, therefore this may lead to minor differences in the produced numbers.2. **Model Compatibility**: This tutorial is specifically for Llama 3 based models, as our prompts include Llama 3 special tokens, e.g. `<|start_header_id|>user<|end_header_id|>`. It will not work with models that are not based on Llama 3.## Insights from Our Evaluation ProcessHere are our insights about the differences in terms of the eval configurations and prompting methods between this implementation and Hugging Face [leaderboard implementation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard).- **Prompts**: We use Chain-of-Thought(COT) prompts while Hugging Face leaderboard does not. The prompts that define the output format are also different.- **Metric calculation**: For MMLU-Pro, BBH, GPQA tasks, we ask the model to generate response and score the parsed answer from generated response, while Hugging Face leaderboard evaluation is comparing log likelihood of all label words, such as [ (A),(B),(C),(D) ].- **Parsers**: For generative tasks, where the final answer needs to be parsed before scoring, the parser functions can be different between ours and Hugging Face leaderboard evaluation, as our prompts that define the model output format are designed differently.- **Inference**: We use an internal LLM inference solution that does not apply padding, while Hugging Face leaderboard uses padding on the generative tasks (MATH and IFEVAL).- **Tasks**  We run benchmarks on BBH and MMLU-Pro only for pretrained models and Math-Hard, IFeval, GPQA, only for instruct models.Given those differences, the numbers from this recipe can not be compared to the numbers in the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard), even if the task names are the same.## Environment setupsPlease install lm-evaluation-harness and our llama-cookbook repo by following:```git clone git@github.com:meta-llama/llama-cookbook.gitcd llama-cookbookpip install -U pip setuptoolspip install -e .pip install -U antlr4_python3_runtime==4.11pip install lm-eval[math,ifeval,sentencepiece,vllm]==0.4.3cd end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval```To access our [3.1 evals Hugging Face collection](https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f), you must:- Log in to the Hugging Face website and click the 3.1 evals dataset pages and agree to the terms.- Follow the [Hugging Face authentication instructions](https://huggingface.co/docs/huggingface_hub/en/quick-start#authentication) to gain read access for your machine.The same process can be followed to access the [3.2 evals Hugging Face collection](https://huggingface.co/collections/meta-llama/llama-32-evals-66f44b3d2df1c7b136d821f0)It is recommended to read the dataset card to understand the meaning of each column and use the viewer feature in the Hugging Face dataset to view our dataset. It is important to have some basic understanding of our dataset format and content before proceeding.### Task SelectionGiven the extensive number of tasks available (12 for pretrained models and 30 for instruct models), a subset of tasks are chosen:- **Tasks for 3.1 pretrained models**: BBH and MMLU-Pro- **Tasks for 3.1 instruct models**: Math-Hard, IFeval, GPQA, and MMLU-Pro- **Tasks for 3.2 pretrained models**: MMLU- **Tasks for 3.2 instruct models**: MMLU, GPQAThese tasks are common evaluations, many of which overlap with the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)Here, we aim to get the benchmark numbers on the aforementioned tasks using Hugging Face [leaderboard implementation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard). Please follow the instructions below to make necessary modifications to use our eval prompts and get more eval metrics.### Run eval tasks1. We created [eval_config.yaml](./eval_config.yaml) to store all the arguments and hyperparameters. This is the main config file you need to change if you want to eval other models, and a part of eval_config.yaml looks like this:```yamlmodel_name: "meta-llama/Llama-3.1-8B-Instruct" # The name of the model to evaluate. This must be a valid Llama 3 based model name in the HuggingFace model hub."evals_dataset: "meta-llama/Llama-3.1-8B-Instruct-evals" # The name of the 3.1 evals dataset to evaluate, please make sure this eval dataset corresponds to the model loaded. This must be a valid Llama 3.1 evals dataset name in the Llama 3.1 Evals collection.# Must be one of the following ["meta-llama/Llama-3.1-8B-Instruct-evals","meta-llama/Llama-3.1-70B-Instruct-evals","meta-llama/Llama-3.1-405B-Instruct-evals","meta-llama/Llama-3.1-8B-evals","meta-llama/Llama-3.1-70B-evals","meta-llama/Llama-3.1-405B-evals","meta-llama/Llama-3.2-1B-evals","meta-llama/Llama-3.2-3B-evals", "meta-llama/Llama-3.2-1B-Instruct-evals", "meta-llama/Llama-3.2-3B-Instruct-evals"]tasks: "meta_instruct" # Available tasks for 3.1 instruct model: "meta_math_hard", "meta_gpqa_cot", "meta_mmlu_pro_instruct", "meta_ifeval"; or just use "meta_instruct" to run all of them.# Available tasks for 3.1 pretrain model: "meta_bbh", "meta_mmlu_pro_pretrain"; or just use "meta_pretrain" to run all of them.# Available tasks for 3.2 instruct model: "meta_mmlu", "meta_math", "meta_gpqa"; or just use "meta_instruct" to run all of them.# Available tasks for 3.2 pretrain model: "meta_mmlu"; or just use "meta_pretrain" to run all of themtensor_parallel_size: 1 # The VLLM argument that specify the tensor parallel size for the model, eg how many GPUs to use for a model copy.data_parallel_size: 4 # The VLLM argument that specify the data parallel size for the model, eg how copies of model will be used....```  Change `model_name` to the model name you want to eval on and change the `evals_dataset` according to the model type and parameters.  Remember to adjust the `tensor_parallel_size` to 2 or more to load the 70B models and change the `data_parallel_size` accordingly so that `tensor_parallel_size * data_parallel_size` is the number of GPUs you have. Please read the comments inside this yaml for detailed explanations on other parameters.2. We already included all the related eval task yaml and python files in the [meta_template](./meta_template/) folder, which define all the task implementation. You do not need to change those manually, we will use [prepare_meta_eval.py](./prepare_meta_eval.py) to automatically change them later.3. Then we can run [prepare_meta_eval.py](./prepare_meta_eval.py) that reads the configuration from [eval_config.yaml](./eval_config.yaml), copies everything in the template folder to a working folder `work_dir`, makes modification to those templates accordingly, prepares dataset if needed and prints out the CLI command to run the `lm_eval`.  To run the [prepare_meta_eval.py](./prepare_meta_eval.py), we can do:```python prepare_meta_eval.py --config_path ./eval_config.yaml```  This script will load the default [eval_config.yaml](./eval_config.yaml) config and print out a `lm_eval` command to run `meta_instruct` group tasks,  which includes `meta_ifeval`, `meta_math_hard`, `meta_gpqa` and `meta_mmlu_pro_instruct`, for `meta-llama/Llama-3.1-8B-Instruct` model using `meta-llama/Llama-3.1-8B-Instruct-evals` dataset.  An example output from [prepare_meta_eval.py](./prepare_meta_eval.py) looks like this:```lm_eval --model vllm --model_args pretrained=meta-llama/Llama-3.1-8B-Instruct,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.9,data_parallel_size=4,max_model_len=8192,add_bos_token=True,seed=42 --tasks meta_instruct --batch_size auto --output_path eval_results --include_path ./work_dir --seed 42  --log_samples```4. Then just copy the `lm_eval` command printed by [prepare_meta_eval.py](./prepare_meta_eval.py) back to your terminal and run it to get the result, which will be saved into `eval_results` folder by default.**NOTE**: As for `--model vllm`, here we will use VLLM inference instead of Hugging Face inference because of the padding issue. By default, for the generative tasks, the `lm-eval --model_args="{...}" --batch_size=auto` command will use Hugging Face inference solution that uses a static batch method with [left padding](https://github.com/EleutherAI/lm-evaluation-harness/blob/8ad598dfd305ece8c6c05062044442d207279a97/lm_eval/models/huggingface.py#L773) using EOS_token for Llama models, but our internal evaluation will load python original checkpoints and handle individual generation request asynchronously without any padding. To simulate this, we will use VLLM inference solution to do dynamic batching without any padding.**NOTE**: As for `add_bos_token=True`, since our prompts in the evals dataset has already included all the special tokens required by instruct model, such as `<|start_header_id|>user<|end_header_id|>`, we will not use `--apply_chat_template` argument for instruct models anymore. However, we need to use `add_bos_token=True` flag to add the BOS_token back during VLLM inference, as the BOS_token is removed by default in [this PR](https://github.com/EleutherAI/lm-evaluation-harness/pull/1465).**NOTE**: For `meta_math_hard` tasks, some of our internal math ground truth has been converted to scientific notation, e.g. `6\sqrt{7}` has been converted to `1.59e+1`, which will be later handled by our internal math evaluation functions. As the lm-evaluation-harness [math evaluation utils.py](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py) can not fully handle those conversion, we will use the original ground truth from the original dataset [lighteval/MATH-Hard](https://www.oxen.ai/lighteval/MATH-Hard) by joining the tables on the original input questions. The `get_math_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) will handle this step and produce a local parquet dataset file.Moreover, we have modified this [math_hard/utils.py](./meta_template/math_hard/utils.py) to address two issues:1. This python script only use [a regular expression "Final Answer: The final answer is(.*?). I hope it is correct."](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py#L192) to get the final answer, because this format is shown in the previous 4 shot examples prompts. However, our MATH Hard task is using 0 shot COT prompts that ask model to put the final answer into this string format `Therefore, the final answer is: $\\boxed{answer}$. I hope it is correct.` which can not be captured by previous regular expression, so we will use `\\box{}` to parse the final answer instead.2. The [is_equiv(x1: str, x2: str)](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py#L144) function failed parse some ground truth, as we noticed some error logs like `[utils.py:158] couldn't parse one of [0,1) or [0,1)`, so all those questions will be marked as wrong. We raised [a issue to lm_evaluation_harness](https://github.com/EleutherAI/lm-evaluation-harness/issues/2212) about this problem and will add a string equality check statement before going to is_equiv() function for now as a temporary solution.**NOTE**: For `meta_ifeval` tasks, we have to use the original configs, such as `instruction_id_list`, `kwargs`, from [wis-k/instruction-following-eval](https://huggingface.co/datasets/wis-k/instruction-following-eval) in order to use [lm-evaluation-harness IFeval evaluation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard/ifeval). We will perform similar join back method using `get_ifeval_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) to get a local parquet dataset file.### Yaml Config Deep DiveHere, we will use MMLU-Pro as a example to show the steps to create a yaml config with detailed explanations, so that people can follow this example to create other tasks configurations if they want. For more information, please read lm-evaluation-harness [new task guide](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md).**1.Define the config to load datasets**We can use our 3.1 evals dataset as the source dataset and the corresponding subset and define the test split to latest. For example, if we want to calculate the MMLU_Pro metric for 3.1 8B instruct, the following configs are needed as explained below:```yamltask: meta_mmlu_pro_instructdataset_path: meta-llama/Llama-3.1-8B-Instruct-evalsdataset_name: Llama-3.1-8B-Instruct-evals__mmlu_pro__detailstest_split: latest```If you want to run evaluation on 70B-Instruct, then it is recommended to change the `dataset_path` and  `dataset_name` from 8B to 70B, even though 70B-instruct and 8B-instruct share the same prompts, the `is_correct` column, which can be used to get the difference between current result and the reported results for each sample, is different.**Note**: Config files for Llama-3.1-8B-Instruct are already provided in each task subfolder under [meta_template folder](./meta_template/). Remember to change the eval dataset name according to the model type and DO NOT use pretrained evals dataset on instruct models or vice versa.**2.Configure preprocessing, prompts and ground truth**Here is the example yaml snippet in the MMLU-Pro that handles dataset preprocess, prompts and ground truth.```yamlprocess_docs: !function utils.process_docsdoc_to_text: !function utils.doc_to_textdoc_to_target: gold```- `process_docs` : Defines the preprocess function for our datasets. In this case, we uses the `process_docs` python function that is defined in [utils.py](./meta_template/mmlu_pro/utils.py). This function will take the original dataset and output a processed dataset that has a out_doc, which contains `problem` which is the input question, `gold` which is the ground truth. We also renamed the `is_correct` column to `previously_is_correct` to allow detailed comparison for the difference of each sample between previously reported score and the current score. You must use eval dataset and model with same parameters and same model type to get a valid comparison.-  `doc_to_text`: Defines the prompts. In the MMLU-Pro case, the `input_final_prompts` column always contains a list of a prompt, so we just use a python function that returns `input_final_prompts[0]`.- `doc_to_target` Defines the ground truth, which in the MMLU-Pro case, is the `gold` that derived from `input_correct_responses[0]`.**3.Configure task type and parser**While Open LLM Leaderboard v2 uses [multiple choice format](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#multiple-choice-format) for MMLU-Pro, BBH, GPQA tasks by comparing log likelihood of all label words, such as [ (A),(B),(C),(D) ], we use generative task option, by asking the model to generate response in sentences given our carefully designed prompts, then using some parsers to grab the final answer, and scoring that final answer based on the ground truth. Here is a example config in the MMLU-Pro that enable the generative task and defines the regex parser:```yamlfilter_list:  - name: "strict-match"    filter:      - function: "regex"        group_select: -1        regex_pattern: 'best answer is ([A-Z])'      - function: "take_first"```Since the MMLU-Pro task uses a 5-shot Chain-of-Thought(COT) prompts and the prompts are designed with explicitly instruction: "Your response should end with \"The best answer is [the_answer_letter].\" where the [the_answer_letter] is a letter from the provided choices.",  we will use a simple and intuitive regex expression `best answer is ([A-Z])` to parse the model response and take the last appearance as the final answer and this final answer will be scored based on the ground truth `gold` using exact match method.**4.Define generation and metric config**Then we need to define the generation and metric config, which looks like this:```yamlgeneration_kwargs:  until: []  do_sample: false  temperature: 0  max_gen_toks: 1024num_fewshot: 0metric_list:  - metric: exact_match    aggregation: mean    higher_is_better: true    ignore_case: true    ignore_punctuation: true```Here we set the `num_fewshot` to 0 as our prompts have already been converted to 5-shots, and the model generation will only stop if the generated output tokens exceeds 1024, as stated in the [mmlu-pro eval details](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/eval_details.md#mmlu-pro). We will set the `do_sample` to false and `temperature` to 0 as stated in our `eval_config` column in the dataset. We will use metric `exact_match` for this tasks and calculate the `mean` as our task aggregated number.**NOTE**: While we tried our best to create the template files, those configs and functions are created based on public third-party library and are not exactly the same as our internal implementation, so there is a chance that the eval numbers are slightly different.**NOTE**: We used the average of `inst_level_strict_acc,none` and `prompt_level_strict_acc,none` to get the final number for `IFeval` as stated [here](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about#task-evaluations-and-parameters).**NOTE**: In the [Llama website](https://llama.com/), we reported the `macro_avg` metric, which is the average of all subtask average score, for `MMLU-Pro `task, but here we are calculating the `micro_avg` metric, which is the average score for all the individual samples, and those `micro_avg`  numbers can be found in the [eval_details.md](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/eval_details.md#mmlu-pro).**NOTE**: The eval numbers may be slightly different, as we observed around ±0.01 differences between each evaluation run because the latest VLLM inference is not very deterministic even with temperature=0. This behavior maybe related [this issue](https://github.com/vllm-project/vllm/issues/5404).or it is expected due to 16-bits inference as stated in [this comment](https://github.com/huggingface/transformers/issues/25420#issuecomment-1775317535) and [this comment](https://github.com/vllm-project/vllm/issues/4112#issuecomment-2071115725).## AcknowledgementThis tutorial is inspired by [leaderboard tasks implementation on the lm_evaluation_harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard) created by Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) team.We also extend our gratitude to the [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) github repo from [EleutherAI](https://www.eleuther.ai/).================================================FILE: end-to-end-use-cases/book-character-mindmap/README.md================================================# Book Character Mind Map With Llama4 MaverickBook Mind is a web application that allows users to explore character relationships and storylines in books using AI-powered visualizations.This leverages **Llama 4 Maverick**'s impressive 1M token context windows to process entire books at once, enabling comprehensive analysis of complex narratives and character relationships across lengthy texts.## Features### Leverage Long Context Length| Model | Meta Llama4 Maverick | Meta Llama4 Scout | OpenAI GPT-4.5 | Claude Sonnet 3.7 || ----- | -------------- | -------------- | -------------- | -------------- || Context Window | 1M tokens | 10M tokens | 128K tokens | 1K tokens | 200K tokens |Because of the long context length, Book Mind can process entire books at once, providing a comprehensive understanding of complex narratives and character relationships.- Interactive Mind Maps: Visualize relationships between characters and plot elements.- Book Summaries: Get concise overviews of plots and themes.### Step-by-Step InstructionsWe implemented a step-by-step approach to ensure the model outputs' reliability.1. **Character Identification**: Identify all characters in the book and summarize their roles.```You are a highly detailed literary analyst AI. Your sole mission is to meticulously extract comprehensive information about characters and the *nuances* of their relationships from the provided text segment. This data will be used later to build a relationship graph.```2. **Character Relationships**: Determine the relationships between characters.```You are an expert data architect AI specializing in transforming literary analysis into structured graph data. Your task is to synthesize character and relationship information into a specific JSON format containing nodes and links, including a title and summary.```3. **JSON Format**: Output the results in a JSON format for easy parsing and visualization.```You are an extremely precise and strict JSON extractor.Extract only the complete JSON object from the input. Get the last one if there are multiple.```### Ask the Book with Chat InterfaceWe also implemented a chat interface to interact with the book. Users can ask questions about the book's characters, plot, and relationships. The model will respond with a concise answer based on the book's content and the relationships between characters.```You are an expert search AI designed to help users find detailed information about character relationships from a book. Your task is to assist users in querying the relationship data extracted from the book.```## Getting Started### Frontend SetupTo communicate with the [server/server.py](server/server.py), we use `React.js` and `axios`.1. Install dependencies:```npm install```2. Run the application:```npm start```### Server SetupWe use `Flask` to serve the model's responses and `vllm` to run the **Llama 4 Maverick** model.1. Install dependencies:```cd serverpip install -r requirements.txt```2. Run the server:```python server.py```## Get Copyright Free Books- [Project Gutenberg](https://www.gutenberg.org/)  - [Romeo and Juliet](https://www.gutenberg.org/ebooks/1513): 50,687 input tokens  - [Moby-Dick; The Whale](https://www.gutenberg.org/ebooks/2701): 318,027 input tokens================================================FILE: end-to-end-use-cases/book-character-mindmap/.gitignore================================================# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.# dependencies/node_modules/.pnp.pnp.js# testing/coverage# production/build# misc.DS_Store.env.local.env.development.local.env.test.local.env.production.localnpm-debug.log*yarn-debug.log*yarn-error.log*================================================FILE: end-to-end-use-cases/book-character-mindmap/public/robots.txt================================================# https://www.robotstxt.org/robotstxt.htmlUser-agent: *Disallow:================================================FILE: end-to-end-use-cases/book-character-mindmap/server/requirements.txt================================================flaskflask-corsasynciowerkzeugvllmtransformers================================================FILE: end-to-end-use-cases/browser_use/README.md================================================## Building an Intelligent Browser Agent with Llama 4 ScoutThis project provides a comprehensive guide to creating an AI-powered browser agent capable of autonomously navigating and interacting with websites. By leveraging the capabilities of Llama 4 Scout, Playwright, and Together AI, this agent can perform tasks seamlessly while understanding both visual and textual content.### Features- Visual Understanding: Utilizes screenshots for visual comprehension of web pages- Autonomous Navigation: Capable of navigating and interacting with web elements.- Natural Language Instructions: Executes tasks based on natural language commands.- Persistent Session Management: Maintains browser sessions for continuous interaction.### Example Tasks- Search for a product on Amazon.- Find the cheapest flight to Tokyo.- Purchase tickets for the next Warriors game.### What's in this Project?- Environment setup instructions- Browser automation guides using Playwright- Structured prompting techniques for guiding the LLM in task execution- Content comprehension utilizing Llama 4 Scout- Creating a persistent and intelligent browser agent for real-world applications### DemoFor a detailed explanation and demo video, visit: [Blog Post and Demo Video](https://miguelg719.github.io/browser-use-blog/)### Prerequisite for Running the Notebook- Before getting started, please make sure to setup Together.ai and get an API key from [here](https://www.together.ai/). ### CollaboratorsFeel free to reach out with any questions or feedback!- Miguel Gonzalez: [X](https://x.com/miguel_gonzf) | [LinkedIn](https://www.linkedin.com/in/gonzalezfernandezmiguel/)- Dimitry Khorzov: [X](https://x.com/korzhov_dm) | [LinkedIn](https://www.linkedin.com/in/korzhovdm)================================================FILE: end-to-end-use-cases/coding/text2sql/README.md================================================## Text2SQL: Natural Language to SQL InterfaceThis project provides a set of scripts to convert natural language queries into SQL statements using Meta's Llama model. The goal is to enable users to interact with databases using natural language inputs, making it easier for non-technical users to access and analyze data. For detailed instructions on setting up the environment, creating a database, and executing natural language queries using the Text2SQL interface, please refer to the quickstart.ipynb notebook.### Structure:- quickstart.ipynb: A Quick Demo of Text2SQL Using Llama 3.3. This Jupyter Notebook includes examples of how to use the interface to execute natural language queries on the sample data. It uses Llama 3.3 to answer questions about a SQLite database using LangChain and the Llama cloud provider Together.ai.- nba.txt: A text file containing NBA roster information, which is used as sample data for demonstration purposes.- txt2csv.py: A script that converts text data into a CSV format. This script is used to preprocess the input data before it is fed into csv2db.py.- csv2db.py: A script that imports data from a CSV file into a SQLite database. This script is used to populate the database with sample data.- nba_roster.db: A SQLite database file created from the nba.txt data, used to test the Text2SQL interface.### Detailed steps on running the notebook:- Before getting started, please make sure to setup Together.ai and get an API key from [here](https://www.together.ai/). - First, please install the requirements from [here](https://github.com/meta-llama/llama-cookbook/blob/main/end-to-end-use-cases/coding/text2sql/requirements.txt) by running inside the folder:```git clone https://github.com/meta-llama/llama-cookbook.gitcd llama-cookbook/end-to-end-use-cases/coding/text2sql/pip install -r requirements.txt```### ContributingContributions are welcome! If you'd like to add new features or improve existing ones, please submit a pull request. We encourage contributions in the following areas:- Adding support for additional databases- Developing new interfaces or applications that use the Text2SQL interface================================================FILE: end-to-end-use-cases/coding/text2sql/nba.txt================================================Atlanta Hawks Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4397136.pngSaddiq Bey41	SF	24	6' 7"	215 lbs	Villanova	$4,556,983https://a.espncdn.com/i/headshots/nba/players/full/3037789.pngBogdan Bogdanovic13	SG	31	6' 5"	225 lbs	--	$18,700,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Kobe Bufkin	G	20	6' 4"	195 lbs	Michigan	$4,094,280https://a.espncdn.com/i/headshots/nba/players/full/3102529.pngClint Capela15	C	29	6' 10"	256 lbs	--	$20,616,000https://a.espncdn.com/i/headshots/nba/players/full/4277952.pngBruno Fernando0	PF	25	6' 10"	240 lbs	Maryland	$2,581,522https://a.espncdn.com/i/headshots/nba/players/full/4065656.pngTrent Forrest2	G	25	6' 4"	210 lbs	Florida State	--https://a.espncdn.com/i/headshots/nba/players/full/4432585.pngAJ Griffin14	F	20	6' 6"	222 lbs	Duke	$3,712,920https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Mouhamed Gueye	F	20	6' 11"	210 lbs	Washington State	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/4065732.pngDe'Andre Hunter12	SF	25	6' 8"	221 lbs	Virginia	$20,089,286https://a.espncdn.com/i/headshots/nba/players/full/4701230.pngJalen Johnson1	SF	21	6' 8"	219 lbs	Duke	$2,925,360https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jarkel Joiner	G	24	6' 1"	180 lbs	NC State	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Seth Lundy	G	23	6' 6"	220 lbs	Penn State	--https://a.espncdn.com/i/headshots/nba/players/full/3913180.pngGarrison Mathews25	SG	26	6' 6"	215 lbs	Lipscomb	$2,000,000https://a.espncdn.com/i/headshots/nba/players/full/4032.pngWesley Matthews23	SG	36	6' 4"	220 lbs	Marquette	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4004.pngPatty Mills8	PG	35	6' 0"	180 lbs	Saint Mary's	$6,802,950https://a.espncdn.com/i/headshots/nba/players/full/3907497.pngDejounte Murray5	SG	27	6' 5"	180 lbs	Washington	$18,214,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Miles Norris	F	23	6' 10"	220 lbs	UC Santa Barbara	--https://a.espncdn.com/i/headshots/nba/players/full/4431680.pngOnyeka Okongwu17	PF	22	6' 9"	240 lbs	USC	$8,109,063https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Keaton Wallace	G	24	6' 3"	185 lbs	UTSA	--https://a.espncdn.com/i/headshots/nba/players/full/4277905.pngTrae Young11	PG	25	6' 1"	164 lbs	Oklahoma	$40,064,220Boston Celtics Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4397885.pngDalano Banton	G	23	6' 7"	204 lbs	Nebraska	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4278031.pngOshae Brissett12	SF	25	6' 7"	210 lbs	Syracuse	$2,165,000https://a.espncdn.com/i/headshots/nba/players/full/2566769.pngMalcolm Brogdon13	PG	30	6' 4"	229 lbs	Virginia	$22,500,000https://a.espncdn.com/i/headshots/nba/players/full/3917376.pngJaylen Brown7	SG	26	6' 6"	223 lbs	California	$31,830,357https://a.espncdn.com/i/headshots/nba/players/full/4432907.pngJustin Champagnie99	SF	22	6' 6"	206 lbs	Pittsburgh	--https://a.espncdn.com/i/headshots/nba/players/full/4576085.pngJD Davison20	SG	20	6' 1"	195 lbs	Alabama	--https://a.espncdn.com/i/headshots/nba/players/full/3989.pngBlake Griffin91	PF	34	6' 9"	250 lbs	Oklahoma	--https://a.espncdn.com/i/headshots/nba/players/full/4065804.pngSam Hauser30	SF	25	6' 7"	217 lbs	Virginia	$1,927,896https://a.espncdn.com/i/headshots/nba/players/full/3213.pngAl Horford42	C	37	6' 9"	240 lbs	Florida	$10,000,000https://a.espncdn.com/i/headshots/nba/players/full/3064560.pngLuke Kornet40	C	28	7' 2"	250 lbs	Vanderbilt	$2,413,304https://a.espncdn.com/i/headshots/nba/players/full/3133602.pngSvi Mykhailiuk	SG	26	6' 7"	205 lbs	Kansas	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/3102531.pngKristaps Porzingis	C	28	7' 3"	240 lbs	--	$36,016,200https://a.espncdn.com/i/headshots/nba/players/full/4066354.pngPayton Pritchard11	PG	25	6' 1"	195 lbs	Oregon	$4,037,278https://a.espncdn.com/i/headshots/nba/players/full/4397424.pngNeemias Queta	C	24	7' 0"	245 lbs	Utah State	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4065648.pngJayson Tatum0	SF	25	6' 8"	210 lbs	Duke	$32,600,060https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jordan Walsh	G	19	6' 7"	205 lbs	Arkansas	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/3078576.pngDerrick White9	PG	29	6' 4"	190 lbs	Colorado	$18,357,143https://a.espncdn.com/i/headshots/nba/players/full/4066211.pngRobert Williams III44	C	25	6' 9"	237 lbs	Texas A&M	$11,571,429Brooklyn Nets Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4423545.pngDarius Bazley	PF	23	6' 9"	216 lbs	--	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/3147657.pngMikal Bridges1	SF	27	6' 6"	209 lbs	Villanova	$21,700,000https://a.espncdn.com/i/headshots/nba/players/full/4278067.pngNic Claxton33	C	24	6' 11"	215 lbs	Georgia	$9,625,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Noah Clowney	F	19	6' 10"	210 lbs	Alabama	$3,089,520https://a.espncdn.com/i/headshots/nba/players/full/2580782.pngSpencer Dinwiddie26	PG	30	6' 6"	215 lbs	Colorado	$20,357,143https://a.espncdn.com/i/headshots/nba/players/full/2578185.pngDorian Finney-Smith28	PF	30	6' 7"	220 lbs	Florida	$13,932,008https://a.espncdn.com/i/headshots/nba/players/full/3138196.pngCameron Johnson2	SF	27	6' 8"	210 lbs	North Carolina	$25,679,348https://a.espncdn.com/i/headshots/nba/players/full/2583632.pngRoyce O'Neale00	PF	30	6' 6"	226 lbs	Baylor	$9,500,000https://a.espncdn.com/i/headshots/nba/players/full/4432194.pngDay'Ron Sharpe20	C	21	6' 9"	265 lbs	North Carolina	$2,210,040https://a.espncdn.com/i/headshots/nba/players/full/3907387.pngBen Simmons10	PG	27	6' 10"	240 lbs	LSU	$37,893,408https://a.espncdn.com/i/headshots/nba/players/full/4065697.pngDennis Smith Jr.	PG	25	6' 2"	205 lbs	NC State	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4432174.pngCam Thomas24	SG	21	6' 3"	210 lbs	LSU	$2,240,160https://a.espncdn.com/i/headshots/nba/players/full/4277890.pngLonnie Walker IV	G	24	6' 4"	204 lbs	Miami	$2,019,706https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Dariq Whitehead	F	19	6' 7"	220 lbs	Duke	$2,966,040https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jalen Wilson	F	22	6' 8"	225 lbs	Kansas	--Charlotte Hornets Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Angelo Allegri16	F	24	6' 7"	215 lbs	Eastern Washington	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Amari Bailey	G	19	6' 5"	185 lbs	UCLA	--https://a.espncdn.com/i/headshots/nba/players/full/4432816.pngLaMelo Ball1	PG	22	6' 7"	180 lbs	--	$10,900,635https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Leaky Black	F	24	6' 9"	205 lbs	North Carolina	--https://a.espncdn.com/i/headshots/nba/players/full/4431712.pngJames Bouknight2	SG	23	6' 4"	190 lbs	UConn	$4,570,080https://a.espncdn.com/i/headshots/nba/players/full/4066383.pngMiles Bridges0	SF	25	6' 7"	225 lbs	Michigan State	$7,921,301https://a.espncdn.com/i/headshots/nba/players/full/4249.pngGordon Hayward20	SF	33	6' 7"	225 lbs	Butler	$31,500,000https://a.espncdn.com/i/headshots/nba/players/full/4431699.pngKai Jones23	PF	22	6' 11"	221 lbs	Texas	$3,047,880https://a.espncdn.com/i/headshots/nba/players/full/4683027.pngTheo Maledon9	PG	22	6' 5"	175 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/3138161.pngCody Martin11	SF	27	6' 6"	205 lbs	Nevada	$7,560,000https://a.espncdn.com/i/headshots/nba/players/full/4576086.pngBryce McGowens7	G	20	6' 6"	175 lbs	Nebraska	$1,719,865https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Nathan Mensah31	C	25	6' 10"	230 lbs	San Diego State	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Brandon Miller	F	20	6' 9"	200 lbs	Alabama	$10,880,400https://a.espncdn.com/i/headshots/nba/players/full/5144059.pngJames Nnaji	C	19	6' 11"	250 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4230547.pngFrank Ntilikina	PG	25	6' 4"	200 lbs	--	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/3133603.pngKelly Oubre Jr.12	SG	27	6' 6"	203 lbs	Kansas	--https://a.espncdn.com/i/headshots/nba/players/full/4278076.pngNick Richards4	C	25	7' 0"	245 lbs	Kentucky	$5,000,000https://a.espncdn.com/i/headshots/nba/players/full/3074752.pngTerry Rozier3	SG	29	6' 1"	190 lbs	Louisville	$22,205,221https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Trevon Scott18	F	26	6' 8"	225 lbs	Cincinnati	--https://a.espncdn.com/i/headshots/nba/players/full/4397570.pngJaylen Sims30	G	24	6' 6"	210 lbs	UNC Wilmington	--https://a.espncdn.com/i/headshots/nba/players/full/4683686.pngNick Smith Jr.	G	19	6' 4"	170 lbs	Arkansas	$2,463,960https://a.espncdn.com/i/headshots/nba/players/full/4702233.pngJT Thor21	F	21	6' 9"	203 lbs	Auburn	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/4278078.pngP.J. Washington25	PF	25	6' 7"	230 lbs	Kentucky	$16,847,826https://a.espncdn.com/i/headshots/nba/players/full/4701232.pngMark Williams5	C	21	7' 0"	240 lbs	Duke	$3,908,160Chicago Bulls Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4066421.pngLonzo Ball2	PG	25	6' 6"	190 lbs	UCLA	$20,465,117https://a.espncdn.com/i/headshots/nba/players/full/3133635.pngJevon Carter	PG	28	6' 1"	200 lbs	West Virginia	$6,190,476https://a.espncdn.com/i/headshots/nba/players/full/2991350.pngAlex Caruso6	SG	29	6' 5"	186 lbs	Texas A&M	$9,460,000https://a.espncdn.com/i/headshots/nba/players/full/2528693.pngTorrey Craig0	SF	32	6' 7"	221 lbs	South Carolina Upstate	$2,528,233https://a.espncdn.com/i/headshots/nba/players/full/3978.pngDeMar DeRozan11	SF	34	6' 6"	220 lbs	USC	$28,600,000https://a.espncdn.com/i/headshots/nba/players/full/4397002.pngAyo Dosunmu12	SG	23	6' 5"	200 lbs	Illinois	$6,481,481https://a.espncdn.com/i/headshots/nba/players/full/6585.pngAndre Drummond3	C	30	6' 11"	279 lbs	UConn	$3,360,000https://a.espncdn.com/i/headshots/nba/players/full/2596112.pngJavonte Green24	SF	30	6' 5"	205 lbs	Radford	--https://a.espncdn.com/i/headshots/nba/players/full/4067735.pngCarlik Jones22	G	25	6' 0"	174 lbs	Louisville	$1,922,896https://a.espncdn.com/i/headshots/nba/players/full/3064440.pngZach LaVine8	SG	28	6' 5"	200 lbs	UCLA	$40,064,220https://a.espncdn.com/i/headshots/nba/players/full/4432239.pngJustin Lewis34	F	21	6' 7"	245 lbs	Marquette	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Julian Phillips	F	19	6' 8"	197 lbs	Tennessee	$1,600,000https://a.espncdn.com/i/headshots/nba/players/full/4683622.pngMarko Simonovic19	C	23	7' 0"	220 lbs	--	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/4279815.pngTerry Taylor32	F	23	6' 4"	230 lbs	Austin Peay	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4433159.pngDalen Terry25	F	21	6' 7"	200 lbs	Arizona	$3,350,760https://a.espncdn.com/i/headshots/nba/players/full/6478.pngNikola Vucevic9	C	32	6' 10"	260 lbs	USC	$18,518,519https://a.espncdn.com/i/headshots/nba/players/full/4395651.pngCoby White0	PG	23	6' 5"	195 lbs	North Carolina	$11,111,111https://a.espncdn.com/i/headshots/nba/players/full/4431687.pngPatrick Williams44	PF	22	6' 7"	215 lbs	Florida State	$9,835,881Cleveland Cavaliers Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4066328.pngJarrett Allen31	C	25	6' 9"	243 lbs	Texas	$20,000,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Emoni Bates21	F	19	6' 10"	170 lbs	Eastern Michigan	--https://a.espncdn.com/i/headshots/nba/players/full/4432173.pngSharife Cooper2	PG	22	6' 1"	176 lbs	Auburn	--https://a.espncdn.com/i/headshots/nba/players/full/3947156.pngMamadi Diakite21	F	26	6' 9"	228 lbs	Virginia	--https://a.espncdn.com/i/headshots/nba/players/full/4396907.pngDarius Garland10	PG	23	6' 1"	192 lbs	Vanderbilt	$34,005,250https://a.espncdn.com/i/headshots/nba/players/full/4065733.pngTy Jerome	SG	26	6' 5"	195 lbs	Virginia	$2,439,025https://a.espncdn.com/i/headshots/nba/players/full/3064559.pngDamian Jones15	C	28	6' 11"	245 lbs	Vanderbilt	$2,586,665https://a.espncdn.com/i/headshots/nba/players/full/2991043.pngCaris LeVert3	SG	29	6' 6"	205 lbs	Michigan	$15,384,616https://a.espncdn.com/i/headshots/nba/players/full/4066757.pngSam Merrill20	SG	27	6' 4"	205 lbs	Utah State	$1,997,238https://a.espncdn.com/i/headshots/nba/players/full/3908809.pngDonovan Mitchell45	SG	27	6' 1"	215 lbs	Louisville	$33,162,030https://a.espncdn.com/i/headshots/nba/players/full/4432158.pngEvan Mobley4	PF	22	6' 11"	215 lbs	USC	$8,882,760https://a.espncdn.com/i/headshots/nba/players/full/4432815.pngIsaiah Mobley15	F	23	6' 8"	238 lbs	USC	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Pete Nance48	F	23	6' 11"	230 lbs	North Carolina	--https://a.espncdn.com/i/headshots/nba/players/full/2968361.pngRaul Neto19	PG	31	6' 2"	180 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/2990969.pngGeorges Niang	SF	30	6' 7"	230 lbs	Iowa State	$8,800,000https://a.espncdn.com/i/headshots/nba/players/full/4432822.pngIsaac Okoro35	SF	22	6' 5"	225 lbs	Auburn	$8,920,795https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Craig Porter Jr.40	G	23	6' 2"	186 lbs	Wichita State	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Justin Powell	G	--	6' 6"	197 lbs	Washington State	--https://a.espncdn.com/i/headshots/nba/players/full/4011.pngRicky Rubio13	PG	32	6' 2"	190 lbs	--	$6,146,342https://a.espncdn.com/i/headshots/nba/players/full/4065778.pngMax Strus31	SG	27	6' 5"	215 lbs	DePaul	$14,487,684https://a.espncdn.com/i/headshots/nba/players/full/6474.pngTristan Thompson13	C	32	6' 9"	254 lbs	Texas	--https://a.espncdn.com/i/headshots/nba/players/full/3912848.pngDean Wade32	PF	26	6' 9"	228 lbs	Kansas State	$5,709,877Dallas Mavericks Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4432161.pngGreg Brown III36	PF	22	6' 8"	217 lbs	Texas	--https://a.espncdn.com/i/headshots/nba/players/full/2326307.pngSeth Curry30	SG	33	6' 1"	190 lbs	Duke	$4,000,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Dexter Dennis38	G	24	6' 5"	210 lbs	Texas A&M	--https://a.espncdn.com/i/headshots/nba/players/full/3945274.pngLuka Doncic77	PG	24	6' 7"	230 lbs	--	$40,064,220https://a.espncdn.com/i/headshots/nba/players/full/3102528.pngDante Exum0	G	28	6' 5"	214 lbs	--	$2,019,760https://a.espncdn.com/i/headshots/nba/players/full/4432811.pngJosh Green8	SG	22	6' 6"	210 lbs	Arizona	$4,765,339https://a.espncdn.com/i/headshots/nba/players/full/2528210.pngTim Hardaway Jr.10	SF	31	6' 5"	205 lbs	Michigan	$17,897,728https://a.espncdn.com/i/headshots/nba/players/full/4868423.pngJaden Hardy1	G	21	6' 4"	198 lbs	--	$1,719,864https://a.espncdn.com/i/headshots/nba/players/full/2993370.pngRichaun Holmes20	F	29	6' 10"	240 lbs	Bowling Green	$12,046,020https://a.espncdn.com/i/headshots/nba/players/full/6442.pngKyrie Irving11	PG	31	6' 2"	191 lbs	Duke	$37,037,037https://a.espncdn.com/i/headshots/nba/players/full/3936099.pngDerrick Jones Jr.55	SF	26	6' 6"	210 lbs	UNLV	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/2960236.pngMaxi Kleber42	PF	31	6' 10"	240 lbs	--	$11,000,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146A.J. Lawson9	G	23	6' 6"	185 lbs	South Carolina	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Dereck Lively II2	C	19	7' 1"	234 lbs	Duke	$4,775,640https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Mike Miles Jr.14	G	21	6' 2"	205 lbs	TCU	--https://a.espncdn.com/i/headshots/nba/players/full/6461.pngMarkieff Morris88	PF	34	6' 9"	245 lbs	Kansas	--https://a.espncdn.com/i/headshots/nba/players/full/2531367.pngDwight Powell7	C	32	6' 10"	240 lbs	Stanford	$4,000,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Olivier-Maxence Prosper18	F	21	6' 8"	215 lbs	Marquette	$2,733,360https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jordan Walker16	SG	24	5' 11"	170 lbs	UAB	--https://a.espncdn.com/i/headshots/nba/players/full/4397033.pngJoe Wieskamp37	SF	24	6' 6"	205 lbs	Iowa	$1,927,896https://a.espncdn.com/i/headshots/nba/players/full/4066218.pngGrant Williams3	PF	24	6' 6"	236 lbs	Tennessee	$12,325,581Denver Nuggets Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4431767.pngChristian Braun0	G	22	6' 6"	220 lbs	Kansas	$2,949,120https://a.espncdn.com/i/headshots/nba/players/full/2581018.pngKentavious Caldwell-Pope5	SG	30	6' 5"	204 lbs	Georgia	$14,704,938https://a.espncdn.com/i/headshots/nba/players/full/4230557.pngVlatko Cancar31	SF	26	6' 8"	236 lbs	--	$2,234,359https://a.espncdn.com/i/headshots/nba/players/full/4278585.pngCollin Gillespie21	G	24	6' 3"	190 lbs	Villanova	--https://a.espncdn.com/i/headshots/nba/players/full/3064290.pngAaron Gordon50	PF	28	6' 8"	235 lbs	Arizona	$22,266,182https://a.espncdn.com/i/headshots/nba/players/full/2284101.pngJustin Holiday0	SF	34	6' 6"	180 lbs	Washington	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4065731.pngJay Huff	C	25	7' 1"	240 lbs	Virginia	--https://a.espncdn.com/i/headshots/nba/players/full/6443.pngReggie Jackson7	PG	33	6' 2"	208 lbs	Boston College	$5,000,000https://a.espncdn.com/i/headshots/nba/players/full/3112335.pngNikola Jokic15	C	28	6' 11"	284 lbs	--	$47,607,350https://a.espncdn.com/i/headshots/nba/players/full/3442.pngDeAndre Jordan6	C	35	6' 11"	265 lbs	Texas A&M	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4066240.pngBraxton Key	F	26	6' 8"	225 lbs	Virginia	--https://a.espncdn.com/i/headshots/nba/players/full/3936299.pngJamal Murray27	PG	26	6' 4"	215 lbs	Kentucky	$33,833,400https://a.espncdn.com/i/headshots/nba/players/full/4431690.pngZeke Nnaji22	PF	22	6' 9"	240 lbs	Arizona	$4,306,281https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jalen Pickett	G	23	6' 4"	202 lbs	Siena	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4278104.pngMichael Porter Jr.1	SF	25	6' 10"	218 lbs	Missouri	$33,386,850https://a.espncdn.com/i/headshots/nba/players/full/4305.pngIsh Smith14	PG	35	6' 0"	175 lbs	Wake Forest	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Julian Strawther	G	21	6' 7"	205 lbs	Gonzaga	$2,431,080https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Hunter Tyson	F	23	6' 8"	215 lbs	Clemson	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/4576087.pngPeyton Watson8	F	21	6' 8"	200 lbs	UCLA	$2,303,520Detroit Pistons Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4277848.pngMarvin Bagley III35	PF	24	6' 10"	235 lbs	Duke	$12,500,000https://a.espncdn.com/i/headshots/nba/players/full/4395676.pngBuddy Boeheim27	F	23	6' 6"	205 lbs	Syracuse	--https://a.espncdn.com/i/headshots/nba/players/full/3593.pngBojan Bogdanovic44	SF	34	6' 7"	226 lbs	--	$20,000,000https://a.espncdn.com/i/headshots/nba/players/full/6429.pngAlec Burks5	SG	32	6' 6"	214 lbs	Colorado	$10,489,600https://a.espncdn.com/i/headshots/nba/players/full/4871137.pngMalcolm Cazalon20	G	--	6' 6"	185 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4432166.pngCade Cunningham2	PG	21	6' 7"	220 lbs	Oklahoma State	$11,055,240https://a.espncdn.com/i/headshots/nba/players/full/4080610.pngHamidou Diallo6	SG	25	6' 5"	202 lbs	Kentucky	--https://a.espncdn.com/i/headshots/nba/players/full/4433621.pngJalen Duren0	C	19	6' 11"	250 lbs	Memphis	$4,330,680https://a.espncdn.com/i/headshots/nba/players/full/2528794.pngJoe Harris12	SF	32	6' 6"	220 lbs	Virginia	$19,928,571https://a.espncdn.com/i/headshots/nba/players/full/4683024.pngKillian Hayes7	PG	22	6' 5"	195 lbs	--	$7,413,955https://a.espncdn.com/i/headshots/nba/players/full/4433218.pngJaden Ivey23	PG	21	6' 4"	195 lbs	Purdue	$7,641,480https://a.espncdn.com/i/headshots/nba/players/full/4277957.pngIsaiah Livers12	PF	25	6' 6"	232 lbs	Michigan	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/2488826.pngRodney McGruder17	G	32	6' 4"	205 lbs	Kansas State	--https://a.espncdn.com/i/headshots/nba/players/full/3059310.pngMonte Morris22	PG	28	6' 2"	183 lbs	Iowa State	$9,800,926https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jared Rhoden8	G	24	6' 6"	210 lbs	Seton Hall	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Marcus Sasser	G	23	6' 2"	195 lbs	Houston	$2,623,680https://a.espncdn.com/i/headshots/nba/players/full/4432810.pngIsaiah Stewart28	C	22	6' 8"	250 lbs	Washington	$5,266,713https://a.espncdn.com/i/headshots/nba/players/full/4684742.pngAusar Thompson	F	20	6' 7"	190 lbs	--	$7,977,480https://a.espncdn.com/i/headshots/nba/players/full/4432808.pngJames Wiseman13	C	22	7' 0"	240 lbs	Memphis	$12,119,400Golden State Warriors Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/3975.pngStephen Curry30	PG	35	6' 2"	185 lbs	Davidson	$51,915,615https://a.espncdn.com/i/headshots/nba/players/full/6589.pngDraymond Green23	PF	33	6' 6"	230 lbs	Michigan State	$22,321,429https://a.espncdn.com/i/headshots/nba/players/full/2327577.pngJaMychal Green1	F	33	6' 9"	227 lbs	Alabama	--https://a.espncdn.com/i/headshots/nba/players/full/2386.pngAndre Iguodala9	F	39	6' 6"	215 lbs	Arizona	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Trayce Jackson-Davis	F	23	6' 9"	245 lbs	Indiana	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/6446.pngCory Joseph18	PG	32	6' 3"	200 lbs	Texas	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4433247.pngJonathan Kuminga00	PF	20	6' 7"	225 lbs	--	$6,012,840https://a.espncdn.com/i/headshots/nba/players/full/4066790.pngAnthony Lamb40	F	25	6' 6"	227 lbs	Vermont	--https://a.espncdn.com/i/headshots/nba/players/full/3155535.pngKevon Looney5	F	27	6' 9"	222 lbs	UCLA	$7,500,000https://a.espncdn.com/i/headshots/nba/players/full/4432171.pngMoses Moody4	SG	21	6' 5"	211 lbs	Arkansas	$3,918,480https://a.espncdn.com/i/headshots/nba/players/full/2779.pngChris Paul	PG	38	6' 0"	175 lbs	Wake Forest	$30,800,000https://a.espncdn.com/i/headshots/nba/players/full/3134903.pngGary Payton II8	SG	30	6' 2"	195 lbs	Oregon State	$8,715,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Brandin Podziemski	G	20	6' 4"	203 lbs	Santa Clara	$3,352,440https://a.espncdn.com/i/headshots/nba/players/full/4431720.pngLester Quinones25	G	22	6' 5"	205 lbs	Memphis	--https://a.espncdn.com/i/headshots/nba/players/full/3032978.pngDario Saric9	PF	29	6' 10"	225 lbs	--	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/6475.pngKlay Thompson11	SG	33	6' 6"	220 lbs	Washington State	$43,219,440https://a.espncdn.com/i/headshots/nba/players/full/3059319.pngAndrew Wiggins22	SF	28	6' 7"	197 lbs	Kansas	$24,330,357Houston Rockets Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/3415.pngD.J. Augustin4	G	35	5' 11"	183 lbs	Texas	--https://a.espncdn.com/i/headshots/nba/players/full/3155526.pngDillon Brooks	SF	27	6' 7"	225 lbs	Oregon	$22,627,671https://a.espncdn.com/i/headshots/nba/players/full/2991282.pngWillie Cauley-Stein2	C	30	7' 0"	240 lbs	Kentucky	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Darius Days5	F	23	6' 7"	245 lbs	LSU	--https://a.espncdn.com/i/headshots/nba/players/full/4433192.pngTari Eason17	F	22	6' 8"	215 lbs	LSU	$3,527,160https://a.espncdn.com/i/headshots/nba/players/full/4437244.pngJalen Green4	SG	21	6' 4"	186 lbs	--	$9,891,480https://a.espncdn.com/i/headshots/nba/players/full/3209.pngJeff Green32	PF	37	6' 8"	235 lbs	Georgetown	$9,600,000https://a.espncdn.com/i/headshots/nba/players/full/3922230.pngAaron Holiday	G	26	6' 0"	185 lbs	UCLA	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/5075393.pngTrevor Hudgins12	G	24	6' 0"	180 lbs	Northwest Missouri State	--https://a.espncdn.com/i/headshots/nba/players/full/2579294.pngFrank Kaminsky33	PF	30	7' 0"	240 lbs	Wisconsin	--https://a.espncdn.com/i/headshots/nba/players/full/3146557.pngJock Landale	C	27	6' 11"	255 lbs	Saint Mary's	$8,000,000https://a.espncdn.com/i/headshots/nba/players/full/4376.pngBoban Marjanovic51	C	35	7' 4"	290 lbs	--	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4397140.pngKevin Porter Jr.3	SG	23	6' 4"	203 lbs	USC	$15,860,000https://a.espncdn.com/i/headshots/nba/players/full/4871144.pngAlperen Sengun28	C	21	6' 11"	243 lbs	--	$3,536,280https://a.espncdn.com/i/headshots/nba/players/full/4432639.pngJabari Smith Jr.1	PF	20	6' 11"	220 lbs	Auburn	$9,326,520https://a.espncdn.com/i/headshots/nba/players/full/3136777.pngJae'Sean Tate8	SF	27	6' 5"	230 lbs	Ohio State	$6,500,000https://a.espncdn.com/i/headshots/nba/players/full/4684740.pngAmen Thompson	F	20	6' 7"	214 lbs	--	$8,809,320https://a.espncdn.com/i/headshots/nba/players/full/2991230.pngFred VanVleet	SG	29	6' 0"	197 lbs	Wichita State	$40,806,300https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Cam Whitmore	F	19	6' 7"	232 lbs	Villanova	$3,218,160Indiana Pacers Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4065670.pngBruce Brown	SF	27	6' 4"	202 lbs	Miami	$22,000,000https://a.espncdn.com/i/headshots/nba/players/full/4433254.pngKendall Brown10	SG	20	6' 8"	205 lbs	Baylor	--https://a.espncdn.com/i/headshots/nba/players/full/4396993.pngTyrese Haliburton0	PG	23	6' 5"	185 lbs	Iowa State	$5,808,435https://a.espncdn.com/i/headshots/nba/players/full/2990984.pngBuddy Hield24	SG	30	6' 4"	220 lbs	Oklahoma	$19,279,841https://a.espncdn.com/i/headshots/nba/players/full/3438.pngGeorge Hill7	PG	37	6' 4"	188 lbs	IUPUI	--https://a.espncdn.com/i/headshots/nba/players/full/3439.pngSerge Ibaka	F	34	6' 10"	235 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4432170.pngIsaiah Jackson22	SF	21	6' 10"	206 lbs	Kentucky	$2,696,280https://a.espncdn.com/i/headshots/nba/players/full/3999.pngJames Johnson16	PF	36	6' 7"	240 lbs	Wake Forest	--https://a.espncdn.com/i/headshots/nba/players/full/4683634.pngBennedict Mathurin00	SG	21	6' 6"	195 lbs	Arizona	$6,916,080https://a.espncdn.com/i/headshots/nba/players/full/2530530.pngT.J. McConnell9	PG	31	6' 1"	190 lbs	Arizona	$8,700,000https://a.espncdn.com/i/headshots/nba/players/full/4395712.pngAndrew Nembhard2	PG	23	6' 5"	193 lbs	Gonzaga	$2,131,905https://a.espncdn.com/i/headshots/nba/players/full/4396909.pngAaron Nesmith23	SF	23	6' 5"	215 lbs	Vanderbilt	$5,634,257https://a.espncdn.com/i/headshots/nba/players/full/4277883.pngJordan Nwora13	SF	25	6' 8"	225 lbs	Louisville	$3,000,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Ben Sheppard	G	22	6' 6"	190 lbs	Belmont	$2,537,160https://a.espncdn.com/i/headshots/nba/players/full/4397189.pngJalen Smith25	PF	23	6' 9"	215 lbs	Maryland	$5,604,192https://a.espncdn.com/i/headshots/nba/players/full/2451037.pngDaniel Theis27	C	31	6' 8"	245 lbs	--	$9,108,387https://a.espncdn.com/i/headshots/nba/players/full/4278355.pngObi Toppin1	PF	25	6' 9"	220 lbs	Dayton	$6,803,012https://a.espncdn.com/i/headshots/nba/players/full/3133628.pngMyles Turner33	C	27	6' 11"	250 lbs	Texas	$20,975,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jarace Walker	F	20	6' 8"	240 lbs	Houston	$6,059,520https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Isaiah Wong	G	22	6' 4"	184 lbs	Miami	--https://a.espncdn.com/i/headshots/nba/players/full/2991011.pngGabe York32	G	30	6' 3"	190 lbs	Arizona	--LA Clippers Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/3416.pngNicolas Batum33	PF	34	6' 8"	230 lbs	--	$11,710,818https://a.espncdn.com/i/headshots/nba/players/full/4432162.pngBrandon Boston Jr.4	SG	21	6' 6"	188 lbs	Kentucky	$1,836,096https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Kobe Brown	G	23	6' 8"	250 lbs	Missouri	$2,413,320https://a.espncdn.com/i/headshots/nba/players/full/4066387.pngAmir Coffey7	SG	26	6' 7"	210 lbs	Minnesota	$3,666,667https://a.espncdn.com/i/headshots/nba/players/full/2490620.pngRobert Covington23	PF	32	6' 7"	209 lbs	Tennessee State	$11,692,308https://a.espncdn.com/i/headshots/nba/players/full/4433249.pngMoussa Diabate25	F	21	6' 11"	210 lbs	Michigan	--https://a.espncdn.com/i/headshots/nba/players/full/4251.pngPaul George13	F	33	6' 8"	220 lbs	Fresno State	$45,640,084https://a.espncdn.com/i/headshots/nba/players/full/4592492.pngBones Hyland5	PG	23	6' 2"	169 lbs	VCU	$2,306,400https://a.espncdn.com/i/headshots/nba/players/full/6450.pngKawhi Leonard2	SF	32	6' 7"	225 lbs	San Diego State	$45,640,084https://a.espncdn.com/i/headshots/nba/players/full/3907823.pngTerance Mann14	SG	26	6' 5"	215 lbs	Florida State	$10,576,923https://a.espncdn.com/i/headshots/nba/players/full/4702882.pngKJ Martin	F	22	6' 7"	215 lbs	--	$1,930,681https://a.espncdn.com/i/headshots/nba/players/full/4431828.pngKenyon Martin Jr.	F	22	6' 7"	215 lbs	--	$1,930,681https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jordan Miller	G	23	6' 7"	195 lbs	Miami	--https://a.espncdn.com/i/headshots/nba/players/full/3914075.pngXavier Moon15	PG	28	6' 0"	165 lbs	Morehead State	--https://a.espncdn.com/i/headshots/nba/players/full/6462.pngMarcus Morris Sr.8	SF	34	6' 8"	218 lbs	Kansas	$17,116,279https://a.espncdn.com/i/headshots/nba/players/full/2488653.pngMason Plumlee44	C	33	7' 0"	254 lbs	Duke	$5,000,000https://a.espncdn.com/i/headshots/nba/players/full/2595516.pngNorman Powell24	G	30	6' 3"	215 lbs	UCLA	$18,000,000https://a.espncdn.com/i/headshots/nba/players/full/4397916.pngJason Preston17	PG	24	6' 3"	181 lbs	Ohio	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/3468.pngRussell Westbrook0	PG	34	6' 3"	200 lbs	UCLA	$3,835,738https://a.espncdn.com/i/headshots/nba/players/full/4017837.pngIvica Zubac40	C	26	7' 0"	240 lbs	--	$10,933,333Los Angeles Lakers Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Damion Baugh	G	23	6' 4"	194 lbs	TCU	--https://a.espncdn.com/i/headshots/nba/players/full/4432582.pngMax Christie10	G	20	6' 5"	190 lbs	Michigan State	$1,719,864https://a.espncdn.com/i/headshots/nba/players/full/6583.pngAnthony Davis3	PF	30	6' 10"	253 lbs	Kentucky	$40,600,080https://a.espncdn.com/i/headshots/nba/players/full/4066264.pngWenyen Gabriel35	F	26	6' 9"	205 lbs	Kentucky	--https://a.espncdn.com/i/headshots/nba/players/full/4066648.pngRui Hachimura28	PF	25	6' 8"	230 lbs	Gonzaga	$15,740,741https://a.espncdn.com/i/headshots/nba/players/full/4397077.pngJaxson Hayes10	C	23	7' 0"	220 lbs	Texas	$2,165,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jalen Hood-Schifino	G	20	6' 6"	215 lbs	Indiana	$3,695,040https://a.espncdn.com/i/headshots/nba/players/full/1966.pngLeBron James23	SF	38	6' 9"	250 lbs	--	$47,607,350https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Maxwell Lewis	F	21	6' 7"	205 lbs	Pepperdine	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/4431785.pngScotty Pippen Jr.14	G	22	6' 1"	185 lbs	Vanderbilt	--https://a.espncdn.com/i/headshots/nba/players/full/2990962.pngTaurean Prince	PF	29	6' 6"	218 lbs	Baylor	$7,455,000https://a.espncdn.com/i/headshots/nba/players/full/4066457.pngAustin Reaves15	SG	25	6' 5"	197 lbs	Oklahoma	$12,015,150https://a.espncdn.com/i/headshots/nba/players/full/4395627.pngCam Reddish	SF	24	6' 8"	217 lbs	Duke	$2,165,000https://a.espncdn.com/i/headshots/nba/players/full/3136776.pngD'Angelo Russell1	PG	27	6' 4"	193 lbs	Ohio State	$17,307,693https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Vincent Valerio-Bodon	SF	22	6' 9"	200 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4278077.pngJarred Vanderbilt2	PF	24	6' 8"	214 lbs	Kentucky	$4,698,000https://a.espncdn.com/i/headshots/nba/players/full/3137259.pngGabe Vincent	PG	27	6' 3"	200 lbs	UC Santa Barbara	$10,500,000https://a.espncdn.com/i/headshots/nba/players/full/3058254.pngChristian Wood	F	27	6' 9"	214 lbs	UNLV	$2,709,849Memphis Grizzlies Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/2991235.pngSteven Adams4	C	30	6' 11"	265 lbs	Pittsburgh	$12,600,000https://a.espncdn.com/i/headshots/nba/players/full/4593125.pngSanti Aldama7	PF	22	6' 11"	215 lbs	Loyola (MD)	$2,194,200https://a.espncdn.com/i/headshots/nba/players/full/4066320.pngDesmond Bane22	SG	25	6' 5"	215 lbs	TCU	$3,845,083https://a.espncdn.com/i/headshots/nba/players/full/5148538.pngTarik Biberovic	F	22	6' 7"	218 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4432168.pngJosh Christopher	SG	21	6' 4"	215 lbs	Arizona State	$2,485,200https://a.espncdn.com/i/headshots/nba/players/full/3906665.pngBrandon Clarke15	PF	27	6' 8"	215 lbs	Gonzaga	$12,500,000https://a.espncdn.com/i/headshots/nba/players/full/4278378.pngJacob Gilyard0	G	25	5' 9"	160 lbs	Richmond	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Gregory Jackson II	F	18	6' 9"	215 lbs	South Carolina	--https://a.espncdn.com/i/headshots/nba/players/full/4277961.pngJaren Jackson Jr.13	PF	24	6' 11"	242 lbs	Michigan State	$27,102,202https://a.espncdn.com/i/headshots/nba/players/full/3913174.pngLuke Kennard10	SG	27	6' 5"	206 lbs	Duke	$15,418,363https://a.espncdn.com/i/headshots/nba/players/full/3134932.pngJohn Konchar46	SG	27	6' 5"	210 lbs	--	$2,400,000https://a.espncdn.com/i/headshots/nba/players/full/4592691.pngJake LaRavia3	F	21	6' 7"	235 lbs	Wake Forest	$3,199,920https://a.espncdn.com/i/headshots/nba/players/full/4585610.pngKenneth Lofton Jr.6	F	21	6' 8"	275 lbs	Louisiana Tech	$1,719,864https://a.espncdn.com/i/headshots/nba/players/full/4279888.pngJa Morant12	PG	24	6' 3"	174 lbs	Murray State	$34,005,250https://a.espncdn.com/i/headshots/nba/players/full/4593041.pngDavid Roddy27	F	22	6' 5"	255 lbs	Colorado State	$2,718,240https://a.espncdn.com/i/headshots/nba/players/full/3456.pngDerrick Rose	PG	34	6' 3"	200 lbs	Memphis	$3,196,448https://a.espncdn.com/i/headshots/nba/players/full/2990992.pngMarcus Smart	PG	29	6' 4"	220 lbs	Oklahoma State	$18,833,712https://a.espncdn.com/i/headshots/nba/players/full/4277964.pngXavier Tillman2	F	24	6' 8"	245 lbs	Michigan State	$1,930,681https://a.espncdn.com/i/headshots/nba/players/full/4715198.pngIsaiah Todd	PF	21	6' 9"	219 lbs	--	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/4433137.pngZiaire Williams8	F	22	6' 9"	185 lbs	Stanford	$4,810,200https://a.espncdn.com/i/headshots/nba/players/full/4397227.pngVince Williams Jr.5	G	23	6' 6"	205 lbs	VCU	--Miami Heat Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4066261.pngBam Adebayo13	C	26	6' 9"	255 lbs	Kentucky	$32,600,060https://a.espncdn.com/i/headshots/nba/players/full/3934723.pngThomas Bryant13	C	26	6' 10"	248 lbs	Indiana	$2,528,233https://a.espncdn.com/i/headshots/nba/players/full/6430.pngJimmy Butler22	SF	34	6' 7"	230 lbs	Marquette	$45,183,960https://a.espncdn.com/i/headshots/nba/players/full/4278572.pngJamal Cain8	F	24	6' 7"	191 lbs	Oakland	--https://a.espncdn.com/i/headshots/nba/players/full/2184.pngUdonis Haslem40	PF	43	6' 8"	235 lbs	Florida	--https://a.espncdn.com/i/headshots/nba/players/full/4395725.pngTyler Herro14	PG	23	6' 5"	195 lbs	Kentucky	$27,000,000https://a.espncdn.com/i/headshots/nba/players/full/4291678.pngHaywood Highsmith24	F	26	6' 4"	220 lbs	Wheeling Jesuit	$1,902,137https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jaime Jaquez Jr.	G	22	6' 7"	225 lbs	UCLA	$3,510,600https://a.espncdn.com/i/headshots/nba/players/full/4997528.pngNikola Jovic5	F	20	6' 10"	205 lbs	--	$2,352,000https://a.espncdn.com/i/headshots/nba/players/full/3449.pngKevin Love42	PF	35	6' 8"	251 lbs	UCLA	$3,835,738https://a.espncdn.com/i/headshots/nba/players/full/3012.pngKyle Lowry7	PG	37	6' 0"	196 lbs	Villanova	$29,682,540https://a.espncdn.com/i/headshots/nba/players/full/3138160.pngCaleb Martin16	SF	27	6' 5"	205 lbs	Nevada	$6,802,950https://a.espncdn.com/i/headshots/nba/players/full/2581190.pngJosh Richardson	SG	30	6' 6"	200 lbs	Tennessee	$2,891,467https://a.espncdn.com/i/headshots/nba/players/full/3157465.pngDuncan Robinson55	F	29	6' 7"	215 lbs	Michigan	$18,154,000https://a.espncdn.com/i/headshots/nba/players/full/4593049.pngOrlando Robinson25	C	23	7' 0"	235 lbs	Fresno State	$1,801,769https://a.espncdn.com/i/headshots/nba/players/full/4066993.pngDru Smith	G	25	6' 3"	203 lbs	Missouri	--Milwaukee Bucks Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/3135045.pngGrayson Allen12	SG	27	6' 4"	198 lbs	Duke	$8,925,000https://a.espncdn.com/i/headshots/nba/players/full/3032977.pngGiannis Antetokounmpo34	PF	28	6' 11"	243 lbs	--	$45,640,084https://a.espncdn.com/i/headshots/nba/players/full/3102533.pngThanasis Antetokounmpo43	SF	31	6' 7"	219 lbs	--	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/3907822.pngMalik Beasley5	SG	26	6' 4"	187 lbs	Florida State	$2,019,760https://a.espncdn.com/i/headshots/nba/players/full/4432179.pngMarJon Beauchamp0	F	22	6' 7"	202 lbs	--	$2,609,400https://a.espncdn.com/i/headshots/nba/players/full/2578239.pngPat Connaughton24	SG	30	6' 5"	209 lbs	Notre Dame	$9,423,869https://a.espncdn.com/i/headshots/nba/players/full/6581.pngJae Crowder99	PF	33	6' 6"	235 lbs	Marquette	$2,019,760https://a.espncdn.com/i/headshots/nba/players/full/3423.pngGoran Dragic31	PG	37	6' 3"	190 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4397475.pngAJ Green20	G	23	6' 5"	207 lbs	Northern Iowa	$1,801,769https://a.espncdn.com/i/headshots/nba/players/full/3995.pngJrue Holiday21	PG	33	6' 4"	205 lbs	UCLA	$36,861,707https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Andre Jackson Jr.	G	21	6' 6"	200 lbs	UConn	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/6605.pngMeyers Leonard3	C	31	7' 0"	260 lbs	Illinois	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Chris Livingston	F	19	6' 6"	220 lbs	Kentucky	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/3448.pngBrook Lopez11	C	35	7' 0"	282 lbs	Stanford	$25,000,000https://a.espncdn.com/i/headshots/nba/players/full/3447.pngRobin Lopez	C	35	7' 0"	281 lbs	Stanford	--https://a.espncdn.com/i/headshots/nba/players/full/6609.pngKhris Middleton22	SF	32	6' 7"	222 lbs	Texas A&M	$29,320,988https://a.espncdn.com/i/headshots/nba/players/full/3064482.pngBobby Portis9	F	28	6' 10"	250 lbs	Arkansas	$11,710,818https://a.espncdn.com/i/headshots/nba/players/full/4277887.pngLindell Wigginton28	G	25	6' 1"	189 lbs	Iowa State	--Minnesota Timberwolves Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4278039.pngNickeil Alexander-Walker9	SG	25	6' 5"	205 lbs	Virginia Tech	$4,687,500https://a.espncdn.com/i/headshots/nba/players/full/2993874.pngKyle Anderson5	SF	30	6' 9"	230 lbs	UCLA	$9,219,512https://a.espncdn.com/i/headshots/nba/players/full/4278508.pngTroy Brown Jr.	SF	24	6' 6"	215 lbs	Oregon	$4,000,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jaylen Clark	G	21	6' 5"	205 lbs	UCLA	--https://a.espncdn.com/i/headshots/nba/players/full/3195.pngMike Conley10	PG	35	6' 1"	175 lbs	Ohio State	$24,360,000https://a.espncdn.com/i/headshots/nba/players/full/4594268.pngAnthony Edwards5	SG	22	6' 4"	225 lbs	Georgia	$9,219,512https://a.espncdn.com/i/headshots/nba/players/full/4277951.pngLuka Garza55	C	24	6' 10"	243 lbs	Iowa	--https://a.espncdn.com/i/headshots/nba/players/full/3032976.pngRudy Gobert27	C	31	7' 1"	258 lbs	--	$41,000,000https://a.espncdn.com/i/headshots/nba/players/full/4578893.pngVit Krejci	F	23	6' 7"	195 lbs	--	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/4431671.pngJaden McDaniels3	PF	22	6' 9"	185 lbs	Washington	$3,901,399https://a.espncdn.com/i/headshots/nba/players/full/3134916.pngJordan McLaughlin6	PG	27	6' 0"	185 lbs	USC	$2,320,000https://a.espncdn.com/i/headshots/nba/players/full/3915195.pngShake Milton	SG	26	6' 5"	205 lbs	SMU	$5,000,000https://a.espncdn.com/i/headshots/nba/players/full/4687718.pngJosh Minott8	F	21	6' 8"	205 lbs	Memphis	$1,719,864https://a.espncdn.com/i/headshots/nba/players/full/4592187.pngWendell Moore Jr.7	G	22	6' 5"	215 lbs	Duke	$2,421,720https://a.espncdn.com/i/headshots/nba/players/full/4432176.pngDaishen Nix15	SG	21	6' 4"	226 lbs	--	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/4278541.pngJaylen Nowell4	SG	24	6' 4"	201 lbs	Washington	--https://a.espncdn.com/i/headshots/nba/players/full/4396971.pngNaz Reid11	C	24	6' 9"	264 lbs	LSU	$12,950,400https://a.espncdn.com/i/headshots/nba/players/full/6617.pngAustin Rivers25	SG	31	6' 4"	200 lbs	Duke	--https://a.espncdn.com/i/headshots/nba/players/full/3908336.pngMatt Ryan37	F	26	6' 6"	215 lbs	Chattanooga	--https://a.espncdn.com/i/headshots/nba/players/full/3136195.pngKarl-Anthony Towns32	C	27	7' 0"	248 lbs	Kentucky	$36,016,200New Orleans Pelicans Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4277869.pngJose Alvarado15	PG	25	6' 0"	179 lbs	Georgia Tech	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/4869342.pngDyson Daniels11	G	20	6' 8"	200 lbs	--	$5,784,120https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jordan Hawkins24	G	21	6' 5"	195 lbs	UConn	$4,310,160https://a.espncdn.com/i/headshots/nba/players/full/2999409.pngWilly Hernangomez9	C	29	6' 11"	250 lbs	--	$2,559,942https://a.espncdn.com/i/headshots/nba/players/full/3913176.pngBrandon Ingram14	SF	26	6' 8"	190 lbs	Duke	$33,833,400https://a.espncdn.com/i/headshots/nba/players/full/4277813.pngHerbert Jones5	SF	24	6' 7"	206 lbs	Alabama	$12,015,150https://a.espncdn.com/i/headshots/nba/players/full/4395698.pngKira Lewis Jr.13	PG	22	6' 1"	170 lbs	Alabama	$5,722,116https://a.espncdn.com/i/headshots/nba/players/full/4432821.pngE.J. Liddell32	F	22	6' 6"	240 lbs	Ohio State	$1,801,769https://a.espncdn.com/i/headshots/nba/players/full/4278594.pngNaji Marshall8	SF	25	6' 7"	220 lbs	Xavier	$1,930,681https://a.espncdn.com/i/headshots/nba/players/full/2490149.pngCJ McCollum3	SG	32	6' 3"	190 lbs	Lehigh	$35,802,469https://a.espncdn.com/i/headshots/nba/players/full/4397688.pngTrey Murphy III25	SG	23	6' 8"	206 lbs	Virginia	$3,359,280https://a.espncdn.com/i/headshots/nba/players/full/2580365.pngLarry Nance Jr.22	PF	30	6' 8"	245 lbs	Wyoming	$10,375,000https://a.espncdn.com/i/headshots/nba/players/full/4431811.pngDereon Seabron0	G	23	6' 5"	183 lbs	NC State	--https://a.espncdn.com/i/headshots/nba/players/full/6477.pngJonas Valanciunas17	C	31	6' 11"	265 lbs	--	$15,435,000https://a.espncdn.com/i/headshots/nba/players/full/4395628.pngZion Williamson1	PF	23	6' 6"	284 lbs	Duke	$34,005,250https://a.espncdn.com/i/headshots/nba/players/full/2579258.pngCody Zeller44	C	30	6' 11"	240 lbs	Indiana	$2,019,706New York Knicks Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4395625.pngRJ Barrett9	SG	23	6' 6"	214 lbs	Duke	$23,883,929https://a.espncdn.com/i/headshots/nba/players/full/3934672.pngJalen Brunson11	PG	27	6' 1"	190 lbs	Villanova	$26,346,666https://a.espncdn.com/i/headshots/nba/players/full/3934673.pngDonte DiVincenzo	SG	26	6' 4"	203 lbs	Villanova	$10,960,000https://a.espncdn.com/i/headshots/nba/players/full/6588.pngEvan Fournier13	SG	30	6' 6"	205 lbs	--	$18,857,143https://a.espncdn.com/i/headshots/nba/players/full/4397014.pngQuentin Grimes6	SG	23	6' 4"	210 lbs	Houston	$2,385,720https://a.espncdn.com/i/headshots/nba/players/full/3062679.pngJosh Hart3	SG	28	6' 4"	215 lbs	Villanova	$12,960,000https://a.espncdn.com/i/headshots/nba/players/full/4222252.pngIsaiah Hartenstein55	C	25	7' 0"	250 lbs	--	$9,245,121https://a.espncdn.com/i/headshots/nba/players/full/3913220.pngDaQuan Jeffries8	SG	26	6' 5"	225 lbs	Western Texas Coll. (J.C.)	$2,065,585https://a.espncdn.com/i/headshots/nba/players/full/4432645.pngTrevor Keels3	G	20	6' 4"	221 lbs	Duke	--https://a.espncdn.com/i/headshots/nba/players/full/4067325.pngNathan Knight	F	26	6' 8"	253 lbs	William & Mary	--https://a.espncdn.com/i/headshots/nba/players/full/5149711.pngJaylen Martin	G	19	6' 6"	196 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4431823.pngMiles McBride2	PG	23	6' 2"	195 lbs	West Virginia	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/4395724.pngImmanuel Quickley5	SG	24	6' 3"	190 lbs	Kentucky	$4,171,548https://a.espncdn.com/i/headshots/nba/players/full/3064514.pngJulius Randle30	PF	28	6' 8"	250 lbs	Kentucky	$28,226,880https://a.espncdn.com/i/headshots/nba/players/full/4351852.pngMitchell Robinson23	C	25	7' 0"	240 lbs	--	$15,681,818https://a.espncdn.com/i/headshots/nba/players/full/4066392.pngIsaiah Roby18	F	25	6' 8"	230 lbs	Nebraska	$2,066,585https://a.espncdn.com/i/headshots/nba/players/full/4277922.pngJericho Sims45	C	24	6' 9"	250 lbs	Texas	$1,927,896https://a.espncdn.com/i/headshots/nba/players/full/4397235.pngDuane Washington Jr.4	PG	23	6' 2"	197 lbs	Ohio State	--https://a.espncdn.com/i/headshots/nba/players/full/3906786.pngDylan Windler	SG	26	6' 7"	196 lbs	Belmont	--Oklahoma City Thunder Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/6426.pngDavis Bertans	SF	30	6' 10"	225 lbs	--	$17,000,000https://a.espncdn.com/i/headshots/nba/players/full/4997526.pngOusmane Dieng13	F	20	6' 10"	220 lbs	--	$4,798,440https://a.espncdn.com/i/headshots/nba/players/full/4397020.pngLuguentz Dort5	G	24	6' 4"	220 lbs	Arizona State	$15,277,778https://a.espncdn.com/i/headshots/nba/players/full/4871145.pngJosh Giddey3	SG	20	6' 8"	216 lbs	--	$6,587,040https://a.espncdn.com/i/headshots/nba/players/full/4278073.pngShai Gilgeous-Alexander2	PG	25	6' 6"	195 lbs	Kentucky	$33,386,850https://a.espncdn.com/i/headshots/nba/players/full/4433255.pngChet Holmgren7	PF	21	7' 1"	208 lbs	Gonzaga	$10,386,000https://a.espncdn.com/i/headshots/nba/players/full/4395702.pngIsaiah Joe11	SG	24	6' 3"	165 lbs	Arkansas	$1,997,238https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Keyontae Johnson18	F	23	6' 6"	235 lbs	Kansas State	--https://a.espncdn.com/i/headshots/nba/players/full/4432819.pngTre Mann23	PG	22	6' 3"	184 lbs	Florida	$3,191,280https://a.espncdn.com/i/headshots/nba/players/full/3102532.pngVasilije Micic98	G	29	6' 5"	203 lbs	--	$7,723,000https://a.espncdn.com/i/headshots/nba/players/full/2527963.pngVictor Oladipo4	SG	31	6' 4"	213 lbs	Indiana	$9,450,000https://a.espncdn.com/i/headshots/nba/players/full/4683018.pngAleksej Pokusevski17	F	21	7' 0"	190 lbs	--	$5,009,633https://a.espncdn.com/i/headshots/nba/players/full/4432813.pngJeremiah Robinson-Earl50	PF	22	6' 9"	240 lbs	Villanova	$1,900,000https://a.espncdn.com/i/headshots/nba/players/full/4278046.pngOlivier Sarr30	C	24	7' 0"	240 lbs	Kentucky	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Cason Wallace22	G	19	6' 4"	193 lbs	Kentucky	$5,291,000https://a.espncdn.com/i/headshots/nba/players/full/4683749.pngTyTy Washington Jr.	G	21	6' 3"	195 lbs	Kentucky	$2,320,440https://a.espncdn.com/i/headshots/nba/players/full/4066317.pngLindy Waters III12	F	26	6' 6"	210 lbs	Oklahoma State	$1,927,986https://a.espncdn.com/i/headshots/nba/players/full/4065653.pngJack White	F	26	6' 7"	225 lbs	Duke	$1,801,769https://a.espncdn.com/i/headshots/nba/players/full/4397183.pngAaron Wiggins21	SG	24	6' 5"	190 lbs	Maryland	$1,836,096https://a.espncdn.com/i/headshots/nba/players/full/4593803.pngJalen Williams8	F	22	6' 6"	218 lbs	Santa Clara	$4,558,680https://a.espncdn.com/i/headshots/nba/players/full/4432823.pngJaylin Williams6	F	21	6' 10"	245 lbs	Arkansas	$2,000,000https://a.espncdn.com/i/headshots/nba/players/full/3133626.pngKenrich Williams34	SF	28	6' 6"	210 lbs	TCU	$6,175,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146KJ Williams12	F	24	6' 10"	250 lbs	LSU	--Orlando Magic Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4432809.pngCole Anthony50	PG	23	6' 3"	185 lbs	North Carolina	$5,539,771https://a.espncdn.com/i/headshots/nba/players/full/4432573.pngPaolo Banchero5	PF	20	6' 10"	250 lbs	Duke	$11,608,080https://a.espncdn.com/i/headshots/nba/players/full/4348700.pngGoga Bitadze88	C	24	6' 11"	250 lbs	--	$2,062,585https://a.espncdn.com/i/headshots/nba/players/full/4712849.pngAnthony Black	G	19	6' 7"	200 lbs	Arkansas	$7,245,480https://a.espncdn.com/i/headshots/nba/players/full/4277847.pngWendell Carter Jr.34	C	24	6' 10"	270 lbs	Duke	$13,050,000https://a.espncdn.com/i/headshots/nba/players/full/2596108.pngMichael Carter-Williams11	G	31	6' 5"	190 lbs	Syracuse	--https://a.espncdn.com/i/headshots/nba/players/full/4066636.pngMarkelle Fultz20	PG	25	6' 4"	209 lbs	Washington	$17,000,000https://a.espncdn.com/i/headshots/nba/players/full/2999547.pngGary Harris14	SG	29	6' 4"	210 lbs	Michigan State	$13,000,000https://a.espncdn.com/i/headshots/nba/players/full/4067858.pngKevon Harris7	G	26	6' 6"	220 lbs	Stephen F. Austin	--https://a.espncdn.com/i/headshots/nba/players/full/4433623.pngCaleb Houstan2	G	20	6' 8"	210 lbs	Michigan	$2,000,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jett Howard	G	20	6' 8"	215 lbs	Michigan	$5,026,800https://a.espncdn.com/i/headshots/nba/players/full/2968436.pngJoe Ingles	SG	35	6' 8"	220 lbs	--	$11,000,000https://a.espncdn.com/i/headshots/nba/players/full/4065654.pngJonathan Isaac1	PF	25	6' 10"	230 lbs	Florida State	$17,400,000https://a.espncdn.com/i/headshots/nba/players/full/4397071.pngMac McClung9	SG	24	6' 2"	185 lbs	Texas Tech	--https://a.espncdn.com/i/headshots/nba/players/full/4278052.pngChuma Okeke3	PF	25	6' 8"	229 lbs	Auburn	$5,266,713https://a.espncdn.com/i/headshots/nba/players/full/4066851.pngMiye Oni	SG	26	6' 5"	206 lbs	Yale	--https://a.espncdn.com/i/headshots/nba/players/full/4409222.pngTrevelin Queen12	SG	26	6' 6"	190 lbs	New Mexico State	--https://a.espncdn.com/i/headshots/nba/players/full/3918423.pngAdmiral Schofield25	SF	26	6' 5"	241 lbs	Tennessee	--https://a.espncdn.com/i/headshots/nba/players/full/4432165.pngJalen Suggs4	SG	22	6' 5"	205 lbs	Gonzaga	$7,252,080https://a.espncdn.com/i/headshots/nba/players/full/4566434.pngFranz Wagner22	SF	22	6' 10"	220 lbs	Michigan	$5,508,720https://a.espncdn.com/i/headshots/nba/players/full/3150844.pngMoritz Wagner21	C	26	6' 11"	245 lbs	Michigan	$8,000,000https://a.espncdn.com/i/headshots/nba/players/full/4397040.pngBrandon Williams11	G	23	6' 2"	190 lbs	Arizona	--Philadelphia 76ers Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Marcus Bagley38	F	21	6' 8"	215 lbs	Arizona State	--https://a.espncdn.com/i/headshots/nba/players/full/4277919.pngMo Bamba	C	25	7' 0"	231 lbs	Texas	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/3964.pngPatrick Beverley21	PG	35	6' 2"	180 lbs	Arkansas	$2,019,706https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Ricky Council IV37	G	22	6' 6"	205 lbs	Arkansas	--https://a.espncdn.com/i/headshots/nba/players/full/2580913.pngDewayne Dedmon14	C	34	6' 10"	245 lbs	USC	--https://a.espncdn.com/i/headshots/nba/players/full/4397107.pngDavid Duke Jr.	G	23	6' 4"	207 lbs	Providence	--https://a.espncdn.com/i/headshots/nba/players/full/3059318.pngJoel Embiid21	C	29	7' 0"	280 lbs	Kansas	$46,900,000https://a.espncdn.com/i/headshots/nba/players/full/3988.pngDanny Green	F	36	6' 6"	215 lbs	North Carolina	--https://a.espncdn.com/i/headshots/nba/players/full/3992.pngJames Harden1	SG	34	6' 5"	220 lbs	Arizona State	$35,640,000https://a.espncdn.com/i/headshots/nba/players/full/2991055.pngMontrezl Harrell5	C	29	6' 7"	240 lbs	Louisville	$2,019,760https://a.espncdn.com/i/headshots/nba/players/full/6440.pngTobias Harris12	PF	31	6' 7"	226 lbs	Tennessee	$39,270,150https://a.espncdn.com/i/headshots/nba/players/full/2991155.pngDanuel House Jr.25	SF	30	6' 6"	220 lbs	Texas A&M	$4,310,250https://a.espncdn.com/i/headshots/nba/players/full/4397103.pngLouis King23	SF	24	6' 7"	205 lbs	Oregon	--https://a.espncdn.com/i/headshots/nba/players/full/3929325.pngFurkan Korkmaz30	SG	26	6' 7"	202 lbs	--	$5,370,370https://a.espncdn.com/i/headshots/nba/players/full/4431678.pngTyrese Maxey0	PG	22	6' 2"	200 lbs	Kentucky	$4,343,920https://a.espncdn.com/i/headshots/nba/players/full/4066436.pngDe'Anthony Melton8	SG	25	6' 2"	200 lbs	USC	$8,000,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Filip Petrusev	PF	23	6' 11"	234 lbs	Gonzaga	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/4278562.pngPaul Reed44	SF	24	6' 9"	210 lbs	DePaul	$7,723,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Terquavion Smith26	G	20	6' 4"	165 lbs	NC State	--https://a.espncdn.com/i/headshots/nba/players/full/4432164.pngJaden Springer11	G	20	6' 4"	205 lbs	Tennessee	$2,226,240https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Azuolas Tubelis29	F	21	6' 11"	245 lbs	Arizona	--https://a.espncdn.com/i/headshots/nba/players/full/3033.pngP.J. Tucker17	PF	38	6' 5"	245 lbs	Texas	$11,014,500Phoenix Suns Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4278129.pngDeandre Ayton22	C	25	7' 0"	250 lbs	Arizona	$32,459,438https://a.espncdn.com/i/headshots/nba/players/full/4066299.pngUdoka Azubuike	C	24	6' 10"	280 lbs	Kansas	--https://a.espncdn.com/i/headshots/nba/players/full/3136779.pngKeita Bates-Diop	SF	27	6' 8"	229 lbs	Ohio State	$2,364,614https://a.espncdn.com/i/headshots/nba/players/full/6580.pngBradley Beal	SG	30	6' 4"	207 lbs	Florida	$46,741,590https://a.espncdn.com/i/headshots/nba/players/full/6427.pngBismack Biyombo18	C	31	6' 8"	255 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4397102.pngBol Bol	C	23	7' 2"	220 lbs	Oregon	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/3136193.pngDevin Booker1	SG	26	6' 5"	206 lbs	Kentucky	$36,016,200https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Toumani Camara	F	23	6' 8"	220 lbs	Dayton	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/3202.pngKevin Durant35	PF	34	6' 10"	240 lbs	Texas	$47,649,433https://a.espncdn.com/i/headshots/nba/players/full/3914285.pngDrew Eubanks	PF	26	6' 10"	245 lbs	Oregon State	$2,346,614https://a.espncdn.com/i/headshots/nba/players/full/4278402.pngJordan Goodwin	G	24	6' 3"	200 lbs	Saint Louis	$1,927,896https://a.espncdn.com/i/headshots/nba/players/full/2595209.pngDamion Lee10	SG	30	6' 5"	210 lbs	Louisville	$2,528,233https://a.espncdn.com/i/headshots/nba/players/full/4278124.pngSaben Lee38	G	24	6' 2"	183 lbs	Vanderbilt	--https://a.espncdn.com/i/headshots/nba/players/full/3914283.pngChimezie Metu7	PF	26	6' 9"	225 lbs	USC	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/4065663.pngJosh Okogie2	SF	25	6' 4"	213 lbs	Georgia Tech	$2,815,937https://a.espncdn.com/i/headshots/nba/players/full/6619.pngTerrence Ross8	G	32	6' 7"	206 lbs	Washington	--https://a.espncdn.com/i/headshots/nba/players/full/3059307.pngIsh Wainright12	F	29	6' 5"	250 lbs	Baylor	$1,927,896https://a.espncdn.com/i/headshots/nba/players/full/2982334.pngT.J. Warren21	SF	30	6' 8"	220 lbs	NC State	--https://a.espncdn.com/i/headshots/nba/players/full/3133838.pngYuta Watanabe	SF	28	6' 8"	215 lbs	George Washington	$2,346,614Portland Trail Blazers Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4871140.pngIbou Badji41	C	20	7' 1"	240 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4397126.pngMoses Brown	C	23	7' 2"	245 lbs	UCLA	--https://a.espncdn.com/i/headshots/nba/players/full/4432588.pngJohn Butler Jr.21	F	20	7' 0"	190 lbs	Florida State	--https://a.espncdn.com/i/headshots/nba/players/full/2991070.pngJerami Grant9	SF	29	6' 8"	210 lbs	Syracuse	$27,586,207https://a.espncdn.com/i/headshots/nba/players/full/4683678.pngScoot Henderson0	G	19	6' 2"	196 lbs	--	$9,770,880https://a.espncdn.com/i/headshots/nba/players/full/4432192.pngKeon Johnson6	SG	21	6' 3"	185 lbs	Tennessee	$2,808,720https://a.espncdn.com/i/headshots/nba/players/full/4278075.pngKevin Knox II11	SF	24	6' 7"	215 lbs	Kentucky	--https://a.espncdn.com/i/headshots/nba/players/full/6606.pngDamian Lillard0	PG	33	6' 2"	195 lbs	Weber State	$45,640,084https://a.espncdn.com/i/headshots/nba/players/full/4395652.pngNassir Little10	SF	23	6' 5"	220 lbs	North Carolina	$6,250,000https://a.espncdn.com/i/headshots/nba/players/full/4066269.pngSkylar Mays8	SG	26	6' 4"	205 lbs	LSU	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Justin Minaya15	F	24	6' 7"	210 lbs	Providence	--https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Kris Murray8	F	23	6' 8"	215 lbs	Iowa	$2,847,480https://a.espncdn.com/i/headshots/nba/players/full/3102530.pngJusuf Nurkic27	C	29	7' 0"	290 lbs	--	$16,875,000https://a.espncdn.com/i/headshots/nba/players/full/4914336.pngShaedon Sharpe17	SG	20	6' 5"	200 lbs	Kentucky	$6,313,800https://a.espncdn.com/i/headshots/nba/players/full/4351851.pngAnfernee Simons1	SG	24	6' 3"	181 lbs	--	$24,107,143https://a.espncdn.com/i/headshots/nba/players/full/3907498.pngMatisse Thybulle4	SG	26	6' 5"	201 lbs	Washington	$10,500,000https://a.espncdn.com/i/headshots/nba/players/full/4432446.pngJabari Walker34	F	21	6' 7"	200 lbs	Colorado	$1,719,864https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jeenathan Williams49	G	24	6' 5"	205 lbs	Buffalo	$1,719,864https://a.espncdn.com/i/headshots/nba/players/full/3135047.pngJustise Winslow26	PF	27	6' 6"	222 lbs	Duke	--Sacramento Kings Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/6578.pngHarrison Barnes40	SF	31	6' 8"	225 lbs	North Carolina	$17,000,000https://a.espncdn.com/i/headshots/nba/players/full/3919454.pngTerence Davis3	SG	26	6' 4"	201 lbs	Ole Miss	--https://a.espncdn.com/i/headshots/nba/players/full/2489716.pngMatthew Dellavedova8	PG	33	6' 3"	200 lbs	Saint Mary's	--https://a.espncdn.com/i/headshots/nba/players/full/4592402.pngChris Duarte	SG	26	6' 5"	190 lbs	Oregon	$4,124,400https://a.espncdn.com/i/headshots/nba/players/full/4397693.pngKessler Edwards17	SF	23	6' 8"	215 lbs	Pepperdine	$1,927,896https://a.espncdn.com/i/headshots/nba/players/full/4702177.pngKeon Ellis23	G	23	6' 5"	175 lbs	Alabama	--https://a.espncdn.com/i/headshots/nba/players/full/4066736.pngJordan Ford1	G	25	6' 1"	175 lbs	Saint Mary's	--https://a.espncdn.com/i/headshots/nba/players/full/4066259.pngDe'Aaron Fox5	PG	25	6' 3"	185 lbs	Kentucky	$32,600,060https://a.espncdn.com/i/headshots/nba/players/full/4066372.pngKevin Huerter9	SG	25	6' 7"	198 lbs	Maryland	$15,669,643https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Colby Jones	G	21	6' 6"	205 lbs	Xavier	$2,019,706https://a.espncdn.com/i/headshots/nba/players/full/2596107.pngAlex Len25	C	30	7' 1"	250 lbs	Maryland	$2,019,760https://a.espncdn.com/i/headshots/nba/players/full/3136196.pngTrey Lyles41	PF	27	6' 9"	234 lbs	Kentucky	$8,000,000https://a.espncdn.com/i/headshots/nba/players/full/3452.pngJaVale McGee00	C	35	7' 0"	270 lbs	Nevada	$5,734,280https://a.espncdn.com/i/headshots/nba/players/full/4278053.pngDavion Mitchell15	PG	25	6' 0"	202 lbs	Baylor	$5,063,760https://a.espncdn.com/i/headshots/nba/players/full/4066262.pngMalik Monk0	SG	25	6' 3"	200 lbs	Kentucky	$9,945,830https://a.espncdn.com/i/headshots/nba/players/full/4594327.pngKeegan Murray13	SF	23	6' 8"	215 lbs	Iowa	$8,409,000https://a.espncdn.com/i/headshots/nba/players/full/3155942.pngDomantas Sabonis10	PF	27	7' 0"	240 lbs	Gonzaga	$30,600,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Jalen Slawson	F	23	6' 7"	218 lbs	Furman	--https://a.espncdn.com/i/headshots/nba/players/full/3893014.pngSasha Vezenkov	F	28	6' 9"	225 lbs	--	$6,341,464San Antonio Spurs Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4870562.pngDominick Barlow26	F	20	6' 10"	220 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/4397886.pngCharles Bassey28	C	22	6' 9"	230 lbs	Western Kentucky	$2,600,000https://a.espncdn.com/i/headshots/nba/players/full/2578240.pngKhem Birch99	C	30	6' 8"	233 lbs	UNLV	$6,985,000https://a.espncdn.com/i/headshots/nba/players/full/4565201.pngMalaki Branham22	G	20	6' 4"	180 lbs	Ohio State	$3,071,880https://a.espncdn.com/i/headshots/nba/players/full/2528779.pngReggie Bullock25	SF	32	6' 6"	205 lbs	North Carolina	$11,014,080https://a.espncdn.com/i/headshots/nba/players/full/4592479.pngJulian Champagnie30	F	22	6' 9"	210 lbs	St. John's	$3,000,000https://a.espncdn.com/i/headshots/nba/players/full/5081727.pngSidy Cissoko	G	19	6' 7"	225 lbs	--	$1,119,563https://a.espncdn.com/i/headshots/nba/players/full/4066650.pngZach Collins23	PF	25	6' 11"	250 lbs	Gonzaga	$7,700,000https://a.espncdn.com/i/headshots/nba/players/full/2534781.pngGorgui Dieng41	C	33	6' 10"	265 lbs	Louisville	--https://a.espncdn.com/i/headshots/nba/players/full/3133601.pngDevonte' Graham4	PG	28	6' 1"	195 lbs	Kansas	$12,100,000https://a.espncdn.com/i/headshots/nba/players/full/4395723.pngKeldon Johnson3	SF	23	6' 5"	220 lbs	Kentucky	$20,000,000https://a.espncdn.com/i/headshots/nba/players/full/4395626.pngTre Jones33	PG	23	6' 1"	185 lbs	Duke	$9,895,833https://a.espncdn.com/i/headshots/nba/players/full/4397008.pngRomeo Langford35	SG	23	6' 5"	216 lbs	Indiana	--https://a.espncdn.com/i/headshots/nba/players/full/4278580.pngSandro Mamukelashvili54	PF	24	6' 10"	240 lbs	Seton Hall	$2,019,760https://a.espncdn.com/i/headshots/nba/players/full/2528588.pngDoug McDermott17	SF	31	6' 6"	225 lbs	Creighton	$13,750,000https://a.espncdn.com/i/headshots/nba/players/full/3893016.pngCedi Osman16	SF	28	6' 7"	230 lbs	--	$6,718,842https://a.espncdn.com/i/headshots/nba/players/full/4610139.pngJeremy Sochan10	F	20	6' 8"	230 lbs	Baylor	$5,316,960https://a.espncdn.com/i/headshots/nba/players/full/4395630.pngDevin Vassell24	SG	23	6' 5"	200 lbs	Florida State	$5,887,899https://a.espncdn.com/i/headshots/nba/players/full/5104157.pngVictor Wembanyama	C	19	7' 3"	225 lbs	--	$12,160,680https://a.espncdn.com/i/headshots/nba/players/full/4683935.pngBlake Wesley14	G	20	6' 4"	190 lbs	Notre Dame	$2,504,640Toronto Raptors Roster 2023-24	NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4431679.pngPrecious Achiuwa5	PF	24	6' 8"	225 lbs	Memphis	$4,379,527https://a.espncdn.com/i/headshots/nba/players/full/3934719.pngO.G. Anunoby3	SF	26	6' 7"	232 lbs	Indiana	$18,642,857https://a.espncdn.com/i/headshots/nba/players/full/4433134.pngScottie Barnes4	SF	22	6' 8"	225 lbs	Florida State	$8,008,560https://a.espncdn.com/i/headshots/nba/players/full/6579.pngWill Barton1	G	32	6' 5"	181 lbs	Memphis	--https://a.espncdn.com/i/headshots/nba/players/full/3948153.pngChris Boucher25	PF	30	6' 9"	200 lbs	Oregon	$11,750,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Gradey Dick	G	19	6' 8"	205 lbs	Kansas	$4,536,720https://a.espncdn.com/i/headshots/nba/players/full/4066786.pngJeff Dowtin Jr.20	PG	26	6' 3"	177 lbs	Rhode Island	$2,109,706https://a.espncdn.com/i/headshots/nba/players/full/4066668.pngMalachi Flynn22	PG	25	6' 1"	175 lbs	San Diego State	$3,873,025https://a.espncdn.com/i/headshots/nba/players/full/4397251.pngRon Harper Jr.8	F	23	6' 5"	245 lbs	Rutgers	--https://a.espncdn.com/i/headshots/nba/players/full/4431778.pngChristian Koloko35	C	23	7' 0"	230 lbs	Arizona	$1,719,864https://a.espncdn.com/i/headshots/nba/players/full/4066731.pngJalen McDaniels7	PF	25	6' 9"	205 lbs	San Diego State	$4,516,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Markquis Nowell	G	23	5' 8"	160 lbs	Kansas State	--https://a.espncdn.com/i/headshots/nba/players/full/3134908.pngJakob Poeltl19	C	27	7' 1"	245 lbs	Utah	$19,500,000https://a.espncdn.com/i/headshots/nba/players/full/2594922.pngOtto Porter Jr.32	SF	30	6' 8"	198 lbs	Georgetown	$6,300,000https://a.espncdn.com/i/headshots/nba/players/full/3032979.pngDennis Schroder	PG	30	6' 1"	172 lbs	--	$12,405,000https://a.espncdn.com/i/headshots/nba/players/full/3149673.pngPascal Siakam43	PF	29	6' 8"	230 lbs	New Mexico State	$37,893,408https://a.espncdn.com/i/headshots/nba/players/full/4277843.pngGary Trent Jr.33	SG	24	6' 5"	209 lbs	Duke	$18,560,000https://a.espncdn.com/i/headshots/nba/players/full/3244.pngThaddeus Young21	PF	35	6' 8"	235 lbs	Georgia Tech	$8,000,000Utah Jazz Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4397018.pngOchai Agbaji30	G	23	6' 5"	215 lbs	Kansas	$4,114,200https://a.espncdn.com/i/headshots/nba/players/full/3912292.pngJarrell Brantley5	PF	27	6' 5"	250 lbs	Charleston	--https://a.espncdn.com/i/headshots/nba/players/full/2528426.pngJordan Clarkson00	PG	31	6' 5"	194 lbs	Missouri	$23,487,629https://a.espncdn.com/i/headshots/nba/players/full/3908845.pngJohn Collins20	PF	25	6' 9"	226 lbs	Wake Forest	$25,340,000https://a.espncdn.com/i/headshots/nba/players/full/2991139.pngKris Dunn11	G	29	6' 3"	205 lbs	Providence	$2,586,665https://a.espncdn.com/i/headshots/nba/players/full/3899664.pngSimone Fontecchio16	F	27	6' 7"	209 lbs	--	$3,044,872https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Keyonte George	G	19	6' 4"	185 lbs	Baylor	$3,889,800https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Taylor Hendricks	F	19	6' 9"	210 lbs	UCF	$5,569,920https://a.espncdn.com/i/headshots/nba/players/full/4396991.pngTalen Horton-Tucker0	SG	22	6' 4"	234 lbs	Iowa State	$11,020,000https://a.espncdn.com/i/headshots/nba/players/full/4432817.pngJohnny Juzang33	G	22	6' 7"	215 lbs	UCLA	--https://a.espncdn.com/i/headshots/nba/players/full/4433136.pngWalker Kessler24	C	22	7' 0"	245 lbs	Auburn	$2,831,160https://a.espncdn.com/i/headshots/nba/players/full/4066336.pngLauri Markkanen23	PF	26	7' 0"	240 lbs	Arizona	$17,259,999https://a.espncdn.com/i/headshots/nba/players/full/2489663.pngKelly Olynyk41	PF	32	6' 11"	240 lbs	Gonzaga	$12,195,122https://a.espncdn.com/i/headshots/nba/players/full/4423309.pngLuka Samanic91	F	23	6' 10"	227 lbs	--	$2,066,585https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Brice Sensabaugh	F	19	6' 6"	235 lbs	Ohio State	$2,448,600https://a.espncdn.com/i/headshots/nba/players/full/4277811.pngCollin Sexton2	PG	24	6' 2"	190 lbs	Alabama	$17,325,000https://a.espncdn.com/i/headshots/nba/players/full/4401416.pngJuan Toscano-Anderson95	SF	30	6' 6"	209 lbs	Marquette	--https://a.espncdn.com/i/headshots/nba/players/full/3074213.pngOmer Yurtseven77	C	25	6' 11"	275 lbs	Georgetown	$2,800,000Toronto Raptors Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4431679.pngPrecious Achiuwa5	PF	24	6' 8"	225 lbs	Memphis	$4,379,527https://a.espncdn.com/i/headshots/nba/players/full/3934719.pngO.G. Anunoby3	SF	26	6' 7"	232 lbs	Indiana	$18,642,857https://a.espncdn.com/i/headshots/nba/players/full/4433134.pngScottie Barnes4	SF	22	6' 8"	225 lbs	Florida State	$8,008,560https://a.espncdn.com/i/headshots/nba/players/full/6579.pngWill Barton1	G	32	6' 5"	181 lbs	Memphis	--https://a.espncdn.com/i/headshots/nba/players/full/3948153.pngChris Boucher25	PF	30	6' 9"	200 lbs	Oregon	$11,750,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Gradey Dick	G	19	6' 8"	205 lbs	Kansas	$4,536,720https://a.espncdn.com/i/headshots/nba/players/full/4066786.pngJeff Dowtin Jr.20	PG	26	6' 3"	177 lbs	Rhode Island	$2,109,706https://a.espncdn.com/i/headshots/nba/players/full/4066668.pngMalachi Flynn22	PG	25	6' 1"	175 lbs	San Diego State	$3,873,025https://a.espncdn.com/i/headshots/nba/players/full/4397251.pngRon Harper Jr.8	F	23	6' 5"	245 lbs	Rutgers	--https://a.espncdn.com/i/headshots/nba/players/full/4431778.pngChristian Koloko35	C	23	7' 0"	230 lbs	Arizona	$1,719,864https://a.espncdn.com/i/headshots/nba/players/full/4066731.pngJalen McDaniels7	PF	25	6' 9"	205 lbs	San Diego State	$4,516,000https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146Markquis Nowell	G	23	5' 8"	160 lbs	Kansas State	--https://a.espncdn.com/i/headshots/nba/players/full/3134908.pngJakob Poeltl19	C	27	7' 1"	245 lbs	Utah	$19,500,000https://a.espncdn.com/i/headshots/nba/players/full/2594922.pngOtto Porter Jr.32	SF	30	6' 8"	198 lbs	Georgetown	$6,300,000https://a.espncdn.com/i/headshots/nba/players/full/3032979.pngDennis Schroder	PG	30	6' 1"	172 lbs	--	$12,405,000https://a.espncdn.com/i/headshots/nba/players/full/3149673.pngPascal Siakam43	PF	29	6' 8"	230 lbs	New Mexico State	$37,893,408https://a.espncdn.com/i/headshots/nba/players/full/4277843.pngGary Trent Jr.33	SG	24	6' 5"	209 lbs	Duke	$18,560,000https://a.espncdn.com/i/headshots/nba/players/full/3244.pngThaddeus Young21	PF	35	6' 8"	235 lbs	Georgia Tech	$8,000,000Washington Wizards Roster 2023-24NAME	POS	AGE	HT	WT	COLLEGE	SALARYhttps://a.espncdn.com/i/headshots/nba/players/full/4683021.pngDeni Avdija9	SF	22	6' 9"	210 lbs	--	$6,263,188https://a.espncdn.com/i/headshots/nba/players/full/4433246.pngPatrick Baldwin Jr.7	F	20	6' 9"	220 lbs	Milwaukee	$2,337,720https://a.espncdn.com/i/headshots/nba/players/full/4395695.pngJared Butler	SG	23	6' 3"	193 lbs	Baylor	--https://a.espncdn.com/i/headshots/nba/players/full/3129932.pngXavier Cooks12	G	28	6' 8"	185 lbs	Winthrop	$1,719,864https://a.espncdn.com/i/headshots/nba/players/full/5104155.pngBilal Coulibaly	SG	19	6' 6"	195 lbs	--	$6,614,280https://a.espncdn.com/i/headshots/nba/players/full/4432356.pngJohnny Davis1	G	21	6' 4"	195 lbs	Wisconsin	$5,050,800https://a.espncdn.com/i/headshots/nba/players/full/4278049.pngDaniel Gafford21	C	24	6' 10"	234 lbs	Arkansas	$12,402,000https://a.espncdn.com/i/headshots/nba/players/full/3428.pngDanilo Gallinari	PF	35	6' 10"	236 lbs	--	$6,802,950https://a.espncdn.com/i/headshots/nba/players/full/3986.pngTaj Gibson67	F	38	6' 9"	232 lbs	USC	--https://a.espncdn.com/i/headshots/nba/players/full/2581184.pngAnthony Gill16	PF	30	6' 8"	230 lbs	Virginia	$1,997,239https://a.espncdn.com/i/headshots/nba/players/full/3135046.pngTyus Jones	PG	27	6' 0"	196 lbs	Duke	$14,000,000https://a.espncdn.com/i/headshots/nba/players/full/4280151.pngCorey Kispert24	SF	24	6' 6"	224 lbs	Gonzaga	$3,722,040https://a.espncdn.com/i/headshots/nba/players/full/3134907.pngKyle Kuzma33	SF	28	6' 9"	221 lbs	Utah	$25,568,182https://a.espncdn.com/i/headshots/nba/players/full/2490089.pngMike Muscala	C	32	6' 11"	240 lbs	Bucknell	$3,500,000https://a.espncdn.com/i/headshots/nba/players/full/3056247.pngKendrick Nunn20	G	28	6' 3"	190 lbs	Oakland	--https://a.espncdn.com/i/headshots/nba/players/full/4066410.pngEugene Omoruyi	F	26	6' 6"	235 lbs	Oregon	$1,927,896https://a.espncdn.com/i/headshots/nba/players/full/4277956.pngJordan Poole3	SG	24	6' 4"	194 lbs	Michigan	$27,955,357https://a.espncdn.com/i/headshots/nba/players/full/4591725.pngRyan Rollins2	G	21	6' 3"	180 lbs	Toledo	$1,719,864https://a.espncdn.com/i/headshots/nba/players/full/3914044.pngLandry Shamet	SG	26	6' 4"	190 lbs	Wichita State	$10,250,000https://a.espncdn.com/i/headshots/nba/players/full/4997537.pngTristan Vukcevic	PF	20	7' 0"	225 lbs	--	--https://a.espncdn.com/i/headshots/nba/players/full/3064447.pngDelon Wright55	SG	31	6' 5"	185 lbs	Utah	$8,195,122================================================FILE: end-to-end-use-cases/coding/text2sql/nba_roster.db================================================[Non-text file]================================================FILE: end-to-end-use-cases/coding/text2sql/requirements.txt================================================langchainlangchain-communitylangchain-together================================================FILE: end-to-end-use-cases/Contextual-Chunking-RAG/README.md================================================# Contextual keywords generation for RAG using Llama-3.1**Problem**: Independent chunking in traditional RAG systems leads to the loss of contextual information between chunks. This makes it difficult for LLMs to retrieve relevant data when context (e.g., the subject or entity being discussed) is not explicitly repeated within individual chunks.**Solution**: Generate keywords for each chunk to fulfill missing contextual information. These keywords (e.g., "BMW, X5, pricing") enrich the chunk with necessary context, ensuring better retrieval accuracy. By embedding this enriched metadata, the system bridges gaps between related chunks, enabling effective query matching and accurate answer generation.[This article](https://medium.com/@ailabs/overcoming-independent-chunking-in-rag-systems-a-hybrid-approach-5d2c205b3732) explains benefits of contextual chunking.**Note** This method does not require calling LLM for each chunk separately, which makes it efficient.**Getting started**In this cookbook, we’ll use DeepInfra for Llama inference services, so be sure to obtain an API key from https://deepinfra.com/.You'll also need a LlamaParse API key to parse PDF files, which can be obtained from https://www.llamaindex.ai/.Additionally, we will use the "jinaai/jina-embeddings-v2-base-en" model from HuggingFace to generate text embeddings locally.Before getting started, update the <code>config.py</code> file as following:    "DEEPINFRA_API_KEY"="<your_api_key>"        "LLAMAPARSE_API_KEY"="<your_api_key>"================================================FILE: end-to-end-use-cases/Contextual-Chunking-RAG/data/llama_article.txt================================================Understanding the LLaMA Model: A Breakthrough in Large Language ModelsIn recent years, large language models (LLMs) have revolutionized the field of natural language processing (NLP). Among them, Meta’s LLaMA (Large Language Model Meta AI) has emerged as a powerful, efficient, and open-weight model that provides high-quality text generation capabilities while being more accessible than proprietary alternatives. This article explores the architecture, capabilities, and applications of LLaMA, along with its significance in the AI landscape.1. Introduction to LLaMALLaMA is a family of autoregressive transformer-based models designed by Meta AI. Unlike massive models like OpenAI’s GPT-4, which require extensive computational resources and are primarily closed-source, LLaMA aims to provide powerful language modeling in a more efficient and open format. The original LLaMA release included models ranging from 7 billion to 65 billion parameters, offering different levels of computational demand and performance.The second iteration, LLaMA 2, introduced in 2023, further improved efficiency, accuracy, and usability. LLaMA 2 models are available in 7B, 13B, and 65B parameter variants, with optimized training methodologies and increased alignment with human preferences.2. Architecture and TrainingLLaMA follows the transformer architecture, the foundation of most modern language models. Key architectural improvements and training strategies include:    Tokenization: LLaMA uses Byte Pair Encoding (BPE) for tokenization, ensuring better handling of various languages and token efficiency.    Efficient Training: Trained on a diverse dataset containing publicly available and licensed data, LLaMA reduces reliance on proprietary sources. The training process leverages a causal decoder-only transformer, meaning it predicts tokens autoregressively while attending to previous context.    Scaled Attention Mechanism: LLaMA incorporates Rotary Position Embeddings (RoPE) for efficient long-context understanding. This improves its ability to handle longer sequences compared to earlier models.    Memory Optimization: Unlike some larger models requiring thousands of GPUs for inference, LLaMA’s optimized weight distribution and efficient parameter scaling allow it to run on fewer computational resources while maintaining high performance.The training data includes code, technical documents, research papers, and general text, making LLaMA well-suited for various NLP tasks, from answering questions to generating detailed content.3. Performance and BenchmarksLLaMA models have demonstrated impressive performance across multiple benchmarks. The 65B variant outperforms GPT-3 (175B) on several standard NLP tasks while using significantly fewer parameters. Key benchmarking results include:    MMLU (Massive Multitask Language Understanding): LLaMA 2-65B achieves results comparable to GPT-4 in general knowledge and reasoning tasks.    ARC (AI2 Reasoning Challenge): LLaMA models show strong problem-solving capabilities, particularly in logic-based questions.    HellaSwag & PIQA: LLaMA performs well in commonsense reasoning, approaching human-level accuracy.    Code Generation: Though not primarily designed for coding, LLaMA exhibits notable competence in generating and completing programming code snippets.Despite being smaller than some competing models, LLaMA's efficiency enables it to achieve state-of-the-art performance per parameter count, making it a highly cost-effective solution.4. Applications of LLaMAThe versatility of LLaMA enables a wide range of applications across industries, including:    Chatbots and Virtual Assistants: LLaMA powers intelligent conversational AI systems, providing human-like responses with improved contextual understanding.    Content Generation: From summarizing long documents to creating articles and reports, LLaMA is widely used for generating high-quality text.    Programming Assistance: Developers use LLaMA to generate code snippets, debug errors, and improve software development efficiency.    Scientific Research: The model helps researchers analyze papers, generate summaries, and assist in hypothesis generation.    Education and Tutoring: LLaMA aids in personalized learning, answering students’ queries and explaining complex topics interactively.Its open-weight availability also allows organizations to fine-tune the model on proprietary data, making it adaptable for specialized use cases such as medical AI, legal document analysis, and multilingual NLP tasks.5. Challenges and LimitationsDespite its advantages, LLaMA faces several challenges:    Ethical Concerns: Like all LLMs, LLaMA can generate biased or misleading information. Efforts are ongoing to align the model with ethical AI principles.    Computational Costs: Although LLaMA is optimized for efficiency, larger variants still require significant GPU resources for fine-tuning and inference.    Context Length Limitations: While improved, LLaMA still has constraints on long-context reasoning compared to specialized extended-context models.    Security Risks: Open-weight models pose potential risks for misuse, such as generating harmful or deceptive content. Responsible deployment and monitoring are necessary.6. The Future of LLaMAMeta continues to refine the LLaMA model family, with research focused on improving alignment, reducing biases, and extending context understanding. Future iterations may include:    LLaMA 3 and Beyond: Expected advancements in parameter efficiency and multimodal capabilities.    Better Fine-Tuning Techniques: Enhancing adaptability for domain-specific applications.    Integration with Retrieval-Augmented Generation (RAG): Combining LLaMA with external knowledge sources for more accurate responses.    Edge Deployment: Efforts to make LLaMA smaller and faster for local AI applications without cloud dependence.As open-source AI research progresses, LLaMA remains a key player in democratizing access to powerful language models, enabling innovation across academia, business, and technology sectors.7. ConclusionLLaMA represents a significant step forward in making high-quality language models more accessible. By balancing efficiency, openness, and performance, it provides a compelling alternative to closed-source models like GPT-4. Whether for research, business applications, or general AI development, LLaMA offers a robust platform for advancing NLP capabilities while promoting transparency and innovation in AI.================================================FILE: end-to-end-use-cases/customerservice_chatbots/README.md================================================This repository contains various end-to-end use cases for building customer service chatbots using Meta's Llama 3. Below is an outline of the sub folders and their contents.## Outline- **[RAG_chatbot](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot)**  contains resources for building a Meta Llama 3 chatbot with Retrieval Augmented Generation (RAG). It contains a [notebook](https://github.com/meta-llama/llama-cookbook/blob/main/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb) which shows a complete example of how to build a Meta Llama 3 chatbot hosted on your browser that can answer questions based on your own data. It covers:    - The deployment process of Meta Llama 3 8B with the [Text-generation-inference](https://github.com/huggingface/text-generation-inference) framework as an API server.    - A chatbot example built with [Gradio](https://github.com/gradio-app/gradio) and wired to the server.    - Adding RAG capability with Meta Llama 3 specific knowledge based on our Getting Started [guide](https://ai.meta.com/llama/get-started/).- **[ai_agent_chatbot](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot)** contains a   [Sales Bot with Llama3 - A Summarization and RAG Use Case notebook](https://github.com/meta-llama/llama-cookbook/blob/main/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb) that demonstrates building a sales chatbot using Llama3 for targeted product recommendations. The workflow involves:    * Generating product review summaries using Llama3    * Storing summaries in a vector database (Weaviate)    * Leveraging Retrieval Augmented Generation (RAG) for intelligent sales interactions- **[messenger_chatbot](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot)** section provides a step-by-step guide to building a Llama-enabled Messenger chatbot. It includes integration details with the Messenger Platform and a [demo video](https://drive.google.com/file/d/1B4ijFH4X3jEHZfkGdTPmdsgpUes_RNud/view).- **[whatsapp_chatbot](https://github.com/Monireh2/llama-recipes/tree/main/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot)** folder contains a tutorial for creating a Llama 3 enabled WhatsApp chatbot, similar to the Messenger chatbot guide. A demo video showcasing the use of iOS WhatsApp to send a question to a test phone number and receive a response generated by Llama 3 can be found [here](https://drive.google.com/file/d/1fZDaOsvyE1yrNGETV-e0SvL14BYeAI6R/view).## Additional Information- **RAG Architecture:** The RAG method enhances LLMs by retrieving and augmenting data, allowing for more relevant and context-aware responses.- **Development Tools:** The repository utilizes frameworks like LangChain and LlamaIndex for building LLM applications, and Gradio for creating chatbot UI.For more detailed information, please refer to the individual sub directory documentation and examples.================================================FILE: end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md================================================# Building a Llama 3 Enabled Messenger ChatbotThis step-by-step tutorial shows the complete process of building a Llama-enabled Messenger chatbot. A demo video of using the iOS Messenger app to send a question to a Facebook business page and receive the Llama 3 generated answer is [here](https://drive.google.com/file/d/1B4ijFH4X3jEHZfkGdTPmdsgpUes_RNud/view).If you're interested in a Llama 3 enabled WhatsApp chatbot, see [here](../whatsapp_chatbot/whatsapp_llama3.md) for a tutorial.## OverviewMessenger from Meta is a messaging service that allows a Facebook business page to respond to people who are interested in their business using [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview). The benefits of an intelligent and knowledgeable Llama 3 powered chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this tutorial, we'll cover the details of integrating Llama 3 with the Messenger Platform to build a basic Llama 3 enabled chatbot - for more Llama 3 application development and deployment demos such as how to integrate your own data with Llama 3, see the recipes.The diagram below shows the components and overall data flow of the Llama 3 enabled Messenger chatbot demo we built, using an Amazon EC2 instance as an example for running the web server.## Getting Started with Messenger Platform1. A Facebook Page is required to send and receive messages using the Messenger Platform - see [here](https://www.facebook.com/business/help/461775097570076?id=939256796236247) for details about Facebook Pages and how to create a new page.2. If you have followed the [Llama WhatsApp chatbot tutorial](../whatsapp_chatbot/whatsapp_llama3.md), or if you already have a Meta developer account and a business app, then you can skip this step. Otherwise, you need to first [create a Meta developer account](https://developers.facebook.com/) and then [create a business app](https://developers.facebook.com/docs/development/create-an-app/).3. Add the Messenger product to your business app by going to your business app's Dashboard, click "Add Product" and then select "Messenger".4. Configure a webhook to receive real time HTTP notifications: follow the [Sample Callback URL for Webhooks Testing Guide](https://developers.facebook.com/docs/whatsapp/sample-app-endpoints) to create a free account on glitch.com to get your webhook's callback URL, but you can skip the WHATSAPP_TOKEN part in the guide - the rest of setting up webhook is the same for WhatsApp and Messenger.5. Open Messenger's API Settings, as shown in the screenshot below, then in "1. Configure webhooks", set the Callback URL and Verify Token set up in the previous step, and subscribe all message related fields for "Webhook Fields". Finally, in "2. Generate access tokens", connect your Facebook page (see step 1) and copy your page access token for later use.## Writing Llama 3 Enabled Web AppThe Python-based web app we developed uses [LangChain](https://www.langchain.com/), an open source LLM development framework, and [Replicate](https://replicate.com/), a service provider hosting LLM models in the cloud, to receive the user query sent by the webhook, which will be covered in the next section, pass the query to Llama 3, and send the Llama 3 answer back to the webhook. For more information on how to use LangChain or LlamaIndex, another LLM app building framework, and other Llama 3 cloud providers or on-premise deployment options to develop Llama 3 apps, see the recipes.First, let's create a new conda (or you can use venv if you like) environment and install all the required packages:```conda create -n messenger-llama python=3.8conda activate messenger-llamapip install langchain replicate flask requests uvicorn gunicorn```Then, create a Python file named [llama_messenger.py](llama_messenger.py) that creates a Llama 3 instance and defines an HTTP method `msgrcvd_page` to:1. receive the user message forwarded by the webhook;2. ask Llama 3 for the answer;3. send the answer back to the sender using the Facebook graph API.```import langchainfrom langchain.llms import Replicatefrom flask import Flaskfrom flask import requestimport osimport requestsimport jsonos.environ["REPLICATE_API_TOKEN"] = "<your replicate api token"llama3_8b_chat = "meta/meta-llama-3-8b-instruct"llm = Replicate(    model=llama3_8b_chat,    model_kwargs={"temperature": 0.0, "top_p": 1, "max_new_tokens":500})app = Flask(__name__)@app.route('/msgrcvd_page', methods=['POST', 'GET'])def msgrcvd_page():    message = request.args.get('message')    sender = request.args.get('sender')    recipient = request.args.get('recipient')    answer = llm(message)    url = f"https://graph.facebook.com/v18.0/{recipient}/messages"    params = {        'recipient': '{"id": ' + sender + '}',        'message': json.dumps({'text': answer}),        'messaging_type': 'RESPONSE',        'access_token': '<page_access_token>'    }    headers = {        'Content-Type': 'application/json'    }    response = requests.post(url, params=params, headers=headers)    return message + "<p/>" + answer```Replace <page_access_token> with the access token copied in step 5 "Open Messenger's API Settings" of the previous section. Now it's time to modify the webhook to complete the whole app.## Modifying the WebhookOpen your glitch.com webhook URL created earlier, and change your `app.js` to simply forward the user message and the user and page ids sent by the Messenger Platform to the Llama 3 enabled web app `llama_messenger.py` described in the previous section:```"use strict";// Imports dependencies and set up http serverconst request = require("request"),  express = require("express"),  body_parser = require("body-parser"),  axios = require("axios").default,  app = express().use(body_parser.json()); // creates express http server// Sets server port and logs message on successapp.listen(process.env.PORT || 1337, () => console.log("webhook is listening"));// Accepts POST requests at /webhook endpointapp.post("/webhook", (req, res) => {  // Parse the request body from the POST  let body = req.body;  let sender = req.body["entry"][0]["messaging"][0]['sender']['id']  let recipient = req.body["entry"][0]["messaging"][0]['recipient']['id']  let message = req.body["entry"][0]["messaging"][0]['message']['text']  // Check if this is an event from a page subscription  if (body.object === "page") {    // Returns a '200 OK' response to all requests    res.status(200).send("EVENT_RECEIVED");    let url = "http://<web server public IP>:5000/msgrcvd_page?sender=" + sender + "&recipient=" + recipient + "&message=" + encodeURIComponent(message)    console.log(url)    axios.get(url)      .then(response => {        // Handle the response data        console.log(response.data);      })      .catch(error => {        // Handle errors        console.error('Axios error:', error);      });    } else {      // Return a '404 Not Found' if event is not from a page subscription      res.sendStatus(404);    }  });// Accepts GET requests at the /webhook endpoint. You need this URL to setup webhook initially.// info on verification request payload: https://developers.facebook.com/docs/graph-api/webhooks/getting-started#verification-requestsapp.get("/webhook", (req, res) => {  /**   * UPDATE YOUR VERIFY TOKEN   *This will be the Verify Token value when you set up webhook  **/  const verify_token = process.env.VERIFY_TOKEN;  // Parse params from the webhook verification request  let mode = req.query["hub.mode"];  let token = req.query["hub.verify_token"];  let challenge = req.query["hub.challenge"];  // Check if a token and mode were sent  if (mode && token) {    // Check the mode and token sent are correct    if (mode === "subscribe" && token === verify_token) {      // Respond with 200 OK and challenge token from the request      console.log("WEBHOOK_VERIFIED: " + token);      res.status(200).send(challenge);    } else {      // Responds with '403 Forbidden' if verify tokens do not match      res.sendStatus(403);    }  }});```Remember to change <web server public IP>, which needs to be publicly visible, to the IP of the server where your Llama 3 enabled web app in the previous section runs.Note: It's possible and even recommended to implement a webhook in Python and call the Llama directly inside the webhook, instead of making an HTTP request, as the JavaScript code above does, to a Python app which calls Llama and sends the answer to Messenger.## Running the ChatbotOn your web server, run the following command on a Terminal (see [here](https://flask.palletsprojects.com/en/3.0.x/deploying/gunicorn/) for more info on Gunicorn):```gunicorn -b 0.0.0.0:5000 llama_messenger:app```If you use Amazon EC2 as your web server, make sure you have port 5000 added to your EC2 instance's security group's inbound rules.Now you can open your Messenger app, select the Facebook page you connected in Messenger's API Settings, enter a message and receive the Llama 3's answer shortly, as shown in the demo video in the beginning of this post.To debug any possible issues, go to your glitch.com app log and copy the URL generated there when a Messenger message is sent, which looks like this:```http://<web server public IP>:5000/msgrcvd_page?sender=<user id>&recipient=<page id>&message=<msg>```Then open the URL in a browser to verify your web server can receive the message and the two ids, and generate a Llama answer before sending the answer back to Messenger.================================================FILE: end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/requirements.txt================================================gradiopypdflangchainsentence-transformersfaiss-cputext-generation================================================FILE: end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md================================================# Building a Llama 3 Enabled WhatsApp ChatbotThis step-by-step tutorial shows the complete process of building a Llama 3 enabled WhatsApp chatbot. A demo video of using the iOS WhatsApp to send a question to a test phone number and receive the Llama 3 generated answer is [here](https://drive.google.com/file/d/1fZDaOsvyE1yrNGETV-e0SvL14BYeAI6R/view).If you're interested in a Llama 3 enabled Messenger chatbot, see [here](../messenger_chatbot/messenger_llama3.md) for a tutorial.## OverviewBusinesses of all sizes can use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to connect their customers with human agents or Llama 3 powered chatbots. The benefits of an intelligent and knowledgeable chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this blog, we'll cover the details of integrating Llama 3 with the WhatsApp Business API to build a basic Llama 3 enabled chatbot.The diagram below shows the components and overall data flow of the Llama 3 enabled WhatsApp chatbot demo we built, using Amazon EC2 instance as an example for running the web server.## Getting Started with WhatsApp Business Cloud APIFirst, open the [WhatsApp Business Platform Cloud API Get Started Guide](https://developers.facebook.com/docs/whatsapp/cloud-api/get-started#set-up-developer-assets) and follow the first four steps to:1. Add the WhatsApp product to your business app;2. Add a recipient number;3. Send a test message;4. Configure a webhook to receive real time HTTP notifications.For the last step, you need to further follow the [Sample Callback URL for Webhooks Testing Guide](https://developers.facebook.com/docs/whatsapp/sample-app-endpoints) to create a free account on glitch.com to get your webhook's callback URL.Now open the [Meta for Develops Apps](https://developers.facebook.com/apps/) page and select the WhatsApp business app and you should be able to copy the curl command (as shown in the App Dashboard - WhatsApp - API Setup - Step 2 below) and run the command on a Terminal to send a test message to your WhatsApp.Note down the "Temporary access token", "Phone number ID", and "a recipient phone number" in the API Setup page above, which will be used later.## Writing Llama 3 Enabled Web AppThe Python-based web app we developed uses [LangChain](https://www.langchain.com/), an open source LLM development framework, and [Replicate](https://replicate.com/), a service provider hosting LLM models in the cloud, to receive the user query sent by the webhook, which will be covered in the next section, pass the query to Llama 3, and send the Llama 3 answer back to the webhook. For more information on how to use LangChain and Replicate to develop Llama 3 apps, see the recipes - you may also replace Replicate with other Llama deployment options shown in the demo apps.First, let's create a new conda (or you can use venv if you like) environment and install all the required packages:```conda create -n whatsapp-llama python=3.8conda activate whatsapp-llamapip install langchain replicate flask requests uvicorn gunicorn```Then, create a Python file named llama_chatbot.py with the following code, which defines a class `WhatsAppClient` and a method `send_text_message` to post a message (the answer generated by Llama 3 on a user query) to the WhatsApp Cloud API, which then sends the answer back to the WhatsApp user. Remember to set `WHATSAPP_API_TOKEN` and `WHATSAPP_CLOUD_NUMBER_ID` to the values you saved in the previous section.```import langchainfrom langchain.llms import Replicatefrom flask import Flaskfrom flask import requestimport osimport requestsimport jsonclass WhatsAppClient:    API_URL = "https://graph.facebook.com/v17.0/"    WHATSAPP_API_TOKEN = "<Temporary access token from your WhatsApp API Setup>"    WHATSAPP_CLOUD_NUMBER_ID = "<Phone number ID from your WhatsApp API Setup>"    def __init__(self):        self.headers = {            "Authorization": f"Bearer {self.WHATSAPP_API_TOKEN}",            "Content-Type": "application/json",        }        self.API_URL = self.API_URL + self.WHATSAPP_CLOUD_NUMBER_ID    def send_text_message(self, message, phone_number):        payload = {            "messaging_product": 'whatsapp',            "to": phone_number,            "type": "text",            "text": {                "preview_url": False,                "body": message            }        }        response = requests.post(f"{self.API_URL}/messages", json=payload, headers=self.headers)        return response.status_code```Finally, add the code below to llama_chatbot.py, which creates a Llama 3 instance and defines an HTTP method `msgrcvd` to:1. receive the user message forwarded by the webhook;2. ask Llama 3 for the answer;3. call the `WhatsAppClient`'s `send_text_message`` with a recipient's phone number.```os.environ["REPLICATE_API_TOKEN"] = "<your replicate api token>"llama3_8b_chat = "meta/meta-llama-3-8b-instruct"llm = Replicate(    model=llama3_8b_chat,    model_kwargs={"temperature": 0.0, "top_p": 1, "max_new_tokens":500})client = WhatsAppClient()app = Flask(__name__)@app.route("/")def hello_llama():    return "<p>Hello Llama 3</p>"@app.route('/msgrcvd', methods=['POST', 'GET'])def msgrcvd():    message = request.args.get('message')    answer = llm(message)    client.send_text_message(answer, "<a recipient phone number from your WhatsApp API Setup>")    return message + "<p/>" + answer```The complete script of llama_chatbot.py is [here](llama_chatbot.py).Now it's time to modify the webhook to complete the whole app.## Modifying the WebhookOpen your glitch.com webhook URL created earlier, and after the code snippet in app.js:```// message received!console.log(req.body["entry"][0]["changes"][0]["value"]["messages"][0]["text"]["body"]);```add the code below - remember to change <web server public IP>, which needs to be publicly visible, to the IP of the server where your Llama 3 enabled web app in the previous section runs:```  let url = "http://<web server public IP>:5000/msgrcvd?message=" +    req.body["entry"][0]["changes"][0]["value"]["messages"][0]["text"]["body"]  axios.get(url)    .then(response => {      console.log(response.data);    })    .catch(error => {      console.error('Axios error:', error);    });```The code simply forwards the user message received by the WhatsApp Cloud Platform to the Llama 3 enabled web app llama_chatbot.py described in the previous section. Because the functionality of calling the WhatsApp Cloud API to send  a message has been implemented in the `send_text_message` in Python above, you can comment out the whole following code snippet in the original app.js:```  '// info on WhatsApp text message payload: https://developers.facebook.com/docs/whatsapp/cloud-api/webhooks/payload-examples#text-messages  if (req.body.object) {    ...  }```Note: It's possible and even recommended to implement a webhook in Python and call Llama 3 directly inside the webhook, instead of making an HTTP request, as the JavaScript code above does, to a Python app which calls Llama 3 and sends the answer to WhatsApp.## Running the ChatbotOn your web server, run the following command on a Terminal:```gunicorn -b 0.0.0.0:5000 llama_chatbot:app```If you use Amazon EC2 as your web server, make sure you have port 5000 added to your EC2 instance's security group's inbound rules. Write down your web server's public IP, update the URL below with it, then open the URL in a browser to verify you can see the answer sent to your WhatsApp app, as well as shown in the browser:```http://<web server public IP>:5000/msgrcvd?message=who%20wrote%20the%20book%20godfather```Now you can open your WhatsApp app, enter a question and receive the Llama 3's answer shortly, as shown in the demo video in the beginning of this post.================================================FILE: end-to-end-use-cases/email_agent/README.md================================================# Building A Llama Powered Email AgentThis app shows how to build an email agent powered by Llama 3.1 8B running locally via Ollama. We'll start with building from scratch a basic agent with custom tool calling natively supported in Llama 3.1. The end goal is to cover all components of a production-ready agent app, acting as an assistant to your email, with great user experience: intuitive, engaging, efficient and reliable. We'll use Gmail as an example but any email client API's can be used instead.Currently implemented features include:* search for emails and attachments* get email detail* reply to a specific email * forward an email* get summary of a PDF attachment* draft and send an email# OverviewEmail is an essential and one top killer app people use every day. A recent [State of AI Agents](https://www.langchain.com/stateofaiagents) survey by LangChain finds that "The top use cases for agents include performing research and summarization (58%), followed by streamlining tasks for personal productivity or assistance (53.5%)." Andrew Ng wrote a 5-part [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) in March 2024 predicting "AI agent workflows will drive massive AI progress this year". Deloitte published in November 2024 a report on [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf) stating that "Through their ability to reason, plan, remember and act, AI agents address key limitations of typical language models." and "Executive leaders should make moves now to prepare for and embrace this next era of intelligent organizational transformation."In the Thanksgiving week, a new startup [/dev/agent](https://sdsa.ai/) building the next-gen OS for AI agents was in the spotlight.In December, Sequoia posted [here](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) saying 2024 has been the year of agents (an agent is an AI that can complete tasks, not only tells you how to do it but also does it for you directly), and 2025 will be the year of networks of AI agents.So what exactly is an AI agent and how to start building an agent app?## What is an agent?The concept of agent is not new - in the 2010 3rd edition of Russell and Norvig's classic book Artificial Intelligence: A Modern Approach ("Modern" by 2010, two years before the deep learning revolution that started the truly modern AI), an agent is defined as "anything that can be viewed as perceiving its environment through sensors and acting upon that environment through actuators". These days, AI agent basically means LLM-powered agent - well, if we treat natural language understanding as a type of sensor, LLM agent is still a sub-category of the traditional agent.Lilian Weng in her popular June 2023 blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) defines LLM-powered agent system to have four key components: * Planning and Reflection: can break down large tasks into smaller ones; can do self-reflection over past actions and self improve;  * Memory: can use contextual info and recall info over extended periods (for other components to use); * Tool Use: can understand what external APIs to use for info or action not built into LLMs; * Action: can actually run the tools.Andrew Ng describes four [agentic design patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) as:* Reflection* Planning* Tool calling* Multi-agent collaboration, where "memory" is mentioned: Each agent implements its own workflow, has its own memory (itself a rapidly evolving area in agentic technology: how can an agent remember enough of its past interactions to perform better on upcoming ones?)In Deloitte's [report](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf), AI agents are reasoning engines that can understand context, plan workflows, connect to external tools and data, and execute actions to achieve a defined goal.In a November 2024 blog by Letta [The AI agents stack](https://www.letta.com/blog/ai-agents-stack), LLM powered agent is described as the combination of tools use, autonomous execution, and memory.In addition, Harrison Chase defines agent in the blog [What is an AI agent](https://blog.langchain.dev/what-is-an-agent/) as "a system that uses an LLM to decide the control flow of an application." Yet another simple [summary](https://www.felicis.com/insight/the-agentic-web) by Felicis of what an agent does is that an agent expands LLMs to go from chat to act: an agent can pair LLMs with external data, multi-step reasoning and planning, and act on the user's behalf. All in all (see [Resources](#resources) for even more info), agents are systems that take a high-level task, use an LLM as a reasoning and planning engine, with the help of contextual info and long-term memory if needed, to decide what actions to take, reflect and improve on the actions, and eventually execute those actions to accomplish the task.It's time to see an agent app in action and enjoy some coding. Below is a preview of the questions or requests one may ask the agent:  # Example Asks to Email Agent* do i have any emails with attachments?* what's the content of the email from LangSmith* do i have emails with attachment larger than 1mb?* what kind of attachments for the email with subject papers to read?* give me a summary of the pdf thinking_llm.pdf* Draft an email to xxx@gmail.com saying working on it and will keep you updated. thanks for your patience.* send the draft* do i have any emails with attachment larger than 10mb?* how about 5mb* reply to the email saying thanks for sharing! * forward the email to xxx@gmail.com* how many emails do i have from xxx@gmail.com?* how about from yyy@gmail.com?Below are screenshots of some example interactions with the agent:# Setup and InstallationIf you feel intimated by the steps of the following Enable Gmail API section, you may want to check again the example asks (to see what you can ask to the agent) and the example log (to see the whole conversation with the agent) - the devil's in the detail and all the glorious description of a powerful trendy agent may not mention the little details one has to deal with to build it.## Enable Gmail API1. Go to the [Google Cloud Console](https://console.cloud.google.com/).2. Create a new project by clicking the dropdown on the top left then click NEW PROJECT.3. Enter a Project name then click CREATE.4. Under "APIs & Services" > "Enabled APIs & services", search for "gmail" and then Enable the "Gmail API" for your project.5. Under "APIs & Services" > "OAuth consent screen", click "GO TO NEW EXPERIENCE", then click "GET STARTED", enter App name, select your gmail as User support email, choose External under Audience, enter your gmail again as Contact Information, and finally check the I agree to the Google API Services under Finish and click Continue - Create. 5. Again under "APIs & Services", go to Credentials. Click on + CREATE CREDENTIALS, then choose OAuth client ID (NOT API key).Select Desktop App (NOT Web application, because you're assumed to want to start your Gmail agent locally first) as the application type and name it. Click Create to generate your client ID and client secret.6. Click Download JSON and rename the downloaded file as credentials.json. This file will be used in your Python script for authentication.## Install Ollama with Llama 3.1 8BDownload Ollama (available for macOS, Linux, and Windows) [here](https://ollama.com/). Then download and test run the Llama 3.1 8B model by running on a Terminal:```ollama run llama3.1```This will download a quantized version of Llama 3.1 of the size 4.7GB.## Install required packagesFirst, create a Conda or virtual env:```conda create -n emailagent python=3.10conda activate emailagent```or```python -m venv emailagentsource emailagent/bin/activate # on Linux, macOS:source emailagent\Scripts\activate # on Windows```Then install the required Python libraries:```git clone https://github.com/meta-llama/llama-cookbookcd llama-cookbook/end-to-end-use-cases/email_agentpip install -r requirements.txt```# Run Email AgentTo run the agent, you need to first copy the `credentials.json` file downloaded and renamed above in Step 6 of Enable Gmail API to the email_agent folder, then run:```python main.py --email <your_gmail_address>```The first time you run it, you'll get a prompt like this;```Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=xxxxEnter the authorization code: ```You need to copy the URL above and open it in a browser - if you Sign in with Google using the same Gmail you enabled for the Gmail API, then you'll see "You’ve been given access to an app that’s currently being tested. You should only continue if you know the developer that invited you.", otherwise if you sign in with another Gmail, you'll see "Gmail Agent App has not completed the Google verification process. The app is currently being tested, and can only be accessed by developer-approved testers. If you think you should have access, contact the developer." In the latter case, go to APIs & Services > OAuth consent screen > Test users, and click the + ADD USERS button, and you'll see this message: While publishing status is set to "Testing", only test users are able to access the app. Allowed user cap prior to app verification is 100, and is counted over the entire lifetime of the app.After clicking Continue, check the Select all checkbox to enable both settings required for running the agent:```View your email messages and settings. Manage drafts and send emails.```Finally, copy the Authorization code and paste it to the Terminal, hit Enter and you'll see the agent's initial greeting (which will likely differ because the default temperature value 0.8 is used here - see [Ollama's model file](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values) for detail) such as:```Hello! I'm Email Agent, here to help you manage your email account with ease.What would you like to do today? Do you want me to:Check and respond to new emailsCompose a new emailOrganize your inbox with filters or labelsDelete unwanted emailsSomething else?Let me know how I can assist you!Your ask:```If you cancel here and run the command `python main.py --email <your_gmail_address>` again you should see the agent greeting right away without the need to enter an authorization code, unless you enter a different Gmail address for the first time - in fact, for each authorized (added as a test user) Gmail address, a file `token_xxxx@gmail.com.pickle` will be created which contains the authorized token.See the example asks and interaction log above for the types of asks you may enter.# Implementation NotesNotes here mainly cover how custom functions are defined, how Gmail API based functions are implemented, and how an Agent class is defined to handle memory for contextual chat and perform pre- and post-processing on the tool calling.## Available Custom Functions (Tools) DefinitionThe `functions_prompt.py` defines the following six custom functions, as part of the system prompt (along with examples for each function call spec that Llama should return):* list_emails_function* get_email_function* send_email_function (new, reply, forward)* get_pdf_summary_function* create_draft_function* send_draft_functionBelow is an example function call spec in JSON format, for the user asks such as "do i have emails with attachments larger than 5mb", "any attachments larger than 5mb" or "let me know if i have large attachments over 5mb":```{"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}}```Before LLMs, it'd be a REAL pain to cover ALL the possible user natural language inputs that can be and should be all translated into the same semantic representation (if you've done Amazon Alex Skill or Google Assistant development or any pre-LLM NLU work before, you'd know that the JSON format is the same as intent-slots representation). Now LLMs such as Llama do the most heavy lifting in translating a natural language open input into its semantic representation. But still, if you look at how the `list_emails_function` (which is used to search for emails based on a user query) is defined below, you'd see a lot of work would be needed to convert the user's asks to the filter values the Gmail API can accept:  ```list_emails_function = """{    "type": "function",    "function": {        "name": "list_emails",        "description": "Return a list of emails matching an optionally specified query.",        "parameters": {            "type": "dic",            "properties": [                {                    "maxResults": {                        "type": "integer",                        "description": "The default maximum number of emails to return is 100; the maximum allowed value for this field is 500."                    }                },                              {                    "query": {                        "type": "string",                        "description": "One or more keywords in the email subject and body, or one or more filters. There can be 6 types of filters: 1) Field-specific Filters: from, to, cc, bcc, subject; 2) Date Filters: before, after, older than, newer than); 3) Status Filters: read, unread, starred, importatant; 4) Attachment Filters: has, filename or type; 5) Size Filters: larger, smaller; 6) logical operators (or, and, not)."                    }                }            ],            "required": []        }    }}"""```In fact, even though many hours of pre-processing work has been done to cover some test examples, not all of the examples in `functions_prompt.py`,have been covered and tested. ## Actual Function Call ImplementationFor each defined custom function call, its implementation using the Gmail API is present in `email_agent.py`. For example, the `list_emails` is defined as follows:```def list_emails(query='', max_results=100):    emails = []    next_page_token = None    while True:        response = service.users().messages().list(            userId=user_id,            maxResults=max_results,            pageToken=next_page_token,            q=query        ).execute()                if 'messages' in response:            for msg in response['messages']:                sender, subject, received_time = get_email_info(msg['id'])                emails.append(                    {                        "message_id": msg['id'],                        "sender": sender,                        "subject": subject,                        "received_time": received_time                    }                )                next_page_token = response.get('nextPageToken')        if not next_page_token:            break        return emails```The function will be called by our agent after a user ask such as "do i have emails with attachments larger than 5mb" gets Llama's response below: ```{"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}} ```## The Agent classImplemented also in `email_agent.py`, the Agent class uses 3 instance members to allow for contextual aware asks to the agent, making it have short-term memory:1. `messages`: this list holds all the previous user asks and the function call results based on Llama's response to the user asks, making Llama able to answer follow-up questions such as "how about 5mb" (after initial ask "attachments larger than 10mb") or "how about from yyy@gmail.com" (after ask "any emails from xxx@gmail.com).2. `emails`: this list holds a list of emails that matches the user query, so follow-up questions such as "what kind of attachments for the email with subject xxx" can be answered. 3. `draft_id`: this is used to handle the ask "send the draft" after an initial ask such as "draft an email to xxx".The `__call__` method of `Agent` includes the call to Llama with the `messages` and parses the Llama response if it's a tool calling spec JSON result, or if Llama doesn't return a tool calling spec, it means it doesn't find a custom tool for the user ask so the Llama response is returned directly: ```    try:      res = json.loads(result.split("<|python_tag|>")[-1])      function_name = res['name']      parameters = res['parameters']      return {"function_name": function_name,              "parameters": parameters}    except:      return result```Also implemented there are both pre-processing logic, mainly to convert some parameter values from Llama's responses to what Gmail APIs can accept to make the API calls happy, and post-processing logic to convert function call results to user-friendly natural language.```function_name = result["function_name"]func = globals()[function_name]parameters = result["parameters"]... <pre-processing>result = func(**parameters)... <post-processing>```When you try out the app, you'll likely find that further pre- and post-processing still needed to make it production ready. In a great video on [Vertical LLM Agents](https://www.youtube.com/watch?v=eBVi_sLaYsc), Jake Heller said "after passes frankly even like 100 tests the odds that it will do on any random distribution of user inputs of the next 100,000, 100% accurately is very high" and "by the time you've dealt with like all the edge cases... there might be dozens of things you build into your application to actually make it work well and then you get to the prompting piece and writing out tests and very specific prompts and the strategy for how you break down a big problem into step by step by step thinking and how you feed in the information how you format that information the right way". That's what all the business logic is about. We'll cover decomposing a complicated ask and multi-step reasoning in a future version of the app, and continue to explore the best possible way to streamline the pre- and post-processing. ## Debugging outputWhen running the app, the detailed Llama returns, pre-processed tool call specs and the actual tool calling results are inside the `-------------------------` block, e.g.:-------------------------Calling Llama...Llama returned: {'function_name': 'list_emails', 'parameters': {'query': 'subject:papers to read has:attachment'}}.Calling tool to access Gmail API: list_emails, {'query': 'subject:papers to read has:attachment'}...Tool calling returned: [{'message_id': '1936ef72ad3f30e8', 'sender': 'xxx@gmail.com', 'subject': 'Fwd: papers to read', 'received_time': '2024-11-27 10:51:51 PST'}, {'message_id': '1936b819706a4923', 'sender': 'Jeff Tang <xxx@gmail.com>', 'subject': 'papers to read', 'received_time': '2024-11-26 18:44:19 PST'}]-------------------------# TODOs1. Port the app to using [Llama Stack](https://github.com/meta-llama/llama-stack) Agents API.2. Improve the search, reply, forward, create email draft, and query about types of attachments.3. Improve the fallback and error handling mechanism when the user asks don't lead to a correct function calling spec or the function calling fails. 4. Improve the user experience by showing progress when some Gmail search API calls take long (minutes) to complete.5. Implement the async behavior of the agent - schedule an email to be sent later.6. Implement the agent planning - decomposing a complicated ask into sub-tasks, using ReAct and other methods.7. Implement the agent long-term memory - longer context and memory across sessions (consider using Llama Stack/MemGPT/Letta)8. Implement reflection - on the tool calling spec and results.9. Introduce multiple-agent collaboration.10. Implement the agent observability. 11. Compare different agent frameworks using the app as the case study.12. Add and implement a test plan and productionize the app.# Resources1. Lilian Weng's blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) 2. Andrew Ng's posts [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) with basic [implementations from scratch](https://github.com/neural-maze/agentic_patterns).3. LangChain's survey [State of AI Agents](https://www.langchain.com/stateofaiagents)4. Deloitte's report [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf)5. Letta's blog [The AI agents stack](https://www.letta.com/blog/ai-agents-stack)6. Microsoft's multi-agent system [Magentic-One](https://www.microsoft.com/en-us/research/articles/magentic-one-a-generalist-multi-agent-system-for-solving-complex-tasks)7. Amazon's [Multi-Agent Orchestrator framework](https://awslabs.github.io/multi-agent-orchestrator/)8. Deeplearning.ai's [agent related courses](https://www.deeplearning.ai/courses/?courses_date_desc%5Bquery%5D=agents) (Meta, AWS, Microsoft, LangChain, LlamaIndex, crewAI, AutoGen, Letta) and some [lessons ported to using Llama](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks). 9. Felicis's [The Agentic Web](https://www.felicis.com/insight/the-agentic-web)10. A pretty complete [list of AI agents](https://github.com/e2b-dev/awesome-ai-agents), not including [/dev/agents](https://sdsa.ai/), a very new startup building the next-gen OS for AI agents, though.11. Sequoia's [post](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) on 2024 being the year of AI agents and 2025 networks of AI agents.================================================FILE: end-to-end-use-cases/email_agent/requirements.txt================================================google-auth==2.27.0google-auth-oauthlib==0.4.6google-auth-httplib2==0.1.0google-api-python-client==2.34.0pytzbeautifulsoup4ollamapypdf================================================FILE: end-to-end-use-cases/github_triage/README.md================================================# Automatic Issues Triaging with LlamaThis tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications.## FeaturesThe tool performs the following tasks:* Fetches issue threads from a specified repository* Analyzes issue discussions and generates annotations such as category, severity, component affected, etc.* Categorizes all issues by theme* Synthesizes key challenges faced by users, along with probable causes and remediations* Generates a high-level executive summary providing insights on diagnosing and improving the developer experienceFor a step-by-step look, check out the [walkthrough notebook](walkthrough.ipynb).## Getting Started### Installation```bashpip install -r requirements.txt```### Setup1. **API Keys and Model Service**: Set your GitHub token for API calls. Some privileged information may not be available if you don't have push-access to the target repository.2. **Model Configuration**: Set the appropriate values in the `model` section of [config.yaml](config.yaml) for using Llama via VLLM or Groq.3. **JSON Schemas**: Edit the output JSON schemas in [config.yaml](config.yaml) to ensure consistency in outputs. VLLM supports JSON-decoding via the `guided_json` generation argument, while Groq requires passing the schema in the system prompt.### Running the Tool```bashpython triage.py --repo_name='meta-llama/llama-cookbook' --start_date='2024-08-14' --end_date='2024-08-27'```### OutputThe tool generates:* CSV files with `annotations`, `challenges`, and `overview` data, which can be persisted in SQL tables for downstream analyses and reporting.* Graphical matplotlib plots of repository traffic, maintenance activity, and issue attributes.* A PDF report for easier reading and sharing.## ConfigThe tool's configuration is stored in [config.yaml](config.yaml). The following sections can be edited:* **Github Token**: Use a token that has push-access on the target repo.* **model**: Specify the model service (`vllm` or `groq`) and set the endpoints and API keys as applicable.* **prompts**: For each of the 3 tasks Llama does in this tool, we specify a prompt and an output JSON schema:  * `parse_issue`: Parsing and generating annotations for the issues   * `assign_category`: Assigns each issue to a category specified in an enum in the corresponding JSON schema  * `get_overview`: Generates a high-level executive summary and analysis of all the parsed and generated data## Troubleshooting* If you encounter issues with API calls, ensure that your GitHub token is set correctly and that you have the necessary permissions.* If you encounter issues with the model service, check the configuration values in [config.yaml](config.yaml).================================================FILE: end-to-end-use-cases/github_triage/requirements.txt================================================kaleidoplotlyopenaigroqfpdfplotly================================================FILE: end-to-end-use-cases/long_context/H2O/README.md================================================## Run Llama with H2O for long context inference### Overview:Heavy-Hitter Oracle (H2O) is an efficient inference framework of LLMs. During the generative inference of transformers, the size of KV cache grows linearly with the sequence length (prompt length + generation length) during long context generation. And the size KV cache is usually significantly larger than the model parameters, contrains the inference throughput. H2O identifies the critical KV pairs and evicts other unnecessary ones, maintaining a small cache size thus improving the throughput.Besides, LLMs usually have poor generation to long sequence during inference. H2O handles this issue by maintaining only heavy-hitter tokens and the most recent tokens. Incorporated with the positional rolling strategy (reassigning the position of each kv with the position in the kv cache instead of the original sequence), H2O can process sequence length much longer than the pretrained context window. Different from other approaches, like [Positional Interpolation](https://arxiv.org/abs/2306.15595), H2O is a KV cache policy and do not involve any training process for long context processing.Current implementation supports llama-1/2/3, from 7B to 70B. Since H2O only maintains the most important KV pairs, it might missing some important information in the middle content for some knowlege-intensive tasks.More details please refer to Paper: **https://arxiv.org/pdf/2306.14048**;**Note: this implementation is tested with transformers == 4.39.0**### Evaluation on Summarization TasksThe following example runs inference of Llama-2-7b and Meta-Llama-3-8B on XSUM summarization tasks. We're using `--enable_h2o_generation` to enable H2O algorithm that only keeps heavy-hitter and the local KV pairs. Use `--num_window_length `to decide the KV cache size. The number of local and heavy-hitter KV pairs equals to half of the --num_window_length (Option: the number of heavy-hitters can also be decided by `--num_heavy_hitter_tokens`) Also, use --enable_position_rolling to enable position rolling in the KV cache size that assign the positions in the KV cache instead of the ones in original sequences. Enabling positional rolling is important when sequence length exceeds the pretrained context windows, e.g., 8K in Llama-3.```python run_summarization.py \--input-path data/summarization/xsum.jsonl \--output-path summarization_output/xsum_h2o.jsonl \--model-name meta-llama/Meta-Llama-3-8B \--enable_h2o_generation```##### **Results**Expected results on XSUM (Rouge-2 score, the higher the better) from the above scripts on Llama-2/3 models. The sequence length of inputs are ~2k. Here we constrains the size of KV cache, allowing only n KVs to be write/read after the prefilling stage. n ranges from **64** to **full** where we maintain all the KV pairs. With 128 KVs, the performance can be matched as the full baseline (~2k KVs) while performance degradation is observed with 64 KVs. Also, maintaining a smaller KV cache reduces the I/O cost of KVs, thus we can achieve better throughput.| KV Cache Size | 64     | 128    | 256    | 512    | 1024   | Full   || ------------- | ------ | ------ | ------ | ------ | ------ | ------ || Llama-2-7B    | 0.0439 | 0.1127 | 0.1148 | 0.1182 | 0.1170 | 0.1164 || Llama-2-13B   | 0.1180 | 0.1217 | 0.1243 | 0.1291 | 0.1302 | 0.1332 || Llama-3-8B    | 0.1107 | 0.1189 | 0.1200 | 0.1347 | 0.1290 | 0.1311 |### One Demo on Streaming to "Infinite" Context LengthThe following example demonstrates the generation process of "infinite" sequence length. We use MT-Bench data and generate the context sample-by-sample. The KV Cache will keep the KV pairs from the previous samples while maintain a fixed size.```# run with full cache# expected results: 1) normal generation at the early stage; 2) performance collapse and generation slow down at the middle stage, because the sequence length exceeds the context window and the I/O cost of KV cache contrains the throughput; 3) OOM errors and stop.bash src/streaming.sh full# run with h2o# expected results: normal generation at all stage.# adjust the number of heavy-hitter tokens with --num_heavy_hitter_tokens and size of KV cache with --num_window_length in src/streaming.shbash src/streaming.sh h2o```================================================FILE: end-to-end-use-cases/long_context/H2O/requirements.txt================================================transformersrougexopenneedlehaystack================================================FILE: end-to-end-use-cases/Multi-Modal-RAG/README.md================================================# End to End Tutorial on using Llama models for Multi-Modal RAG ## Recipe Overview: Multi-Modal RAG using `Llama-3.2-11B` model: This is a complete workshop on how to label images using the new Llama 3.2-Vision Models and performing RAG using the image caption capabilities of the model.- **Data Labeling and Preparation:** We start by downloading 5000 images of clothing items and labeling them using `Llama-3.2-11B-Vision-Instruct` model- **Cleaning Labels:** With the labels based on the notebook above, we will then clean the dataset and prepare it for RAG- **Building Vector DB and RAG Pipeline:** With the final clean dataset, we can use descriptions and 11B model to generate recommendations## Requirements:Before we start:1. Please grab your HF CLI Token from [here](https://huggingface.co/settings/tokens)2. Git clone [this dataset](https://huggingface.co/datasets/Sanyam/MM-Demo) inside the Multi-Modal-RAG folder: `git clone https://huggingface.co/datasets/Sanyam/MM-Demo` (Remember to thank the original author by up voting [Kaggle Dataset](https://www.kaggle.com/datasets/agrigorev/clothing-dataset-full))3. Make sure you grab a together.ai token [here](https://www.together.ai)## Detailed Outline for running:Order of running files, the notebook establish the method of approaching the problem. Once we establish it, we use the scripts to run the method end to end.- Notebook 1: `Part_1_Data_Preparation.ipynb`- Script: `label_script.py`- Notebook 2: `Part_2_Cleaning_Data_and_DB.ipynb`- Notebook 3: `Part_3_RAG_Setup_and_Validation.ipynb`- Script: `final_demo.py`Here's the detailed outline:### Step 1: Data Prep and Synthetic Labeling:In this step we start with an unlabeled dataset and use the image captioning capability of the model to write a description of the image and categorize it.[Notebook for Step 1](./notebooks/Part_1_Data_Preparation.ipynb) and [Script for Step 1](./scripts/label_script.py)To run the script (remember to set n):```python scripts/label_script.py --hf_token "your_huggingface_token_here" \    --input_path "../MM-Demo/images_compressed" \    --output_path "../MM-Demo/output/" \    --num_gpus N```The dataset consists of 5000 images with some meta-data.The first half is preparing the dataset for labeling:- Clean/Remove corrupt images- Some exploratory analysis to understand existing distribution- Merging up categories of clothes to reduce complexity - Balancing dataset by randomly sampling images to have an equal distribution for retrievalSecond Half consists of Labeling the dataset. Llama 3.2, 11B model can only process one image at a time:- We load a few images and test captioning- We run this pipeline on random images and iterate on the prompt till we feel the model is giving good outputs- Finally, we can create a script to label all 5000 images on multi-GPUAfter running the script on the entire dataset, we have more data cleaning to perform.### Step 2: Cleaning up Synthetic Labels and preparing the dataset:[Notebook for Step 2](./notebooks/Part_2_Cleaning_Data_and_DB.ipynb)We notice that even after some fun prompt engineering, the model faces some hallucinations-there are some issues with the JSON formatting and we notice that it hallucinates the label categories. Here is how we address this:- Re-balance the dataset by mapping correct categories. This is useful to make sure we have an equal distribution in our dataset for retrieval- Fix Descriptions so that we can create a CSVNow, we are ready to try our vector db pipeline:### Step 3: Notebook 3: MM-RAG using lance-db to validate idea[Notebook for Step 3](./notebooks/Part_3_RAG_Setup_and_Validation.ipynb) and [Final Demo Script](./scripts/label_script.py)With the cleaned descriptions and dataset, we can now store these in a vector-db, here's the steps:- We create embeddings using the text description of our clothes- Use 11-B model to describe the uploaded image- Ask the model to suggest complementary items to the upload- Try to find similar or complementary images based on the uploadWe try the approach with different retrieval methods.Finally, we can bring this all together in a Gradio App. For running the script:```python scripts/final_demo.py \    --images_folder "../MM-Demo/compressed_images" \    --csv_path "../MM-Demo/final_balanced_sample_dataset.csv" \    --table_path "~/.lancedb" \    --api_key "your_together_api_key" \    --default_model "BAAI/bge-large-en-v1.5" \    --use_existing_table ```Note: We can further improve the description prompt. You will notice sometimes the description starts with the title of the cloth which causes in retrieval of "similar" clothes instead of "complementary" items- Upload an image- 11B model describes the image- We retrieve complementary clothes to wear based on the description- You can keep the loop going by chatting with the model## Resources used: Credit and Thanks to List of models and resources used in the showcase:Firstly, thanks to the author here for providing this dataset on which we base our exercise [here](https://www.kaggle.com/datasets/agrigorev/clothing-dataset-full)- [Llama-3.2-11B-Vision-Instruct Model](https://www.llama.com/docs/how-to-guides/vision-capabilities/)- [Lance-db for vector database](https://lancedb.com)- [This Kaggle dataset](https://www.kaggle.com/datasets/agrigorev/clothing-dataset-full)- [HF Dataset](https://huggingface.co/datasets/Sanyam/MM-Demo) Since output of the model can be non-deterministic every time we run, we will use the uploaded dataset to give a universal experience- [Together API for demo](https://www.together.ai)================================================FILE: end-to-end-use-cases/multilingual/README.md================================================# Extending Llama to a new languageAuthored by : Sarvam teamIn this recipe, we will see how to add a new language to the Llama family of models. The steps are quite general and can be easily adapted to other models as well. Using this recipe, you should be able to replicate the findings of [OpenHathi](https://huggingface.co/sarvamai/OpenHathi-7B-Hi-v0.1-Base).Please read more about OpenHathi [here](https://x.com/SarvamAI/status/1734645628288831557)## DataThe original OpenHathi model uses a combination of [Sangraha](https://huggingface.co/datasets/ai4bharat/sangraha) and Wikipedia as its primary data sources. If the reader is interested in using these sources, they would also have to preprocess the data: clean, filter, and deduplicate. See [Setu](https://github.com/AI4Bharat/setu) for an easy way to do this at scale.In this tutorial, we will use the [Varta](https://huggingface.co/datasets/rahular/varta) dataset which contains 40M+ news articles taken from [DailyHunt](https://m.dailyhunt.in/). Since this data is already high-quality, we can skip the pre-processing step mentioned above. We will use the Hindi subset here, but you can add any other language present in the dataset by only passing the right language code (advanced users can also tweak the code to add multiple languages at once).## TokenizerOur first step towards augmenting a new language to an LLM is creating a better tokenizer. We define 'better' in terms of fertility score or the number of in-language tokens present in the tokenizer. Note that we should add new tokens without disturbing the original vocabulary, and therefore creating a better tokenizer usually involves 2 steps: (i) building a new, in-language only tokenizer, and (ii) merging this new tokenizer with the original.### Building the in-language tokenizerFor this, we will first download and prepare the data for training the tokenizer:```python prepare_data.py --split=validation --lang=hi --docs_to_sample=10000 --save_path=./data```Here we sample 10,000 Hindi documents from the validation split (we should ideally sample from the training split, but this is much faster) and save it as a text file inside `./data`. Next, we use this text to train a Hindi-only [sentencepiece](https://github.com/google/sentencepiece) tokenizer with a vocabulary size of 16,000.```python train_tokenizer.py --data_file=./data/hi.txt --save_path=./hi_tokenizer --vocab_size=16000```This creates a new sentencepiece Hindi tokenizer and saves it in `./hi_tokenizer`.### Merging the tokenizersThis process can again be divided into 2 steps:- add new tokens to the original Llama2 tokenizer without disturbing its original vocabulary in any way- expand the input and output embedding matrices of Llama2 to be equal to the new vocabulary sizeWe can do the first step by (i) downloading Llama2's `tokenizer.model` file, (ii) loading our Hindi `tokenizer.model` file, (iii) appending the Hindi tokens to Llama2 tokenizer's vocabulary if they are not already present, and (iv) save the extended tokenizer for future use. All this can be done by running```python extend_tokenizer.py --new_tokenizer_path=./hi_tokenizer --extended_tokenizer_save_path=./extended_tokenizer```Now, you have a new Llama2 tokenizer which works the same way on English text but can efficiently tokenize Hindi words as well. You can also test to see if it works as intended:```>>> from transformers import LlamaTokenizer>>> llama_tokenizer = LlamaTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat-hf')>>> our_tokenizer = LlamaTokenizer.from_pretrained('./extended_tokenizer')>>> for i in range(len(llama_tokenizer)):...     assert llama_tokenizer.convert_ids_to_tokens(i) == our_tokenizer.convert_ids_to_tokens(i), f"Token mismatch at index {i}."...>>> text = "मैं एक अच्छा हाथी हूँ">>> llama_tokenizer.tokenize(text)['▁', 'म', 'ै', 'ं', '▁', '<0xE0>', '<0xA4>', '<0x8F>', 'क', '▁', 'अ', 'च', '्', '<0xE0>', '<0xA4>', '<0x9B>', 'ा', '▁', 'ह', 'ा', 'थ', 'ी', '▁', 'ह', 'ू', '<0xE0>', '<0xA4>', '<0x81>']>>> our_tokenizer.tokenize(text)['▁मैं', '▁एक', '▁अच', '्', 'छा', '▁हाथी', '▁हूँ']```## Continual pre-trainingOpenHathi uses a two-stage pre-training process:- Phase 1: learn to translate paragraphs of text (use translated text as context and generate the original text, ~15B tokens)- Phase 2: bilingual next token prediction (train on text where the language changes after every sentence, ~15B tokens)Note: OpenHathi's final data mixture also contains monolingual data and romanized transliterations.We can easily create data for both phases using any translation model. OpenHathi uses [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2). We provide sample code for both phases below.### Phase 1With the assumption that we don't have source-native data, let us first get some English data to translate.```from datasets import load_datasetds = load_dataset("rahular/varta", split="train", streaming=True)english_paragraphs = []for d in ds:    if d["langCode"] != "en": continue    english_paragraphs.append(" ".join(d["text"].split("\n")))```Now, our goal is to create data in the format `{translated_paragraph}\n\n{english_paragraph}`. We can use the `translate_paragraph` function ([link](https://github.com/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/example.py#L150])) from the IndicTrans2 codebase to do this easily.```quantization = ""en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B"en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, "en-indic", quantization)ip = IndicProcessor(inference=True)phase1_data = []for para in english_paragraphs:    trans_para = translate_paragraph(para, "eng_Latn", "hin_Deva", en_indic_model, en_indic_tokenizer, ip)    phase1_data.append({"text": f"{trans_para}\n\n{para}"})# if you want to save it for future, you can do so easily with HF datasetsfrom datasets import Datasetphase1_ds = Dataset.from_list(phase1_data)phase1_ds.save_to_disk("data/phase1")```### Phase 2This is almost the same as phase 1, except that we have to replace the original sentences in an alternating manner to get the data in the required format. We can use the `split_sentences` ([link](https://github.com/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/example.py#L60])) and `batch_translate` ([link](https://github.com/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/example.py#L109)) functions to do this.```quantization = ""en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B"en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, "en-indic", quantization)ip = IndicProcessor(inference=True)phase2_data = []for para in english_paragraphs:    en_sents = split_sentences(para, "eng_Latn")    trans_sents = batch_translate(input_sentences, "eng_Latn", "hin_Deva, en_indic_model, en_indic_tokenizer, ip)    final_para = []    for idx, (en_sent, trans_sent) in enumerate(zip(en_sents, trans_sents)):        sent_to_append = en_sent if idx % 2 == 0 else trans_sent        final_para.append(sent_to_append)    phase2_data.append({"text": " ".join(final_para)})# if you want to save it for future, you can do so easily with HF datasetsfrom datasets import Datasetphase2_ds = Dataset.from_list(phase2_data)phase2_ds.save_to_disk("data/phase2")```### TrainFinally, we can start finetuning Llama2 on these datasets by following the [finetuning recipes](../../getting-started/finetuning/). Remember to pass the new tokenizer path as an argument to the script: `--tokenizer_name=./extended_tokenizer`.OpenHathi was trained on 64 A100 80GB GPUs. Here are the hyperparameters used and other training details:- maximum learning rate: 2e-4- minimum learning rate: 2e-6- optimizer: AdamW (weight decay = 0.1)- beta1: 0.9- beta2: 0.95- lora rank: 128- lora alpha: 64- lora trainable: q_proj, v_proj, k_proj, o_proj, gate_proj, down_proj, up_proj- lora dropout: 0.05- block size: 4096- global batch size: 4M tokens- input and output embeddings are trainable- lr schedule: cosine decay with warmup (warmup ratio = 0.1, number of cycles = 3)- deepspeed stage 2- dtype: bfloat16The resulting (partial) loss plots from the OpenHathi training are shown below:Phase 1: train lossPhase 1: eval lossPhase 2: train lossPhase 2: eval loss================================================FILE: end-to-end-use-cases/NotebookLlama/README.md================================================## NotebookLlama: An Open Source version of NotebookLM[Listen to audio from the example here](./resources/_podcast.mp3)This is a guided series of tutorials/notebooks that can be taken as a reference or course to build a PDF to Podcast workflow. You will also learn from the experiments of using  Text to Speech Models.It assumes zero knowledge of LLMs, prompting and audio models, everything is covered in their respective notebooks.### Outline:Here is step by step thought (pun intended) for the task:- Step 1: Pre-process PDF: Use `Llama-3.2-1B-Instruct` to pre-process the PDF and save it in a `.txt` file.- Step 2: Transcript Writer: Use `Llama-3.1-70B-Instruct` model to write a podcast transcript from the text- Step 3: Dramatic Re-Writer: Use `Llama-3.1-8B-Instruct` model to make the transcript more dramatic- Step 4: Text-To-Speech Workflow: Use `parler-tts/parler-tts-mini-v1` and `bark/suno` to generate a conversational podcastNote 1: In Step 1, we prompt the 1B model to not modify the text or summarize it, strictly clean up extra characters or garbage characters that might get picked due to encoding from PDF. Please see the prompt in Notebook 1 for more details.Note 2: For Step 2, you can also use `Llama-3.1-8B-Instruct` model, we recommend experimenting and trying if you see any differences. The 70B model was used here because it gave slightly more creative podcast transcripts for the tested examples.Note 3: For Step 4, please try to extend the approach with other models. These models were chosen based on a sample prompt and worked best, newer models might sound better. Please see [Notes](./TTS_Notes.md) for some of the sample tests.### Detailed steps on running the notebook:Requirements: GPU server or an API provider for using 70B, 8B and 1B Llama models.For running the 70B model, you will need a GPU with aggregated memory around 140GB to infer in bfloat-16 precision.Note: For our GPU Poor friends, you can also use the 8B and lower models for the entire pipeline. There is no strong recommendation. The pipeline below is what worked best on first few tests. You should try and see what works best for you!- Before getting started, please make sure to login using the `huggingface cli` and then launch your jupyter notebook server to make sure you are able to download the Llama models.You'll need your Hugging Face access token, which you can get at your Settings page [here](https://huggingface.co/settings/tokens). Then run `huggingface-cli login` and copy and paste your Hugging Face access token to complete the login to make sure the scripts can download Hugging Face models if needed.- First, please Install the requirements from [here]() by running inside the folder:```git clone https://github.com/meta-llama/llama-recipescd llama-recipes/end-to-end-use-cases/NotebookLlama/pip install -r requirements.txt```- Notebook 1:This notebook is used for processing the PDF and processing it using the new Feather light model into a `.txt` file.Update the first cell with a PDF link that you would like to use. Please decide on a PDF to use for Notebook 1, it can be any link but please remember to update the first cell of the notebook with the right link. Please try changing the prompts for the `Llama-3.2-1B-Instruct` model and see if you can improve results.- Notebook 2:This notebook will take in the processed output from Notebook 1 and creatively convert it into a podcast transcript using the `Llama-3.1-70B-Instruct` model. If you are GPU rich, please feel free to test with the 405B model!Please try experimenting with the System prompts for the model and see if you can improve the results and try the 8B model as well here to see if there is a huge difference!- Notebook 3:This notebook takes the transcript from earlier and prompts `Llama-3.1-8B-Instruct` to add more dramatization and interruptions in the conversations. There is also a key factor here: we return a tuple of conversation which makes our lives easier later. Yes, studying Data Structures 101 was actually useful for once!For our TTS logic, we use two different models that behave differently with certain prompts. So we prompt the model to add specifics for each speaker accordingly.Please again try changing the system prompt and see if you can improve the results. We encourage testing the feather light 3B and 1B models as well at this stage- Notebook 4:Finally, we take the results from last notebook and convert them into a podcast. We use the `parler-tts/parler-tts-mini-v1` and `bark/suno` models for a conversation.The speakers and the prompt for parler model were decided based on experimentation and suggestions from the model authors. Please try experimenting, you can find more details in the resources section.#### Note: Right now there is one issue: Parler needs transformers 4.43.3 or earlier and for steps 1 to 3 of the pipeline you need latest, so we just switch versions in the last notebook.### Next-Improvements/Further ideas:- Speech Model experimentation: The TTS model is the limitation of how natural this will sound. This probably be improved with a better pipeline and with the help of someone more knowledgable-PRs are welcome! :) - LLM vs LLM Debate: Another approach of writing the podcast would be having two agents debate the topic of interest and write the podcast outline. Right now we use a single LLM (70B) to write the podcast outline- Testing 405B for writing the transcripts- Better prompting- Support for ingesting a website, audio file, YouTube links and more. Again, we welcome community PRs!### Resources for further learning:- https://betterprogramming.pub/text-to-audio-generation-with-bark-clearly-explained-4ee300a3713a- https://colab.research.google.com/drive/1dWWkZzvu7L9Bunq9zvD-W02RFUXoW-Pd?usp=sharing- https://colab.research.google.com/drive/1eJfA2XUa-mXwdMy7DoYKVYHI1iTd9Vkt?usp=sharing#scrollTo=NyYQ--3YksJY- https://replicate.com/suno-ai/bark?prediction=zh8j6yddxxrge0cjp9asgzd534- https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c================================================FILE: end-to-end-use-cases/NotebookLlama/requirements.txt================================================# Core dependenciesPyPDF2>=3.0.0torch>=2.0.0transformers>=4.46.0accelerate>=0.27.0rich>=13.0.0ipywidgets>=8.0.0tqdm>=4.66.0# Optional but recommendedjupyter>=1.0.0ipykernel>=6.0.0# Warning handlingwarnings>=0.1.0================================================FILE: end-to-end-use-cases/NotebookLlama/TTS_Notes.md================================================### Notes from TTS ExperimentationFor the TTS Pipeline, *all* of the top models from HuggingFace and Reddit were tried. The goal was to use the models that were easy to setup and sounded less robotic with ability to include sound effects like laughter, etc.#### Parler-TTSMinimal code to run their models:```model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1").to(device)tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1")# Define text and descriptiontext_prompt = "This is where the actual words to be spoken go"description = """Laura's voice is expressive and dramatic in delivery, speaking at a fast pace with a very close recording that almost has no background noise."""input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)prompt_input_ids = tokenizer(text_prompt, return_tensors="pt").input_ids.to(device)generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)audio_arr = generation.cpu().numpy().squeeze()ipd.Audio(audio_arr, rate=model.config.sampling_rate)```The really cool aspect of these models are the ability to prompt the `description` which can change the speaker profile and pacing of the outputs.Surprisingly, Parler's mini model sounded more natural.In their [repo](https://github.com/huggingface/parler-tts/blob/main/INFERENCE.md#speaker-consistency) they share names of speakers that we can use in prompt.#### Suno/BarkMinimal code to run bark:```voice_preset = "v2/en_speaker_6"sampling_rate = 24000text_prompt = """Exactly! [sigh] And the distillation part is where you take a LARGE-model,and compress-it down into a smaller, more efficient model that can run on devices with limited resources."""inputs = processor(text_prompt, voice_preset=voice_preset).to(device)speech_output = model.generate(**inputs, temperature = 0.9, semantic_temperature = 0.8)Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)```Similar to parler models, suno has a [library](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c) of speakers.v9 from their library sounded robotic so we use Parler for our first speaker and the best one from bark.The incredible thing about Bark model is being able to add sound effects: `[Laugh]`, `[Gasps]`, `[Sigh]`, `[clears throat]`, making words capital causes the model to emphasize them. Adding `-` gives a break in the text. We utilize this knowledge when we re-write the transcript using the 8B model to add effects to our transcript.Note: Authors suggest using `...`. However, this didn't work as effectively as adding a hyphen during trails.#### Hyper-parameters: Bark models have two parameters we can tweak: `temperature` and `semantic_temperature`Below are the notes from a sweep, prompt and speaker were fixed and this was a vibe test to see which gives best results. `temperature` and `semantic_temperature` respectively below:First, fix `temperature` and sweep `semantic_temperature`- `0.7`, `0.2`: Quite bland and boring- `0.7`, `0.3`: An improvement over the previous one- `0.7`, `0.4`: Further improvement - `0.7`, `0.5`: This one didn't work- `0.7`, `0.6`: So-So, didn't stand out- `0.7`, `0.7`: The best so far- `0.7`, `0.8`: Further improvement - `0.7`, `0.9`: Mix feelings on this oneNow sweeping the `temperature`- `0.1`, `0.9`: Very Robotic- `0.2`, `0.9`: Less Robotic but not convincing- `0.3`, `0.9`: Slight improvement still not fun- `0.4`, `0.9`: Still has a robotic tinge- `0.5`, `0.9`: The laugh was weird on this one but the voice modulates so much it feels speaker is changing- `0.6`, `0.9`: Most consistent voice but has a robotic after-taste- `0.7`, `0.9`: Very robotic and laugh was weird- `0.8`, `0.9`: Completely ignore the laughter but it was more natural- `0.9`, `0.9`: We have a winner probablyAfter this about ~30 more sweeps were done with the promising combinations:Best results are at ```speech_output = model.generate(**inputs, temperature = 0.9, semantic_temperature = 0.8)Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)```### Notes from other models that were tested:Promising directions to explore in future:- [MeloTTS](https://huggingface.co/myshell-ai/MeloTTS-English) This is most popular (ever) on HuggingFace- [WhisperSpeech](https://huggingface.co/WhisperSpeech/WhisperSpeech) sounded quite natural as well- [F5-TTS](https://github.com/SWivid/F5-TTS) was the latest release at this time, however, it felt a bit robotic- E2-TTS: r/locallama claims this to be a little better, however, it didn't pass the vibe test- [xTTS](https://coqui.ai/blog/tts/open_xtts) It has great documentation and also seems promising#### Some more models that weren't tested:In other words, we leave this as an exercise to readers :D- [Fish-Speech](https://huggingface.co/fishaudio/fish-speech-1.4)- [MMS-TTS-Eng](https://huggingface.co/facebook/mms-tts-eng)- [Metavoice](https://huggingface.co/metavoiceio/metavoice-1B-v0.1)- [Hifigan](https://huggingface.co/nvidia/tts_hifigan)- [TTS-Tacotron2](https://huggingface.co/speechbrain/tts-tacotron2-ljspeech) - [MMS-TTS-Eng](https://huggingface.co/facebook/mms-tts-eng)- [VALL-E X](https://github.com/Plachtaa/VALL-E-X)================================================FILE: end-to-end-use-cases/NotebookLlama/resources/clean_extracted_text.txt===============================================================Knowledge Distillation is a methodology that transfers advanced capabilities from leading proprietary Large Language Models (LLMs) to their open-source counterparts, such as LLaMA and Mistral. This paper presents a comprehensive survey of KD's role in imparting advanced knowledge.Abstract —In the era of Large Language Models, Knowledge Distillation emerges as a pivotal methodology for transferring advanced capabilities from proprietary LLMs to open-source counterparts, facilitating their self-improvement by employing themselves as teachers.xamined through a meticulous survey that delves into the foundational pillars of algorithm, skill, and verticalization, which form the backbone of knowledge distillation and deep learning models. The survey provides a comprehensive examination of key mechanisms within the knowledge distillation framework, specifically focusing on the enhancement of cognitive abilities and their practical implications across various fields, with a particular emphasis on the interplay between data augmentation (DA) and knowledge distillation.en-source LLMs, this survey highlights the potential for more accessible, efficient, and powerful AI solutions.Most importantly, we advocate for compliance with legal terms that regulate the use of LLMs, ensuring ethical and lawful application of knowledge distillation.An associated Github repository is available at https://github.com/Tebmer/Awesome-Knowledge-Distillation-of-LLMs. Index Terms - Large language models, knowledge distillation, data augmentation, skill distillation, supervised fine-tuningsophisticated problem-solving capabilities, the core significance of these large language models (LLMs) lies in their emergent abilities, enabling them to tackle a diverse array of tasks with remarkable proficiency.their remarkable capabilities, have some notable limitations, particularly when considering the advantages offered by open-source models, such as GPT-4 and Gemini. These models are often expensive, with substantial usage fees and restricted access, making them inaccessible to individuals and smaller organizations.ng restrictions and costs. In contrast, open-source LLMs like LLaMA and Mistral bring several advantages. Accessibility and adaptability are key benefits, as they are more readily available to a broader range of users, including researchers and organizations.ts. One of the most significant limitations is the smaller model scale, resulting in lower performance on real-world tasks with multiple instructions (Zheng et al., 2023a). Models with fewer parameters struggle to capture the depth and breadth of knowledge embodied in larger models like GPT-4. Additionally, the pre-training investment in these open-source models is typically less substantial. This reduced investment can lead to a narrower range of pre-training data, potentially limiting their understanding and handling of diverse or specialized topics (Liang et al., 2022; Sun et al., 2024a). Fine-tuning steps are often fewer due to resource constraints, hindering model optimization for specific tasks or industries.ary models becomes apparent when compared to highly fine-tuned proprietary LLMs. Primarily, the disparity between proprietary and open-source LLMs becomes evident, with proprietary models excelling in complex scenarios, while open-source models excel in a wide range of scenarios. Knowledge distillation, a technique that leverages the advanced capabilities of proprietary models, is used to enhance the competencies of open-source models. This process is similar to transferring the performance of a skilled teacher to a student.tillation of LLMs, where a small seed of knowledge is used to prompt the LLM to generate more data with respect to a specific skill or domain (Taori et al., 2023). Furthermore, KD retains its fundamental role in compressing LLMs, making them more efficient without significant loss in performance.advanced context following and instruction following****key aspects of knowledge distillation*** **contextual understanding**: in-context learning and instruction following* **alignment with user intents**: human values/principles and thinking patterns like chain-of-thought* **NLP task specialization**: semantic understanding and code generation**critical skills for various applications*** **healthcare**: accuracy and contextual knowledge* **law**: contextual knowledge and precision* **science**: contextual knowledge and precisionned in the era of LLMs, the benefits of knowledge distillation in the era of LLMs are multifaceted and transformative. Through a suite of distillation techniques, the gap between proprietary and open-source models narrows and is filled. This process streamlines computational requirements and enhances environmental sustainability of AI operations, as open-source models become more proficient with lower overhead.ch domains. The escalating need for a comprehensive survey on the knowledge distillation of LLMs stems from the rapidly evolving landscape of AI and the increasing complexity of these models. The ability to efficiently and effectively distill knowledge from proprietary LLMs to open-source ones becomes a practical necessity. This is driven by the need to bridge the knowledge gap between the proprietary and open-source LLMs.This need is driven by the 3 models mentioned, including Student, Vicuna, Opt, GPT, and others. These models are being used in various sectors such as law, healthcare, finance, and science, and the ability to distill knowledge from them is becoming increasingly important.synthesizefeedbackFeedback input outputSelf-Knowledge outputinputinput YlabelLabelingExpansion X,Y demonstrationsexpandFeature featureinput,outputextractSec.4Sec.5 Sec.3.1Sec.3.2 Fig. 2: An overview of this survey on knowledge distillation of large language modelses emerging, but there is still much to be learned from the era of Large Language Models (LLMs). In this section, we provide a foundational overview of knowledge distillation, highlighting the role of data augmentation (DA) in this context.Traditional techniques, such as supervised fine-tuning, have shown promise in distilling knowledge from LLMs. However, the increasing complexity of these models requires careful consideration of the trade-offs between accuracy and computational resources. To further explore the possibilities of knowledge distillation, we examine methods involving supervised fine-tuning, such as incremental learning and transfer learning.Supervised fine-tuning involves training a model on a smaller dataset with the goal of adapting to a specific task or domain. This approach has shown significant improvement in various NLP tasks, but may not be scalable to large-scale applications. In contrast, transfer learning offers a more flexible approach, where a model is trained on a smaller dataset and then fine-tuned on a larger dataset. This can lead to improved performance on a variety of tasks, but requires careful selection of the target dataset.Another approach is divergence and similarity, which involve exploring the differences and similarities between the knowledge distillation process and traditional machine learning. Reinforcement learning and ranking optimization are also gaining attention, particularly in the context of knowledge distillation, where the goal is to optimize the distillation process itself. These methods can improve the efficiency and effectiveness of knowledge distillation, but require careful consideration of the trade-offs between exploration and exploitation.Skill distillation focuses on enhancing student models to improve their understanding of the task and their ability to perform well on NLP tasks. This can be achieved through various methods, including data augmentation, feature learning, and attention mechanisms. By incorporating these techniques, student models can better understand the context and intentions of the user, leading to improved performance across a variety of tasks.We propose several strategies for skill distillation, including:mmendation systems, and the evaluation of text generation. In §5, we delve into domain-specific vertical distillation, demonstrating how knowledge distillation techniques are applied in specialized fields such as law, healthcare, finance, and science, highlighting their practical implications and transformative impact. The survey reveals open problems in §6, highlighting current challenges and gaps in knowledge distillation research that present opportunities for future work.large, complex model to a smaller, more efficient model, mitigating the challenges of computational demands and resource constraints in deploying large-scale models in practical applications. This process, prior to the era of Large Language Models (LLMs), focused on compacting complex neural networks for deployment in resource-constrained environments, such as mobile devices or edge computing platforms, where computational efficiency was paramount.al., 2022a), Alpaca (Taori et al., 2023), Code Alpaca (Chaudhary, 2023) Self-Align (Sun et al., 2024b), WizardLM (Xu et al., 2023a), WizardCoder (Luo et al., 2023a), WizardMath (Luo et al., 2023b), AugGPT (Dai et al., 2023a), TDG (He et al., 2023b), CurationUltraChat (Ding et al., 2023b), Phi-1 (Gunasekar et al., 2023), Phi-1.5 (Li et al., 2023a), Phi-2 (Mar, 2023), Magicoder (Wei et al., 2023), WaveCoder (Yu et al., 2024), ZeroGen (Ye et al., 2022), InPars (Bonifacio et al., 2022)Self-Align (Sun et al., 2024b), RLCD (Yang et al., 2024a), ImpDistill (Jung et al., 2023), LMSI (Huang et al., 2023a), ReST (Gulcehre et al., 2023), Self-Rewarding (Yuan et al., 2024a), Baize (Xu et al., 2023b), STaR (Zelikman et al., 2022) DistillationSupervised Fine-TuningAlpaca (Taori et al., 2023), Vicuna (Chiang et al., 2023), WizardLM (Xu et al., 2023a), Self-Instruct (Wang et al., 2022a), Baize (Xu et al., 2023b), STaR (Zelikman et al., 2022), Divergence and SimilarityDistilGPT (Sanh et al., 2019), f-Distill (Wen et al., 2023), MiniLLM (Gu et al., 2024) TED (Liang et al., 2023a), GKD (Agarwal et al., 2024), BabyLlama (Timiryasov and Tastet, 2023) Reinforcement LearningCAI (Bai et al., 2022a), UltraFeedback (Cui et al., 2023a), WizardMath (Luo et al., 2023b), MiniLLM (Gu et al., 2024), GKD (Agarwal et al., 2024), GPT3 Reward (Kwon et al., 2023) Rank OptimizationollowingInstruction FollowingSelf-Instruct Wang et al., 2022a, Alpaca Taori et al., 2023, Vicuna Chiang et al., 2023, WizardLM Xu et al., 2023a, Orca Mukherjee et al., 2023, Orca2 Mitra et al., 2023, WizardMath Luo et al., 2023b, Llama-GPT4 Peng et al., 2023a, Multi-turn Dialogue Chiang et al., 2023, Baize Xu et al., 2023b, UltraLLaMA Ding et al., 2023b, CAMEL Li et al., 2023b, OpenChat Wang et al., 2023c, Zephyr Tunstall et al., 2023, RAG Kang et al., 2023a, SAIL Luo et al., 2023c, Self-RAG Asai et al., 2023, AlignmentThinking PatternYe et al., 2023, Orca Mukherjee et al., 2023, Orca2 Wang et al., 2023d, AFT Cheng et al., 2023, KnowPAT Zhang et al., 2023a, PreferenceCAI Bai et al., 2022a, GPT-3 Reward Kwon et al., 2023, ILF Scheurer et al., 2023, ALMoST Kim et al., 2023a, RLEF Roit et al., 2023i et al., 2022a), Align Honesty (Yang et al., 2023a), SANDBOX (Liu et al., 2023b), Self-Align (Sun et al., 2024b), UltraFeedback (Cui et al., 2023a), RLCD (Yang et al., 2024a), AgentToolformer (Schick et al., 2023), Graph-ToolFormer (Zhang, 2023), Gorilla (Patil et al., 2023), ToolAlpaca (Tang et al., 2023a), ToolLLM (Qin et al., 2023a), CRAFT (Yuan et al., 2023a), Confucius (Gao et al., 2023b), MLLM-Tool (Wang et al., 2024), α-UMi (Shen et al., 2024), PlanningFireAct (Chen et al., 2023b), AgentTuning (Zeng et al., 2023a), Lumos (Yin et al., 2023a), AUTOACT (Qiao et al., 2024), TPTU-v2 (Kong et al., 2023), NLP Task SpecializationNLUAugGPT (Dai et al., 2023a), GPT Annotation (Gilardi et al., 2023), (Ding et al., 2023a), TDG (He et al., 2023b), SunGen (Gao et al., 2023a), Mix Distill (Chenglin et al., 2023), Annollm (He et al., 2023a), UDG (Wang et al., 2021a), ZeroGen (Ye et al., 2024)al., 2023 GPT-3 Labeling Wang et al., 2021b BioGPT Guo et al., 2023a ChatGPT NMT Yang and Nicolai, 2023 Information RetrievalQUILL Srinivasan et al., 2022 Promptgator Dai et al., 2023b InPars Bonifacio et al., 2022 AugTriever Meng et al., 2023 Sun et al., 2023a RankVicuna Pradeep et al., 2023a RankZephyr Pradeep et al., 2023b ExaRanker Ferraretto et al., 2023 Recommendation NDR Mysore et al., 2023 InstrcutRec Zhang et al., 2023b ONCE Liu et al., 2023c Text Generation Evaluation PandaLM Wang et al., 2023b Prometheus Kim et al., 2024 InstructScore Xu et al., 2023d TigerScore Jiang et al., 2023c Auto-J Li et al., 2024a CodeCodeAlpaca Chaudhary, 2023 CodeLlama Rozi `ere et al., 2023 Magicoder Wei et al., 2023 Phi-1 Gunasekar et al., 2023 PERsD Chen et al., 2023 MFTCoder Liu et al., 2023d WaveCoder Yu et al., 2023et al., 2023e), SVIT (Zhao et al., 2023b), LVIS-Instruct4V (Wang et al., 2023e), Shikra (Chen et al., 2023c), LSKD (Park et al., 2023), DetGPT (Pi et al., 2023; Zhao et al., 2023c), LRV (Liu et al., 2023f), NExT-GPT (Wu et al., 2023b), Valley (Luo et al., 2023d), ILuvUI (Jiang et al., 2023d), StableLLaVA (Li et al., 2023c), PointLLM (Xu et al., 2023e), Verticalization DistillationLaw (Huang et al., 2023b; Cui et al., 2023b); Medical & Healthcare (Zhang et al., 2023c; Chen et al., 2023d); Finance (Zhang and Yang, 2023); Science (Xie et al., 2023a; Zhang et al., 2024) and Misc. (Dan et al., 2023; Guo et al., 2023b) Fig. 3: Taxonomy of Knowledge Distillation of Large Language Models"r network, often through techniques like soft target training, where the student learns from the softened softmax output of the teacher.The distillation of knowledge from larger models to smaller ones is a technique used to improve the performance of AI models. In this context, distillation refers to the process of distilling the knowledge from a larger model into a smaller model, allowing it to learn from the teacher model's output.The current era of knowledge distillation in large language models (LLMs) has shifted the focus from mere architecture compression to a more nuanced process of knowledge elicitation and transfer. This paradigm change is largely due to the immense knowledge that LLMs like GPT-4 and Gemini possess. The parameters of LLMs make it challenging to compress them using pruning or quantization techniques.size, the current focus in llm-based knowledge distillation is to extract and transfer the rich, nuanced understanding that these models have developed the key to this modern approach lies in carefully designed prompts that elicit specific knowledge or capabilities from the llms, tapping into their understanding and capabilities in various domains ranging from natural language understanding to more complex cognitive tasks like reasoning and problem-solvingexplicit training objectives. This era of knowledge distillation also emphasizes the transfer of abstract qualities such as reasoning patterns and preference alignment. This is in stark contrast to the earlier focus on output replication, indicating a shift towards a more holistic and comprehensive transfer of cognitive capabilities. The current techniques involve not just the replication of outputs, but also the emulation of thought processes and decision-making patterns of the teacher model. This involves complex strategies like chain-of-thought prompting, where the student model learns the reasoning process of the teacher, enhancing its problem-solving and decision-making capabilities. 2.2 Relation to Data Augmentation (DA)llation, Unlike traditional techniques such as paraphrasing, or back-translation, which primarily aim at expanding the training dataset in a somewhat mechanical manner. DA within the context of LLMs focuses on the generation of novel, context-rich training data tailored to specific domains and skills. This innovation is driven by the unique capabilities of LLMs to generate coherent, diverse, and intricate data samples that closely mimic the nuanced understanding and cognitive abilities of human experts in various fields.ource models, through Deep Learning Models (LLMs) are prompted to create targeted, high-quality datasets that are not merely larger in volume but also rich in diversity and specificity. This approach enables the distillation process to be more effective, ensuring that the distilled models replicate the teacher model's output behavior and embody its deep-seated understanding and cognitive strategies. The significance and necessity of Data Augmentation (DA) for achieving Knowledge Domains (KD) in the LLM era cannot be overstated. DA acts as a force multiplier, enabling the distilled models to acquire and refine capabilities that would otherwise require exponentially larger datasets and computational resources. It facilitates a more nuanced and effective transfer of knowledge, focusing on the qualitative aspects of learning rather than quantitative expansion.er of LLMs empowers open-source models with the ability to approximate the contextual adeptness, ethical alignment, and deep semantic insights characteristic of their proprietary counterparts thereby democratizing access to advanced AI capabilities and fostering innovation across a broader spectrum of applications and users 2 3 Survey Scope Building on the discussions introduced earlier this survey aims to comprehensively explore the landscape of knowledge distillation within the context of LLMs following a meticulously structured taxonomy as in Figure 3 the survey’s scope is delineated through three primary facets each encapsulating a range of subtopics and methodologiesundations and methodologies of knowledge distillation. It includes an in-depth exploration of processes involved in constructing knowledge from teacher models (e.g., proprietary LLMs) and integrating this knowledge into student models (e.g., open-source LLMs). Under the umbrella of 'knowledge', we delve into strategies such as labeling, expansion, curation, feature understanding, and feedback mechanisms. The exploration seeks to uncover the various ways in which knowledge can be identified, expanded, and curated for effective distillation. This subsection examines learning approaches like supervised fine-tuning, divergence minimization, and reinforcement learning techniques.ow algorithms enable knowledge transfer, allowing open-source models to replicate and sometimes surpass proprietary capabilities. Skill Distillation examines specific competencies and capabilities enhanced through Knowledge Distillation. Contextual discussions follow (Taori et al., 2023; Luo et al., 2023c), including instruction following and retrieval-augmented generation (RAG) capabilities. Alignment research investigates thinking patterns, persona/preference modeling, and value alignment. The 'agent' category focuses on skills like tool usage and planning. NLP task specialization (Dai et al., 2023a; Jung et al., 2023; Chaudhary, 2023) is examined through lenses like natural language understanding (NLU), natural language processing (NLP).tion, and Code Generation**Finally, the survey explores how Knowledge Distillation (KD) enhances Large Language Models (LLMs) in interpreting and integrating multiple forms of input, enriching their utility and applicability across various contexts. Verticalization DistillationThis section examines the application of KD across diverse domains, providing insights into how distilled LLMs can be tailored for specialized fields such as Law, Medical & Healthcare (Wang et al., 2023a), Finance (Zhang and Yang, 2023), Science (Zhang et al., 2024), among others. This exploration showcases the practical implications of KD techniques and highlights their transformative impact on domain-specific AI solutions. Through detailed analysis and examples, this part aims to demonstrate the versatility and efficacy of KD in adapting LLMs to diverse domains.stem. by navigating through these facets, this survey endeavors to provide an extensive and nuanced analysis of knowledge distillation in the era of LLMs. it serves as a guide for researchers, practitioners, and enthusiasts in the field, shedding light on current methodologies, challenges, and opportunities for innovation in this rapidly evolving domain.across a range of applications.Distillation Pipeline in LLM Era================================================FILE: end-to-end-use-cases/RAFT-Chatbot/README.md================================================## Chatbot Recipe:As the popularity of our Meta Llama 3 models grows, we've seen a surge in demand to adapt them to specific domains, enabling businesses to better serve their customers. For example, a company might have a vast collection of plain text documents related to their custom domain and want to create a chatbot that can answer client questions.In response to this demand, we're exploring the possibility of building a Llama chatbot that can answer Llama-related questions using our Meta Llama 3 models. In this tutorial, we'll demonstrate how to do just that. While our Meta Llama 3 70B Instruct model is an excellent candidate, its production costs are relatively high. To reduce these costs, we'll focus on creating a Llama chatbot based on the Meta Llama 8B Instruct model, aiming to achieve similar accuracy while minimizing inference costs.One common approach to produce a model based on new domain data is **fine-tuning**. The idea is to start from a pre-trained model that already has some knowledge of language from its pre-training and adapt it to a new domain. However, [recent paper](https://arxiv.org/pdf/2405.05904) highlights the risk of using supervised fine-tuning to update LLMs' knowledge, as it presents empirical evidence that acquiring new knowledge through fine-tuning is correlated with hallucinations w.r.t. preexisting knowledge. Fine-tuning can also be costly if the domain knowledge has to be updated frequently.Another solution is to use **RAG (Retrieval-Augmented Generation)**, which combines the strengths of traditional information retrieval systems (such as databases) with the capabilities of generative large language models (LLMs). RAG operates by first retrieving relevant information from a database using a query generated by the LLM. This retrieved information is then integrated into the LLM's query input, enabling it to generate more accurate and contextually relevant text. This helps to reduce LLM hallucination as the related documents are provided to LLM and has a lower cost to update the domain knowledge.In this tutorial, we'll use **Retrieval Augmented Fine Tuning (RAFT)**, a technique that combines fine-tuning with RAG to better utilize custom domain text data. RAFT is a general recipe for fine-tuning a pre-trained Large Language Model (LLM) to a domain-specific RAG setting. It helps LLM to better utilize custom domain text data, by ignoring those documents that don’t help in answering the question. This approach can create a more factual model and reduce LLM hallucinations during inference.The process involves preparing training data with each data point containing:* A question (Q)* A set of documents (D)* A corresponding Chain-of-thought style answer (A*) generated from one of the documents (D*)RAFT tries to teach the models to differentiate between two types of documents:* Oracle documents (D*): documents from which the answer to the question can be deduced* Distractor documents (Di): documents that do not contain answer-relevant informationThe following graph illustrates the RAFT main concepts:For more information on RAFT, please refer to their [blog post](https://gorilla.cs.berkeley.edu/blogs/9_raft.html).## Fine-tuning LlamaTo build a Llama bot, we need to collect relevant text data. Ideally, we would include a vast range of Llama-related web documents, but for demo purposes, we'll focus on official documents. For example, we can use the raw text from official web pages listed in [Getting started with Meta Llama](https://llama.meta.com/get-started/), excluding the FAQ page since some evaluation questions will come from there.We have two options to obtain the text data: using a local folder or web crawling. For the local folder option, we can download the desired documents in PDF, Text, or Markdown format to the "data" folder specified in the [raft.yaml](./raft.yaml) file. Langchain DirectoryLoader will load files in that folder, but it may also ask us to install more package dependency if the files formats are not supported natively.Alternatively, we can create a sitemap XML file, similar to the example below, and put the file path in the [raft.yaml](./raft.yaml) file, so eventually a Langchain SitemapLoader can retrieve all the text from the web pages.```xml<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">  <url>    <loc>http://llama.meta.com/responsible-use-guide/</loc>  </url>  <!-- more URLs --></urlset>```## Create RAFT DatasetTo create a RAFT dataset from the prepared documents, we can use the Meta Llama 3 70B Instruct model either through APIs from LLM cloud providers or by hosting a local VLLM server.For this example, we'll demonstrate how to create a VLLM OpenAI-compatible server that hosts Meta Llama 3 70B Instruct locally and generates the RAFT dataset.**Local Server Setup**First, ensure VLLM is installed. Then, run the following command to start the VLLM server:```bashCUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server  --model meta-Llama/Meta-Llama-3-70B-Instruct --tensor-parallel-size 2 --disable-log-requests --port 8001```**Note**: Make sure the port is available, and the server requires at least 135GB GPU memory, so we need to use multiple GPUs in a tensor parallel way.**Querying the Server**Once the server is ready, query it using the following command in another terminal:```bashpython raft.py -u "http://localhost:8001/v1" -k "EMPTY" -t 4```If you prefer to use a cloud API, replace the endpoint URL with the cloud provider's URL and set the API key using the `-k` flag or environment variables.**RAFT Dataset Generation**The [raft.py](raft.py) script reads all documents from local or web sources, depending on the settings, and splits the data into text chunks of 1000 characters using RecursiveCharacterTextSplitter.Then, it applies the `question_prompt_template` defined in [raft.yaml](raft.yaml) to each chunk to generate queries to Meta Llama 3 70B model, and the model will generate a question list (By default 4 questions in that list) for each text chunk. For each question and corresponding text chunk, we generate a Chain-of-Thought (COT) style answer using Meta Llama 3 70B Instruct APIs.Once we have the COT answers, we can create a dataset where each sample contains an "instruction" section. This section includes some unrelated chunks called distractors (by default, we add 4 distractors). In the original RAFT method, there is an oracle probability P (by default, 80%) that a related document will be included. This means that there is a 1-P (by default, 20%) chance that no related documents are provided, and the RAFT model should still try to predict the COT answer label, as stated in the blog, "By removing the oracle documents in some instances of the training data, we are compelling the model to memorize domain-knowledge."**Modification to Add Refusal Examples**In this tutorial, we made an important modification by adding additional refusal examples (by default, this refusal probability is 5%). When the related documents are not presented, we set the COT answer label to "Sorry, I don't know the answer to this question because related documents are not found. Please try again." Our hypothesis is that this will increase answer precision and reduce chatbot hallucination. In real-world production scenarios, we prefer that the chatbot refuses to answer when not enough context is provided, so that we can detect this refusal signal and mitigate the risk of producing wrong or misleading answers (e.g., we can ask a human agent to take over the conversation to better serve customers).**RAFT Format JSON Example**Here is a RAFT format JSON example from our saved `raft.jsonl` file:```json{   "id":"seed_task_228",   "type":"general",   "question":"What is the context length supported by Llama 3 models?",   "context":{      "sentences":[         [            "DISTRACT_DOCS 1"            "DISTRACT_DOCS 2"            "We hope that Code Llama will inspire others to leverage Llama 2 to create new innovative tools for research and commercial products. Download the model Explore more on Code Llama Discover more about Code Llama here \u2014 visit our resources, ranging from our research paper, getting started guide and more. Code Llama GitHub repository Research paper Download the model Getting started guide Meta Llama 3 Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Get Started Experience Llama 3 on Meta AI Experience Llama 3 with Meta AI We\u2019ve integrated Llama 3 into Meta AI, our intelligent assistant, that expands the ways people can get things done, create and connect with Meta AI. You can see first-hand the performance of Llama 3 by using Meta AI for coding tasks and problem solving. Whether you're developing agents, or other AI-powered applications, Llama 3 in both 8B and 70B will offer the capabilities and flexibility you need to develop your ideas. Experience Llama 3 on Meta AI Enhanced performance Experience the state-of-the-art performance of Llama 3, an openly accessible model that excels at language nuances, contextual understanding, and complex tasks like translation and dialogue generation. With enhanced scalability and performance, Llama 3 can handle  multi-step tasks effortlessly, while our refined post-training processes significantly lower false refusal rates, improve response alignment, and boost diversity in model answers. Additionally, it drastically elevates capabilities like reasoning, code generation, and instruction following. Build the future of AI with Llama 3. Download Llama 3 Getting Started Guide With each Meta Llama request, you will receive: Meta Llama Guard 2 Getting started guide Responsible Use Guide Acceptable use policy Model card Community license agreement Benchmarks Llama 3 models take data and scale to new heights. It\u2019s been trained on our two recently announced custom-built 24K GPU clusters on over 15T token of data \u2013 a training dataset 7x larger than that used for Llama 2, including 4x more code. This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. Model card Trust & safety A comprehensive approach to responsibility With the release of Llama 3, we\u2019ve updated the Responsible Use Guide (RUG) to provide the most comprehensive information on responsible development with LLMs. Our system-centric approach includes updates to our trust and safety tools with Llama Guard 2, optimized to support the newly announced taxonomy published by MLCommons expanding its coverage to a more comprehensive set of safety categories, Code Shield, and Cybersec Eval 2. In line with the principles outlined in our RUG , we recommend thorough checking and filtering of all inputs to and outputs from LLMs based on your unique content guidelines for your intended use case and audience. Meta Llama Guard 2 Explore more on Meta Llama 3 Introducing Meta Llama 3: The most capable openly available LLM to date Read the blog Meet Your New Assistant: Meta AI, Built With Llama 3 Learn more Meta Llama 3 repository View repository Model card Explore Meta Llama 3 License META LLAMA 3 COMMUNITY LICENSE AGREEMENT Meta Llama 3 Version Release Date: April 18, 2024 \u201c Agreement \u201d means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein. \u201c Documentation \u201d means the specifications, manuals and documentation accompanying Meta Llama 3 distributed by Meta at https:\/\/llama.meta.com\/get-started\/ .",            "DISTRACT_DOCS 3"            "DISTRACT_DOCS 4"         ]      ],      "title":[         [            "placeholder_title",            "placeholder_title",            "placeholder_title",            "placeholder_title",            "placeholder_title",         ]      ]   },   "oracle_context":"We hope that Code Llama will inspire others to leverage Llama 2 to create new innovative tools for research and commercial products. Download the model Explore more on Code Llama Discover more about Code Llama here \u2014 visit our resources, ranging from our research paper, getting started guide and more. Code Llama GitHub repository Research paper Download the model Getting started guide Meta Llama 3 Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Get Started Experience Llama 3 on Meta AI Experience Llama 3 with Meta AI We\u2019ve integrated Llama 3 into Meta AI, our intelligent assistant, that expands the ways people can get things done, create and connect with Meta AI. You can see first-hand the performance of Llama 3 by using Meta AI for coding tasks and problem solving. Whether you're developing agents, or other AI-powered applications, Llama 3 in both 8B and 70B will offer the capabilities and flexibility you need to develop your ideas. Experience Llama 3 on Meta AI Enhanced performance Experience the state-of-the-art performance of Llama 3, an openly accessible model that excels at language nuances, contextual understanding, and complex tasks like translation and dialogue generation. With enhanced scalability and performance, Llama 3 can handle  multi-step tasks effortlessly, while our refined post-training processes significantly lower false refusal rates, improve response alignment, and boost diversity in model answers. Additionally, it drastically elevates capabilities like reasoning, code generation, and instruction following. Build the future of AI with Llama 3. Download Llama 3 Getting Started Guide With each Meta Llama request, you will receive: Meta Llama Guard 2 Getting started guide Responsible Use Guide Acceptable use policy Model card Community license agreement Benchmarks Llama 3 models take data and scale to new heights. It\u2019s been trained on our two recently announced custom-built 24K GPU clusters on over 15T token of data \u2013 a training dataset 7x larger than that used for Llama 2, including 4x more code. This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. Model card Trust & safety A comprehensive approach to responsibility With the release of Llama 3, we\u2019ve updated the Responsible Use Guide (RUG) to provide the most comprehensive information on responsible development with LLMs. Our system-centric approach includes updates to our trust and safety tools with Llama Guard 2, optimized to support the newly announced taxonomy published by MLCommons expanding its coverage to a more comprehensive set of safety categories, Code Shield, and Cybersec Eval 2. In line with the principles outlined in our RUG , we recommend thorough checking and filtering of all inputs to and outputs from LLMs based on your unique content guidelines for your intended use case and audience. Meta Llama Guard 2 Explore more on Meta Llama 3 Introducing Meta Llama 3: The most capable openly available LLM to date Read the blog Meet Your New Assistant: Meta AI, Built With Llama 3 Learn more Meta Llama 3 repository View repository Model card Explore Meta Llama 3 License META LLAMA 3 COMMUNITY LICENSE AGREEMENT Meta Llama 3 Version Release Date: April 18, 2024 \u201c Agreement \u201d means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein. \u201c Documentation \u201d means the specifications, manuals and documentation accompanying Meta Llama 3 distributed by Meta at https:\/\/llama.meta.com\/get-started\/ .",   "cot_answer":"Here's the step-by-step reasoning to answer the question:\n\n1. The question asks about the context length supported by Llama 3 models.\n2. In the context, we need to find the relevant information about Llama 3 models and their context length.\n3. The relevant sentence is: \"This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2.\"\n##begin_quote## This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. ##end_quote##\n4. From this sentence, we can see that Llama 3 models support a context length of 8K.\n\n<ANSWER>: 8K",   "instruction":"<DOCUMENT> DISTRACT_DOCS 1 <\/DOCUMENT>...<DOCUMENT> DISTRACT_DOCS 4 <\/DOCUMENT>\nWhat is the context length supported by Llama 3 models?"}```As shown in the above example, we have a "question" section for the generated question, a "cot_answer" section for the generated COT answers (where the final answer will be added after the "<ANSWER>" token), and an "instruction" section that has all the documents included (each document split by `<DOCUMENT>` and `</DOCUMENT>` tags) and finally the generated question appended at the end. This "instruction" section will be the input during fine-tuning, and the "cot_answer" will be the output label that the loss will be calculated on.## Creating an Evaluation SetTo create a reliable evaluation set, it's ideal to use human-annotated question and answer pairs. This ensures that the questions are relevant and the answers are accurate. However, human annotation is time-consuming and costly. For demonstration purposes, we'll use a subset of the validation set, which will never be used in the fine-tuning. We only need to keep the "question" section and the final answer section, marked by the `<ANSWER>` tag in "cot_answer". We'll manually check each example and select only the good ones. We want to ensure that the questions are general enough to be used for web search engine queries and are related to Llama. We'll also use some QA pairs from our FAQ page, with modifications. This will result in 72 question and answer pairs as our evaluation set, saved as `eval_llama.json`.## Fine-Tuning StepsOnce the RAFT dataset is ready in JSON format, we can start fine-tuning. Unfortunately, the LORA method didn't produce good results, so we'll use the full fine-tuning method. We can use the following commands as an example in the llama-cookbook main folder:```bashexport PATH_TO_ROOT_FOLDER=./raft-8bexport PATH_TO_RAFT_JSON=recipes/use_cases/end2end-recipes/raft/output/raft.jsonltorchrun --nnodes 1 --nproc_per_node 4  recipes/quickstart/finetuning/finetuning.py --enable_fsdp --lr 1e-5 --context_length 8192 --num_epochs 1 --batch_size_training 1 --model_name meta-Llama/Meta-Llama-3-8B-Instruct --dist_checkpoint_root_folder $PATH_TO_ROOT_FOLDER --dist_checkpoint_folder fine-tuned  --use_fast_kernels --dataset "custom_dataset" --custom_dataset.test_split "test" --custom_dataset.file "recipes/finetuning/datasets/raft_dataset.py" --use-wandb  --run_validation True  --custom_dataset.data_path $PATH_TO_RAFT_JSON```For more details on multi-GPU fine-tuning, please refer to the [multigpu_finetuning.md](../../getting-started/finetuning/multigpu_finetuning.md) in the finetuning recipe.Next, we need to convert the FSDP checkpoint to a HuggingFace checkpoint using the following command:```bashpython src/llama_cookbook/inference/checkpoint_converter_fsdp_hf.py --fsdp_checkpoint_path  "$PATH_TO_ROOT_FOLDER/fine-tuned-meta-Llama/Meta-Llama-3-8B-Instruct" --consolidated_model_path "$PATH_TO_ROOT_FOLDER"```For more details on FSDP to HuggingFace checkpoint conversion, please refer to the [readme](../../getting-started/finetuning/multigpu_finetuning.md) in the inference/local_inference recipe.## Evaluation StepsOnce we have the RAFT model, we need to evaluate its performance. In this tutorial, we'll not only use traditional evaluation methods (e.g., calculating exact match rate or ROUGE score) but also use LLM as a judge to score model-generated answers.We'll launch a VLLM server to host our converted model from `PATH_TO_ROOT_FOLDER`. To make things easier, we can rename the model folder to `raft-8b`.```bashCUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server  --model raft-8b --port 8000  --disable-log-requests```Similarly, if we want to get the 8B instruct baseline, we can launch a 8B model VLLM server instead:```bashCUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server  --model  meta-Llama/Meta-Llama-3-8B-Instruct --port 8000  --disable-log-requests```On another terminal, we can use another Meta Llama 3 70B Instruct model as a judge to compare the answers from the RAFT 8B model with the ground truth and get a score. To do this, we need to host another Meta Llama 3 70B Instruct VLLM server locally with the command, making sure the port is not in use:```bashCUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server  --model meta-Llama/Meta-Llama-3-70B-Instruct --tensor-parallel-size 2 --disable-log-requests --port 8001```Then, we can pass the ports to the eval script to evaluate our RAFT model once our `raft-8b` VLLM server is running:```bashCUDA_VISIBLE_DEVICES=4 python raft_eval.py -m raft-8b -u "http://localhost:8000/v1" -j "http://localhost:8001/v1" -r 5```To evaluate the 8B baseline, we can use the following command once our 8B VLLM server is running:```bashCUDA_VISIBLE_DEVICES=4 python raft_eval.py -m meta-Llama/Meta-Llama-3-8B-Instruct -u "http://localhost:8000/v1" -j "http://localhost:8001/v1" -r 5```**NOTE**: Please ensure that the `--model` in VLLM server creation matches the `--m` in raft_eval.py. Otherwise, VLLM will raise a `model not found` error. By default, the RAFT model is called "raft-8b". Here, `-u` specifies the RAFT model endpoint URL, `-j` specifies the judge model endpoint URL, and `-r` defines how many top-k documents the RAG should retrieve.This [raft_eval.py](./raft_eval.py) script will load questions from the evaluation set, generate answers from models and models+RAG, and compare the generated answers with the ground truth to get the evaluation metrics, such as ROUGE score or LLM-as-judge score. It will then save those metrics and evaluation details to eval logs.## Experiment Results**Overview**During our experiments, we encountered issues with using only the Llama website data, which consisted 1980+ RAFT examples generated from 327K characters text. We believed that this initial data was insufficient, so we created an additional PyTorch RAFT dataset using text from official [Pytorch blogs](https://pytorch.org/blog/) and [Pytorch tutorials](https://pytorch.org/tutorials/). This new dataset contains 20K+ RAFT examples generated from 4.7 million characters. We combined both datasets to create an `all_data` dataset. We then fine-tuned the 8B model on each dataset separately for 1 epoch with a learning rate of 1e-5, resulting in three RAFT models: `llama_only`, `pytorch_only`, and `all_data`.**Evaluation on non-RAG baseline**First we run a non-RAG baseline, just using Meta Llama 3 8B Instruct and Meta Llama 3 70B Instruct model to see if our model can already answers some questions without any fine-tuning and external knowledge base. The LLM score, the percentage of correctness marked by LLM_as_judge, for 8B is 47.9% and 70B is 59.2%. Clearly, there are some information that has been pretrained into our Meta Llama 3 models.**Evaluation on RAG baseline**Then we tested these 3 RAFT models with Langchain RAG, along with the Meta Llama 3 8B Instruct and Meta Llama 3 70B Instruct RAG baselines, using the RAG document top-k retrieve parameters of 3, 5, and 7. We deployed a Meta Llama 70B Instruct model as the judge to score our model-generated answers against the ground truth in our evaluation set. The LLM scores are shown below:Our results showed that RAFT models performed similarly to the 8B RAG baseline, but noticeably worse than the 70B RAG baseline when context documents were limited (top_k <= 5). However, when top_k = 7, the RAFT models performance suddenly increase, with the `all_data` 8B model achieving a score of 76.06% which beats the 70B baseline's 74.65%.**Refusal Examples**We also analyzed the number of refusal examples, where the model responded with "Sorry, I do not know." The `all_data` model was more cautious and tended to refuse to answer, whereas the `llama_only` RAFT model did not learn to refuse at all, likely due to the limited dataset size.**Precision Analysis**We calculated the precision of our model answers, which represents the likelihood of producing correct answers when the model decides to respond. The formula used was $\frac{LLMScore}{1-\frac{numRefusal}{totalQA}}$.Note that the 8B and 70B RAG baselines never refused to answer, so their precision was equivalent to their LLM_score. Our `all_data` and `pytorch_only` models tended to refuse to answer when provided documents were limited (top_k < 5), but when they did generate an answer, the likelihood of it being correct was higher. Specifically, when top_k = 7, the `all_data` RAFT model had an 82.97% likelihood of producing a correct answer when it decided to respond, outperforming the 70B baseline.**Example Comparisons**Here are some examples where our `all_data` RAFT model correctly answered questions that the 70B baseline failed to answer:```Comparing interested question: What tokenizer is used as the basis for the special tokens in Meta Llamaground_truth:  tiktokenTrue all_data_RAG_answers: <ANSWER>: The tokenizer used as the basis for the special tokens in Meta Llama is tiktoken.False 70B_RAG_answers: <ANSWER>: The tokenizer used as the basis for the special tokens in Meta Llama is SentencePiece.``````Comparing interested question: What is the license under which the Llama Guard model and its weights are released?groud_truth:  The license is the same as Llama 3, which can be found in the LICENSE file and is accompanied by the Acceptable Use Policy.True all_data_RAG_answers: <ANSWER>: The license under which the Llama Guard model and its weights are released is the same as Llama 3, and the [LICENSE](../LICENSE) file contains more information about the license.False 70B_RAG_answers: <ANSWER>: The Llama Guard model and its weights are licensed under the Llama 2 Community license.```**Key Takeaways**From our experiments, we learned:1. Few thousand RAFT examples are insufficient, and at least 10K examples are recommended.2. The LLM_as_judge is not always reliable, and we noticed there are chances that answers were scored incorrectly.3. The chunk_size for RAFT documents and RAG documents should be the same.4. The RAFT method appears to help the LLM differentiate related documents from distractors rather than forcing it to memorize the training data, as we used Pytorch data as additional data to help our Llama chatbot to answer Llama questions. More research experiments will be needed to understand more about this.## Local Inference StepsOnce we evaluated and refined our RAFT model, we can deploy it locally to interact with it by asking questions manually. To do this, run the following command:```bashpython recipes/inference/local_inference/inference.py --model_name raft-8b```For more details,please check [local_inference recipe](../../getting-started/inference/local_inference/README.md)## AcknowledgementFinally, we would like to extend special thanks to Tianjun Zhang, the first author of the [RAFT paper](https://arxiv.org/pdf/2403.10131), for collaborating with us on this tutorial and providing valuable guidance throughout our experiments. Our code is also partially inspired by the [RAFT section in Gorilla github](https://github.com/ShishirPatil/gorilla/tree/main/raft).================================================FILE: end-to-end-use-cases/research_paper_analyzer/README.md================================================# Research Paper analyzer with Llama4 MaverickThis leverages **Llama 4 Maverick** model to retrieve the references of an arXiv paper and ingest all their content for question-answering without using any RAG to store these information.## Features### Leverage Long Context Length| Model | Meta Llama4 Maverick | Meta Llama4 Scout | OpenAI GPT-4.5 | Claude Sonnet 3.7 || ----- | -------------- | -------------- | -------------- | -------------- || Context Window | 1M tokens | 10M tokens | 128K tokens | 1K tokens | 200K tokens |Because of the long context length, the analyzer can process all the reference paper content at once, so you can ask questions about the paper without worrying about the context length.## Getting Started1. Install dependencies:```pip install -r requirements.txt```2. Run the application:```python research_analyzer.py```3. Open the gradio interface on localhost in the browser. 3. Provide a paper url such as https://arxiv.org/abs/2305.111354. Press "Ingest", wait for paper to be processed and ask questions about it================================================FILE: end-to-end-use-cases/research_paper_analyzer/requirements.txt================================================gradiotogetherrequestspypdf2gradio================================================FILE: end-to-end-use-cases/whatsapp_llama_4_bot/README.md================================================# WhatsApp and Llama 4 APIs : Build your own multi-modal chatbotWelcome to the WhatsApp Llama4 Bot ! This bot leverages the power of the Llama 4 APIs to provide intelligent and interactive responses to users via WhatsApp. It supports text, image, and audio interactions, making it a versatile tool for various use cases.## Key Features- **Text Interaction**: Users can send text messages to the bot, which are processed using the Llama4 APIs to generate accurate and contextually relevant responses.- **Image Reasoning**: The bot can analyze images sent by users, providing insights, descriptions, or answers related to the image content.- **Audio-to-Audio Interaction**: Users can send audio messages, which are transcribed to text, processed by the Llama4, and converted back to audio for a seamless voice-based interaction.## Technical Overview### Architecture- **FastAPI**: The bot is built using FastAPI, a modern web framework for building APIs with Python.- **Asynchronous Processing**: Utilizes `httpx` for making asynchronous HTTP requests to external APIs, ensuring efficient handling of media files.- **Environment Configuration**: Uses `dotenv` to manage environment variables, keeping sensitive information like API keys secure.Please refer below a high-level of architecture which explains the integrations :### Important Integrations- **WhatsApp API**: Facilitates sending and receiving messages, images, and audio files. - **Llama4 Model**: Provides advanced natural language processing capabilities for generating responses.- **Groq API**: Handles speech-to-text (STT) and text-to-speech (TTS) conversions, enabling the audio-to-audio feature.## Here are the steps to setup with WhatsApp Business Cloud APIFirst, open the [WhatsApp Business Platform Cloud API Get Started Guide](https://developers.facebook.com/docs/whatsapp/cloud-api/get-started#set-up-developer-assets) and follow the first four steps to:1. Add the WhatsApp product to your business app;2. Add a recipient number;3. Send a test message;4. Configure a webhook to receive real time HTTP notifications.For the last step, you need to further follow the [Sample Callback URL for Webhooks Testing Guide](https://developers.facebook.com/docs/whatsapp/sample-app-endpoints) to create a free account on glitch.com to get your webhook's callback URL.Now open the [Meta for Develops Apps](https://developers.facebook.com/apps/) page and select the WhatsApp business app and you should be able to copy the curl command (as shown in the App Dashboard - WhatsApp - API Setup - Step 2 below) and run the command on a Terminal to send a test message to your WhatsApp.Note down the "Temporary access token", "Phone number ID", and "a recipient phone number" in the API Setup page above, which will be used later.## Setup and Installation### Step 1: Clone the Repository```bashgit clone https://github.com/meta-llama/llama-cookbook.gitcd llama-cookbook/end-to-end-use-cases/whatsapp-llama4-bot```### Step 2: Install DependenciesEnsure you have Python installed, then run the following command to install the required packages:```bashpip install -r requirements.txt```### Step 3: Configure Environment VariablesCreate a `.env` file in the project directory and add your API keys and other configuration details as follows:```plaintextACCESS_TOKEN=your_whatsapp_access_tokenWHATSAPP_API_URL=your_whatsapp_api_urlTOGETHER_API_KEY=your_llama4_api_keyGROQ_API_KEY=your_groq_api_keyPHONE_NUMBER_ID=your_phone_number_id```### Step 4: Run the ApplicationOn your EC2 instance, run the following command on a Terminal to start the FastAPI server ```bashuvicorn ec2_endpoints:app —host 0.0.0.0 —port 5000```Note: If you use Amazon EC2 as your web server, make sure you have port 5000 added to your EC2 instance's security group's inbound rules.## LicenseThis project is licensed under the MIT License.## ContributingWe welcome contributions to enhance the capabilities of this bot. Please feel free to submit issues or pull requests.================================================FILE: end-to-end-use-cases/whatsapp_llama_4_bot/requirements.txt================================================aiohappyeyeballs==2.6.1aiohttp==3.11.16aiosignal==1.3.2annotated-types==0.7.0anyio==4.9.0async-timeout==5.0.1attrs==25.3.0certifi==2025.1.31charset-normalizer==3.4.1click==8.1.8colorama==0.4.6distro==1.9.0dotenv==0.9.9eval_type_backport==0.2.2exceptiongroup==1.2.2fastapi==0.115.12filelock==3.18.0frozenlist==1.5.0groq==0.22.0h11==0.14.0httpcore==1.0.8httpx==0.28.1idna==3.10markdown-it-py==3.0.0mdurl==0.1.2multidict==6.4.3numpy==2.2.4pillow==11.2.1propcache==0.3.1pyarrow==19.0.1pydantic==2.11.3pydantic_core==2.33.1Pygments==2.19.1python-dotenv==1.1.0requests==2.32.3rich==13.9.4shellingham==1.5.4sniffio==1.3.1starlette==0.46.2tabulate==0.9.0together==1.5.5tqdm==4.67.1typer==0.15.2typing-inspection==0.4.0typing_extensions==4.13.2urllib3==2.4.0uvicorn==0.34.1yarl==1.19.0================================================FILE: end-to-end-use-cases/whatsapp_llama_4_bot/.env================================================# WhatsApp Business Phone Number ID (NOT the phone number itself)PHONE_NUMBER_ID="place your whatsapp phone number id"# Full URL to send WhatsApp messages (use correct version and phone number ID)WHATSAPP_API_URL="place graphql request i.e. https://graph.facebook.com/v{version}/{phone_number_id}/messages"# Your custom backend/agent endpoint (e.g., for LLM-based processing)AGENT_URL=https://your-agent-url.com/apiLLAMA_API_KEY="place your LLAMA API Key"TOGETHER_API_KEY="place your Together API Key, In case you want to use Together, instead of Llama APIs"GROQ_API_KEY="place your Groq API Key - this is for SST and TTS"OPENAI_API_KEY="place your OpenAI Ke to run the client"META_ACCESS_TOKEN="please your WhatsApp generated Access token from the app"
 |