1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026 |
- Directory structure:
- └── end-to-end-use-cases/
- ├── README.md
- ├── agents/
- │ ├── README.md
- │ ├── Agents_Tutorial/
- │ ├── calendar_assistant/
- │ │ ├── README.md
- │ │ └── assets/
- │ └── DeepLearningai_Course_Notebooks/
- │ └── README.md
- ├── ArticleSummarizer/
- │ ├── README.md
- │ ├── gradle.properties
- │ ├── gradlew
- │ ├── .gitignore
- │ ├── app/
- │ │ ├── .gitignore
- │ │ └── src/
- │ │ └── main/
- │ │ ├── java/
- │ │ │ └── com/
- │ │ │ └── example/
- │ │ │ └── llamaandroiddemo/
- │ │ └── res/
- │ │ ├── drawable/
- │ │ ├── layout/
- │ │ ├── mipmap-anydpi-v26/
- │ │ ├── mipmap-hdpi/
- │ │ ├── mipmap-mdpi/
- │ │ ├── mipmap-xhdpi/
- │ │ ├── mipmap-xxhdpi/
- │ │ ├── mipmap-xxxhdpi/
- │ │ ├── values/
- │ │ └── xml/
- │ └── gradle/
- │ └── wrapper/
- │ └── gradle-wrapper.properties
- ├── benchmarks/
- │ ├── README.md
- │ ├── inference/
- │ │ ├── README.md
- │ │ ├── requirements.txt
- │ │ ├── cloud/
- │ │ │ ├── README.md
- │ │ │ ├── aws/
- │ │ │ │ └── fmbench/
- │ │ │ │ ├── README.md
- │ │ │ │ └── img/
- │ │ │ └── azure/
- │ │ └── on_prem/
- │ │ ├── README.md
- │ │ └── vllm/
- │ └── llm_eval_harness/
- │ ├── README.md
- │ └── meta_eval/
- │ ├── README.md
- │ └── meta_template/
- │ ├── bbh/
- │ ├── gpqa/
- │ ├── gpqa_cot/
- │ ├── ifeval/
- │ ├── math_hard/
- │ ├── mmlu/
- │ └── mmlu_pro/
- ├── book-character-mindmap/
- │ ├── README.md
- │ ├── .gitignore
- │ ├── public/
- │ │ └── robots.txt
- │ ├── server/
- │ │ └── requirements.txt
- │ └── src/
- │ └── pages/
- │ ├── bookPage/
- │ │ └── components/
- │ └── homePage/
- │ └── components/
- ├── browser_use/
- │ ├── README.md
- │ └── agent/
- ├── coding/
- │ └── text2sql/
- │ ├── README.md
- │ ├── nba.txt
- │ ├── nba_roster.db
- │ └── requirements.txt
- ├── Contextual-Chunking-RAG/
- │ ├── README.md
- │ └── data/
- │ └── llama_article.txt
- ├── customerservice_chatbots/
- │ ├── README.md
- │ ├── messenger_chatbot/
- │ │ └── messenger_llama3.md
- │ ├── RAG_chatbot/
- │ │ ├── requirements.txt
- │ │ ├── data/
- │ │ └── vectorstore/
- │ │ ├── db_faiss/
- │ │ └── mongodb/
- │ └── whatsapp_chatbot/
- │ └── whatsapp_llama3.md
- ├── email_agent/
- │ ├── README.md
- │ └── requirements.txt
- ├── github_triage/
- │ ├── README.md
- │ ├── requirements.txt
- │ └── output/
- │ └── pytorch/
- │ └── pytorch/
- │ └── 2024-08-28_2024-08-28/
- │ └── plots/
- ├── long_context/
- │ └── H2O/
- │ ├── README.md
- │ ├── requirements.txt
- │ ├── data/
- │ │ └── summarization/
- │ ├── src/
- │ └── utils/
- ├── Multi-Modal-RAG/
- │ ├── README.md
- │ ├── notebooks/
- │ └── scripts/
- ├── multilingual/
- │ ├── README.md
- │ └── img/
- ├── NotebookLlama/
- │ ├── README.md
- │ ├── requirements.txt
- │ ├── TTS_Notes.md
- │ └── resources/
- │ └── clean_extracted_text.txt
- ├── RAFT-Chatbot/
- │ ├── README.md
- │ └── images/
- ├── research_paper_analyzer/
- │ ├── README.md
- │ └── requirements.txt
- └── whatsapp_llama_4_bot/
- ├── README.md
- ├── requirements.txt
- └── .env
- ================================================
- FILE: end-to-end-use-cases/README.md
- ================================================
- <h1 align="center"> End to End Use Applications using various Llama Models </h1>
- <p align="center">
- <a href="https://llama.developer.meta.com/join_waitlist?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img src="https://img.shields.io/badge/Llama_API-Join_Waitlist-brightgreen?logo=meta" /></a>
- <a href="https://llama.developer.meta.com/docs?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img src="https://img.shields.io/badge/Llama_API-Documentation-4BA9FE?logo=meta" /></a>
- </p>
- <p align="center">
- <a href="https://github.com/meta-llama/llama-models/blob/main/models/?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img alt="Llama Model cards" src="https://img.shields.io/badge/Llama_OSS-Model_cards-green?logo=meta" /></a>
- <a href="https://www.llama.com/docs/overview/?utm_source=llama-cookbook&utm_medium=readme&utm_campaign=end_to_end"><img alt="Llama Documentation" src="https://img.shields.io/badge/Llama_OSS-Documentation-4BA9FE?logo=meta" /></a>
- <a href="https://huggingface.co/meta-llama"><img alt="Hugging Face meta-llama" src="https://img.shields.io/badge/Hugging_Face-meta--llama-yellow?logo=huggingface" /></a>
- </p>
- <p align="center">
- <a href="https://github.com/meta-llama/synthetic-data-kit"><img alt="Llama Tools Syntethic Data Kit" src="https://img.shields.io/badge/Llama_Tools-synthetic--data--kit-orange?logo=meta" /></a>
- <a href="https://github.com/meta-llama/llama-prompt-ops"><img alt="Llama Tools Syntethic Data Kit" src="https://img.shields.io/badge/Llama_Tools-llama--prompt--ops-orange?logo=meta" /></a>
- </p>
- ## [Building an Intelligent WhatsApp Bot with Llama 4 APIs](./whatsapp-llama4-bot/README.md)
- ### A Step-by-Step Guide
- Create a WhatsApp bot that leverages the power of Llama 4 APIs to provide intelligent and interactive responses. This guide will walk you through the process of building a bot that supports text, image, and audio interactions, making it versatile for various use cases.
- - **Text Interaction**: Respond to text messages with accurate and contextually relevant answers.
- - **Image Reasoning**: Analyze images to provide insights, descriptions, or answers related to the content.
- - **Audio-to-Audio Interaction**: Transcribe audio messages to text, process them, and convert back to audio for seamless voice-based interaction.
- Get started with building your own WhatsApp bot using Llama 4 APIs today!
- ## [Research Paper Analyzer with Llama4 Maverick](./research_paper_analyzer/README.md)
- ### Analyze Research Papers with Ease
- Leverage Llama4 Maverick to retrieve references from an arXiv paper and ingest all their content for question-answering.
- - **Long Context Length**: Process entire papers at once.
- - **Comprehensive Analysis**: Get insights, descriptions, or answers related to the content.
- Get started with analyzing research papers using Llama4 Maverick today!
- ## [Book Character Mind Map With Llama4 Maverick](./book_character_mindmap/README.md)
- ### Explore Book Characters and Storylines
- Use Llama4 Maverick to process entire books at once and visualize character relationships and storylines.
- - **Interactive Mind Maps**: Visualize relationships between characters and plot elements.
- - **Book Summaries**: Get concise overviews of plots and themes.
- Discover new insights into your favorite books!
- ## [Agentic Tutorial](./agents/):
- ### 101 and 201 tutorials on performing Tool Calling and building an Agentic Workflow using Llama Models
- 101 notebooks show how to apply Llama models and enable tool calling functionality, 201 notebook walks you through an end to end workflow of building an agent that can search two papers, fetch their details and find their differences.
- ## [Benchmarks](./benchmarks/):
- ### A folder contains benchmark scripts
- The scripts apply a throughput analysis and introduction to `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality
- ## [Browser Usage](./browser_use/):
- ### Demo of how to apply Llama models and use them for browsing the internet and completing tasks
- ## [Automatic Triaging of Github Repositories](./github_triage/walkthrough.ipynb):
- ### Use Llama to automatically triage issues in an OSS repository and generate insights to improve community experience
- This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications.
- ## [NBA2023-24](./coding/text2sql/quickstart.ipynb):
- ### Ask Llama 3 about Structured Data
- This demo app shows how to use LangChain and Llama 3 to let users ask questions about **structured** data stored in a SQL DB. As the 2023-24 NBA season is entering the playoff, we use the NBA roster info saved in a SQLite DB to show you how to ask Llama 3 questions about your favorite teams or players.
- ## [NotebookLlama](./NotebookLlama/):
- ### PDF to Podcast using Llama Models
- Workflow showcasing how to use multiple Llama models to go from any PDF to a Podcast and using open models to generate a multi-speaker podcast
- ## [WhatsApp Chatbot](./customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md):
- ### Building a Llama 3 Enabled WhatsApp Chatbot
- This step-by-step tutorial shows how to use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to build a Llama 3 enabled WhatsApp chatbot.
- ## [Messenger Chatbot](./customerservice_chatbots/messenger_chatbot/messenger_llama3.md):
- ### Building a Llama 3 Enabled Messenger Chatbot
- This step-by-step tutorial shows how to use the [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview) to build a Llama 3 enabled Messenger chatbot.
- ### RAG Chatbot Example (running [locally](./customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb)
- A complete example of how to build a Llama 3 chatbot hosted on your browser that can answer questions based on your own data using retrieval augmented generation (RAG).
- ================================================
- FILE: end-to-end-use-cases/agents/README.md
- ================================================
- ## Agents and Tool Calling
- Structure:
- - Agents_Tutorial: Showcases 101 and 201 notebooks guidance for using tool calling with Llama models
- - DeepLearning_Course_Notebooks: Notebooks from the DL.ai course teaching Agents
- ================================================
- FILE: end-to-end-use-cases/agents/calendar_assistant/README.md
- ================================================
- # Google Calendar Assistant with with Llama 3.2 3B Tool Calling
- This notebook showcases how to go about building a digital assistant to schedule meetings with the Llama 3.2 3B model. The core concepts used to implement this are Prompt Engineering and Tool Calling. This demo shows how Llama can be used to interact with 3rd party apps like Google Contacts & Google Calendar and schedule a meeting requested by the user. Even though we are using prompt engineering to achieve this, the approach described doesn't degrade the model's ability to answer general queries. This approach can extended to perform other tasks in a similar manner without affecting the quality of other tasks
- ## Approach
- Instead of using a complex system prompt with multiple conditions & expecting Llama to perform various tasks accurately out of the box, the approach here is to treat this as a 2 step process
- - Determine user intent - Task classification
- - Take action for the specific task using Tool Calling
- In the diagram shown below,
- - system prompt 1 determines the classification of the query
- - In steps 2 & 3, we classify the task being requested.
- - system prompt 2 is chosen based on the classification result
- - Steps 4 & 5 implement the classified task.
- - For the sake of demo, we show 2 classes: General & Meeting
- 
- Both these tasks have a specific prompt. We use the same model with different system prompts depending on the classification result.
- Additionally, this demo also showcases how Llama can be used to do double tool calling with 1 prompt. In the case of Meeting, Llama returns 2 function calls in Step 5
- ```
- <function=google_contact>{{"name": "John Constantine"}}</function>
- <function=google_calendar>{{"date": "Mar 31 ", "time": "5:30 pm", "attendees": "John Constantine"}}</function>
- ```
- ## Examples
- - General query
- **User Prompt** : `Tell me about Paris`
- **Prompt Classification**: `general`
- **Answer**:
- ```
- Paris, the City of Light, is the capital of France. Known for its stunning architecture, art museums, and romantic atmosphere, Paris is a popular tourist destination. The city is famous for its iconic landmarks such as the Eiffel Tower, Notre-Dame Cathedral, and the Louvre Museum, which houses the Mona Lisa. Paris is also renowned for its fashion, cuisine, and wine. The city's charming streets and historic neighborhoods, like Montmartre and Le Marais, offer a glimpse
- ```
- - Set up a Calendar Invite
- **User Prompt** : `Schedule a meeting with John Constantine on Mar 31 at 5:30 pm`
- **Prompt Classification**: `meeting`
- **Answer**:
- ```
- <function=google_contact>{{"name": "John Constantine"}}</function>
- <function=google_calendar>{{"date": "Mar 31", "time": "5:30 pm", "attendees": "John Constantine"}}</function>
- ```
- ### Actions based on tool calling output
- - The google_contact function call returned by the model is used to call [People API](https://developers.google.com/people) to look up the email address of the person of interest
- - The email address from the previous step is used to call [Calendar API](https://developers.google.com/calendar) along with the other information in the google_calendar tool calling output returned by the model
- The end result is that a google meeting is scheduled with the person of interest at the date & time specified
- ================================================
- FILE: end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks/README.md
- ================================================
- # Quickstart Llama 3 Powered Agent Examples Ported from DeepLearning.ai Short Courses
- The notebooks in this folder are ported from the 4 recent agent short courses on [Deeplearning.ai](https://www.deeplearning.ai) to use Llama 3 to build agent apps from scratch or with open source frameworks (LangChain, LlamaIndex, AutoGen).
- 1. [Functions Tools and Agents with LangChain L1 Function Calling](Functions_Tools_and_Agents_with_LangChain_L1_Function_Calling.ipynb)
- 2. [AI Agents in LangGraph L1 Build an Agent from Scratch](AI_Agents_in_LangGraph_L1_Build_an_Agent_from_Scratch.ipynb)
- 3. [Building Agentic RAG with Llamaindex L1 Router Engine](Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb)
- 4. [AI Agentic Design Patterns with AutoGen L4 Tool Use and Conversational Chess](AI_Agentic_Design_Patterns_with_AutoGen_L4_Tool_Use_and_Conversational_Chess.ipynb)
- ================================================
- FILE: end-to-end-use-cases/ArticleSummarizer/README.md
- ================================================
- # Android Document Summarizer App
- <img src="./screenshot.png">
- This is a sample Android app to demonstrate Llama 4 multimodal and multilingual capabilities. This app allows user to take a picture/screenshot of a document, and then summarize and translate it into any of the supported languages
- ## Quick Start
- 1. Open the DocumentSummarizer folder in Android Studio
- 2. Update the `API_KEY` in `AppUtils.java`
- 3. Build the Android Project
- 4. Inside the app, tap on settings icon on top right
- 5. Configure the Remote URL endpoint (any supported providers that serve Llama 4 models. For example: https://api.together.xyz)
- 6. Select the desired model from the drop down list. If you need to add more models, modify `ModelUtils.java`
- 7. Go back to the Main chat window
- 8. Press the '+' button on the bottom left and select an image document (or take a picture of one!)
- 9. Select the 'globe' button on the bottom left and select your languages
- 10. Enter a prompt like "summarize this" and press Enter!
- > **_NOTE:_** This is an example project to demonstrate E2E flow. You should NOT use/store API key directly on client. Exposing your API key in client-side environments allows malicious users to take that key and make requests on your behalf. Requests should always be routed through your own backend server where you can keep your API key secure.
- ## Reporting Issues
- If you encountered any bugs or issues following this tutorial please file a bug/issue here on [Github](https://github.com/meta-llama/llama-cookbook/issues)).
- ================================================
- FILE: end-to-end-use-cases/ArticleSummarizer/gradle.properties
- ================================================
- # Project-wide Gradle settings.
- # IDE (e.g. Android Studio) users:
- # Gradle settings configured through the IDE *will override*
- # any settings specified in this file.
- # For more details on how to configure your build environment visit
- # http://www.gradle.org/docs/current/userguide/build_environment.html
- # Specifies the JVM arguments used for the daemon process.
- # The setting is particularly useful for tweaking memory settings.
- org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
- # When configured, Gradle will run in incubating parallel mode.
- # This option should only be used with decoupled projects. More details, visit
- # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
- # org.gradle.parallel=true
- # AndroidX package structure to make it clearer which packages are bundled with the
- # Android operating system, and which are packaged with your app's APK
- # https://developer.android.com/topic/libraries/support-library/androidx-rn
- android.useAndroidX=true
- # Kotlin code style for this project: "official" or "obsolete":
- kotlin.code.style=official
- # Enables namespacing of each library's R class so that its R class includes only the
- # resources declared in the library itself and none from the library's dependencies,
- # thereby reducing the size of the R class for that library
- android.nonTransitiveRClass=true
- ================================================
- FILE: end-to-end-use-cases/ArticleSummarizer/gradlew
- ================================================
- #!/usr/bin/env sh
- #
- # Copyright 2015 the original author or authors.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # https://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- ##############################################################################
- ##
- ## Gradle start up script for UN*X
- ##
- ##############################################################################
- # Attempt to set APP_HOME
- # Resolve links: $0 may be a link
- PRG="$0"
- # Need this for relative symlinks.
- while [ -h "$PRG" ] ; do
- ls=`ls -ld "$PRG"`
- link=`expr "$ls" : '.*-> \(.*\)$'`
- if expr "$link" : '/.*' > /dev/null; then
- PRG="$link"
- else
- PRG=`dirname "$PRG"`"/$link"
- fi
- done
- SAVED="`pwd`"
- cd "`dirname \"$PRG\"`/" >/dev/null
- APP_HOME="`pwd -P`"
- cd "$SAVED" >/dev/null
- APP_NAME="Gradle"
- APP_BASE_NAME=`basename "$0"`
- # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
- DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
- # Use the maximum available, or set MAX_FD != -1 to use that value.
- MAX_FD="maximum"
- warn () {
- echo "$*"
- }
- die () {
- echo
- echo "$*"
- echo
- exit 1
- }
- # OS specific support (must be 'true' or 'false').
- cygwin=false
- msys=false
- darwin=false
- nonstop=false
- case "`uname`" in
- CYGWIN* )
- cygwin=true
- ;;
- Darwin* )
- darwin=true
- ;;
- MINGW* )
- msys=true
- ;;
- NONSTOP* )
- nonstop=true
- ;;
- esac
- CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
- # Determine the Java command to use to start the JVM.
- if [ -n "$JAVA_HOME" ] ; then
- if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
- # IBM's JDK on AIX uses strange locations for the executables
- JAVACMD="$JAVA_HOME/jre/sh/java"
- else
- JAVACMD="$JAVA_HOME/bin/java"
- fi
- if [ ! -x "$JAVACMD" ] ; then
- die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
- Please set the JAVA_HOME variable in your environment to match the
- location of your Java installation."
- fi
- else
- JAVACMD="java"
- which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
- Please set the JAVA_HOME variable in your environment to match the
- location of your Java installation."
- fi
- # Increase the maximum file descriptors if we can.
- if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
- MAX_FD_LIMIT=`ulimit -H -n`
- if [ $? -eq 0 ] ; then
- if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
- MAX_FD="$MAX_FD_LIMIT"
- fi
- ulimit -n $MAX_FD
- if [ $? -ne 0 ] ; then
- warn "Could not set maximum file descriptor limit: $MAX_FD"
- fi
- else
- warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
- fi
- fi
- # For Darwin, add options to specify how the application appears in the dock
- if $darwin; then
- GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
- fi
- # For Cygwin or MSYS, switch paths to Windows format before running java
- if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
- APP_HOME=`cygpath --path --mixed "$APP_HOME"`
- CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
- JAVACMD=`cygpath --unix "$JAVACMD"`
- # We build the pattern for arguments to be converted via cygpath
- ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
- SEP=""
- for dir in $ROOTDIRSRAW ; do
- ROOTDIRS="$ROOTDIRS$SEP$dir"
- SEP="|"
- done
- OURCYGPATTERN="(^($ROOTDIRS))"
- # Add a user-defined pattern to the cygpath arguments
- if [ "$GRADLE_CYGPATTERN" != "" ] ; then
- OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
- fi
- # Now convert the arguments - kludge to limit ourselves to /bin/sh
- i=0
- for arg in "$@" ; do
- CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
- CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
- if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
- eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
- else
- eval `echo args$i`="\"$arg\""
- fi
- i=`expr $i + 1`
- done
- case $i in
- 0) set -- ;;
- 1) set -- "$args0" ;;
- 2) set -- "$args0" "$args1" ;;
- 3) set -- "$args0" "$args1" "$args2" ;;
- 4) set -- "$args0" "$args1" "$args2" "$args3" ;;
- 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
- 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
- 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
- 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
- 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
- esac
- fi
- # Escape application args
- save () {
- for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
- echo " "
- }
- APP_ARGS=`save "$@"`
- # Collect all arguments for the java command, following the shell quoting and substitution rules
- eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
- exec "$JAVACMD" "$@"
- ================================================
- FILE: end-to-end-use-cases/ArticleSummarizer/.gitignore
- ================================================
- # Gradle files
- .gradle/
- build/
- # Local configuration file (sdk path, etc)
- local.properties
- # Log/OS Files
- *.log
- # Android Studio generated files and folders
- captures/
- .externalNativeBuild/
- .cxx/
- *.apk
- output.json
- # IntelliJ
- *.iml
- .idea/
- misc.xml
- deploymentTargetDropDown.xml
- render.experimental.xml
- # Keystore files
- *.jks
- *.keystore
- # Google Services (e.g. APIs or Firebase)
- google-services.json
- # Android Profiling
- *.hprof
- .DS_Store
- *.jar
- *.aar
- *.so
- ================================================
- FILE: end-to-end-use-cases/ArticleSummarizer/app/.gitignore
- ================================================
- /build
- ================================================
- FILE: end-to-end-use-cases/ArticleSummarizer/gradle/wrapper/gradle-wrapper.properties
- ================================================
- #Mon Sep 25 11:23:11 PDT 2023
- distributionBase=GRADLE_USER_HOME
- distributionPath=wrapper/dists
- distributionUrl=https\://services.gradle.org/distributions/gradle-8.0-bin.zip
- zipStoreBase=GRADLE_USER_HOME
- zipStorePath=wrapper/dists
- ================================================
- FILE: end-to-end-use-cases/benchmarks/README.md
- ================================================
- # Benchmarks
- * inference - a folder contains benchmark scripts that apply a throughput analysis for Llama models inference on various backends including on-prem, cloud and on-device.
- * llm_eval_harness - a folder that introduces `lm-evaluation-harness`, a tool to evaluate Llama models including quantized models focusing on quality. We also included a recipe that calculates Llama 3.1 evaluation metrics Using `lm-evaluation-harness` and instructions that calculate HuggingFace Open LLM Leaderboard v2 metrics.
- ================================================
- FILE: end-to-end-use-cases/benchmarks/inference/README.md
- ================================================
- # Inference Throughput Benchmarks
- In this folder we provide a series of benchmark scripts that apply a throughput analysis for Llama models inference on various backends:
- * On-prem - Popular serving frameworks and containers (i.e. vLLM)
- * Cloud API - Popular API services (i.e. Azure Model-as-a-Service or Serverless API)
- * [**WIP**]On-device - Popular on-device inference solutions on mobile and desktop (i.e. ExecuTorch, MLC-LLM, Ollama)
- * [**WIP**]Optimization - Popular optimization solutions for faster inference and quantization (i.e. AutoAWQ)
- # Why
- There are three major reasons we want to run these benchmarks and share them with our Llama community:
- * Provide inference throughput analysis based on real world situation to help you select the best service or deployment for your scenario
- * Provide a baseline measurement for validating various optimization solutions on different backends, so we can provide guidance on which solutions work best for your scenario
- * Encourage the community to develop benchmarks on top of our works, so we can better quantify the latest proposed solutions combined with current popular frameworks, especially in this crazy fast-moving area
- # Parameters
- Here are the parameters (if applicable) that you can configure for running the benchmark:
- * **PROMPT** - Prompt sent in for inference (configure the length of prompt, choose from 5, 25, 50, 100, 500, 1k and 2k)
- * **MAX_NEW_TOKENS** - Max number of tokens generated
- * **CONCURRENT_LEVELS** - Max number of concurrent requests
- * **MODEL_PATH** - Model source from Huggingface
- * **MODEL_HEADERS** - Request headers
- * **SAFE_CHECK** - Content safety check (either Azure service or simulated latency)
- * **THRESHOLD_TPS** - Threshold TPS (threshold for tokens per second below which we deem the query to be slow)
- * **TOKENIZER_PATH** - Tokenizer source
- * **RANDOM_PROMPT_LENGTH** - Random prompt length (for pretrained models)
- * **NUM_GPU** - Number of GPUs for request dispatch among multiple containers
- * **TEMPERATURE** - Temperature for inference
- * **TOP_P** - Top_p for inference
- * **MODEL_ENDPOINTS** - Container endpoints
- * Model parallelism or model replicas - Load one model into multiple GPUs or multiple model replicas on one instance. More detail in the README files for specific containers.
- You can also configure other model hyperparameters as part of the request payload.
- All these parameters are stored in ```parameter.json``` and real prompts are stored in ```input.jsonl```. Running the script will load these configurations.
- # Metrics
- The benchmark will report these metrics per instance:
- * Number of concurrent requests
- * P50 Latency(ms)
- * P99 Latency(ms)
- * Request per second (RPS)
- * Output tokens per second
- * Output tokens per second per GPU
- * Input tokens per second
- * Input tokens per second per GPU
- * Average tokens per second per request
- We intend to add these metrics in the future:
- * Time to first token (TTFT)
-
- The benchmark result will be displayed in the terminal output and saved as a CSV file (```performance_metrics.csv```) which you can export to spreadsheets.
- # Getting Started
- Please follow the ```README.md``` in each subfolder for instructions on how to setup and run these benchmarks.
- ================================================
- FILE: end-to-end-use-cases/benchmarks/inference/requirements.txt
- ================================================
- transformers
- requests
- azure-core
- azure-ai-contentsafety
- torch
- ================================================
- FILE: end-to-end-use-cases/benchmarks/inference/cloud/README.md
- ================================================
- # Llama-Cloud-API-Benchmark
- This folder contains code to run inference benchmark for Llama 2 models on cloud API with popular cloud service providers. The benchmark will focus on overall inference **throughput** for querying the API endpoint for output generation with different level of concurrent requests. Remember that to send queries to the API endpoint, you are required to acquire subscriptions with the cloud service providers and there will be a fee associated with it.
- Disclaimer - The purpose of the code is to provide a configurable setup to measure inference throughput. It is not a representative of the performance of these API services and we do not plan to make comparisons between different API providers.
- # Azure - Getting Started
- To get started, there are certain steps we need to take to deploy the models:
- <!-- markdown-link-check-disable -->
- * Register for a valid Azure account with subscription [here](https://azure.microsoft.com/en-us/free/search/?ef_id=_k_CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE_k_&OCID=AIDcmm5edswduu_SEM__k_CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE_k_&gad_source=1&gclid=CjwKCAiA-P-rBhBEEiwAQEXhH5OHAJLhzzcNsuxwpa5c9EJFcuAjeh6EvZw4afirjbWXXWkiZXmU2hoC5GoQAvD_BwE)
- <!-- markdown-link-check-enable -->
- * Take a quick look on what is the [Azure AI Studio](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio?tabs=home) and navigate to the website from the link in the article
- * Follow the demos in the article to create a project and [resource](https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/manage-resource-groups-portal) group, or you can also follow the guide [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-llama?tabs=azure-studio)
- * Select Llama models from Model catalog
- * Click the "Deploy" button
- * Select Serverless API with Azure AI Content Safety. Note that currently this API service is offered for Llama 2 pretrained model, chat model and Llama 3 instruct model
- * Select the project you created in previous step
- * Choose a deployment name then Go to deployment
- Once deployed successfully, you should be assigned for an API endpoint and a security key for inference.
- For more information, you should consult Azure's official documentation [here](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-llama?tabs=azure-studio) for model deployment and inference.
- Now, replace the endpoint url and API key in ```azure/parameters.json```. For parameter `MODEL_ENDPOINTS`, with chat models the suffix should be `v1/chat/completions` and with pretrained models the suffix should be `v1/completions`.
- Note that the API endpoint might implemented a rate limit for token generation in certain amount of time. If you encountered the error, you can try reduce `MAX_NEW_TOKEN` or start with smaller `CONCURRENT_LEVELS`.
- For `MODEL_PATH`, copy the model path from Huggingface under meta-llama organization. For Llama 2, make sure you copy the path of the model with hf format. This model path is used to retrieve corresponding tokenizer for your model of choice. Llama 3 used a different tokenizer compare to Llama 2.
- Once everything configured, to run chat model benchmark:
- ```python chat_azure_api_benchmark.py```
- To run pretrained model benchmark:
- ```python pretrained_azure_api_benchmark.py```
- Once finished, the result will be written into a CSV file in the same directory, which can be later imported into dashboard of your choice.
- ================================================
- FILE: end-to-end-use-cases/benchmarks/inference/cloud/aws/fmbench/README.md
- ================================================
- # Benchmark Llama models on AWS
- The [`FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main) tool provides a quick and easy way to benchmark the Llama family of models for price and performance on any AWS service including [`Amazon SagMaker`](https://aws.amazon.com/solutions/guidance/generative-ai-deployments-using-amazon-sagemaker-jumpstart/), [`Amazon Bedrock`](https://aws.amazon.com/bedrock/) or `Amazon EKS` or `Amazon EC2` as `Bring your own endpoint`.
- ## The need for benchmarking
- <!-- markdown-link-check-disable -->
- Customers often wonder what is the best AWS service to run Llama models for _my specific use-case_ and _my specific price performance requirements_. While model evaluation metrics are available on several leaderboards ([`HELM`](https://crfm.stanford.edu/helm/lite/latest/#/leaderboard), [`LMSys`](https://chat.lmsys.org/?leaderboard)), but the price performance comparison can be notoriously hard to find and even more harder to trust. In such a scenario, we think it is best to be able to run performance benchmarking yourself on either on your own dataset or on a similar (task wise, prompt size wise) open-source datasets such as ([`LongBench`](https://huggingface.co/datasets/THUDM/LongBench), [`QMSum`](https://paperswithcode.com/dataset/qmsum)). This is the problem that [`FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main) solves.
- <!-- markdown-link-check-enable -->
- ## [`FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main): an open-source Python package for FM benchmarking on AWS
- `FMBench` runs inference requests against endpoints that are either deployed through `FMBench` itself (as in the case of SageMaker) or are available either as a fully-managed endpoint (as in the case of Bedrock) or as bring your own endpoint. The metrics such as inference latency, transactions per-minute, error rates and cost per transactions are captured and presented in the form of a Markdown report containing explanatory text, tables and figures. The figures and tables in the report provide insights into what might be the best serving stack (instance type, inference container and configuration parameters) for a given Llama model for a given use-case.
- The following figure gives an example of the price performance numbers that include inference latency, transactions per-minute and concurrency level for running the `Llama2-13b` model on different instance types available on SageMaker using prompts for Q&A task created from the [`LongBench`](https://huggingface.co/datasets/THUDM/LongBench) dataset, these prompts are between 3000 to 3840 tokens in length. **_Note that the numbers are hidden in this figure but you would be able to see them when you run `FMBench` yourself_**.
- 
- The following table (also included in the report) provides information about the best available instance type for that experiment<sup>1</sup>.
- |Information |Value |
- |--- |--- |
- |experiment_name |llama2-13b-inf2.24xlarge |
- |payload_file |payload_en_3000-3840.jsonl |
- |instance_type |ml.inf2.24xlarge |
- |concurrency |** |
- |error_rate |** |
- |prompt_token_count_mean |3394 |
- |prompt_token_throughput |2400 |
- |completion_token_count_mean |31 |
- |completion_token_throughput |15 |
- |latency_mean |** |
- |latency_p50 |** |
- |latency_p95 |** |
- |latency_p99 |** |
- |transactions_per_minute |** |
- |price_per_txn |** |
- <sup>1</sup> ** represent values hidden on purpose, these are available when you run the tool yourself.
- The report also includes latency Vs prompt size charts for different concurrency levels. As expected, inference latency increases as prompt size increases but what is interesting to note is that the increase is much more at higher concurrency levels (and this behavior varies with instance types).
- 
- ### How to get started with `FMBench`
- The following steps provide a [Quick start guide for `FMBench`](https://github.com/aws-samples/foundation-model-benchmarking-tool#quickstart). For a more detailed DIY version, please see the [`FMBench Readme`](https://github.com/aws-samples/foundation-model-benchmarking-tool?tab=readme-ov-file#the-diy-version-with-gory-details).
- 1. Each `FMBench` run works with a configuration file that contains the information about the model, the deployment steps, and the tests to run. A typical `FMBench` workflow involves either directly using an already provided config file from the [`configs`](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main/fmbench/configs) folder in the `FMBench` GitHub repo or editing an already provided config file as per your own requirements (say you want to try benchmarking on a different instance type, or a different inference container etc.).
- >A simple config file with key parameters annotated is included in this repo, see [`config.yml`](./config.yml). This file benchmarks performance of Llama2-7b on an `ml.g5.xlarge` instance and an `ml.g5.2xlarge` instance. You can use this provided config file as it is for this Quickstart.
- 1. Launch the AWS CloudFormation template included in this repository using one of the buttons from the table below. The CloudFormation template creates the following resources within your AWS account: Amazon S3 buckets, Amazon IAM role and an Amazon SageMaker Notebook with this repository cloned. A read S3 bucket is created which contains all the files (configuration files, datasets) required to run `FMBench` and a write S3 bucket is created which will hold the metrics and reports generated by `FMBench`. The CloudFormation stack takes about 5-minutes to create.
- |AWS Region | Link |
- |:------------------------:|:-----------:|
- |us-east-1 (N. Virginia) | [<img src="./img/CFT.png">](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/new?stackName=fmbench&templateURL=https://aws-blogs-artifacts-public.s3.amazonaws.com/artifacts/ML-FMBT/template.yml) |
- |us-west-2 (Oregon) | [<img src="./img/CFT.png">](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/new?stackName=fmbench&templateURL=https://aws-blogs-artifacts-public.s3.amazonaws.com/artifacts/ML-FMBT/template.yml) |
- 1. Once the CloudFormation stack is created, navigate to SageMaker Notebooks and open the `fmbench-notebook`.
- 1. On the `fmbench-notebook` open a Terminal and run the following commands.
- ```{.bash}
- conda create --name fmbench_python311 -y python=3.11 ipykernel
- source activate fmbench_python311;
- pip install -U fmbench
- ```
- 1. Now you are ready to `fmbench` with the following command line. We will use a sample config file placed in the S3 bucket by the CloudFormation stack for a quick first run.
- 1. We benchmark performance for the `Llama2-7b` model on a `ml.g5.xlarge` and a `ml.g5.2xlarge` instance type, using the `huggingface-pytorch-tgi-inference` inference container. This test would take about 30 minutes to complete and cost about $0.20.
- 1. It uses a simple relationship that 750 words equals 1000 tokens, to get a more accurate representation of token counts use the `Llama2 tokenizer`. **_It is strongly recommended that for more accurate results on token throughput you use a tokenizer specific to the model you are testing rather than the default tokenizer. See instructions provided [here](https://github.com/aws-samples/foundation-model-benchmarking-tool/tree/main?tab=readme-ov-file#the-diy-version-with-gory-details) on how to use a custom tokenizer_**.
- <!-- markdown-link-check-disable -->
- ```{.bash}
- account=`aws sts get-caller-identity | jq .Account | tr -d '"'`
- region=`aws configure get region`
- fmbench --config-file s3://sagemaker-fmbench-read-${region}-${account}/configs/llama2/7b/config-llama2-7b-g5-quick.yml >> fmbench.log 2>&1
- ```
- <!-- markdown-link-check-enable -->
- 1. Open another terminal window and do a `tail -f` on the `fmbench.log` file to see all the traces being generated at runtime.
- ```{.bash}
- tail -f fmbench.log
- ```
- 1. The generated reports and metrics are available in the `sagemaker-fmbench-write-<replace_w_your_aws_region>-<replace_w_your_aws_account_id>` bucket. The metrics and report files are also downloaded locally and in the `results` directory (created by `FMBench`) and the benchmarking report is available as a markdown file called `report.md` in the `results` directory. You can view the rendered Markdown report in the SageMaker notebook itself or download the metrics and report files to your machine for offline analysis.
- ## 🚨 Benchmarking Llama3 on Amazon Bedrock 🚨
- Llama3 is now available on Bedrock (read [blog post](https://aws.amazon.com/blogs/aws/metas-llama-3-models-are-now-available-in-amazon-bedrock/)), and you can now benchmark it using `FMBench`. Here is the config file for benchmarking `Llama3-8b-instruct` and `Llama3-70b-instruct` on Bedrock.
- <!-- markdown-link-check-disable -->
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/bedrock/config-bedrock-llama3.yml) for `Llama3-8b-instruct` and `Llama3-70b-instruct`.
- <!-- markdown-link-check-enable -->
- ## 🚨 Benchmarking Llama3 on Amazon SageMaker 🚨
- Llama3 is now available on SageMaker (read [blog post](https://aws.amazon.com/blogs/machine-learning/meta-llama-3-models-are-now-available-in-amazon-sagemaker-jumpstart/)), and you can now benchmark it using `FMBench`. Here are the config files for benchmarking `Llama3-8b-instruct` and `Llama3-70b-instruct` on `ml.p4d.24xlarge`, `ml.inf2.24xlarge` and `ml.g5.12xlarge` instances.
- <!-- markdown-link-check-disable -->
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama3/8b/config-llama3-8b-instruct-g5-p4d.yml) for `Llama3-8b-instruct` on `ml.p4d.24xlarge` and `ml.g5.12xlarge`.
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama3/70b/config-llama3-70b-instruct-g5-p4d.yml) for `Llama3-70b-instruct` on `ml.p4d.24xlarge` and `ml.g5.48xlarge`.
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama3/8b/config-llama3-8b-inf2-g5.yml) for `Llama3-8b-instruct` on `ml.inf2.24xlarge` and `ml.g5.12xlarge`.
- <!-- markdown-link-check-enable -->
- ## Benchmarking Llama2 on Amazon SageMaker
- Llama2 models are available through SageMaker JumpStart as well as directly deployable from Hugging Face to a SageMaker endpoint. You can use `FMBench` to benchmark Llama2 on SageMaker for different combinations of instance types and inference containers.
- <!-- markdown-link-check-disable -->
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/7b/config-llama2-7b-g5-quick.yml) for `Llama2-7b` on `ml.g5.xlarge` and `ml.g5.2xlarge` instances, using the [Hugging Face TGI container](763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04).
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/7b/config-llama2-7b-g4dn-g5-trt.yml) for `Llama2-7b` on `ml.g4dn.12xlarge` instance using the [Deep Java Library DeepSpeed container](763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.26.0-deepspeed0.12.6-cu121).
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/13b/config-llama2-13b-inf2-g5-p4d.yml) for `Llama2-13b` on `ml.g5.12xlarge`, `ml.inf2.24xlarge` and `ml.p4d.24xlarge` instances using the [Hugging Face TGI container](763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04) and the [Deep Java Library & NeuronX container](763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.26.0-neuronx-sdk2.16.0).
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/70b/config-llama2-70b-g5-p4d-trt.yml) for `Llama2-70b` on `ml.p4d.24xlarge` instance using the [Deep Java Library TensorRT container](763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.26.0-tensorrtllm0.7.1-cu122).
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/llama2/70b/config-llama2-70b-inf2-g5.yml) for `Llama2-70b` on `ml.inf2.48xlarge` instance using the [HuggingFace TGI with Optimum NeuronX container](763104351884.dkr.ecr.{region}.amazonaws.com/huggingface-pytorch-tgi-inference:1.13.1-optimum0.0.17-neuronx-py310-ubuntu22.04).
- <!-- markdown-link-check-enable -->
- ## Benchmarking Llama2 on Amazon Bedrock
- The Llama2-13b-chat and Llama2-70b-chat models are available on [Bedrock](https://aws.amazon.com/bedrock/llama/). You can use `FMBench` to benchmark Llama2 on Bedrock for both on-demand throughput and provisioned throughput inference options.
- <!-- markdown-link-check-disable -->
- - [Config file](https://github.com/aws-samples/foundation-model-benchmarking-tool/blob/main/src/fmbench/configs/bedrock/config-bedrock.yml) for `Llama2-13b-chat` and `Llama2-70b-chat` on Bedrock for on-demand throughput.
- <!-- markdown-link-check-enable -->
- - For testing provisioned throughput simply replace the `ep_name` parameter in `experiments` section of the config file with the ARN of your provisioned throughput.
- ## More..
- For bug reports, enhancement requests and any questions please create a [GitHub issue](https://github.com/aws-samples/foundation-model-benchmarking-tool/issues) on the `FMBench` repo.
- ================================================
- FILE: end-to-end-use-cases/benchmarks/inference/on_prem/README.md
- ================================================
- # Llama-On-Prem-Benchmark
- This folder contains code to run inference benchmark for Meta Llama 3 models on-prem with popular serving frameworks.
- The benchmark will focus on overall inference **throughput** for running containers on one instance (single or multiple GPUs) that you can acquire from cloud service providers such as Azure and AWS. You can also run this benchmark on local laptop or desktop.
- We support benchmark on these serving framework:
- * [vLLM](https://github.com/vllm-project/vllm)
- # vLLM - Getting Started
- To get started, we first need to deploy containers on-prem as a API host. Follow the guidance [here](../../../../3p-integrations/llama_on_prem.md#setting-up-vllm-with-llama-3) to deploy vLLM on-prem.
- Note that in common scenario which overall throughput is important, we suggest you prioritize deploying as many model replicas as possible to reach higher overall throughput and request-per-second (RPS), comparing to deploy one model container among multiple GPUs for model parallelism. Additionally, as deploying multiple model replicas, there is a need for a higher level wrapper to handle the load balancing which here has been simulated in the benchmark scripts.
- For example, we have an instance from Azure that has 8xA100 80G GPUs, and we want to deploy the Meta Llama 3 70B instruct model, which is around 140GB with FP16. So for deployment we can do:
- * 1x70B model parallel on 8 GPUs, each GPU RAM takes around 17.5GB for loading model weights.
- * 2x70B models each use 4 GPUs, each GPU RAM takes around 35GB for loading model weights.
- * 4x70B models each use 2 GPUs, each GPU RAM takes around 70GB for loading model weights. (Preferred configuration for max overall throughput. Note that you will have 4 endpoints hosted on different ports and the benchmark script will route requests into each model equally)
- Here are examples for deploying 2x70B chat models over 8 GPUs with vLLM.
- ```
- CUDA_VISIBLE_DEVICES=0,1,2,3 python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-70B-Instruct --tensor-parallel-size 4 --disable-log-requests --port 8000
- CUDA_VISIBLE_DEVICES=4,5,6,7 python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3.1-70B-Instruct --tensor-parallel-size 4 --disable-log-requests --port 8001
- ```
- Once you have finished deployment, you can use the command below to run benchmark scripts in a separate terminal.
- ```
- python chat_vllm_benchmark.py
- ```
- <!-- markdown-link-check-disable -->
- If you are going to use [Azure AI content check](https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety), then you should install dependencies as shown below in your terminal:
- <!-- markdown-link-check-enable -->
- ```
- pip install azure-ai-contentsafety azure-core
- ```
- Besides chat models, we also provide benchmark scripts for running pretrained models for text completion tasks. To better simulate the real traffic, we generate configurable random token prompt as input. In this process, we select vocabulary that is longer than 2 tokens so the generated words are closer to the English, rather than symbols.
- However, random token prompts can't be applied for chat model benchmarks, since the chat model expects a valid question. By feeding random prompts, chat models rarely provide answers that is meeting our ```MAX_NEW_TOKEN``` requirement, defeating the purpose of running throughput benchmarks. Hence for chat models, the questions are copied over to form long inputs such as for 2k and 4k inputs.
- To run pretrained model benchmark, follow the command below.
- ```
- python pretrained_vllm_benchmark.py
- ```
- Refer to more vLLM benchmark details on their official Github repo [here](https://github.com/vllm-project/vllm/tree/main/benchmarks).
- ================================================
- FILE: end-to-end-use-cases/benchmarks/llm_eval_harness/README.md
- ================================================
- # Llama Model Evaluation
- Llama-Recipe make use of `lm-evaluation-harness` for evaluating our fine-tuned Meta Llama3 (or Llama2) model. It also can serve as a tool to evaluate quantized model to ensure the quality in lower precision or other optimization applied to the model that might need evaluation.
- `lm-evaluation-harness` provide a wide range of [features](https://github.com/EleutherAI/lm-evaluation-harness?tab=readme-ov-file#overview):
- - Over 60 standard academic benchmarks for LLMs, with hundreds of subtasks and variants implemented.
- - Support for models loaded via [transformers](https://github.com/huggingface/transformers/) (including quantization via [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ)), [GPT-NeoX](https://github.com/EleutherAI/gpt-neox), and [Megatron-DeepSpeed](https://github.com/microsoft/Megatron-DeepSpeed/), with a flexible tokenization-agnostic interface.
- - Support for fast and memory-efficient inference with [vLLM](https://github.com/vllm-project/vllm).
- - Support for commercial APIs including OpenAI and TextSynth.
- - Support for evaluation on adapters (e.g. LoRA) supported in [HuggingFace's PEFT library](https://github.com/huggingface/peft).
- - Support for local models and benchmarks.
- - Evaluation with publicly available prompts ensures reproducibility and comparability between papers.
- - Easy support for custom prompts and evaluation metrics.
- The Language Model Evaluation Harness is also the backend for 🤗 [Hugging Face's (HF) popular Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
- ## Setup
- Before running the evaluation, ensure you have all the necessary dependencies installed.
- ### Dependencies
- - Python 3.8+
- - Your language model's dependencies
- ### Installation
- Clone the lm-evaluation-harness repository and install it:
- ```bash
- git clone https://github.com/EleutherAI/lm-evaluation-harness.git
- cd lm-evaluation-harness
- pip install -e .
- ```
- ### Quick Test
- To run evaluation for Hugging Face `Llama3.1 8B` model on a single GPU please run the following,
- ```bash
- lm_eval --model hf --model_args pretrained=meta-llama/Llama-3.1-8B --tasks hellaswag --device cuda:0 --batch_size 8
- ```
- Tasks can be extended by using `,` between them for example `--tasks hellaswag,arc`.
- To set the number of shots you can use `--num_fewshot` to set the number for few shot evaluation.
- ### PEFT Fine-tuned model Evaluation
- In case you have fine-tuned your model using PEFT you can set the PATH to the PEFT checkpoints using PEFT as part of model_args as shown below:
- ```bash
- lm_eval --model hf --model_args pretrained=meta-llama/Llama-3.1-8B, dtype="float",peft=../peft_output --tasks hellaswag --num_fewshot 10 --device cuda:0 --batch_size 8
- ```
- ### Limit the number of examples in benchmarks
- There has been an study from [IBM on efficient benchmarking of LLMs](https://arxiv.org/pdf/2308.11696.pdf), with main take a way that to identify if a model is performing poorly, benchmarking on wider range of tasks is more important than the number example in each task. This means you could run the evaluation harness with fewer number of example to have initial decision if the performance got worse from the base line. To limit the number of example here, it can be set using `--limit` flag with actual desired number. But for the full assessment you would need to run the full evaluation. Please read more in the paper linked above.
- ```bash
- lm_eval --model hf --model_args pretrained=meta-llama/Llama-3.1-8B,dtype="float",peft=../peft_output --tasks hellaswag --num_fewshot 10 --device cuda:0 --batch_size 8 --limit 100
- ```
- ### Customized Llama Model
- In case you have customized the Llama model, for example a quantized version of model where it has different model loading from normal HF model, you can follow [this guide](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md#external-library-usage) to use `lm_eval.simple_evaluate()` to run the eval benchmarks.
- You can also find full task list [here](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks).
- ### Multi-GPU Evaluation with Hugging Face `accelerate`
- `lm-evaluation-harness` support three main ways of using Hugging Face's [accelerate 🚀](https://github.com/huggingface/accelerate) library for multi-GPU evaluation.
- To perform *data-parallel evaluation* (where each GPU loads a **separate full copy** of the model), `lm-evaluation-harness` leverage the `accelerate` launcher as follows:
- ```bash
- accelerate launch -m lm_eval --model hf \
- --model_args pretrained=meta-llama/Llama-3.1-8B \
- --tasks lambada_openai,arc_easy \
- --batch_size 16
- ```
- (or via `accelerate launch --no-python lm_eval`).
- For cases where your model can fit on a single GPU, this allows you to evaluate on K GPUs K times faster than on one.
- **WARNING**: This setup does not work with FSDP model sharding, so in `accelerate config` FSDP must be disabled, or the NO_SHARD FSDP option must be used.
- The second way of using `accelerate` for multi-GPU evaluation is when your model is *too large to fit on a single GPU.*
- In this setting, run the library *outside the `accelerate` launcher*, but passing `parallelize=True` to `--model_args` as follows:
- ```
- lm_eval --model hf \
- --tasks lambada_openai,arc_easy \
- --model_args pretrained=meta-llama/Llama-3.1-70B,parallelize=True \
- --batch_size 16
- ```
- This means that your model's weights will be split across all available GPUs.
- For more advanced users or even larger models, `lm-evaluation-harness` allows for the following arguments when `parallelize=True` as well:
- - `device_map_option`: How to split model weights across available GPUs. defaults to "auto".
- - `max_memory_per_gpu`: the max GPU memory to use per GPU in loading the model.
- - `max_cpu_memory`: the max amount of CPU memory to use when offloading the model weights to RAM.
- - `offload_folder`: a folder where model weights will be offloaded to disk if needed.
- There is also an option to run with tensor parallel and data parallel together. This will allow you to take advantage of both data parallelism and model sharding, and is especially useful for models that are too large to fit on a single GPU.
- ```
- accelerate launch --multi_gpu --num_processes {nb_of_copies_of_your_model} \
- -m lm_eval --model hf \
- --model_args pretrained=meta-llama/Llama-3.1-70B \
- --tasks lambada_openai,arc_easy \
- --model_args parallelize=True \
- --batch_size 16
- ```
- To learn more about model parallelism and how to use it with the `accelerate` library, see the [accelerate documentation](https://huggingface.co/docs/transformers/v4.15.0/en/parallelism)
- ### Tensor + Data Parallel and Optimized Inference with `vLLM`
- `lm-evaluation-harness` also support vLLM for faster inference on [supported model types](https://docs.vllm.ai/en/latest/models/supported_models.html), especially faster when splitting a model across multiple GPUs. For single-GPU or multi-GPU — tensor parallel, data parallel, or a combination of both — inference, for example:
- ```bash
- lm_eval --model vllm \
- --model_args pretrained={model_name},tensor_parallel_size={GPUs_per_model},dtype=auto,gpu_memory_utilization=0.8,data_parallel_size={model_replicas} \
- --tasks lambada_openai \
- --batch_size auto
- ```
- To use vllm, do `pip install lm_eval[vllm]`. For a full list of supported vLLM configurations, please reference our [vLLM integration](https://github.com/EleutherAI/lm-evaluation-harness/blob/e74ec966556253fbe3d8ecba9de675c77c075bce/lm_eval/models/vllm_causallms.py) and the vLLM documentation.
- vLLM occasionally differs in output from Huggingface. `lm-evaluation-harness` treat Huggingface as the reference implementation, and it provides a script for checking the validity of vllm results against HF.
- > [!Tip]
- > For fastest performance, `lm-evaluation-harness` recommend using `--batch_size auto` for vLLM whenever possible, to leverage its continuous batching functionality!
- > [!Tip]
- > Passing `max_model_len=4096` or some other reasonable default to vLLM through model args may cause speedups or prevent out-of-memory errors when trying to use auto batch size, such as for Mistral-7B-v0.1 which defaults to a maximum length of 32k.
- For more details about `lm-evaluation-harness`, please visit checkout their github repo [README.md](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/README.md).
- ## Calculating Meta 3.1 Evaluation Metrics Using LM-Evaluation-Harness
- [meta_eval](./meta_eval/) folder provides a detailed guide on how to calculate the Meta Llama 3.1 evaluation metrics reported in our [Meta Llama website](https://llama.meta.com/) using the [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main) and our [3.1 evals Huggingface collection](https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f). By following the steps outlined, users can replicate a evaluation process that is similar to Meta's, for specific tasks and compare their results with our reported metrics. While slight variations in results are expected due to differences in implementation and model behavior, we aim to provide a transparent method for evaluating Meta Llama 3 models using third party library. Please check the [README.md](./meta_eval/README.md) for more details.
- ## Reproducing HuggingFace Open-LLM-Leaderboard v2
- In the HF leaderboard v2, the [LLMs are evaluated on 6 benchmarks](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) from Language Model Evaluation Harness as described below:
- - **IFEval**: [IFEval](https://arxiv.org/abs/2311.07911) is a dataset designed to test a model’s ability to follow explicit instructions, such as “include keyword x” or “use format y.” The focus is on the model’s adherence to formatting instructions rather than the content generated, allowing for the use of strict and rigorous metrics.
- - **BBH (Big Bench Hard)**: [BBH](https://arxiv.org/abs/2210.09261) is a subset of 23 challenging tasks from the BigBench dataset to evaluate language models. The tasks use objective metrics, are highly difficult, and have sufficient sample sizes for statistical significance. They include multistep arithmetic, algorithmic reasoning (e.g., boolean expressions, SVG shapes), language understanding (e.g., sarcasm detection, name disambiguation), and world knowledge. BBH performance correlates well with human preferences, providing valuable insights into model capabilities.
- - **MATH**: [MATH](https://arxiv.org/abs/2103.03874) is a compilation of high-school level competition problems gathered from several sources, formatted consistently using Latex for equations and asymptote for figures. Generations must fit a very specific output format. HuggingFace Open-LLM-Leaderboard v2 keeps only level 5 MATH questions and call it MATH Level 5.
- - **GPQA (Graduate-Level Google-Proof Q&A Benchmark)**: [GPQA](https://arxiv.org/abs/2311.12022) is a highly challenging knowledge dataset with questions crafted by PhD-level domain experts in fields like biology, physics, and chemistry. These questions are designed to be difficult for laypersons but relatively easy for experts. The dataset has undergone multiple rounds of validation to ensure both difficulty and factual accuracy. Access to GPQA is restricted through gating mechanisms to minimize the risk of data contamination. Consequently, HuggingFace Open-LLM-Leaderboard v2 does not provide plain text examples from this dataset, as requested by the authors.
- - **MuSR (Multistep Soft Reasoning)**: [MuSR](https://arxiv.org/abs/2310.16049) is a new dataset consisting of algorithmically generated complex problems, each around 1,000 words in length. The problems include murder mysteries, object placement questions, and team allocation optimizations. Solving these problems requires models to integrate reasoning with long-range context parsing. Few models achieve better than random performance on this dataset.
- - **MMLU-PRO (Massive Multitask Language Understanding - Professional)**: [MMLU-Pro](https://arxiv.org/abs/2406.01574) is a refined version of the MMLU dataset, which has been a standard for multiple-choice knowledge assessment. Recent research identified issues with the original MMLU, such as noisy data (some unanswerable questions) and decreasing difficulty due to advances in model capabilities and increased data contamination. MMLU-Pro addresses these issues by presenting models with 10 choices instead of 4, requiring reasoning on more questions, and undergoing expert review to reduce noise. As a result, MMLU-Pro is of higher quality and currently more challenging than the original.
- In order to install correct lm-evaluation-harness version, please check the Huggingface 🤗 Open LLM Leaderboard v2 [reproducibility section](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about#reproducibility).
- To run a leaderboard evaluation for `Llama-3.1-8B`, we can run the following:
- ```bash
- accelerate launch -m lm_eval --model_args pretrained=meta-llama/Llama-3.1-8B,dtype=bfloat16 --log_samples --output_path eval_results --tasks leaderboard --batch_size 4
- ```
- Similarly to run a leaderboard evaluation for `Llama-3.1-8B-Instruct`, we can run the following, using `--apply_chat_template --fewshot_as_multiturn`:
- ```bash
- accelerate launch -m lm_eval --model_args pretrained=meta-llama/Llama-3.1-8B-Instruct,dtype=bfloat16 --log_samples --output_path eval_results --tasks leaderboard --batch_size 4 --apply_chat_template --fewshot_as_multiturn
- ```
- As for 70B models, it is required to run tensor parallelism as it can not fit into 1 GPU, therefore we can run the following for `Llama-3.1-70B-Instruct`:
- ```bash
- lm_eval --model hf --batch_size 4 --model_args pretrained=meta-llama/Llama-3.1-70B-Instruct,parallelize=True --tasks leaderboard --log_samples --output_path eval_results --apply_chat_template --fewshot_as_multiturn
- ```
- ================================================
- FILE: end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval/README.md
- ================================================
- # Calculating Meta 3.x Evaluation Metrics Using LM-Evaluation-Harness
- As Llama models gain popularity, evaluating these models has become increasingly important. We have released all the evaluation details for Llama 3.x models on Hugging Face as datasets in the [3.1 evals collection](https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f) and the [3.2 evals collection](https://huggingface.co/collections/meta-llama/llama-32-evals-66f44b3d2df1c7b136d821f0). This recipe demonstrates how to calculate the Llama 3.x reported benchmark numbers using the [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main) library and our prompts from the 3.x evals datasets on selected tasks.
- ## Disclaimer
- 1. **This recipe is not the official implementation** of Llama evaluation. Since our internal eval repo isn't public, we want to provide this recipe as an aid for anyone who wants to use the datasets we released. It is based on public third-party libraries, as this implementation is not mirroring Llama evaluation, therefore this may lead to minor differences in the produced numbers.
- 2. **Model Compatibility**: This tutorial is specifically for Llama 3 based models, as our prompts include Llama 3 special tokens, e.g. `<|start_header_id|>user<|end_header_id|>`. It will not work with models that are not based on Llama 3.
- ## Insights from Our Evaluation Process
- Here are our insights about the differences in terms of the eval configurations and prompting methods between this implementation and Hugging Face [leaderboard implementation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard).
- - **Prompts**: We use Chain-of-Thought(COT) prompts while Hugging Face leaderboard does not. The prompts that define the output format are also different.
- - **Metric calculation**: For MMLU-Pro, BBH, GPQA tasks, we ask the model to generate response and score the parsed answer from generated response, while Hugging Face leaderboard evaluation is comparing log likelihood of all label words, such as [ (A),(B),(C),(D) ].
- - **Parsers**: For generative tasks, where the final answer needs to be parsed before scoring, the parser functions can be different between ours and Hugging Face leaderboard evaluation, as our prompts that define the model output format are designed differently.
- - **Inference**: We use an internal LLM inference solution that does not apply padding, while Hugging Face leaderboard uses padding on the generative tasks (MATH and IFEVAL).
- - **Tasks** We run benchmarks on BBH and MMLU-Pro only for pretrained models and Math-Hard, IFeval, GPQA, only for instruct models.
- Given those differences, the numbers from this recipe can not be compared to the numbers in the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard), even if the task names are the same.
- ## Environment setups
- Please install lm-evaluation-harness and our llama-cookbook repo by following:
- ```
- git clone git@github.com:meta-llama/llama-cookbook.git
- cd llama-cookbook
- pip install -U pip setuptools
- pip install -e .
- pip install -U antlr4_python3_runtime==4.11
- pip install lm-eval[math,ifeval,sentencepiece,vllm]==0.4.3
- cd end-to-end-use-cases/benchmarks/llm_eval_harness/meta_eval
- ```
- To access our [3.1 evals Hugging Face collection](https://huggingface.co/collections/meta-llama/llama-31-evals-66a2c5a14c2093e58298ac7f), you must:
- - Log in to the Hugging Face website and click the 3.1 evals dataset pages and agree to the terms.
- - Follow the [Hugging Face authentication instructions](https://huggingface.co/docs/huggingface_hub/en/quick-start#authentication) to gain read access for your machine.
- The same process can be followed to access the [3.2 evals Hugging Face collection](https://huggingface.co/collections/meta-llama/llama-32-evals-66f44b3d2df1c7b136d821f0)
- It is recommended to read the dataset card to understand the meaning of each column and use the viewer feature in the Hugging Face dataset to view our dataset. It is important to have some basic understanding of our dataset format and content before proceeding.
- ### Task Selection
- Given the extensive number of tasks available (12 for pretrained models and 30 for instruct models), a subset of tasks are chosen:
- - **Tasks for 3.1 pretrained models**: BBH and MMLU-Pro
- - **Tasks for 3.1 instruct models**: Math-Hard, IFeval, GPQA, and MMLU-Pro
- - **Tasks for 3.2 pretrained models**: MMLU
- - **Tasks for 3.2 instruct models**: MMLU, GPQA
- These tasks are common evaluations, many of which overlap with the Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
- Here, we aim to get the benchmark numbers on the aforementioned tasks using Hugging Face [leaderboard implementation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard). Please follow the instructions below to make necessary modifications to use our eval prompts and get more eval metrics.
- ### Run eval tasks
- 1. We created [eval_config.yaml](./eval_config.yaml) to store all the arguments and hyperparameters. This is the main config file you need to change if you want to eval other models, and a part of eval_config.yaml looks like this:
- ```yaml
- model_name: "meta-llama/Llama-3.1-8B-Instruct" # The name of the model to evaluate. This must be a valid Llama 3 based model name in the HuggingFace model hub."
- evals_dataset: "meta-llama/Llama-3.1-8B-Instruct-evals" # The name of the 3.1 evals dataset to evaluate, please make sure this eval dataset corresponds to the model loaded. This must be a valid Llama 3.1 evals dataset name in the Llama 3.1 Evals collection.
- # Must be one of the following ["meta-llama/Llama-3.1-8B-Instruct-evals","meta-llama/Llama-3.1-70B-Instruct-evals","meta-llama/Llama-3.1-405B-Instruct-evals","meta-llama/Llama-3.1-8B-evals","meta-llama/Llama-3.1-70B-evals","meta-llama/Llama-3.1-405B-evals","meta-llama/Llama-3.2-1B-evals","meta-llama/Llama-3.2-3B-evals", "meta-llama/Llama-3.2-1B-Instruct-evals", "meta-llama/Llama-3.2-3B-Instruct-evals"]
- tasks: "meta_instruct" # Available tasks for 3.1 instruct model: "meta_math_hard", "meta_gpqa_cot", "meta_mmlu_pro_instruct", "meta_ifeval"; or just use "meta_instruct" to run all of them.
- # Available tasks for 3.1 pretrain model: "meta_bbh", "meta_mmlu_pro_pretrain"; or just use "meta_pretrain" to run all of them.
- # Available tasks for 3.2 instruct model: "meta_mmlu", "meta_math", "meta_gpqa"; or just use "meta_instruct" to run all of them.
- # Available tasks for 3.2 pretrain model: "meta_mmlu"; or just use "meta_pretrain" to run all of them
- tensor_parallel_size: 1 # The VLLM argument that specify the tensor parallel size for the model, eg how many GPUs to use for a model copy.
- data_parallel_size: 4 # The VLLM argument that specify the data parallel size for the model, eg how copies of model will be used.
- ...
- ```
- Change `model_name` to the model name you want to eval on and change the `evals_dataset` according to the model type and parameters. Remember to adjust the `tensor_parallel_size` to 2 or more to load the 70B models and change the `data_parallel_size` accordingly so that `tensor_parallel_size * data_parallel_size` is the number of GPUs you have. Please read the comments inside this yaml for detailed explanations on other parameters.
- 2. We already included all the related eval task yaml and python files in the [meta_template](./meta_template/) folder, which define all the task implementation. You do not need to change those manually, we will use [prepare_meta_eval.py](./prepare_meta_eval.py) to automatically change them later.
- 3. Then we can run [prepare_meta_eval.py](./prepare_meta_eval.py) that reads the configuration from [eval_config.yaml](./eval_config.yaml), copies everything in the template folder to a working folder `work_dir`, makes modification to those templates accordingly, prepares dataset if needed and prints out the CLI command to run the `lm_eval`.
- To run the [prepare_meta_eval.py](./prepare_meta_eval.py), we can do:
- ```
- python prepare_meta_eval.py --config_path ./eval_config.yaml
- ```
- This script will load the default [eval_config.yaml](./eval_config.yaml) config and print out a `lm_eval` command to run `meta_instruct` group tasks, which includes `meta_ifeval`, `meta_math_hard`, `meta_gpqa` and `meta_mmlu_pro_instruct`, for `meta-llama/Llama-3.1-8B-Instruct` model using `meta-llama/Llama-3.1-8B-Instruct-evals` dataset.
- An example output from [prepare_meta_eval.py](./prepare_meta_eval.py) looks like this:
- ```
- lm_eval --model vllm --model_args pretrained=meta-llama/Llama-3.1-8B-Instruct,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.9,data_parallel_size=4,max_model_len=8192,add_bos_token=True,seed=42 --tasks meta_instruct --batch_size auto --output_path eval_results --include_path ./work_dir --seed 42 --log_samples
- ```
- 4. Then just copy the `lm_eval` command printed by [prepare_meta_eval.py](./prepare_meta_eval.py) back to your terminal and run it to get the result, which will be saved into `eval_results` folder by default.
- **NOTE**: As for `--model vllm`, here we will use VLLM inference instead of Hugging Face inference because of the padding issue. By default, for the generative tasks, the `lm-eval --model_args="{...}" --batch_size=auto` command will use Hugging Face inference solution that uses a static batch method with [left padding](https://github.com/EleutherAI/lm-evaluation-harness/blob/8ad598dfd305ece8c6c05062044442d207279a97/lm_eval/models/huggingface.py#L773) using EOS_token for Llama models, but our internal evaluation will load python original checkpoints and handle individual generation request asynchronously without any padding. To simulate this, we will use VLLM inference solution to do dynamic batching without any padding.
- **NOTE**: As for `add_bos_token=True`, since our prompts in the evals dataset has already included all the special tokens required by instruct model, such as `<|start_header_id|>user<|end_header_id|>`, we will not use `--apply_chat_template` argument for instruct models anymore. However, we need to use `add_bos_token=True` flag to add the BOS_token back during VLLM inference, as the BOS_token is removed by default in [this PR](https://github.com/EleutherAI/lm-evaluation-harness/pull/1465).
- **NOTE**: For `meta_math_hard` tasks, some of our internal math ground truth has been converted to scientific notation, e.g. `6\sqrt{7}` has been converted to `1.59e+1`, which will be later handled by our internal math evaluation functions. As the lm-evaluation-harness [math evaluation utils.py](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py) can not fully handle those conversion, we will use the original ground truth from the original dataset [lighteval/MATH-Hard](https://www.oxen.ai/lighteval/MATH-Hard) by joining the tables on the original input questions. The `get_math_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) will handle this step and produce a local parquet dataset file.
- Moreover, we have modified this [math_hard/utils.py](./meta_template/math_hard/utils.py) to address two issues:
- 1. This python script only use [a regular expression "Final Answer: The final answer is(.*?). I hope it is correct."](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py#L192) to get the final answer, because this format is shown in the previous 4 shot examples prompts. However, our MATH Hard task is using 0 shot COT prompts that ask model to put the final answer into this string format `Therefore, the final answer is: $\\boxed{answer}$. I hope it is correct.` which can not be captured by previous regular expression, so we will use `\\box{}` to parse the final answer instead.
- 2. The [is_equiv(x1: str, x2: str)](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/tasks/leaderboard/math/utils.py#L144) function failed parse some ground truth, as we noticed some error logs like `[utils.py:158] couldn't parse one of [0,1) or [0,1)`, so all those questions will be marked as wrong. We raised [a issue to lm_evaluation_harness](https://github.com/EleutherAI/lm-evaluation-harness/issues/2212) about this problem and will add a string equality check statement before going to is_equiv() function for now as a temporary solution.
- **NOTE**: For `meta_ifeval` tasks, we have to use the original configs, such as `instruction_id_list`, `kwargs`, from [wis-k/instruction-following-eval](https://huggingface.co/datasets/wis-k/instruction-following-eval) in order to use [lm-evaluation-harness IFeval evaluation](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard/ifeval). We will perform similar join back method using `get_ifeval_data` function in the [prepare_meta_eval.py](./prepare_meta_eval.py) to get a local parquet dataset file.
- ### Yaml Config Deep Dive
- Here, we will use MMLU-Pro as a example to show the steps to create a yaml config with detailed explanations, so that people can follow this example to create other tasks configurations if they want. For more information, please read lm-evaluation-harness [new task guide](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md).
- **1.Define the config to load datasets**
- We can use our 3.1 evals dataset as the source dataset and the corresponding subset and define the test split to latest. For example, if we want to calculate the MMLU_Pro metric for 3.1 8B instruct, the following configs are needed as explained below:
- ```yaml
- task: meta_mmlu_pro_instruct
- dataset_path: meta-llama/Llama-3.1-8B-Instruct-evals
- dataset_name: Llama-3.1-8B-Instruct-evals__mmlu_pro__details
- test_split: latest
- ```
- If you want to run evaluation on 70B-Instruct, then it is recommended to change the `dataset_path` and `dataset_name` from 8B to 70B, even though 70B-instruct and 8B-instruct share the same prompts, the `is_correct` column, which can be used to get the difference between current result and the reported results for each sample, is different.
- **Note**: Config files for Llama-3.1-8B-Instruct are already provided in each task subfolder under [meta_template folder](./meta_template/). Remember to change the eval dataset name according to the model type and DO NOT use pretrained evals dataset on instruct models or vice versa.
- **2.Configure preprocessing, prompts and ground truth**
- Here is the example yaml snippet in the MMLU-Pro that handles dataset preprocess, prompts and ground truth.
- ```yaml
- process_docs: !function utils.process_docs
- doc_to_text: !function utils.doc_to_text
- doc_to_target: gold
- ```
- - `process_docs` : Defines the preprocess function for our datasets. In this case, we uses the `process_docs` python function that is defined in [utils.py](./meta_template/mmlu_pro/utils.py). This function will take the original dataset and output a processed dataset that has a out_doc, which contains `problem` which is the input question, `gold` which is the ground truth. We also renamed the `is_correct` column to `previously_is_correct` to allow detailed comparison for the difference of each sample between previously reported score and the current score. You must use eval dataset and model with same parameters and same model type to get a valid comparison.
- - `doc_to_text`: Defines the prompts. In the MMLU-Pro case, the `input_final_prompts` column always contains a list of a prompt, so we just use a python function that returns `input_final_prompts[0]`.
- - `doc_to_target` Defines the ground truth, which in the MMLU-Pro case, is the `gold` that derived from `input_correct_responses[0]`.
- **3.Configure task type and parser**
- While Open LLM Leaderboard v2 uses [multiple choice format](https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/new_task_guide.md#multiple-choice-format) for MMLU-Pro, BBH, GPQA tasks by comparing log likelihood of all label words, such as [ (A),(B),(C),(D) ], we use generative task option, by asking the model to generate response in sentences given our carefully designed prompts, then using some parsers to grab the final answer, and scoring that final answer based on the ground truth. Here is a example config in the MMLU-Pro that enable the generative task and defines the regex parser:
- ```yaml
- filter_list:
- - name: "strict-match"
- filter:
- - function: "regex"
- group_select: -1
- regex_pattern: 'best answer is ([A-Z])'
- - function: "take_first"
- ```
- Since the MMLU-Pro task uses a 5-shot Chain-of-Thought(COT) prompts and the prompts are designed with explicitly instruction: "Your response should end with \"The best answer is [the_answer_letter].\" where the [the_answer_letter] is a letter from the provided choices.", we will use a simple and intuitive regex expression `best answer is ([A-Z])` to parse the model response and take the last appearance as the final answer and this final answer will be scored based on the ground truth `gold` using exact match method.
- **4.Define generation and metric config**
- Then we need to define the generation and metric config, which looks like this:
- ```yaml
- generation_kwargs:
- until: []
- do_sample: false
- temperature: 0
- max_gen_toks: 1024
- num_fewshot: 0
- metric_list:
- - metric: exact_match
- aggregation: mean
- higher_is_better: true
- ignore_case: true
- ignore_punctuation: true
- ```
- Here we set the `num_fewshot` to 0 as our prompts have already been converted to 5-shots, and the model generation will only stop if the generated output tokens exceeds 1024, as stated in the [mmlu-pro eval details](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/eval_details.md#mmlu-pro). We will set the `do_sample` to false and `temperature` to 0 as stated in our `eval_config` column in the dataset. We will use metric `exact_match` for this tasks and calculate the `mean` as our task aggregated number.
- **NOTE**: While we tried our best to create the template files, those configs and functions are created based on public third-party library and are not exactly the same as our internal implementation, so there is a chance that the eval numbers are slightly different.
- **NOTE**: We used the average of `inst_level_strict_acc,none` and `prompt_level_strict_acc,none` to get the final number for `IFeval` as stated [here](https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about#task-evaluations-and-parameters).
- **NOTE**: In the [Llama website](https://llama.com/), we reported the `macro_avg` metric, which is the average of all subtask average score, for `MMLU-Pro `task, but here we are calculating the `micro_avg` metric, which is the average score for all the individual samples, and those `micro_avg` numbers can be found in the [eval_details.md](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/eval_details.md#mmlu-pro).
- **NOTE**: The eval numbers may be slightly different, as we observed around ±0.01 differences between each evaluation run because the latest VLLM inference is not very deterministic even with temperature=0. This behavior maybe related [this issue](https://github.com/vllm-project/vllm/issues/5404).
- or it is expected due to 16-bits inference as stated in [this comment](https://github.com/huggingface/transformers/issues/25420#issuecomment-1775317535) and [this comment](https://github.com/vllm-project/vllm/issues/4112#issuecomment-2071115725).
- ## Acknowledgement
- This tutorial is inspired by [leaderboard tasks implementation on the lm_evaluation_harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks/leaderboard) created by Hugging Face [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) team.
- We also extend our gratitude to the [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) github repo from [EleutherAI](https://www.eleuther.ai/).
- ================================================
- FILE: end-to-end-use-cases/book-character-mindmap/README.md
- ================================================
- # Book Character Mind Map With Llama4 Maverick
- 
- Book Mind is a web application that allows users to explore character relationships and storylines in books using AI-powered visualizations.
- This leverages **Llama 4 Maverick**'s impressive 1M token context windows to process entire books at once, enabling comprehensive analysis of complex narratives and character relationships across lengthy texts.
- ## Features
- ### Leverage Long Context Length
- | Model | Meta Llama4 Maverick | Meta Llama4 Scout | OpenAI GPT-4.5 | Claude Sonnet 3.7 |
- | ----- | -------------- | -------------- | -------------- | -------------- |
- | Context Window | 1M tokens | 10M tokens | 128K tokens | 1K tokens | 200K tokens |
- Because of the long context length, Book Mind can process entire books at once, providing a comprehensive understanding of complex narratives and character relationships.
- - Interactive Mind Maps: Visualize relationships between characters and plot elements.
- - Book Summaries: Get concise overviews of plots and themes.
- ### Step-by-Step Instructions
- We implemented a step-by-step approach to ensure the model outputs' reliability.
- 1. **Character Identification**: Identify all characters in the book and summarize their roles.
- ```
- You are a highly detailed literary analyst AI. Your sole mission is to meticulously extract comprehensive information about characters and the *nuances* of their relationships from the provided text segment. This data will be used later to build a relationship graph.
- ```
- 2. **Character Relationships**: Determine the relationships between characters.
- ```
- You are an expert data architect AI specializing in transforming literary analysis into structured graph data. Your task is to synthesize character and relationship information into a specific JSON format containing nodes and links, including a title and summary.
- ```
- 3. **JSON Format**: Output the results in a JSON format for easy parsing and visualization.
- ```
- You are an extremely precise and strict JSON extractor.
- Extract only the complete JSON object from the input. Get the last one if there are multiple.
- ```
- ### Ask the Book with Chat Interface
- We also implemented a chat interface to interact with the book. Users can ask questions about the book's characters, plot, and relationships. The model will respond with a concise answer based on the book's content and the relationships between characters.
- ```
- You are an expert search AI designed to help users find detailed information about character relationships from a book. Your task is to assist users in querying the relationship data extracted from the book.
- ```
- 
- ## Getting Started
- ### Frontend Setup
- To communicate with the [server/server.py](server/server.py), we use `React.js` and `axios`.
- 1. Install dependencies:
- ```
- npm install
- ```
- 2. Run the application:
- ```
- npm start
- ```
- ### Server Setup
- We use `Flask` to serve the model's responses and `vllm` to run the **Llama 4 Maverick** model.
- 1. Install dependencies:
- ```
- cd server
- pip install -r requirements.txt
- ```
- 2. Run the server:
- ```
- python server.py
- ```
- ## Get Copyright Free Books
- - [Project Gutenberg](https://www.gutenberg.org/)
- - [Romeo and Juliet](https://www.gutenberg.org/ebooks/1513): 50,687 input tokens
- - [Moby-Dick; The Whale](https://www.gutenberg.org/ebooks/2701): 318,027 input tokens
- ================================================
- FILE: end-to-end-use-cases/book-character-mindmap/.gitignore
- ================================================
- # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
- # dependencies
- /node_modules
- /.pnp
- .pnp.js
- # testing
- /coverage
- # production
- /build
- # misc
- .DS_Store
- .env.local
- .env.development.local
- .env.test.local
- .env.production.local
- npm-debug.log*
- yarn-debug.log*
- yarn-error.log*
- ================================================
- FILE: end-to-end-use-cases/book-character-mindmap/public/robots.txt
- ================================================
- # https://www.robotstxt.org/robotstxt.html
- User-agent: *
- Disallow:
- ================================================
- FILE: end-to-end-use-cases/book-character-mindmap/server/requirements.txt
- ================================================
- flask
- flask-cors
- asyncio
- werkzeug
- vllm
- transformers
- ================================================
- FILE: end-to-end-use-cases/browser_use/README.md
- ================================================
- ## Building an Intelligent Browser Agent with Llama 4 Scout
- This project provides a comprehensive guide to creating an AI-powered browser agent capable of autonomously navigating and interacting with websites. By leveraging the capabilities of Llama 4 Scout, Playwright, and Together AI, this agent can perform tasks seamlessly while understanding both visual and textual content.
- ### Features
- - Visual Understanding: Utilizes screenshots for visual comprehension of web pages
- - Autonomous Navigation: Capable of navigating and interacting with web elements.
- - Natural Language Instructions: Executes tasks based on natural language commands.
- - Persistent Session Management: Maintains browser sessions for continuous interaction.
- ### Example Tasks
- - Search for a product on Amazon.
- - Find the cheapest flight to Tokyo.
- - Purchase tickets for the next Warriors game.
- ### What's in this Project?
- - Environment setup instructions
- - Browser automation guides using Playwright
- - Structured prompting techniques for guiding the LLM in task execution
- - Content comprehension utilizing Llama 4 Scout
- - Creating a persistent and intelligent browser agent for real-world applications
- ### Demo
- For a detailed explanation and demo video, visit: [Blog Post and Demo Video](https://miguelg719.github.io/browser-use-blog/)
- ### Prerequisite for Running the Notebook
- - Before getting started, please make sure to setup Together.ai and get an API key from [here](https://www.together.ai/).
- ### Collaborators
- Feel free to reach out with any questions or feedback!
- - Miguel Gonzalez: [X](https://x.com/miguel_gonzf) | [LinkedIn](https://www.linkedin.com/in/gonzalezfernandezmiguel/)
- - Dimitry Khorzov: [X](https://x.com/korzhov_dm) | [LinkedIn](https://www.linkedin.com/in/korzhovdm)
- ================================================
- FILE: end-to-end-use-cases/coding/text2sql/README.md
- ================================================
- ## Text2SQL: Natural Language to SQL Interface
- This project provides a set of scripts to convert natural language queries into SQL statements using Meta's Llama model. The goal is to enable users to interact with databases using natural language inputs, making it easier for non-technical users to access and analyze data.
- For detailed instructions on setting up the environment, creating a database, and executing natural language queries using the Text2SQL interface, please refer to the quickstart.ipynb notebook.
- ### Structure:
- - quickstart.ipynb: A Quick Demo of Text2SQL Using Llama 3.3. This Jupyter Notebook includes examples of how to use the interface to execute natural language queries on the sample data. It uses Llama 3.3 to answer questions about a SQLite database using LangChain and the Llama cloud provider Together.ai.
- - nba.txt: A text file containing NBA roster information, which is used as sample data for demonstration purposes.
- - txt2csv.py: A script that converts text data into a CSV format. This script is used to preprocess the input data before it is fed into csv2db.py.
- - csv2db.py: A script that imports data from a CSV file into a SQLite database. This script is used to populate the database with sample data.
- - nba_roster.db: A SQLite database file created from the nba.txt data, used to test the Text2SQL interface.
- ### Detailed steps on running the notebook:
- - Before getting started, please make sure to setup Together.ai and get an API key from [here](https://www.together.ai/).
- - First, please install the requirements from [here](https://github.com/meta-llama/llama-cookbook/blob/main/end-to-end-use-cases/coding/text2sql/requirements.txt) by running inside the folder:
- ```
- git clone https://github.com/meta-llama/llama-cookbook.git
- cd llama-cookbook/end-to-end-use-cases/coding/text2sql/
- pip install -r requirements.txt
- ```
- ### Contributing
- Contributions are welcome! If you'd like to add new features or improve existing ones, please submit a pull request. We encourage contributions in the following areas:
- - Adding support for additional databases
- - Developing new interfaces or applications that use the Text2SQL interface
- ================================================
- FILE: end-to-end-use-cases/coding/text2sql/nba.txt
- ================================================
- Atlanta Hawks Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4397136.png
- Saddiq Bey41 SF 24 6' 7" 215 lbs Villanova $4,556,983
- https://a.espncdn.com/i/headshots/nba/players/full/3037789.png
- Bogdan Bogdanovic13 SG 31 6' 5" 225 lbs -- $18,700,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Kobe Bufkin G 20 6' 4" 195 lbs Michigan $4,094,280
- https://a.espncdn.com/i/headshots/nba/players/full/3102529.png
- Clint Capela15 C 29 6' 10" 256 lbs -- $20,616,000
- https://a.espncdn.com/i/headshots/nba/players/full/4277952.png
- Bruno Fernando0 PF 25 6' 10" 240 lbs Maryland $2,581,522
- https://a.espncdn.com/i/headshots/nba/players/full/4065656.png
- Trent Forrest2 G 25 6' 4" 210 lbs Florida State --
- https://a.espncdn.com/i/headshots/nba/players/full/4432585.png
- AJ Griffin14 F 20 6' 6" 222 lbs Duke $3,712,920
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Mouhamed Gueye F 20 6' 11" 210 lbs Washington State $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/4065732.png
- De'Andre Hunter12 SF 25 6' 8" 221 lbs Virginia $20,089,286
- https://a.espncdn.com/i/headshots/nba/players/full/4701230.png
- Jalen Johnson1 SF 21 6' 8" 219 lbs Duke $2,925,360
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jarkel Joiner G 24 6' 1" 180 lbs NC State --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Seth Lundy G 23 6' 6" 220 lbs Penn State --
- https://a.espncdn.com/i/headshots/nba/players/full/3913180.png
- Garrison Mathews25 SG 26 6' 6" 215 lbs Lipscomb $2,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4032.png
- Wesley Matthews23 SG 36 6' 4" 220 lbs Marquette $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4004.png
- Patty Mills8 PG 35 6' 0" 180 lbs Saint Mary's $6,802,950
- https://a.espncdn.com/i/headshots/nba/players/full/3907497.png
- Dejounte Murray5 SG 27 6' 5" 180 lbs Washington $18,214,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Miles Norris F 23 6' 10" 220 lbs UC Santa Barbara --
- https://a.espncdn.com/i/headshots/nba/players/full/4431680.png
- Onyeka Okongwu17 PF 22 6' 9" 240 lbs USC $8,109,063
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Keaton Wallace G 24 6' 3" 185 lbs UTSA --
- https://a.espncdn.com/i/headshots/nba/players/full/4277905.png
- Trae Young11 PG 25 6' 1" 164 lbs Oklahoma $40,064,220
- Boston Celtics Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4397885.png
- Dalano Banton G 23 6' 7" 204 lbs Nebraska $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4278031.png
- Oshae Brissett12 SF 25 6' 7" 210 lbs Syracuse $2,165,000
- https://a.espncdn.com/i/headshots/nba/players/full/2566769.png
- Malcolm Brogdon13 PG 30 6' 4" 229 lbs Virginia $22,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/3917376.png
- Jaylen Brown7 SG 26 6' 6" 223 lbs California $31,830,357
- https://a.espncdn.com/i/headshots/nba/players/full/4432907.png
- Justin Champagnie99 SF 22 6' 6" 206 lbs Pittsburgh --
- https://a.espncdn.com/i/headshots/nba/players/full/4576085.png
- JD Davison20 SG 20 6' 1" 195 lbs Alabama --
- https://a.espncdn.com/i/headshots/nba/players/full/3989.png
- Blake Griffin91 PF 34 6' 9" 250 lbs Oklahoma --
- https://a.espncdn.com/i/headshots/nba/players/full/4065804.png
- Sam Hauser30 SF 25 6' 7" 217 lbs Virginia $1,927,896
- https://a.espncdn.com/i/headshots/nba/players/full/3213.png
- Al Horford42 C 37 6' 9" 240 lbs Florida $10,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/3064560.png
- Luke Kornet40 C 28 7' 2" 250 lbs Vanderbilt $2,413,304
- https://a.espncdn.com/i/headshots/nba/players/full/3133602.png
- Svi Mykhailiuk SG 26 6' 7" 205 lbs Kansas $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/3102531.png
- Kristaps Porzingis C 28 7' 3" 240 lbs -- $36,016,200
- https://a.espncdn.com/i/headshots/nba/players/full/4066354.png
- Payton Pritchard11 PG 25 6' 1" 195 lbs Oregon $4,037,278
- https://a.espncdn.com/i/headshots/nba/players/full/4397424.png
- Neemias Queta C 24 7' 0" 245 lbs Utah State $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4065648.png
- Jayson Tatum0 SF 25 6' 8" 210 lbs Duke $32,600,060
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jordan Walsh G 19 6' 7" 205 lbs Arkansas $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/3078576.png
- Derrick White9 PG 29 6' 4" 190 lbs Colorado $18,357,143
- https://a.espncdn.com/i/headshots/nba/players/full/4066211.png
- Robert Williams III44 C 25 6' 9" 237 lbs Texas A&M $11,571,429
- Brooklyn Nets Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4423545.png
- Darius Bazley PF 23 6' 9" 216 lbs -- $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/3147657.png
- Mikal Bridges1 SF 27 6' 6" 209 lbs Villanova $21,700,000
- https://a.espncdn.com/i/headshots/nba/players/full/4278067.png
- Nic Claxton33 C 24 6' 11" 215 lbs Georgia $9,625,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Noah Clowney F 19 6' 10" 210 lbs Alabama $3,089,520
- https://a.espncdn.com/i/headshots/nba/players/full/2580782.png
- Spencer Dinwiddie26 PG 30 6' 6" 215 lbs Colorado $20,357,143
- https://a.espncdn.com/i/headshots/nba/players/full/2578185.png
- Dorian Finney-Smith28 PF 30 6' 7" 220 lbs Florida $13,932,008
- https://a.espncdn.com/i/headshots/nba/players/full/3138196.png
- Cameron Johnson2 SF 27 6' 8" 210 lbs North Carolina $25,679,348
- https://a.espncdn.com/i/headshots/nba/players/full/2583632.png
- Royce O'Neale00 PF 30 6' 6" 226 lbs Baylor $9,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/4432194.png
- Day'Ron Sharpe20 C 21 6' 9" 265 lbs North Carolina $2,210,040
- https://a.espncdn.com/i/headshots/nba/players/full/3907387.png
- Ben Simmons10 PG 27 6' 10" 240 lbs LSU $37,893,408
- https://a.espncdn.com/i/headshots/nba/players/full/4065697.png
- Dennis Smith Jr. PG 25 6' 2" 205 lbs NC State $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4432174.png
- Cam Thomas24 SG 21 6' 3" 210 lbs LSU $2,240,160
- https://a.espncdn.com/i/headshots/nba/players/full/4277890.png
- Lonnie Walker IV G 24 6' 4" 204 lbs Miami $2,019,706
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Dariq Whitehead F 19 6' 7" 220 lbs Duke $2,966,040
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jalen Wilson F 22 6' 8" 225 lbs Kansas --
- Charlotte Hornets Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Angelo Allegri16 F 24 6' 7" 215 lbs Eastern Washington --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Amari Bailey G 19 6' 5" 185 lbs UCLA --
- https://a.espncdn.com/i/headshots/nba/players/full/4432816.png
- LaMelo Ball1 PG 22 6' 7" 180 lbs -- $10,900,635
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Leaky Black F 24 6' 9" 205 lbs North Carolina --
- https://a.espncdn.com/i/headshots/nba/players/full/4431712.png
- James Bouknight2 SG 23 6' 4" 190 lbs UConn $4,570,080
- https://a.espncdn.com/i/headshots/nba/players/full/4066383.png
- Miles Bridges0 SF 25 6' 7" 225 lbs Michigan State $7,921,301
- https://a.espncdn.com/i/headshots/nba/players/full/4249.png
- Gordon Hayward20 SF 33 6' 7" 225 lbs Butler $31,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/4431699.png
- Kai Jones23 PF 22 6' 11" 221 lbs Texas $3,047,880
- https://a.espncdn.com/i/headshots/nba/players/full/4683027.png
- Theo Maledon9 PG 22 6' 5" 175 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/3138161.png
- Cody Martin11 SF 27 6' 6" 205 lbs Nevada $7,560,000
- https://a.espncdn.com/i/headshots/nba/players/full/4576086.png
- Bryce McGowens7 G 20 6' 6" 175 lbs Nebraska $1,719,865
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Nathan Mensah31 C 25 6' 10" 230 lbs San Diego State --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Brandon Miller F 20 6' 9" 200 lbs Alabama $10,880,400
- https://a.espncdn.com/i/headshots/nba/players/full/5144059.png
- James Nnaji C 19 6' 11" 250 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4230547.png
- Frank Ntilikina PG 25 6' 4" 200 lbs -- $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/3133603.png
- Kelly Oubre Jr.12 SG 27 6' 6" 203 lbs Kansas --
- https://a.espncdn.com/i/headshots/nba/players/full/4278076.png
- Nick Richards4 C 25 7' 0" 245 lbs Kentucky $5,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/3074752.png
- Terry Rozier3 SG 29 6' 1" 190 lbs Louisville $22,205,221
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Trevon Scott18 F 26 6' 8" 225 lbs Cincinnati --
- https://a.espncdn.com/i/headshots/nba/players/full/4397570.png
- Jaylen Sims30 G 24 6' 6" 210 lbs UNC Wilmington --
- https://a.espncdn.com/i/headshots/nba/players/full/4683686.png
- Nick Smith Jr. G 19 6' 4" 170 lbs Arkansas $2,463,960
- https://a.espncdn.com/i/headshots/nba/players/full/4702233.png
- JT Thor21 F 21 6' 9" 203 lbs Auburn $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/4278078.png
- P.J. Washington25 PF 25 6' 7" 230 lbs Kentucky $16,847,826
- https://a.espncdn.com/i/headshots/nba/players/full/4701232.png
- Mark Williams5 C 21 7' 0" 240 lbs Duke $3,908,160
- Chicago Bulls Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4066421.png
- Lonzo Ball2 PG 25 6' 6" 190 lbs UCLA $20,465,117
- https://a.espncdn.com/i/headshots/nba/players/full/3133635.png
- Jevon Carter PG 28 6' 1" 200 lbs West Virginia $6,190,476
- https://a.espncdn.com/i/headshots/nba/players/full/2991350.png
- Alex Caruso6 SG 29 6' 5" 186 lbs Texas A&M $9,460,000
- https://a.espncdn.com/i/headshots/nba/players/full/2528693.png
- Torrey Craig0 SF 32 6' 7" 221 lbs South Carolina Upstate $2,528,233
- https://a.espncdn.com/i/headshots/nba/players/full/3978.png
- DeMar DeRozan11 SF 34 6' 6" 220 lbs USC $28,600,000
- https://a.espncdn.com/i/headshots/nba/players/full/4397002.png
- Ayo Dosunmu12 SG 23 6' 5" 200 lbs Illinois $6,481,481
- https://a.espncdn.com/i/headshots/nba/players/full/6585.png
- Andre Drummond3 C 30 6' 11" 279 lbs UConn $3,360,000
- https://a.espncdn.com/i/headshots/nba/players/full/2596112.png
- Javonte Green24 SF 30 6' 5" 205 lbs Radford --
- https://a.espncdn.com/i/headshots/nba/players/full/4067735.png
- Carlik Jones22 G 25 6' 0" 174 lbs Louisville $1,922,896
- https://a.espncdn.com/i/headshots/nba/players/full/3064440.png
- Zach LaVine8 SG 28 6' 5" 200 lbs UCLA $40,064,220
- https://a.espncdn.com/i/headshots/nba/players/full/4432239.png
- Justin Lewis34 F 21 6' 7" 245 lbs Marquette --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Julian Phillips F 19 6' 8" 197 lbs Tennessee $1,600,000
- https://a.espncdn.com/i/headshots/nba/players/full/4683622.png
- Marko Simonovic19 C 23 7' 0" 220 lbs -- $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/4279815.png
- Terry Taylor32 F 23 6' 4" 230 lbs Austin Peay $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4433159.png
- Dalen Terry25 F 21 6' 7" 200 lbs Arizona $3,350,760
- https://a.espncdn.com/i/headshots/nba/players/full/6478.png
- Nikola Vucevic9 C 32 6' 10" 260 lbs USC $18,518,519
- https://a.espncdn.com/i/headshots/nba/players/full/4395651.png
- Coby White0 PG 23 6' 5" 195 lbs North Carolina $11,111,111
- https://a.espncdn.com/i/headshots/nba/players/full/4431687.png
- Patrick Williams44 PF 22 6' 7" 215 lbs Florida State $9,835,881
- Cleveland Cavaliers Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4066328.png
- Jarrett Allen31 C 25 6' 9" 243 lbs Texas $20,000,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Emoni Bates21 F 19 6' 10" 170 lbs Eastern Michigan --
- https://a.espncdn.com/i/headshots/nba/players/full/4432173.png
- Sharife Cooper2 PG 22 6' 1" 176 lbs Auburn --
- https://a.espncdn.com/i/headshots/nba/players/full/3947156.png
- Mamadi Diakite21 F 26 6' 9" 228 lbs Virginia --
- https://a.espncdn.com/i/headshots/nba/players/full/4396907.png
- Darius Garland10 PG 23 6' 1" 192 lbs Vanderbilt $34,005,250
- https://a.espncdn.com/i/headshots/nba/players/full/4065733.png
- Ty Jerome SG 26 6' 5" 195 lbs Virginia $2,439,025
- https://a.espncdn.com/i/headshots/nba/players/full/3064559.png
- Damian Jones15 C 28 6' 11" 245 lbs Vanderbilt $2,586,665
- https://a.espncdn.com/i/headshots/nba/players/full/2991043.png
- Caris LeVert3 SG 29 6' 6" 205 lbs Michigan $15,384,616
- https://a.espncdn.com/i/headshots/nba/players/full/4066757.png
- Sam Merrill20 SG 27 6' 4" 205 lbs Utah State $1,997,238
- https://a.espncdn.com/i/headshots/nba/players/full/3908809.png
- Donovan Mitchell45 SG 27 6' 1" 215 lbs Louisville $33,162,030
- https://a.espncdn.com/i/headshots/nba/players/full/4432158.png
- Evan Mobley4 PF 22 6' 11" 215 lbs USC $8,882,760
- https://a.espncdn.com/i/headshots/nba/players/full/4432815.png
- Isaiah Mobley15 F 23 6' 8" 238 lbs USC --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Pete Nance48 F 23 6' 11" 230 lbs North Carolina --
- https://a.espncdn.com/i/headshots/nba/players/full/2968361.png
- Raul Neto19 PG 31 6' 2" 180 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/2990969.png
- Georges Niang SF 30 6' 7" 230 lbs Iowa State $8,800,000
- https://a.espncdn.com/i/headshots/nba/players/full/4432822.png
- Isaac Okoro35 SF 22 6' 5" 225 lbs Auburn $8,920,795
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Craig Porter Jr.40 G 23 6' 2" 186 lbs Wichita State --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Justin Powell G -- 6' 6" 197 lbs Washington State --
- https://a.espncdn.com/i/headshots/nba/players/full/4011.png
- Ricky Rubio13 PG 32 6' 2" 190 lbs -- $6,146,342
- https://a.espncdn.com/i/headshots/nba/players/full/4065778.png
- Max Strus31 SG 27 6' 5" 215 lbs DePaul $14,487,684
- https://a.espncdn.com/i/headshots/nba/players/full/6474.png
- Tristan Thompson13 C 32 6' 9" 254 lbs Texas --
- https://a.espncdn.com/i/headshots/nba/players/full/3912848.png
- Dean Wade32 PF 26 6' 9" 228 lbs Kansas State $5,709,877
- Dallas Mavericks Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4432161.png
- Greg Brown III36 PF 22 6' 8" 217 lbs Texas --
- https://a.espncdn.com/i/headshots/nba/players/full/2326307.png
- Seth Curry30 SG 33 6' 1" 190 lbs Duke $4,000,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Dexter Dennis38 G 24 6' 5" 210 lbs Texas A&M --
- https://a.espncdn.com/i/headshots/nba/players/full/3945274.png
- Luka Doncic77 PG 24 6' 7" 230 lbs -- $40,064,220
- https://a.espncdn.com/i/headshots/nba/players/full/3102528.png
- Dante Exum0 G 28 6' 5" 214 lbs -- $2,019,760
- https://a.espncdn.com/i/headshots/nba/players/full/4432811.png
- Josh Green8 SG 22 6' 6" 210 lbs Arizona $4,765,339
- https://a.espncdn.com/i/headshots/nba/players/full/2528210.png
- Tim Hardaway Jr.10 SF 31 6' 5" 205 lbs Michigan $17,897,728
- https://a.espncdn.com/i/headshots/nba/players/full/4868423.png
- Jaden Hardy1 G 21 6' 4" 198 lbs -- $1,719,864
- https://a.espncdn.com/i/headshots/nba/players/full/2993370.png
- Richaun Holmes20 F 29 6' 10" 240 lbs Bowling Green $12,046,020
- https://a.espncdn.com/i/headshots/nba/players/full/6442.png
- Kyrie Irving11 PG 31 6' 2" 191 lbs Duke $37,037,037
- https://a.espncdn.com/i/headshots/nba/players/full/3936099.png
- Derrick Jones Jr.55 SF 26 6' 6" 210 lbs UNLV $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/2960236.png
- Maxi Kleber42 PF 31 6' 10" 240 lbs -- $11,000,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- A.J. Lawson9 G 23 6' 6" 185 lbs South Carolina --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Dereck Lively II2 C 19 7' 1" 234 lbs Duke $4,775,640
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Mike Miles Jr.14 G 21 6' 2" 205 lbs TCU --
- https://a.espncdn.com/i/headshots/nba/players/full/6461.png
- Markieff Morris88 PF 34 6' 9" 245 lbs Kansas --
- https://a.espncdn.com/i/headshots/nba/players/full/2531367.png
- Dwight Powell7 C 32 6' 10" 240 lbs Stanford $4,000,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Olivier-Maxence Prosper18 F 21 6' 8" 215 lbs Marquette $2,733,360
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jordan Walker16 SG 24 5' 11" 170 lbs UAB --
- https://a.espncdn.com/i/headshots/nba/players/full/4397033.png
- Joe Wieskamp37 SF 24 6' 6" 205 lbs Iowa $1,927,896
- https://a.espncdn.com/i/headshots/nba/players/full/4066218.png
- Grant Williams3 PF 24 6' 6" 236 lbs Tennessee $12,325,581
- Denver Nuggets Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4431767.png
- Christian Braun0 G 22 6' 6" 220 lbs Kansas $2,949,120
- https://a.espncdn.com/i/headshots/nba/players/full/2581018.png
- Kentavious Caldwell-Pope5 SG 30 6' 5" 204 lbs Georgia $14,704,938
- https://a.espncdn.com/i/headshots/nba/players/full/4230557.png
- Vlatko Cancar31 SF 26 6' 8" 236 lbs -- $2,234,359
- https://a.espncdn.com/i/headshots/nba/players/full/4278585.png
- Collin Gillespie21 G 24 6' 3" 190 lbs Villanova --
- https://a.espncdn.com/i/headshots/nba/players/full/3064290.png
- Aaron Gordon50 PF 28 6' 8" 235 lbs Arizona $22,266,182
- https://a.espncdn.com/i/headshots/nba/players/full/2284101.png
- Justin Holiday0 SF 34 6' 6" 180 lbs Washington $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4065731.png
- Jay Huff C 25 7' 1" 240 lbs Virginia --
- https://a.espncdn.com/i/headshots/nba/players/full/6443.png
- Reggie Jackson7 PG 33 6' 2" 208 lbs Boston College $5,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/3112335.png
- Nikola Jokic15 C 28 6' 11" 284 lbs -- $47,607,350
- https://a.espncdn.com/i/headshots/nba/players/full/3442.png
- DeAndre Jordan6 C 35 6' 11" 265 lbs Texas A&M $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4066240.png
- Braxton Key F 26 6' 8" 225 lbs Virginia --
- https://a.espncdn.com/i/headshots/nba/players/full/3936299.png
- Jamal Murray27 PG 26 6' 4" 215 lbs Kentucky $33,833,400
- https://a.espncdn.com/i/headshots/nba/players/full/4431690.png
- Zeke Nnaji22 PF 22 6' 9" 240 lbs Arizona $4,306,281
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jalen Pickett G 23 6' 4" 202 lbs Siena $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4278104.png
- Michael Porter Jr.1 SF 25 6' 10" 218 lbs Missouri $33,386,850
- https://a.espncdn.com/i/headshots/nba/players/full/4305.png
- Ish Smith14 PG 35 6' 0" 175 lbs Wake Forest --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Julian Strawther G 21 6' 7" 205 lbs Gonzaga $2,431,080
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Hunter Tyson F 23 6' 8" 215 lbs Clemson $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/4576087.png
- Peyton Watson8 F 21 6' 8" 200 lbs UCLA $2,303,520
- Detroit Pistons Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4277848.png
- Marvin Bagley III35 PF 24 6' 10" 235 lbs Duke $12,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/4395676.png
- Buddy Boeheim27 F 23 6' 6" 205 lbs Syracuse --
- https://a.espncdn.com/i/headshots/nba/players/full/3593.png
- Bojan Bogdanovic44 SF 34 6' 7" 226 lbs -- $20,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/6429.png
- Alec Burks5 SG 32 6' 6" 214 lbs Colorado $10,489,600
- https://a.espncdn.com/i/headshots/nba/players/full/4871137.png
- Malcolm Cazalon20 G -- 6' 6" 185 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4432166.png
- Cade Cunningham2 PG 21 6' 7" 220 lbs Oklahoma State $11,055,240
- https://a.espncdn.com/i/headshots/nba/players/full/4080610.png
- Hamidou Diallo6 SG 25 6' 5" 202 lbs Kentucky --
- https://a.espncdn.com/i/headshots/nba/players/full/4433621.png
- Jalen Duren0 C 19 6' 11" 250 lbs Memphis $4,330,680
- https://a.espncdn.com/i/headshots/nba/players/full/2528794.png
- Joe Harris12 SF 32 6' 6" 220 lbs Virginia $19,928,571
- https://a.espncdn.com/i/headshots/nba/players/full/4683024.png
- Killian Hayes7 PG 22 6' 5" 195 lbs -- $7,413,955
- https://a.espncdn.com/i/headshots/nba/players/full/4433218.png
- Jaden Ivey23 PG 21 6' 4" 195 lbs Purdue $7,641,480
- https://a.espncdn.com/i/headshots/nba/players/full/4277957.png
- Isaiah Livers12 PF 25 6' 6" 232 lbs Michigan $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/2488826.png
- Rodney McGruder17 G 32 6' 4" 205 lbs Kansas State --
- https://a.espncdn.com/i/headshots/nba/players/full/3059310.png
- Monte Morris22 PG 28 6' 2" 183 lbs Iowa State $9,800,926
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jared Rhoden8 G 24 6' 6" 210 lbs Seton Hall --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Marcus Sasser G 23 6' 2" 195 lbs Houston $2,623,680
- https://a.espncdn.com/i/headshots/nba/players/full/4432810.png
- Isaiah Stewart28 C 22 6' 8" 250 lbs Washington $5,266,713
- https://a.espncdn.com/i/headshots/nba/players/full/4684742.png
- Ausar Thompson F 20 6' 7" 190 lbs -- $7,977,480
- https://a.espncdn.com/i/headshots/nba/players/full/4432808.png
- James Wiseman13 C 22 7' 0" 240 lbs Memphis $12,119,400
- Golden State Warriors Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/3975.png
- Stephen Curry30 PG 35 6' 2" 185 lbs Davidson $51,915,615
- https://a.espncdn.com/i/headshots/nba/players/full/6589.png
- Draymond Green23 PF 33 6' 6" 230 lbs Michigan State $22,321,429
- https://a.espncdn.com/i/headshots/nba/players/full/2327577.png
- JaMychal Green1 F 33 6' 9" 227 lbs Alabama --
- https://a.espncdn.com/i/headshots/nba/players/full/2386.png
- Andre Iguodala9 F 39 6' 6" 215 lbs Arizona --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Trayce Jackson-Davis F 23 6' 9" 245 lbs Indiana $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/6446.png
- Cory Joseph18 PG 32 6' 3" 200 lbs Texas $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4433247.png
- Jonathan Kuminga00 PF 20 6' 7" 225 lbs -- $6,012,840
- https://a.espncdn.com/i/headshots/nba/players/full/4066790.png
- Anthony Lamb40 F 25 6' 6" 227 lbs Vermont --
- https://a.espncdn.com/i/headshots/nba/players/full/3155535.png
- Kevon Looney5 F 27 6' 9" 222 lbs UCLA $7,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/4432171.png
- Moses Moody4 SG 21 6' 5" 211 lbs Arkansas $3,918,480
- https://a.espncdn.com/i/headshots/nba/players/full/2779.png
- Chris Paul PG 38 6' 0" 175 lbs Wake Forest $30,800,000
- https://a.espncdn.com/i/headshots/nba/players/full/3134903.png
- Gary Payton II8 SG 30 6' 2" 195 lbs Oregon State $8,715,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Brandin Podziemski G 20 6' 4" 203 lbs Santa Clara $3,352,440
- https://a.espncdn.com/i/headshots/nba/players/full/4431720.png
- Lester Quinones25 G 22 6' 5" 205 lbs Memphis --
- https://a.espncdn.com/i/headshots/nba/players/full/3032978.png
- Dario Saric9 PF 29 6' 10" 225 lbs -- $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/6475.png
- Klay Thompson11 SG 33 6' 6" 220 lbs Washington State $43,219,440
- https://a.espncdn.com/i/headshots/nba/players/full/3059319.png
- Andrew Wiggins22 SF 28 6' 7" 197 lbs Kansas $24,330,357
- Houston Rockets Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/3415.png
- D.J. Augustin4 G 35 5' 11" 183 lbs Texas --
- https://a.espncdn.com/i/headshots/nba/players/full/3155526.png
- Dillon Brooks SF 27 6' 7" 225 lbs Oregon $22,627,671
- https://a.espncdn.com/i/headshots/nba/players/full/2991282.png
- Willie Cauley-Stein2 C 30 7' 0" 240 lbs Kentucky --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Darius Days5 F 23 6' 7" 245 lbs LSU --
- https://a.espncdn.com/i/headshots/nba/players/full/4433192.png
- Tari Eason17 F 22 6' 8" 215 lbs LSU $3,527,160
- https://a.espncdn.com/i/headshots/nba/players/full/4437244.png
- Jalen Green4 SG 21 6' 4" 186 lbs -- $9,891,480
- https://a.espncdn.com/i/headshots/nba/players/full/3209.png
- Jeff Green32 PF 37 6' 8" 235 lbs Georgetown $9,600,000
- https://a.espncdn.com/i/headshots/nba/players/full/3922230.png
- Aaron Holiday G 26 6' 0" 185 lbs UCLA $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/5075393.png
- Trevor Hudgins12 G 24 6' 0" 180 lbs Northwest Missouri State --
- https://a.espncdn.com/i/headshots/nba/players/full/2579294.png
- Frank Kaminsky33 PF 30 7' 0" 240 lbs Wisconsin --
- https://a.espncdn.com/i/headshots/nba/players/full/3146557.png
- Jock Landale C 27 6' 11" 255 lbs Saint Mary's $8,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4376.png
- Boban Marjanovic51 C 35 7' 4" 290 lbs -- $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4397140.png
- Kevin Porter Jr.3 SG 23 6' 4" 203 lbs USC $15,860,000
- https://a.espncdn.com/i/headshots/nba/players/full/4871144.png
- Alperen Sengun28 C 21 6' 11" 243 lbs -- $3,536,280
- https://a.espncdn.com/i/headshots/nba/players/full/4432639.png
- Jabari Smith Jr.1 PF 20 6' 11" 220 lbs Auburn $9,326,520
- https://a.espncdn.com/i/headshots/nba/players/full/3136777.png
- Jae'Sean Tate8 SF 27 6' 5" 230 lbs Ohio State $6,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/4684740.png
- Amen Thompson F 20 6' 7" 214 lbs -- $8,809,320
- https://a.espncdn.com/i/headshots/nba/players/full/2991230.png
- Fred VanVleet SG 29 6' 0" 197 lbs Wichita State $40,806,300
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Cam Whitmore F 19 6' 7" 232 lbs Villanova $3,218,160
- Indiana Pacers Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4065670.png
- Bruce Brown SF 27 6' 4" 202 lbs Miami $22,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4433254.png
- Kendall Brown10 SG 20 6' 8" 205 lbs Baylor --
- https://a.espncdn.com/i/headshots/nba/players/full/4396993.png
- Tyrese Haliburton0 PG 23 6' 5" 185 lbs Iowa State $5,808,435
- https://a.espncdn.com/i/headshots/nba/players/full/2990984.png
- Buddy Hield24 SG 30 6' 4" 220 lbs Oklahoma $19,279,841
- https://a.espncdn.com/i/headshots/nba/players/full/3438.png
- George Hill7 PG 37 6' 4" 188 lbs IUPUI --
- https://a.espncdn.com/i/headshots/nba/players/full/3439.png
- Serge Ibaka F 34 6' 10" 235 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4432170.png
- Isaiah Jackson22 SF 21 6' 10" 206 lbs Kentucky $2,696,280
- https://a.espncdn.com/i/headshots/nba/players/full/3999.png
- James Johnson16 PF 36 6' 7" 240 lbs Wake Forest --
- https://a.espncdn.com/i/headshots/nba/players/full/4683634.png
- Bennedict Mathurin00 SG 21 6' 6" 195 lbs Arizona $6,916,080
- https://a.espncdn.com/i/headshots/nba/players/full/2530530.png
- T.J. McConnell9 PG 31 6' 1" 190 lbs Arizona $8,700,000
- https://a.espncdn.com/i/headshots/nba/players/full/4395712.png
- Andrew Nembhard2 PG 23 6' 5" 193 lbs Gonzaga $2,131,905
- https://a.espncdn.com/i/headshots/nba/players/full/4396909.png
- Aaron Nesmith23 SF 23 6' 5" 215 lbs Vanderbilt $5,634,257
- https://a.espncdn.com/i/headshots/nba/players/full/4277883.png
- Jordan Nwora13 SF 25 6' 8" 225 lbs Louisville $3,000,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Ben Sheppard G 22 6' 6" 190 lbs Belmont $2,537,160
- https://a.espncdn.com/i/headshots/nba/players/full/4397189.png
- Jalen Smith25 PF 23 6' 9" 215 lbs Maryland $5,604,192
- https://a.espncdn.com/i/headshots/nba/players/full/2451037.png
- Daniel Theis27 C 31 6' 8" 245 lbs -- $9,108,387
- https://a.espncdn.com/i/headshots/nba/players/full/4278355.png
- Obi Toppin1 PF 25 6' 9" 220 lbs Dayton $6,803,012
- https://a.espncdn.com/i/headshots/nba/players/full/3133628.png
- Myles Turner33 C 27 6' 11" 250 lbs Texas $20,975,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jarace Walker F 20 6' 8" 240 lbs Houston $6,059,520
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Isaiah Wong G 22 6' 4" 184 lbs Miami --
- https://a.espncdn.com/i/headshots/nba/players/full/2991011.png
- Gabe York32 G 30 6' 3" 190 lbs Arizona --
- LA Clippers Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/3416.png
- Nicolas Batum33 PF 34 6' 8" 230 lbs -- $11,710,818
- https://a.espncdn.com/i/headshots/nba/players/full/4432162.png
- Brandon Boston Jr.4 SG 21 6' 6" 188 lbs Kentucky $1,836,096
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Kobe Brown G 23 6' 8" 250 lbs Missouri $2,413,320
- https://a.espncdn.com/i/headshots/nba/players/full/4066387.png
- Amir Coffey7 SG 26 6' 7" 210 lbs Minnesota $3,666,667
- https://a.espncdn.com/i/headshots/nba/players/full/2490620.png
- Robert Covington23 PF 32 6' 7" 209 lbs Tennessee State $11,692,308
- https://a.espncdn.com/i/headshots/nba/players/full/4433249.png
- Moussa Diabate25 F 21 6' 11" 210 lbs Michigan --
- https://a.espncdn.com/i/headshots/nba/players/full/4251.png
- Paul George13 F 33 6' 8" 220 lbs Fresno State $45,640,084
- https://a.espncdn.com/i/headshots/nba/players/full/4592492.png
- Bones Hyland5 PG 23 6' 2" 169 lbs VCU $2,306,400
- https://a.espncdn.com/i/headshots/nba/players/full/6450.png
- Kawhi Leonard2 SF 32 6' 7" 225 lbs San Diego State $45,640,084
- https://a.espncdn.com/i/headshots/nba/players/full/3907823.png
- Terance Mann14 SG 26 6' 5" 215 lbs Florida State $10,576,923
- https://a.espncdn.com/i/headshots/nba/players/full/4702882.png
- KJ Martin F 22 6' 7" 215 lbs -- $1,930,681
- https://a.espncdn.com/i/headshots/nba/players/full/4431828.png
- Kenyon Martin Jr. F 22 6' 7" 215 lbs -- $1,930,681
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jordan Miller G 23 6' 7" 195 lbs Miami --
- https://a.espncdn.com/i/headshots/nba/players/full/3914075.png
- Xavier Moon15 PG 28 6' 0" 165 lbs Morehead State --
- https://a.espncdn.com/i/headshots/nba/players/full/6462.png
- Marcus Morris Sr.8 SF 34 6' 8" 218 lbs Kansas $17,116,279
- https://a.espncdn.com/i/headshots/nba/players/full/2488653.png
- Mason Plumlee44 C 33 7' 0" 254 lbs Duke $5,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/2595516.png
- Norman Powell24 G 30 6' 3" 215 lbs UCLA $18,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4397916.png
- Jason Preston17 PG 24 6' 3" 181 lbs Ohio $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/3468.png
- Russell Westbrook0 PG 34 6' 3" 200 lbs UCLA $3,835,738
- https://a.espncdn.com/i/headshots/nba/players/full/4017837.png
- Ivica Zubac40 C 26 7' 0" 240 lbs -- $10,933,333
- Los Angeles Lakers Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Damion Baugh G 23 6' 4" 194 lbs TCU --
- https://a.espncdn.com/i/headshots/nba/players/full/4432582.png
- Max Christie10 G 20 6' 5" 190 lbs Michigan State $1,719,864
- https://a.espncdn.com/i/headshots/nba/players/full/6583.png
- Anthony Davis3 PF 30 6' 10" 253 lbs Kentucky $40,600,080
- https://a.espncdn.com/i/headshots/nba/players/full/4066264.png
- Wenyen Gabriel35 F 26 6' 9" 205 lbs Kentucky --
- https://a.espncdn.com/i/headshots/nba/players/full/4066648.png
- Rui Hachimura28 PF 25 6' 8" 230 lbs Gonzaga $15,740,741
- https://a.espncdn.com/i/headshots/nba/players/full/4397077.png
- Jaxson Hayes10 C 23 7' 0" 220 lbs Texas $2,165,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jalen Hood-Schifino G 20 6' 6" 215 lbs Indiana $3,695,040
- https://a.espncdn.com/i/headshots/nba/players/full/1966.png
- LeBron James23 SF 38 6' 9" 250 lbs -- $47,607,350
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Maxwell Lewis F 21 6' 7" 205 lbs Pepperdine $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/4431785.png
- Scotty Pippen Jr.14 G 22 6' 1" 185 lbs Vanderbilt --
- https://a.espncdn.com/i/headshots/nba/players/full/2990962.png
- Taurean Prince PF 29 6' 6" 218 lbs Baylor $7,455,000
- https://a.espncdn.com/i/headshots/nba/players/full/4066457.png
- Austin Reaves15 SG 25 6' 5" 197 lbs Oklahoma $12,015,150
- https://a.espncdn.com/i/headshots/nba/players/full/4395627.png
- Cam Reddish SF 24 6' 8" 217 lbs Duke $2,165,000
- https://a.espncdn.com/i/headshots/nba/players/full/3136776.png
- D'Angelo Russell1 PG 27 6' 4" 193 lbs Ohio State $17,307,693
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Vincent Valerio-Bodon SF 22 6' 9" 200 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4278077.png
- Jarred Vanderbilt2 PF 24 6' 8" 214 lbs Kentucky $4,698,000
- https://a.espncdn.com/i/headshots/nba/players/full/3137259.png
- Gabe Vincent PG 27 6' 3" 200 lbs UC Santa Barbara $10,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/3058254.png
- Christian Wood F 27 6' 9" 214 lbs UNLV $2,709,849
- Memphis Grizzlies Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/2991235.png
- Steven Adams4 C 30 6' 11" 265 lbs Pittsburgh $12,600,000
- https://a.espncdn.com/i/headshots/nba/players/full/4593125.png
- Santi Aldama7 PF 22 6' 11" 215 lbs Loyola (MD) $2,194,200
- https://a.espncdn.com/i/headshots/nba/players/full/4066320.png
- Desmond Bane22 SG 25 6' 5" 215 lbs TCU $3,845,083
- https://a.espncdn.com/i/headshots/nba/players/full/5148538.png
- Tarik Biberovic F 22 6' 7" 218 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4432168.png
- Josh Christopher SG 21 6' 4" 215 lbs Arizona State $2,485,200
- https://a.espncdn.com/i/headshots/nba/players/full/3906665.png
- Brandon Clarke15 PF 27 6' 8" 215 lbs Gonzaga $12,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/4278378.png
- Jacob Gilyard0 G 25 5' 9" 160 lbs Richmond --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Gregory Jackson II F 18 6' 9" 215 lbs South Carolina --
- https://a.espncdn.com/i/headshots/nba/players/full/4277961.png
- Jaren Jackson Jr.13 PF 24 6' 11" 242 lbs Michigan State $27,102,202
- https://a.espncdn.com/i/headshots/nba/players/full/3913174.png
- Luke Kennard10 SG 27 6' 5" 206 lbs Duke $15,418,363
- https://a.espncdn.com/i/headshots/nba/players/full/3134932.png
- John Konchar46 SG 27 6' 5" 210 lbs -- $2,400,000
- https://a.espncdn.com/i/headshots/nba/players/full/4592691.png
- Jake LaRavia3 F 21 6' 7" 235 lbs Wake Forest $3,199,920
- https://a.espncdn.com/i/headshots/nba/players/full/4585610.png
- Kenneth Lofton Jr.6 F 21 6' 8" 275 lbs Louisiana Tech $1,719,864
- https://a.espncdn.com/i/headshots/nba/players/full/4279888.png
- Ja Morant12 PG 24 6' 3" 174 lbs Murray State $34,005,250
- https://a.espncdn.com/i/headshots/nba/players/full/4593041.png
- David Roddy27 F 22 6' 5" 255 lbs Colorado State $2,718,240
- https://a.espncdn.com/i/headshots/nba/players/full/3456.png
- Derrick Rose PG 34 6' 3" 200 lbs Memphis $3,196,448
- https://a.espncdn.com/i/headshots/nba/players/full/2990992.png
- Marcus Smart PG 29 6' 4" 220 lbs Oklahoma State $18,833,712
- https://a.espncdn.com/i/headshots/nba/players/full/4277964.png
- Xavier Tillman2 F 24 6' 8" 245 lbs Michigan State $1,930,681
- https://a.espncdn.com/i/headshots/nba/players/full/4715198.png
- Isaiah Todd PF 21 6' 9" 219 lbs -- $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/4433137.png
- Ziaire Williams8 F 22 6' 9" 185 lbs Stanford $4,810,200
- https://a.espncdn.com/i/headshots/nba/players/full/4397227.png
- Vince Williams Jr.5 G 23 6' 6" 205 lbs VCU --
- Miami Heat Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4066261.png
- Bam Adebayo13 C 26 6' 9" 255 lbs Kentucky $32,600,060
- https://a.espncdn.com/i/headshots/nba/players/full/3934723.png
- Thomas Bryant13 C 26 6' 10" 248 lbs Indiana $2,528,233
- https://a.espncdn.com/i/headshots/nba/players/full/6430.png
- Jimmy Butler22 SF 34 6' 7" 230 lbs Marquette $45,183,960
- https://a.espncdn.com/i/headshots/nba/players/full/4278572.png
- Jamal Cain8 F 24 6' 7" 191 lbs Oakland --
- https://a.espncdn.com/i/headshots/nba/players/full/2184.png
- Udonis Haslem40 PF 43 6' 8" 235 lbs Florida --
- https://a.espncdn.com/i/headshots/nba/players/full/4395725.png
- Tyler Herro14 PG 23 6' 5" 195 lbs Kentucky $27,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4291678.png
- Haywood Highsmith24 F 26 6' 4" 220 lbs Wheeling Jesuit $1,902,137
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jaime Jaquez Jr. G 22 6' 7" 225 lbs UCLA $3,510,600
- https://a.espncdn.com/i/headshots/nba/players/full/4997528.png
- Nikola Jovic5 F 20 6' 10" 205 lbs -- $2,352,000
- https://a.espncdn.com/i/headshots/nba/players/full/3449.png
- Kevin Love42 PF 35 6' 8" 251 lbs UCLA $3,835,738
- https://a.espncdn.com/i/headshots/nba/players/full/3012.png
- Kyle Lowry7 PG 37 6' 0" 196 lbs Villanova $29,682,540
- https://a.espncdn.com/i/headshots/nba/players/full/3138160.png
- Caleb Martin16 SF 27 6' 5" 205 lbs Nevada $6,802,950
- https://a.espncdn.com/i/headshots/nba/players/full/2581190.png
- Josh Richardson SG 30 6' 6" 200 lbs Tennessee $2,891,467
- https://a.espncdn.com/i/headshots/nba/players/full/3157465.png
- Duncan Robinson55 F 29 6' 7" 215 lbs Michigan $18,154,000
- https://a.espncdn.com/i/headshots/nba/players/full/4593049.png
- Orlando Robinson25 C 23 7' 0" 235 lbs Fresno State $1,801,769
- https://a.espncdn.com/i/headshots/nba/players/full/4066993.png
- Dru Smith G 25 6' 3" 203 lbs Missouri --
- Milwaukee Bucks Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/3135045.png
- Grayson Allen12 SG 27 6' 4" 198 lbs Duke $8,925,000
- https://a.espncdn.com/i/headshots/nba/players/full/3032977.png
- Giannis Antetokounmpo34 PF 28 6' 11" 243 lbs -- $45,640,084
- https://a.espncdn.com/i/headshots/nba/players/full/3102533.png
- Thanasis Antetokounmpo43 SF 31 6' 7" 219 lbs -- $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/3907822.png
- Malik Beasley5 SG 26 6' 4" 187 lbs Florida State $2,019,760
- https://a.espncdn.com/i/headshots/nba/players/full/4432179.png
- MarJon Beauchamp0 F 22 6' 7" 202 lbs -- $2,609,400
- https://a.espncdn.com/i/headshots/nba/players/full/2578239.png
- Pat Connaughton24 SG 30 6' 5" 209 lbs Notre Dame $9,423,869
- https://a.espncdn.com/i/headshots/nba/players/full/6581.png
- Jae Crowder99 PF 33 6' 6" 235 lbs Marquette $2,019,760
- https://a.espncdn.com/i/headshots/nba/players/full/3423.png
- Goran Dragic31 PG 37 6' 3" 190 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4397475.png
- AJ Green20 G 23 6' 5" 207 lbs Northern Iowa $1,801,769
- https://a.espncdn.com/i/headshots/nba/players/full/3995.png
- Jrue Holiday21 PG 33 6' 4" 205 lbs UCLA $36,861,707
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Andre Jackson Jr. G 21 6' 6" 200 lbs UConn $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/6605.png
- Meyers Leonard3 C 31 7' 0" 260 lbs Illinois --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Chris Livingston F 19 6' 6" 220 lbs Kentucky $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/3448.png
- Brook Lopez11 C 35 7' 0" 282 lbs Stanford $25,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/3447.png
- Robin Lopez C 35 7' 0" 281 lbs Stanford --
- https://a.espncdn.com/i/headshots/nba/players/full/6609.png
- Khris Middleton22 SF 32 6' 7" 222 lbs Texas A&M $29,320,988
- https://a.espncdn.com/i/headshots/nba/players/full/3064482.png
- Bobby Portis9 F 28 6' 10" 250 lbs Arkansas $11,710,818
- https://a.espncdn.com/i/headshots/nba/players/full/4277887.png
- Lindell Wigginton28 G 25 6' 1" 189 lbs Iowa State --
- Minnesota Timberwolves Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4278039.png
- Nickeil Alexander-Walker9 SG 25 6' 5" 205 lbs Virginia Tech $4,687,500
- https://a.espncdn.com/i/headshots/nba/players/full/2993874.png
- Kyle Anderson5 SF 30 6' 9" 230 lbs UCLA $9,219,512
- https://a.espncdn.com/i/headshots/nba/players/full/4278508.png
- Troy Brown Jr. SF 24 6' 6" 215 lbs Oregon $4,000,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jaylen Clark G 21 6' 5" 205 lbs UCLA --
- https://a.espncdn.com/i/headshots/nba/players/full/3195.png
- Mike Conley10 PG 35 6' 1" 175 lbs Ohio State $24,360,000
- https://a.espncdn.com/i/headshots/nba/players/full/4594268.png
- Anthony Edwards5 SG 22 6' 4" 225 lbs Georgia $9,219,512
- https://a.espncdn.com/i/headshots/nba/players/full/4277951.png
- Luka Garza55 C 24 6' 10" 243 lbs Iowa --
- https://a.espncdn.com/i/headshots/nba/players/full/3032976.png
- Rudy Gobert27 C 31 7' 1" 258 lbs -- $41,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4578893.png
- Vit Krejci F 23 6' 7" 195 lbs -- $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/4431671.png
- Jaden McDaniels3 PF 22 6' 9" 185 lbs Washington $3,901,399
- https://a.espncdn.com/i/headshots/nba/players/full/3134916.png
- Jordan McLaughlin6 PG 27 6' 0" 185 lbs USC $2,320,000
- https://a.espncdn.com/i/headshots/nba/players/full/3915195.png
- Shake Milton SG 26 6' 5" 205 lbs SMU $5,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4687718.png
- Josh Minott8 F 21 6' 8" 205 lbs Memphis $1,719,864
- https://a.espncdn.com/i/headshots/nba/players/full/4592187.png
- Wendell Moore Jr.7 G 22 6' 5" 215 lbs Duke $2,421,720
- https://a.espncdn.com/i/headshots/nba/players/full/4432176.png
- Daishen Nix15 SG 21 6' 4" 226 lbs -- $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/4278541.png
- Jaylen Nowell4 SG 24 6' 4" 201 lbs Washington --
- https://a.espncdn.com/i/headshots/nba/players/full/4396971.png
- Naz Reid11 C 24 6' 9" 264 lbs LSU $12,950,400
- https://a.espncdn.com/i/headshots/nba/players/full/6617.png
- Austin Rivers25 SG 31 6' 4" 200 lbs Duke --
- https://a.espncdn.com/i/headshots/nba/players/full/3908336.png
- Matt Ryan37 F 26 6' 6" 215 lbs Chattanooga --
- https://a.espncdn.com/i/headshots/nba/players/full/3136195.png
- Karl-Anthony Towns32 C 27 7' 0" 248 lbs Kentucky $36,016,200
- New Orleans Pelicans Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4277869.png
- Jose Alvarado15 PG 25 6' 0" 179 lbs Georgia Tech $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/4869342.png
- Dyson Daniels11 G 20 6' 8" 200 lbs -- $5,784,120
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jordan Hawkins24 G 21 6' 5" 195 lbs UConn $4,310,160
- https://a.espncdn.com/i/headshots/nba/players/full/2999409.png
- Willy Hernangomez9 C 29 6' 11" 250 lbs -- $2,559,942
- https://a.espncdn.com/i/headshots/nba/players/full/3913176.png
- Brandon Ingram14 SF 26 6' 8" 190 lbs Duke $33,833,400
- https://a.espncdn.com/i/headshots/nba/players/full/4277813.png
- Herbert Jones5 SF 24 6' 7" 206 lbs Alabama $12,015,150
- https://a.espncdn.com/i/headshots/nba/players/full/4395698.png
- Kira Lewis Jr.13 PG 22 6' 1" 170 lbs Alabama $5,722,116
- https://a.espncdn.com/i/headshots/nba/players/full/4432821.png
- E.J. Liddell32 F 22 6' 6" 240 lbs Ohio State $1,801,769
- https://a.espncdn.com/i/headshots/nba/players/full/4278594.png
- Naji Marshall8 SF 25 6' 7" 220 lbs Xavier $1,930,681
- https://a.espncdn.com/i/headshots/nba/players/full/2490149.png
- CJ McCollum3 SG 32 6' 3" 190 lbs Lehigh $35,802,469
- https://a.espncdn.com/i/headshots/nba/players/full/4397688.png
- Trey Murphy III25 SG 23 6' 8" 206 lbs Virginia $3,359,280
- https://a.espncdn.com/i/headshots/nba/players/full/2580365.png
- Larry Nance Jr.22 PF 30 6' 8" 245 lbs Wyoming $10,375,000
- https://a.espncdn.com/i/headshots/nba/players/full/4431811.png
- Dereon Seabron0 G 23 6' 5" 183 lbs NC State --
- https://a.espncdn.com/i/headshots/nba/players/full/6477.png
- Jonas Valanciunas17 C 31 6' 11" 265 lbs -- $15,435,000
- https://a.espncdn.com/i/headshots/nba/players/full/4395628.png
- Zion Williamson1 PF 23 6' 6" 284 lbs Duke $34,005,250
- https://a.espncdn.com/i/headshots/nba/players/full/2579258.png
- Cody Zeller44 C 30 6' 11" 240 lbs Indiana $2,019,706
- New York Knicks Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4395625.png
- RJ Barrett9 SG 23 6' 6" 214 lbs Duke $23,883,929
- https://a.espncdn.com/i/headshots/nba/players/full/3934672.png
- Jalen Brunson11 PG 27 6' 1" 190 lbs Villanova $26,346,666
- https://a.espncdn.com/i/headshots/nba/players/full/3934673.png
- Donte DiVincenzo SG 26 6' 4" 203 lbs Villanova $10,960,000
- https://a.espncdn.com/i/headshots/nba/players/full/6588.png
- Evan Fournier13 SG 30 6' 6" 205 lbs -- $18,857,143
- https://a.espncdn.com/i/headshots/nba/players/full/4397014.png
- Quentin Grimes6 SG 23 6' 4" 210 lbs Houston $2,385,720
- https://a.espncdn.com/i/headshots/nba/players/full/3062679.png
- Josh Hart3 SG 28 6' 4" 215 lbs Villanova $12,960,000
- https://a.espncdn.com/i/headshots/nba/players/full/4222252.png
- Isaiah Hartenstein55 C 25 7' 0" 250 lbs -- $9,245,121
- https://a.espncdn.com/i/headshots/nba/players/full/3913220.png
- DaQuan Jeffries8 SG 26 6' 5" 225 lbs Western Texas Coll. (J.C.) $2,065,585
- https://a.espncdn.com/i/headshots/nba/players/full/4432645.png
- Trevor Keels3 G 20 6' 4" 221 lbs Duke --
- https://a.espncdn.com/i/headshots/nba/players/full/4067325.png
- Nathan Knight F 26 6' 8" 253 lbs William & Mary --
- https://a.espncdn.com/i/headshots/nba/players/full/5149711.png
- Jaylen Martin G 19 6' 6" 196 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4431823.png
- Miles McBride2 PG 23 6' 2" 195 lbs West Virginia $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/4395724.png
- Immanuel Quickley5 SG 24 6' 3" 190 lbs Kentucky $4,171,548
- https://a.espncdn.com/i/headshots/nba/players/full/3064514.png
- Julius Randle30 PF 28 6' 8" 250 lbs Kentucky $28,226,880
- https://a.espncdn.com/i/headshots/nba/players/full/4351852.png
- Mitchell Robinson23 C 25 7' 0" 240 lbs -- $15,681,818
- https://a.espncdn.com/i/headshots/nba/players/full/4066392.png
- Isaiah Roby18 F 25 6' 8" 230 lbs Nebraska $2,066,585
- https://a.espncdn.com/i/headshots/nba/players/full/4277922.png
- Jericho Sims45 C 24 6' 9" 250 lbs Texas $1,927,896
- https://a.espncdn.com/i/headshots/nba/players/full/4397235.png
- Duane Washington Jr.4 PG 23 6' 2" 197 lbs Ohio State --
- https://a.espncdn.com/i/headshots/nba/players/full/3906786.png
- Dylan Windler SG 26 6' 7" 196 lbs Belmont --
- Oklahoma City Thunder Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/6426.png
- Davis Bertans SF 30 6' 10" 225 lbs -- $17,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4997526.png
- Ousmane Dieng13 F 20 6' 10" 220 lbs -- $4,798,440
- https://a.espncdn.com/i/headshots/nba/players/full/4397020.png
- Luguentz Dort5 G 24 6' 4" 220 lbs Arizona State $15,277,778
- https://a.espncdn.com/i/headshots/nba/players/full/4871145.png
- Josh Giddey3 SG 20 6' 8" 216 lbs -- $6,587,040
- https://a.espncdn.com/i/headshots/nba/players/full/4278073.png
- Shai Gilgeous-Alexander2 PG 25 6' 6" 195 lbs Kentucky $33,386,850
- https://a.espncdn.com/i/headshots/nba/players/full/4433255.png
- Chet Holmgren7 PF 21 7' 1" 208 lbs Gonzaga $10,386,000
- https://a.espncdn.com/i/headshots/nba/players/full/4395702.png
- Isaiah Joe11 SG 24 6' 3" 165 lbs Arkansas $1,997,238
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Keyontae Johnson18 F 23 6' 6" 235 lbs Kansas State --
- https://a.espncdn.com/i/headshots/nba/players/full/4432819.png
- Tre Mann23 PG 22 6' 3" 184 lbs Florida $3,191,280
- https://a.espncdn.com/i/headshots/nba/players/full/3102532.png
- Vasilije Micic98 G 29 6' 5" 203 lbs -- $7,723,000
- https://a.espncdn.com/i/headshots/nba/players/full/2527963.png
- Victor Oladipo4 SG 31 6' 4" 213 lbs Indiana $9,450,000
- https://a.espncdn.com/i/headshots/nba/players/full/4683018.png
- Aleksej Pokusevski17 F 21 7' 0" 190 lbs -- $5,009,633
- https://a.espncdn.com/i/headshots/nba/players/full/4432813.png
- Jeremiah Robinson-Earl50 PF 22 6' 9" 240 lbs Villanova $1,900,000
- https://a.espncdn.com/i/headshots/nba/players/full/4278046.png
- Olivier Sarr30 C 24 7' 0" 240 lbs Kentucky --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Cason Wallace22 G 19 6' 4" 193 lbs Kentucky $5,291,000
- https://a.espncdn.com/i/headshots/nba/players/full/4683749.png
- TyTy Washington Jr. G 21 6' 3" 195 lbs Kentucky $2,320,440
- https://a.espncdn.com/i/headshots/nba/players/full/4066317.png
- Lindy Waters III12 F 26 6' 6" 210 lbs Oklahoma State $1,927,986
- https://a.espncdn.com/i/headshots/nba/players/full/4065653.png
- Jack White F 26 6' 7" 225 lbs Duke $1,801,769
- https://a.espncdn.com/i/headshots/nba/players/full/4397183.png
- Aaron Wiggins21 SG 24 6' 5" 190 lbs Maryland $1,836,096
- https://a.espncdn.com/i/headshots/nba/players/full/4593803.png
- Jalen Williams8 F 22 6' 6" 218 lbs Santa Clara $4,558,680
- https://a.espncdn.com/i/headshots/nba/players/full/4432823.png
- Jaylin Williams6 F 21 6' 10" 245 lbs Arkansas $2,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/3133626.png
- Kenrich Williams34 SF 28 6' 6" 210 lbs TCU $6,175,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- KJ Williams12 F 24 6' 10" 250 lbs LSU --
- Orlando Magic Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4432809.png
- Cole Anthony50 PG 23 6' 3" 185 lbs North Carolina $5,539,771
- https://a.espncdn.com/i/headshots/nba/players/full/4432573.png
- Paolo Banchero5 PF 20 6' 10" 250 lbs Duke $11,608,080
- https://a.espncdn.com/i/headshots/nba/players/full/4348700.png
- Goga Bitadze88 C 24 6' 11" 250 lbs -- $2,062,585
- https://a.espncdn.com/i/headshots/nba/players/full/4712849.png
- Anthony Black G 19 6' 7" 200 lbs Arkansas $7,245,480
- https://a.espncdn.com/i/headshots/nba/players/full/4277847.png
- Wendell Carter Jr.34 C 24 6' 10" 270 lbs Duke $13,050,000
- https://a.espncdn.com/i/headshots/nba/players/full/2596108.png
- Michael Carter-Williams11 G 31 6' 5" 190 lbs Syracuse --
- https://a.espncdn.com/i/headshots/nba/players/full/4066636.png
- Markelle Fultz20 PG 25 6' 4" 209 lbs Washington $17,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/2999547.png
- Gary Harris14 SG 29 6' 4" 210 lbs Michigan State $13,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4067858.png
- Kevon Harris7 G 26 6' 6" 220 lbs Stephen F. Austin --
- https://a.espncdn.com/i/headshots/nba/players/full/4433623.png
- Caleb Houstan2 G 20 6' 8" 210 lbs Michigan $2,000,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jett Howard G 20 6' 8" 215 lbs Michigan $5,026,800
- https://a.espncdn.com/i/headshots/nba/players/full/2968436.png
- Joe Ingles SG 35 6' 8" 220 lbs -- $11,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4065654.png
- Jonathan Isaac1 PF 25 6' 10" 230 lbs Florida State $17,400,000
- https://a.espncdn.com/i/headshots/nba/players/full/4397071.png
- Mac McClung9 SG 24 6' 2" 185 lbs Texas Tech --
- https://a.espncdn.com/i/headshots/nba/players/full/4278052.png
- Chuma Okeke3 PF 25 6' 8" 229 lbs Auburn $5,266,713
- https://a.espncdn.com/i/headshots/nba/players/full/4066851.png
- Miye Oni SG 26 6' 5" 206 lbs Yale --
- https://a.espncdn.com/i/headshots/nba/players/full/4409222.png
- Trevelin Queen12 SG 26 6' 6" 190 lbs New Mexico State --
- https://a.espncdn.com/i/headshots/nba/players/full/3918423.png
- Admiral Schofield25 SF 26 6' 5" 241 lbs Tennessee --
- https://a.espncdn.com/i/headshots/nba/players/full/4432165.png
- Jalen Suggs4 SG 22 6' 5" 205 lbs Gonzaga $7,252,080
- https://a.espncdn.com/i/headshots/nba/players/full/4566434.png
- Franz Wagner22 SF 22 6' 10" 220 lbs Michigan $5,508,720
- https://a.espncdn.com/i/headshots/nba/players/full/3150844.png
- Moritz Wagner21 C 26 6' 11" 245 lbs Michigan $8,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4397040.png
- Brandon Williams11 G 23 6' 2" 190 lbs Arizona --
- Philadelphia 76ers Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Marcus Bagley38 F 21 6' 8" 215 lbs Arizona State --
- https://a.espncdn.com/i/headshots/nba/players/full/4277919.png
- Mo Bamba C 25 7' 0" 231 lbs Texas $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/3964.png
- Patrick Beverley21 PG 35 6' 2" 180 lbs Arkansas $2,019,706
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Ricky Council IV37 G 22 6' 6" 205 lbs Arkansas --
- https://a.espncdn.com/i/headshots/nba/players/full/2580913.png
- Dewayne Dedmon14 C 34 6' 10" 245 lbs USC --
- https://a.espncdn.com/i/headshots/nba/players/full/4397107.png
- David Duke Jr. G 23 6' 4" 207 lbs Providence --
- https://a.espncdn.com/i/headshots/nba/players/full/3059318.png
- Joel Embiid21 C 29 7' 0" 280 lbs Kansas $46,900,000
- https://a.espncdn.com/i/headshots/nba/players/full/3988.png
- Danny Green F 36 6' 6" 215 lbs North Carolina --
- https://a.espncdn.com/i/headshots/nba/players/full/3992.png
- James Harden1 SG 34 6' 5" 220 lbs Arizona State $35,640,000
- https://a.espncdn.com/i/headshots/nba/players/full/2991055.png
- Montrezl Harrell5 C 29 6' 7" 240 lbs Louisville $2,019,760
- https://a.espncdn.com/i/headshots/nba/players/full/6440.png
- Tobias Harris12 PF 31 6' 7" 226 lbs Tennessee $39,270,150
- https://a.espncdn.com/i/headshots/nba/players/full/2991155.png
- Danuel House Jr.25 SF 30 6' 6" 220 lbs Texas A&M $4,310,250
- https://a.espncdn.com/i/headshots/nba/players/full/4397103.png
- Louis King23 SF 24 6' 7" 205 lbs Oregon --
- https://a.espncdn.com/i/headshots/nba/players/full/3929325.png
- Furkan Korkmaz30 SG 26 6' 7" 202 lbs -- $5,370,370
- https://a.espncdn.com/i/headshots/nba/players/full/4431678.png
- Tyrese Maxey0 PG 22 6' 2" 200 lbs Kentucky $4,343,920
- https://a.espncdn.com/i/headshots/nba/players/full/4066436.png
- De'Anthony Melton8 SG 25 6' 2" 200 lbs USC $8,000,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Filip Petrusev PF 23 6' 11" 234 lbs Gonzaga $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/4278562.png
- Paul Reed44 SF 24 6' 9" 210 lbs DePaul $7,723,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Terquavion Smith26 G 20 6' 4" 165 lbs NC State --
- https://a.espncdn.com/i/headshots/nba/players/full/4432164.png
- Jaden Springer11 G 20 6' 4" 205 lbs Tennessee $2,226,240
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Azuolas Tubelis29 F 21 6' 11" 245 lbs Arizona --
- https://a.espncdn.com/i/headshots/nba/players/full/3033.png
- P.J. Tucker17 PF 38 6' 5" 245 lbs Texas $11,014,500
- Phoenix Suns Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4278129.png
- Deandre Ayton22 C 25 7' 0" 250 lbs Arizona $32,459,438
- https://a.espncdn.com/i/headshots/nba/players/full/4066299.png
- Udoka Azubuike C 24 6' 10" 280 lbs Kansas --
- https://a.espncdn.com/i/headshots/nba/players/full/3136779.png
- Keita Bates-Diop SF 27 6' 8" 229 lbs Ohio State $2,364,614
- https://a.espncdn.com/i/headshots/nba/players/full/6580.png
- Bradley Beal SG 30 6' 4" 207 lbs Florida $46,741,590
- https://a.espncdn.com/i/headshots/nba/players/full/6427.png
- Bismack Biyombo18 C 31 6' 8" 255 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4397102.png
- Bol Bol C 23 7' 2" 220 lbs Oregon $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/3136193.png
- Devin Booker1 SG 26 6' 5" 206 lbs Kentucky $36,016,200
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Toumani Camara F 23 6' 8" 220 lbs Dayton $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/3202.png
- Kevin Durant35 PF 34 6' 10" 240 lbs Texas $47,649,433
- https://a.espncdn.com/i/headshots/nba/players/full/3914285.png
- Drew Eubanks PF 26 6' 10" 245 lbs Oregon State $2,346,614
- https://a.espncdn.com/i/headshots/nba/players/full/4278402.png
- Jordan Goodwin G 24 6' 3" 200 lbs Saint Louis $1,927,896
- https://a.espncdn.com/i/headshots/nba/players/full/2595209.png
- Damion Lee10 SG 30 6' 5" 210 lbs Louisville $2,528,233
- https://a.espncdn.com/i/headshots/nba/players/full/4278124.png
- Saben Lee38 G 24 6' 2" 183 lbs Vanderbilt --
- https://a.espncdn.com/i/headshots/nba/players/full/3914283.png
- Chimezie Metu7 PF 26 6' 9" 225 lbs USC $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/4065663.png
- Josh Okogie2 SF 25 6' 4" 213 lbs Georgia Tech $2,815,937
- https://a.espncdn.com/i/headshots/nba/players/full/6619.png
- Terrence Ross8 G 32 6' 7" 206 lbs Washington --
- https://a.espncdn.com/i/headshots/nba/players/full/3059307.png
- Ish Wainright12 F 29 6' 5" 250 lbs Baylor $1,927,896
- https://a.espncdn.com/i/headshots/nba/players/full/2982334.png
- T.J. Warren21 SF 30 6' 8" 220 lbs NC State --
- https://a.espncdn.com/i/headshots/nba/players/full/3133838.png
- Yuta Watanabe SF 28 6' 8" 215 lbs George Washington $2,346,614
- Portland Trail Blazers Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4871140.png
- Ibou Badji41 C 20 7' 1" 240 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4397126.png
- Moses Brown C 23 7' 2" 245 lbs UCLA --
- https://a.espncdn.com/i/headshots/nba/players/full/4432588.png
- John Butler Jr.21 F 20 7' 0" 190 lbs Florida State --
- https://a.espncdn.com/i/headshots/nba/players/full/2991070.png
- Jerami Grant9 SF 29 6' 8" 210 lbs Syracuse $27,586,207
- https://a.espncdn.com/i/headshots/nba/players/full/4683678.png
- Scoot Henderson0 G 19 6' 2" 196 lbs -- $9,770,880
- https://a.espncdn.com/i/headshots/nba/players/full/4432192.png
- Keon Johnson6 SG 21 6' 3" 185 lbs Tennessee $2,808,720
- https://a.espncdn.com/i/headshots/nba/players/full/4278075.png
- Kevin Knox II11 SF 24 6' 7" 215 lbs Kentucky --
- https://a.espncdn.com/i/headshots/nba/players/full/6606.png
- Damian Lillard0 PG 33 6' 2" 195 lbs Weber State $45,640,084
- https://a.espncdn.com/i/headshots/nba/players/full/4395652.png
- Nassir Little10 SF 23 6' 5" 220 lbs North Carolina $6,250,000
- https://a.espncdn.com/i/headshots/nba/players/full/4066269.png
- Skylar Mays8 SG 26 6' 4" 205 lbs LSU --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Justin Minaya15 F 24 6' 7" 210 lbs Providence --
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Kris Murray8 F 23 6' 8" 215 lbs Iowa $2,847,480
- https://a.espncdn.com/i/headshots/nba/players/full/3102530.png
- Jusuf Nurkic27 C 29 7' 0" 290 lbs -- $16,875,000
- https://a.espncdn.com/i/headshots/nba/players/full/4914336.png
- Shaedon Sharpe17 SG 20 6' 5" 200 lbs Kentucky $6,313,800
- https://a.espncdn.com/i/headshots/nba/players/full/4351851.png
- Anfernee Simons1 SG 24 6' 3" 181 lbs -- $24,107,143
- https://a.espncdn.com/i/headshots/nba/players/full/3907498.png
- Matisse Thybulle4 SG 26 6' 5" 201 lbs Washington $10,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/4432446.png
- Jabari Walker34 F 21 6' 7" 200 lbs Colorado $1,719,864
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jeenathan Williams49 G 24 6' 5" 205 lbs Buffalo $1,719,864
- https://a.espncdn.com/i/headshots/nba/players/full/3135047.png
- Justise Winslow26 PF 27 6' 6" 222 lbs Duke --
- Sacramento Kings Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/6578.png
- Harrison Barnes40 SF 31 6' 8" 225 lbs North Carolina $17,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/3919454.png
- Terence Davis3 SG 26 6' 4" 201 lbs Ole Miss --
- https://a.espncdn.com/i/headshots/nba/players/full/2489716.png
- Matthew Dellavedova8 PG 33 6' 3" 200 lbs Saint Mary's --
- https://a.espncdn.com/i/headshots/nba/players/full/4592402.png
- Chris Duarte SG 26 6' 5" 190 lbs Oregon $4,124,400
- https://a.espncdn.com/i/headshots/nba/players/full/4397693.png
- Kessler Edwards17 SF 23 6' 8" 215 lbs Pepperdine $1,927,896
- https://a.espncdn.com/i/headshots/nba/players/full/4702177.png
- Keon Ellis23 G 23 6' 5" 175 lbs Alabama --
- https://a.espncdn.com/i/headshots/nba/players/full/4066736.png
- Jordan Ford1 G 25 6' 1" 175 lbs Saint Mary's --
- https://a.espncdn.com/i/headshots/nba/players/full/4066259.png
- De'Aaron Fox5 PG 25 6' 3" 185 lbs Kentucky $32,600,060
- https://a.espncdn.com/i/headshots/nba/players/full/4066372.png
- Kevin Huerter9 SG 25 6' 7" 198 lbs Maryland $15,669,643
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Colby Jones G 21 6' 6" 205 lbs Xavier $2,019,706
- https://a.espncdn.com/i/headshots/nba/players/full/2596107.png
- Alex Len25 C 30 7' 1" 250 lbs Maryland $2,019,760
- https://a.espncdn.com/i/headshots/nba/players/full/3136196.png
- Trey Lyles41 PF 27 6' 9" 234 lbs Kentucky $8,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/3452.png
- JaVale McGee00 C 35 7' 0" 270 lbs Nevada $5,734,280
- https://a.espncdn.com/i/headshots/nba/players/full/4278053.png
- Davion Mitchell15 PG 25 6' 0" 202 lbs Baylor $5,063,760
- https://a.espncdn.com/i/headshots/nba/players/full/4066262.png
- Malik Monk0 SG 25 6' 3" 200 lbs Kentucky $9,945,830
- https://a.espncdn.com/i/headshots/nba/players/full/4594327.png
- Keegan Murray13 SF 23 6' 8" 215 lbs Iowa $8,409,000
- https://a.espncdn.com/i/headshots/nba/players/full/3155942.png
- Domantas Sabonis10 PF 27 7' 0" 240 lbs Gonzaga $30,600,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Jalen Slawson F 23 6' 7" 218 lbs Furman --
- https://a.espncdn.com/i/headshots/nba/players/full/3893014.png
- Sasha Vezenkov F 28 6' 9" 225 lbs -- $6,341,464
- San Antonio Spurs Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4870562.png
- Dominick Barlow26 F 20 6' 10" 220 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/4397886.png
- Charles Bassey28 C 22 6' 9" 230 lbs Western Kentucky $2,600,000
- https://a.espncdn.com/i/headshots/nba/players/full/2578240.png
- Khem Birch99 C 30 6' 8" 233 lbs UNLV $6,985,000
- https://a.espncdn.com/i/headshots/nba/players/full/4565201.png
- Malaki Branham22 G 20 6' 4" 180 lbs Ohio State $3,071,880
- https://a.espncdn.com/i/headshots/nba/players/full/2528779.png
- Reggie Bullock25 SF 32 6' 6" 205 lbs North Carolina $11,014,080
- https://a.espncdn.com/i/headshots/nba/players/full/4592479.png
- Julian Champagnie30 F 22 6' 9" 210 lbs St. John's $3,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/5081727.png
- Sidy Cissoko G 19 6' 7" 225 lbs -- $1,119,563
- https://a.espncdn.com/i/headshots/nba/players/full/4066650.png
- Zach Collins23 PF 25 6' 11" 250 lbs Gonzaga $7,700,000
- https://a.espncdn.com/i/headshots/nba/players/full/2534781.png
- Gorgui Dieng41 C 33 6' 10" 265 lbs Louisville --
- https://a.espncdn.com/i/headshots/nba/players/full/3133601.png
- Devonte' Graham4 PG 28 6' 1" 195 lbs Kansas $12,100,000
- https://a.espncdn.com/i/headshots/nba/players/full/4395723.png
- Keldon Johnson3 SF 23 6' 5" 220 lbs Kentucky $20,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4395626.png
- Tre Jones33 PG 23 6' 1" 185 lbs Duke $9,895,833
- https://a.espncdn.com/i/headshots/nba/players/full/4397008.png
- Romeo Langford35 SG 23 6' 5" 216 lbs Indiana --
- https://a.espncdn.com/i/headshots/nba/players/full/4278580.png
- Sandro Mamukelashvili54 PF 24 6' 10" 240 lbs Seton Hall $2,019,760
- https://a.espncdn.com/i/headshots/nba/players/full/2528588.png
- Doug McDermott17 SF 31 6' 6" 225 lbs Creighton $13,750,000
- https://a.espncdn.com/i/headshots/nba/players/full/3893016.png
- Cedi Osman16 SF 28 6' 7" 230 lbs -- $6,718,842
- https://a.espncdn.com/i/headshots/nba/players/full/4610139.png
- Jeremy Sochan10 F 20 6' 8" 230 lbs Baylor $5,316,960
- https://a.espncdn.com/i/headshots/nba/players/full/4395630.png
- Devin Vassell24 SG 23 6' 5" 200 lbs Florida State $5,887,899
- https://a.espncdn.com/i/headshots/nba/players/full/5104157.png
- Victor Wembanyama C 19 7' 3" 225 lbs -- $12,160,680
- https://a.espncdn.com/i/headshots/nba/players/full/4683935.png
- Blake Wesley14 G 20 6' 4" 190 lbs Notre Dame $2,504,640
- Toronto Raptors Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4431679.png
- Precious Achiuwa5 PF 24 6' 8" 225 lbs Memphis $4,379,527
- https://a.espncdn.com/i/headshots/nba/players/full/3934719.png
- O.G. Anunoby3 SF 26 6' 7" 232 lbs Indiana $18,642,857
- https://a.espncdn.com/i/headshots/nba/players/full/4433134.png
- Scottie Barnes4 SF 22 6' 8" 225 lbs Florida State $8,008,560
- https://a.espncdn.com/i/headshots/nba/players/full/6579.png
- Will Barton1 G 32 6' 5" 181 lbs Memphis --
- https://a.espncdn.com/i/headshots/nba/players/full/3948153.png
- Chris Boucher25 PF 30 6' 9" 200 lbs Oregon $11,750,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Gradey Dick G 19 6' 8" 205 lbs Kansas $4,536,720
- https://a.espncdn.com/i/headshots/nba/players/full/4066786.png
- Jeff Dowtin Jr.20 PG 26 6' 3" 177 lbs Rhode Island $2,109,706
- https://a.espncdn.com/i/headshots/nba/players/full/4066668.png
- Malachi Flynn22 PG 25 6' 1" 175 lbs San Diego State $3,873,025
- https://a.espncdn.com/i/headshots/nba/players/full/4397251.png
- Ron Harper Jr.8 F 23 6' 5" 245 lbs Rutgers --
- https://a.espncdn.com/i/headshots/nba/players/full/4431778.png
- Christian Koloko35 C 23 7' 0" 230 lbs Arizona $1,719,864
- https://a.espncdn.com/i/headshots/nba/players/full/4066731.png
- Jalen McDaniels7 PF 25 6' 9" 205 lbs San Diego State $4,516,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Markquis Nowell G 23 5' 8" 160 lbs Kansas State --
- https://a.espncdn.com/i/headshots/nba/players/full/3134908.png
- Jakob Poeltl19 C 27 7' 1" 245 lbs Utah $19,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/2594922.png
- Otto Porter Jr.32 SF 30 6' 8" 198 lbs Georgetown $6,300,000
- https://a.espncdn.com/i/headshots/nba/players/full/3032979.png
- Dennis Schroder PG 30 6' 1" 172 lbs -- $12,405,000
- https://a.espncdn.com/i/headshots/nba/players/full/3149673.png
- Pascal Siakam43 PF 29 6' 8" 230 lbs New Mexico State $37,893,408
- https://a.espncdn.com/i/headshots/nba/players/full/4277843.png
- Gary Trent Jr.33 SG 24 6' 5" 209 lbs Duke $18,560,000
- https://a.espncdn.com/i/headshots/nba/players/full/3244.png
- Thaddeus Young21 PF 35 6' 8" 235 lbs Georgia Tech $8,000,000
- Utah Jazz Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4397018.png
- Ochai Agbaji30 G 23 6' 5" 215 lbs Kansas $4,114,200
- https://a.espncdn.com/i/headshots/nba/players/full/3912292.png
- Jarrell Brantley5 PF 27 6' 5" 250 lbs Charleston --
- https://a.espncdn.com/i/headshots/nba/players/full/2528426.png
- Jordan Clarkson00 PG 31 6' 5" 194 lbs Missouri $23,487,629
- https://a.espncdn.com/i/headshots/nba/players/full/3908845.png
- John Collins20 PF 25 6' 9" 226 lbs Wake Forest $25,340,000
- https://a.espncdn.com/i/headshots/nba/players/full/2991139.png
- Kris Dunn11 G 29 6' 3" 205 lbs Providence $2,586,665
- https://a.espncdn.com/i/headshots/nba/players/full/3899664.png
- Simone Fontecchio16 F 27 6' 7" 209 lbs -- $3,044,872
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Keyonte George G 19 6' 4" 185 lbs Baylor $3,889,800
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Taylor Hendricks F 19 6' 9" 210 lbs UCF $5,569,920
- https://a.espncdn.com/i/headshots/nba/players/full/4396991.png
- Talen Horton-Tucker0 SG 22 6' 4" 234 lbs Iowa State $11,020,000
- https://a.espncdn.com/i/headshots/nba/players/full/4432817.png
- Johnny Juzang33 G 22 6' 7" 215 lbs UCLA --
- https://a.espncdn.com/i/headshots/nba/players/full/4433136.png
- Walker Kessler24 C 22 7' 0" 245 lbs Auburn $2,831,160
- https://a.espncdn.com/i/headshots/nba/players/full/4066336.png
- Lauri Markkanen23 PF 26 7' 0" 240 lbs Arizona $17,259,999
- https://a.espncdn.com/i/headshots/nba/players/full/2489663.png
- Kelly Olynyk41 PF 32 6' 11" 240 lbs Gonzaga $12,195,122
- https://a.espncdn.com/i/headshots/nba/players/full/4423309.png
- Luka Samanic91 F 23 6' 10" 227 lbs -- $2,066,585
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Brice Sensabaugh F 19 6' 6" 235 lbs Ohio State $2,448,600
- https://a.espncdn.com/i/headshots/nba/players/full/4277811.png
- Collin Sexton2 PG 24 6' 2" 190 lbs Alabama $17,325,000
- https://a.espncdn.com/i/headshots/nba/players/full/4401416.png
- Juan Toscano-Anderson95 SF 30 6' 6" 209 lbs Marquette --
- https://a.espncdn.com/i/headshots/nba/players/full/3074213.png
- Omer Yurtseven77 C 25 6' 11" 275 lbs Georgetown $2,800,000
- Toronto Raptors Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4431679.png
- Precious Achiuwa5 PF 24 6' 8" 225 lbs Memphis $4,379,527
- https://a.espncdn.com/i/headshots/nba/players/full/3934719.png
- O.G. Anunoby3 SF 26 6' 7" 232 lbs Indiana $18,642,857
- https://a.espncdn.com/i/headshots/nba/players/full/4433134.png
- Scottie Barnes4 SF 22 6' 8" 225 lbs Florida State $8,008,560
- https://a.espncdn.com/i/headshots/nba/players/full/6579.png
- Will Barton1 G 32 6' 5" 181 lbs Memphis --
- https://a.espncdn.com/i/headshots/nba/players/full/3948153.png
- Chris Boucher25 PF 30 6' 9" 200 lbs Oregon $11,750,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Gradey Dick G 19 6' 8" 205 lbs Kansas $4,536,720
- https://a.espncdn.com/i/headshots/nba/players/full/4066786.png
- Jeff Dowtin Jr.20 PG 26 6' 3" 177 lbs Rhode Island $2,109,706
- https://a.espncdn.com/i/headshots/nba/players/full/4066668.png
- Malachi Flynn22 PG 25 6' 1" 175 lbs San Diego State $3,873,025
- https://a.espncdn.com/i/headshots/nba/players/full/4397251.png
- Ron Harper Jr.8 F 23 6' 5" 245 lbs Rutgers --
- https://a.espncdn.com/i/headshots/nba/players/full/4431778.png
- Christian Koloko35 C 23 7' 0" 230 lbs Arizona $1,719,864
- https://a.espncdn.com/i/headshots/nba/players/full/4066731.png
- Jalen McDaniels7 PF 25 6' 9" 205 lbs San Diego State $4,516,000
- https://a.espncdn.com/combiner/i?img=/i/headshots/nophoto.png&w=200&h=146
- Markquis Nowell G 23 5' 8" 160 lbs Kansas State --
- https://a.espncdn.com/i/headshots/nba/players/full/3134908.png
- Jakob Poeltl19 C 27 7' 1" 245 lbs Utah $19,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/2594922.png
- Otto Porter Jr.32 SF 30 6' 8" 198 lbs Georgetown $6,300,000
- https://a.espncdn.com/i/headshots/nba/players/full/3032979.png
- Dennis Schroder PG 30 6' 1" 172 lbs -- $12,405,000
- https://a.espncdn.com/i/headshots/nba/players/full/3149673.png
- Pascal Siakam43 PF 29 6' 8" 230 lbs New Mexico State $37,893,408
- https://a.espncdn.com/i/headshots/nba/players/full/4277843.png
- Gary Trent Jr.33 SG 24 6' 5" 209 lbs Duke $18,560,000
- https://a.espncdn.com/i/headshots/nba/players/full/3244.png
- Thaddeus Young21 PF 35 6' 8" 235 lbs Georgia Tech $8,000,000
- Washington Wizards Roster 2023-24
- NAME POS AGE HT WT COLLEGE SALARY
- https://a.espncdn.com/i/headshots/nba/players/full/4683021.png
- Deni Avdija9 SF 22 6' 9" 210 lbs -- $6,263,188
- https://a.espncdn.com/i/headshots/nba/players/full/4433246.png
- Patrick Baldwin Jr.7 F 20 6' 9" 220 lbs Milwaukee $2,337,720
- https://a.espncdn.com/i/headshots/nba/players/full/4395695.png
- Jared Butler SG 23 6' 3" 193 lbs Baylor --
- https://a.espncdn.com/i/headshots/nba/players/full/3129932.png
- Xavier Cooks12 G 28 6' 8" 185 lbs Winthrop $1,719,864
- https://a.espncdn.com/i/headshots/nba/players/full/5104155.png
- Bilal Coulibaly SG 19 6' 6" 195 lbs -- $6,614,280
- https://a.espncdn.com/i/headshots/nba/players/full/4432356.png
- Johnny Davis1 G 21 6' 4" 195 lbs Wisconsin $5,050,800
- https://a.espncdn.com/i/headshots/nba/players/full/4278049.png
- Daniel Gafford21 C 24 6' 10" 234 lbs Arkansas $12,402,000
- https://a.espncdn.com/i/headshots/nba/players/full/3428.png
- Danilo Gallinari PF 35 6' 10" 236 lbs -- $6,802,950
- https://a.espncdn.com/i/headshots/nba/players/full/3986.png
- Taj Gibson67 F 38 6' 9" 232 lbs USC --
- https://a.espncdn.com/i/headshots/nba/players/full/2581184.png
- Anthony Gill16 PF 30 6' 8" 230 lbs Virginia $1,997,239
- https://a.espncdn.com/i/headshots/nba/players/full/3135046.png
- Tyus Jones PG 27 6' 0" 196 lbs Duke $14,000,000
- https://a.espncdn.com/i/headshots/nba/players/full/4280151.png
- Corey Kispert24 SF 24 6' 6" 224 lbs Gonzaga $3,722,040
- https://a.espncdn.com/i/headshots/nba/players/full/3134907.png
- Kyle Kuzma33 SF 28 6' 9" 221 lbs Utah $25,568,182
- https://a.espncdn.com/i/headshots/nba/players/full/2490089.png
- Mike Muscala C 32 6' 11" 240 lbs Bucknell $3,500,000
- https://a.espncdn.com/i/headshots/nba/players/full/3056247.png
- Kendrick Nunn20 G 28 6' 3" 190 lbs Oakland --
- https://a.espncdn.com/i/headshots/nba/players/full/4066410.png
- Eugene Omoruyi F 26 6' 6" 235 lbs Oregon $1,927,896
- https://a.espncdn.com/i/headshots/nba/players/full/4277956.png
- Jordan Poole3 SG 24 6' 4" 194 lbs Michigan $27,955,357
- https://a.espncdn.com/i/headshots/nba/players/full/4591725.png
- Ryan Rollins2 G 21 6' 3" 180 lbs Toledo $1,719,864
- https://a.espncdn.com/i/headshots/nba/players/full/3914044.png
- Landry Shamet SG 26 6' 4" 190 lbs Wichita State $10,250,000
- https://a.espncdn.com/i/headshots/nba/players/full/4997537.png
- Tristan Vukcevic PF 20 7' 0" 225 lbs -- --
- https://a.espncdn.com/i/headshots/nba/players/full/3064447.png
- Delon Wright55 SG 31 6' 5" 185 lbs Utah $8,195,122
- ================================================
- FILE: end-to-end-use-cases/coding/text2sql/nba_roster.db
- ================================================
- [Non-text file]
- ================================================
- FILE: end-to-end-use-cases/coding/text2sql/requirements.txt
- ================================================
- langchain
- langchain-community
- langchain-together
- ================================================
- FILE: end-to-end-use-cases/Contextual-Chunking-RAG/README.md
- ================================================
- # Contextual keywords generation for RAG using Llama-3.1
- **Problem**: Independent chunking in traditional RAG systems leads to the loss of contextual information between chunks. This makes it difficult for LLMs to retrieve relevant data when context (e.g., the subject or entity being discussed) is not explicitly repeated within individual chunks.
- **Solution**: Generate keywords for each chunk to fulfill missing contextual information. These keywords (e.g., "BMW, X5, pricing") enrich the chunk with necessary context, ensuring better retrieval accuracy. By embedding this enriched metadata, the system bridges gaps between related chunks, enabling effective query matching and accurate answer generation.
- [This article](https://medium.com/@ailabs/overcoming-independent-chunking-in-rag-systems-a-hybrid-approach-5d2c205b3732) explains benefits of contextual chunking.
- **Note** This method does not require calling LLM for each chunk separately, which makes it efficient.
- **Getting started**
- In this cookbook, we’ll use DeepInfra for Llama inference services, so be sure to obtain an API key from https://deepinfra.com/.
- You'll also need a LlamaParse API key to parse PDF files, which can be obtained from https://www.llamaindex.ai/.
- Additionally, we will use the "jinaai/jina-embeddings-v2-base-en" model from HuggingFace to generate text embeddings locally.
- Before getting started, update the <code>config.py</code> file as following:
- "DEEPINFRA_API_KEY"="<your_api_key>"
- "LLAMAPARSE_API_KEY"="<your_api_key>"
- ================================================
- FILE: end-to-end-use-cases/Contextual-Chunking-RAG/data/llama_article.txt
- ================================================
- Understanding the LLaMA Model: A Breakthrough in Large Language Models
- In recent years, large language models (LLMs) have revolutionized the field of natural language processing (NLP). Among them, Meta’s LLaMA (Large Language Model Meta AI) has emerged as a powerful, efficient, and open-weight model that provides high-quality text generation capabilities while being more accessible than proprietary alternatives. This article explores the architecture, capabilities, and applications of LLaMA, along with its significance in the AI landscape.
- 1. Introduction to LLaMA
- LLaMA is a family of autoregressive transformer-based models designed by Meta AI. Unlike massive models like OpenAI’s GPT-4, which require extensive computational resources and are primarily closed-source, LLaMA aims to provide powerful language modeling in a more efficient and open format. The original LLaMA release included models ranging from 7 billion to 65 billion parameters, offering different levels of computational demand and performance.
- The second iteration, LLaMA 2, introduced in 2023, further improved efficiency, accuracy, and usability. LLaMA 2 models are available in 7B, 13B, and 65B parameter variants, with optimized training methodologies and increased alignment with human preferences.
- 2. Architecture and Training
- LLaMA follows the transformer architecture, the foundation of most modern language models. Key architectural improvements and training strategies include:
- Tokenization: LLaMA uses Byte Pair Encoding (BPE) for tokenization, ensuring better handling of various languages and token efficiency.
- Efficient Training: Trained on a diverse dataset containing publicly available and licensed data, LLaMA reduces reliance on proprietary sources. The training process leverages a causal decoder-only transformer, meaning it predicts tokens autoregressively while attending to previous context.
- Scaled Attention Mechanism: LLaMA incorporates Rotary Position Embeddings (RoPE) for efficient long-context understanding. This improves its ability to handle longer sequences compared to earlier models.
- Memory Optimization: Unlike some larger models requiring thousands of GPUs for inference, LLaMA’s optimized weight distribution and efficient parameter scaling allow it to run on fewer computational resources while maintaining high performance.
- The training data includes code, technical documents, research papers, and general text, making LLaMA well-suited for various NLP tasks, from answering questions to generating detailed content.
- 3. Performance and Benchmarks
- LLaMA models have demonstrated impressive performance across multiple benchmarks. The 65B variant outperforms GPT-3 (175B) on several standard NLP tasks while using significantly fewer parameters. Key benchmarking results include:
- MMLU (Massive Multitask Language Understanding): LLaMA 2-65B achieves results comparable to GPT-4 in general knowledge and reasoning tasks.
- ARC (AI2 Reasoning Challenge): LLaMA models show strong problem-solving capabilities, particularly in logic-based questions.
- HellaSwag & PIQA: LLaMA performs well in commonsense reasoning, approaching human-level accuracy.
- Code Generation: Though not primarily designed for coding, LLaMA exhibits notable competence in generating and completing programming code snippets.
- Despite being smaller than some competing models, LLaMA's efficiency enables it to achieve state-of-the-art performance per parameter count, making it a highly cost-effective solution.
- 4. Applications of LLaMA
- The versatility of LLaMA enables a wide range of applications across industries, including:
- Chatbots and Virtual Assistants: LLaMA powers intelligent conversational AI systems, providing human-like responses with improved contextual understanding.
- Content Generation: From summarizing long documents to creating articles and reports, LLaMA is widely used for generating high-quality text.
- Programming Assistance: Developers use LLaMA to generate code snippets, debug errors, and improve software development efficiency.
- Scientific Research: The model helps researchers analyze papers, generate summaries, and assist in hypothesis generation.
- Education and Tutoring: LLaMA aids in personalized learning, answering students’ queries and explaining complex topics interactively.
- Its open-weight availability also allows organizations to fine-tune the model on proprietary data, making it adaptable for specialized use cases such as medical AI, legal document analysis, and multilingual NLP tasks.
- 5. Challenges and Limitations
- Despite its advantages, LLaMA faces several challenges:
- Ethical Concerns: Like all LLMs, LLaMA can generate biased or misleading information. Efforts are ongoing to align the model with ethical AI principles.
- Computational Costs: Although LLaMA is optimized for efficiency, larger variants still require significant GPU resources for fine-tuning and inference.
- Context Length Limitations: While improved, LLaMA still has constraints on long-context reasoning compared to specialized extended-context models.
- Security Risks: Open-weight models pose potential risks for misuse, such as generating harmful or deceptive content. Responsible deployment and monitoring are necessary.
- 6. The Future of LLaMA
- Meta continues to refine the LLaMA model family, with research focused on improving alignment, reducing biases, and extending context understanding. Future iterations may include:
- LLaMA 3 and Beyond: Expected advancements in parameter efficiency and multimodal capabilities.
- Better Fine-Tuning Techniques: Enhancing adaptability for domain-specific applications.
- Integration with Retrieval-Augmented Generation (RAG): Combining LLaMA with external knowledge sources for more accurate responses.
- Edge Deployment: Efforts to make LLaMA smaller and faster for local AI applications without cloud dependence.
- As open-source AI research progresses, LLaMA remains a key player in democratizing access to powerful language models, enabling innovation across academia, business, and technology sectors.
- 7. Conclusion
- LLaMA represents a significant step forward in making high-quality language models more accessible. By balancing efficiency, openness, and performance, it provides a compelling alternative to closed-source models like GPT-4. Whether for research, business applications, or general AI development, LLaMA offers a robust platform for advancing NLP capabilities while promoting transparency and innovation in AI.
- ================================================
- FILE: end-to-end-use-cases/customerservice_chatbots/README.md
- ================================================
- This repository contains various end-to-end use cases for building customer service chatbots using Meta's Llama 3. Below is an outline of the sub folders and their contents.
- ## Outline
- - **[RAG_chatbot](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot)**
- contains resources for building a Meta Llama 3 chatbot with Retrieval Augmented Generation (RAG). It contains a [notebook](https://github.com/meta-llama/llama-cookbook/blob/main/end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/RAG_Chatbot_Example.ipynb) which shows a complete example of how to build a Meta Llama 3 chatbot hosted on your browser that can answer questions based on your own data. It covers:
- - The deployment process of Meta Llama 3 8B with the [Text-generation-inference](https://github.com/huggingface/text-generation-inference) framework as an API server.
- - A chatbot example built with [Gradio](https://github.com/gradio-app/gradio) and wired to the server.
- - Adding RAG capability with Meta Llama 3 specific knowledge based on our Getting Started [guide](https://ai.meta.com/llama/get-started/).
- - **[ai_agent_chatbot](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot)** contains a
- [Sales Bot with Llama3 - A Summarization and RAG Use Case notebook](https://github.com/meta-llama/llama-cookbook/blob/main/end-to-end-use-cases/customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb) that demonstrates building a sales chatbot using Llama3 for targeted product recommendations. The workflow involves:
- * Generating product review summaries using Llama3
- * Storing summaries in a vector database (Weaviate)
- * Leveraging Retrieval Augmented Generation (RAG) for intelligent sales interactions
- - **[messenger_chatbot](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/customerservice_chatbots/messenger_chatbot)** section provides a step-by-step guide to building a Llama-enabled Messenger chatbot. It includes integration details with the Messenger Platform and a [demo video](https://drive.google.com/file/d/1B4ijFH4X3jEHZfkGdTPmdsgpUes_RNud/view).
- - **[whatsapp_chatbot](https://github.com/Monireh2/llama-recipes/tree/main/end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot)** folder contains a tutorial for creating a Llama 3 enabled WhatsApp chatbot, similar to the Messenger chatbot guide. A demo video showcasing the use of iOS WhatsApp to send a question to a test phone number and receive a response generated by Llama 3 can be found [here](https://drive.google.com/file/d/1fZDaOsvyE1yrNGETV-e0SvL14BYeAI6R/view).
- ## Additional Information
- - **RAG Architecture:** The RAG method enhances LLMs by retrieving and augmenting data, allowing for more relevant and context-aware responses.
- - **Development Tools:** The repository utilizes frameworks like LangChain and LlamaIndex for building LLM applications, and Gradio for creating chatbot UI.
- For more detailed information, please refer to the individual sub directory documentation and examples.
- ================================================
- FILE: end-to-end-use-cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md
- ================================================
- # Building a Llama 3 Enabled Messenger Chatbot
- This step-by-step tutorial shows the complete process of building a Llama-enabled Messenger chatbot. A demo video of using the iOS Messenger app to send a question to a Facebook business page and receive the Llama 3 generated answer is [here](https://drive.google.com/file/d/1B4ijFH4X3jEHZfkGdTPmdsgpUes_RNud/view).
- If you're interested in a Llama 3 enabled WhatsApp chatbot, see [here](../whatsapp_chatbot/whatsapp_llama3.md) for a tutorial.
- ## Overview
- Messenger from Meta is a messaging service that allows a Facebook business page to respond to people who are interested in their business using [Messenger Platform](https://developers.facebook.com/docs/messenger-platform/overview). The benefits of an intelligent and knowledgeable Llama 3 powered chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this tutorial, we'll cover the details of integrating Llama 3 with the Messenger Platform to build a basic Llama 3 enabled chatbot - for more Llama 3 application development and deployment demos such as how to integrate your own data with Llama 3, see the recipes.
- The diagram below shows the components and overall data flow of the Llama 3 enabled Messenger chatbot demo we built, using an Amazon EC2 instance as an example for running the web server.
- 
- ## Getting Started with Messenger Platform
- 1. A Facebook Page is required to send and receive messages using the Messenger Platform - see [here](https://www.facebook.com/business/help/461775097570076?id=939256796236247) for details about Facebook Pages and how to create a new page.
- 2. If you have followed the [Llama WhatsApp chatbot tutorial](../whatsapp_chatbot/whatsapp_llama3.md), or if you already have a Meta developer account and a business app, then you can skip this step. Otherwise, you need to first [create a Meta developer account](https://developers.facebook.com/) and then [create a business app](https://developers.facebook.com/docs/development/create-an-app/).
- 3. Add the Messenger product to your business app by going to your business app's Dashboard, click "Add Product" and then select "Messenger".
- 4. Configure a webhook to receive real time HTTP notifications: follow the [Sample Callback URL for Webhooks Testing Guide](https://developers.facebook.com/docs/whatsapp/sample-app-endpoints) to create a free account on glitch.com to get your webhook's callback URL, but you can skip the WHATSAPP_TOKEN part in the guide - the rest of setting up webhook is the same for WhatsApp and Messenger.
- 5. Open Messenger's API Settings, as shown in the screenshot below, then in "1. Configure webhooks", set the Callback URL and Verify Token set up in the previous step, and subscribe all message related fields for "Webhook Fields". Finally, in "2. Generate access tokens", connect your Facebook page (see step 1) and copy your page access token for later use.
- 
- ## Writing Llama 3 Enabled Web App
- The Python-based web app we developed uses [LangChain](https://www.langchain.com/), an open source LLM development framework, and [Replicate](https://replicate.com/), a service provider hosting LLM models in the cloud, to receive the user query sent by the webhook, which will be covered in the next section, pass the query to Llama 3, and send the Llama 3 answer back to the webhook. For more information on how to use LangChain or LlamaIndex, another LLM app building framework, and other Llama 3 cloud providers or on-premise deployment options to develop Llama 3 apps, see the recipes.
- First, let's create a new conda (or you can use venv if you like) environment and install all the required packages:
- ```
- conda create -n messenger-llama python=3.8
- conda activate messenger-llama
- pip install langchain replicate flask requests uvicorn gunicorn
- ```
- Then, create a Python file named [llama_messenger.py](llama_messenger.py) that creates a Llama 3 instance and defines an HTTP method `msgrcvd_page` to:
- 1. receive the user message forwarded by the webhook;
- 2. ask Llama 3 for the answer;
- 3. send the answer back to the sender using the Facebook graph API.
- ```
- import langchain
- from langchain.llms import Replicate
- from flask import Flask
- from flask import request
- import os
- import requests
- import json
- os.environ["REPLICATE_API_TOKEN"] = "<your replicate api token"
- llama3_8b_chat = "meta/meta-llama-3-8b-instruct"
- llm = Replicate(
- model=llama3_8b_chat,
- model_kwargs={"temperature": 0.0, "top_p": 1, "max_new_tokens":500}
- )
- app = Flask(__name__)
- @app.route('/msgrcvd_page', methods=['POST', 'GET'])
- def msgrcvd_page():
- message = request.args.get('message')
- sender = request.args.get('sender')
- recipient = request.args.get('recipient')
- answer = llm(message)
- url = f"https://graph.facebook.com/v18.0/{recipient}/messages"
- params = {
- 'recipient': '{"id": ' + sender + '}',
- 'message': json.dumps({'text': answer}),
- 'messaging_type': 'RESPONSE',
- 'access_token': '<page_access_token>'
- }
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.post(url, params=params, headers=headers)
- return message + "<p/>" + answer
- ```
- Replace <page_access_token> with the access token copied in step 5 "Open Messenger's API Settings" of the previous section. Now it's time to modify the webhook to complete the whole app.
- ## Modifying the Webhook
- Open your glitch.com webhook URL created earlier, and change your `app.js` to simply forward the user message and the user and page ids sent by the Messenger Platform to the Llama 3 enabled web app `llama_messenger.py` described in the previous section:
- ```
- "use strict";
- // Imports dependencies and set up http server
- const request = require("request"),
- express = require("express"),
- body_parser = require("body-parser"),
- axios = require("axios").default,
- app = express().use(body_parser.json()); // creates express http server
- // Sets server port and logs message on success
- app.listen(process.env.PORT || 1337, () => console.log("webhook is listening"));
- // Accepts POST requests at /webhook endpoint
- app.post("/webhook", (req, res) => {
- // Parse the request body from the POST
- let body = req.body;
- let sender = req.body["entry"][0]["messaging"][0]['sender']['id']
- let recipient = req.body["entry"][0]["messaging"][0]['recipient']['id']
- let message = req.body["entry"][0]["messaging"][0]['message']['text']
- // Check if this is an event from a page subscription
- if (body.object === "page") {
- // Returns a '200 OK' response to all requests
- res.status(200).send("EVENT_RECEIVED");
- let url = "http://<web server public IP>:5000/msgrcvd_page?sender=" + sender + "&recipient=" + recipient + "&message=" + encodeURIComponent(message)
- console.log(url)
- axios.get(url)
- .then(response => {
- // Handle the response data
- console.log(response.data);
- })
- .catch(error => {
- // Handle errors
- console.error('Axios error:', error);
- });
- } else {
- // Return a '404 Not Found' if event is not from a page subscription
- res.sendStatus(404);
- }
- });
- // Accepts GET requests at the /webhook endpoint. You need this URL to setup webhook initially.
- // info on verification request payload: https://developers.facebook.com/docs/graph-api/webhooks/getting-started#verification-requests
- app.get("/webhook", (req, res) => {
- /**
- * UPDATE YOUR VERIFY TOKEN
- *This will be the Verify Token value when you set up webhook
- **/
- const verify_token = process.env.VERIFY_TOKEN;
- // Parse params from the webhook verification request
- let mode = req.query["hub.mode"];
- let token = req.query["hub.verify_token"];
- let challenge = req.query["hub.challenge"];
- // Check if a token and mode were sent
- if (mode && token) {
- // Check the mode and token sent are correct
- if (mode === "subscribe" && token === verify_token) {
- // Respond with 200 OK and challenge token from the request
- console.log("WEBHOOK_VERIFIED: " + token);
- res.status(200).send(challenge);
- } else {
- // Responds with '403 Forbidden' if verify tokens do not match
- res.sendStatus(403);
- }
- }
- });
- ```
- Remember to change <web server public IP>, which needs to be publicly visible, to the IP of the server where your Llama 3 enabled web app in the previous section runs.
- Note: It's possible and even recommended to implement a webhook in Python and call the Llama directly inside the webhook, instead of making an HTTP request, as the JavaScript code above does, to a Python app which calls Llama and sends the answer to Messenger.
- ## Running the Chatbot
- On your web server, run the following command on a Terminal (see [here](https://flask.palletsprojects.com/en/3.0.x/deploying/gunicorn/) for more info on Gunicorn):
- ```
- gunicorn -b 0.0.0.0:5000 llama_messenger:app
- ```
- If you use Amazon EC2 as your web server, make sure you have port 5000 added to your EC2 instance's security group's inbound rules.
- Now you can open your Messenger app, select the Facebook page you connected in Messenger's API Settings, enter a message and receive the Llama 3's answer shortly, as shown in the demo video in the beginning of this post.
- To debug any possible issues, go to your glitch.com app log and copy the URL generated there when a Messenger message is sent, which looks like this:
- ```
- http://<web server public IP>:5000/msgrcvd_page?sender=<user id>&recipient=<page id>&message=<msg>
- ```
- Then open the URL in a browser to verify your web server can receive the message and the two ids, and generate a Llama answer before sending the answer back to Messenger.
- ================================================
- FILE: end-to-end-use-cases/customerservice_chatbots/RAG_chatbot/requirements.txt
- ================================================
- gradio
- pypdf
- langchain
- sentence-transformers
- faiss-cpu
- text-generation
- ================================================
- FILE: end-to-end-use-cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md
- ================================================
- # Building a Llama 3 Enabled WhatsApp Chatbot
- This step-by-step tutorial shows the complete process of building a Llama 3 enabled WhatsApp chatbot. A demo video of using the iOS WhatsApp to send a question to a test phone number and receive the Llama 3 generated answer is [here](https://drive.google.com/file/d/1fZDaOsvyE1yrNGETV-e0SvL14BYeAI6R/view).
- If you're interested in a Llama 3 enabled Messenger chatbot, see [here](../messenger_chatbot/messenger_llama3.md) for a tutorial.
- ## Overview
- Businesses of all sizes can use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to connect their customers with human agents or Llama 3 powered chatbots. The benefits of an intelligent and knowledgeable chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this blog, we'll cover the details of integrating Llama 3 with the WhatsApp Business API to build a basic Llama 3 enabled chatbot.
- The diagram below shows the components and overall data flow of the Llama 3 enabled WhatsApp chatbot demo we built, using Amazon EC2 instance as an example for running the web server.
- 
- ## Getting Started with WhatsApp Business Cloud API
- First, open the [WhatsApp Business Platform Cloud API Get Started Guide](https://developers.facebook.com/docs/whatsapp/cloud-api/get-started#set-up-developer-assets) and follow the first four steps to:
- 1. Add the WhatsApp product to your business app;
- 2. Add a recipient number;
- 3. Send a test message;
- 4. Configure a webhook to receive real time HTTP notifications.
- For the last step, you need to further follow the [Sample Callback URL for Webhooks Testing Guide](https://developers.facebook.com/docs/whatsapp/sample-app-endpoints) to create a free account on glitch.com to get your webhook's callback URL.
- Now open the [Meta for Develops Apps](https://developers.facebook.com/apps/) page and select the WhatsApp business app and you should be able to copy the curl command (as shown in the App Dashboard - WhatsApp - API Setup - Step 2 below) and run the command on a Terminal to send a test message to your WhatsApp.
- 
- Note down the "Temporary access token", "Phone number ID", and "a recipient phone number" in the API Setup page above, which will be used later.
- ## Writing Llama 3 Enabled Web App
- The Python-based web app we developed uses [LangChain](https://www.langchain.com/), an open source LLM development framework, and [Replicate](https://replicate.com/), a service provider hosting LLM models in the cloud, to receive the user query sent by the webhook, which will be covered in the next section, pass the query to Llama 3, and send the Llama 3 answer back to the webhook. For more information on how to use LangChain and Replicate to develop Llama 3 apps, see the recipes - you may also replace Replicate with other Llama deployment options shown in the demo apps.
- First, let's create a new conda (or you can use venv if you like) environment and install all the required packages:
- ```
- conda create -n whatsapp-llama python=3.8
- conda activate whatsapp-llama
- pip install langchain replicate flask requests uvicorn gunicorn
- ```
- Then, create a Python file named llama_chatbot.py with the following code, which defines a class `WhatsAppClient` and a method `send_text_message` to post a message (the answer generated by Llama 3 on a user query) to the WhatsApp Cloud API, which then sends the answer back to the WhatsApp user. Remember to set `WHATSAPP_API_TOKEN` and `WHATSAPP_CLOUD_NUMBER_ID` to the values you saved in the previous section.
- ```
- import langchain
- from langchain.llms import Replicate
- from flask import Flask
- from flask import request
- import os
- import requests
- import json
- class WhatsAppClient:
- API_URL = "https://graph.facebook.com/v17.0/"
- WHATSAPP_API_TOKEN = "<Temporary access token from your WhatsApp API Setup>"
- WHATSAPP_CLOUD_NUMBER_ID = "<Phone number ID from your WhatsApp API Setup>"
- def __init__(self):
- self.headers = {
- "Authorization": f"Bearer {self.WHATSAPP_API_TOKEN}",
- "Content-Type": "application/json",
- }
- self.API_URL = self.API_URL + self.WHATSAPP_CLOUD_NUMBER_ID
- def send_text_message(self, message, phone_number):
- payload = {
- "messaging_product": 'whatsapp',
- "to": phone_number,
- "type": "text",
- "text": {
- "preview_url": False,
- "body": message
- }
- }
- response = requests.post(f"{self.API_URL}/messages", json=payload, headers=self.headers)
- return response.status_code
- ```
- Finally, add the code below to llama_chatbot.py, which creates a Llama 3 instance and defines an HTTP method `msgrcvd` to:
- 1. receive the user message forwarded by the webhook;
- 2. ask Llama 3 for the answer;
- 3. call the `WhatsAppClient`'s `send_text_message`` with a recipient's phone number.
- ```
- os.environ["REPLICATE_API_TOKEN"] = "<your replicate api token>"
- llama3_8b_chat = "meta/meta-llama-3-8b-instruct"
- llm = Replicate(
- model=llama3_8b_chat,
- model_kwargs={"temperature": 0.0, "top_p": 1, "max_new_tokens":500}
- )
- client = WhatsAppClient()
- app = Flask(__name__)
- @app.route("/")
- def hello_llama():
- return "<p>Hello Llama 3</p>"
- @app.route('/msgrcvd', methods=['POST', 'GET'])
- def msgrcvd():
- message = request.args.get('message')
- answer = llm(message)
- client.send_text_message(answer, "<a recipient phone number from your WhatsApp API Setup>")
- return message + "<p/>" + answer
- ```
- The complete script of llama_chatbot.py is [here](llama_chatbot.py).
- Now it's time to modify the webhook to complete the whole app.
- ## Modifying the Webhook
- Open your glitch.com webhook URL created earlier, and after the code snippet in app.js:
- ```
- // message received!
- console.log(req.body["entry"][0]["changes"][0]["value"]["messages"][0]["text"]["body"]);
- ```
- add the code below - remember to change <web server public IP>, which needs to be publicly visible, to the IP of the server where your Llama 3 enabled web app in the previous section runs:
- ```
- let url = "http://<web server public IP>:5000/msgrcvd?message=" +
- req.body["entry"][0]["changes"][0]["value"]["messages"][0]["text"]["body"]
- axios.get(url)
- .then(response => {
- console.log(response.data);
- })
- .catch(error => {
- console.error('Axios error:', error);
- });
- ```
- The code simply forwards the user message received by the WhatsApp Cloud Platform to the Llama 3 enabled web app llama_chatbot.py described in the previous section. Because the functionality of calling the WhatsApp Cloud API to send a message has been implemented in the `send_text_message` in Python above, you can comment out the whole following code snippet in the original app.js:
- ```
- '// info on WhatsApp text message payload: https://developers.facebook.com/docs/whatsapp/cloud-api/webhooks/payload-examples#text-messages
- if (req.body.object) {
- ...
- }
- ```
- Note: It's possible and even recommended to implement a webhook in Python and call Llama 3 directly inside the webhook, instead of making an HTTP request, as the JavaScript code above does, to a Python app which calls Llama 3 and sends the answer to WhatsApp.
- ## Running the Chatbot
- On your web server, run the following command on a Terminal:
- ```
- gunicorn -b 0.0.0.0:5000 llama_chatbot:app
- ```
- If you use Amazon EC2 as your web server, make sure you have port 5000 added to your EC2 instance's security group's inbound rules. Write down your web server's public IP, update the URL below with it, then open the URL in a browser to verify you can see the answer sent to your WhatsApp app, as well as shown in the browser:
- ```
- http://<web server public IP>:5000/msgrcvd?message=who%20wrote%20the%20book%20godfather
- ```
- Now you can open your WhatsApp app, enter a question and receive the Llama 3's answer shortly, as shown in the demo video in the beginning of this post.
- ================================================
- FILE: end-to-end-use-cases/email_agent/README.md
- ================================================
- # Building A Llama Powered Email Agent
- This app shows how to build an email agent powered by Llama 3.1 8B running locally via Ollama. We'll start with building from scratch a basic agent with custom tool calling natively supported in Llama 3.1. The end goal is to cover all components of a production-ready agent app, acting as an assistant to your email, with great user experience: intuitive, engaging, efficient and reliable. We'll use Gmail as an example but any email client API's can be used instead.
- Currently implemented features include:
- * search for emails and attachments
- * get email detail
- * reply to a specific email
- * forward an email
- * get summary of a PDF attachment
- * draft and send an email
- 
- # Overview
- Email is an essential and one top killer app people use every day. A recent [State of AI Agents](https://www.langchain.com/stateofaiagents) survey by LangChain finds that "The top use cases for agents include performing research and summarization (58%), followed by streamlining tasks for personal productivity or assistance (53.5%)."
- Andrew Ng wrote a 5-part [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) in March 2024 predicting "AI agent workflows will drive massive AI progress this year".
- Deloitte published in November 2024 a report on [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf) stating that "Through their ability to reason, plan, remember and act, AI agents address key limitations of typical language models." and "Executive leaders should make moves now to prepare for and embrace this next era of intelligent organizational transformation."
- In the Thanksgiving week, a new startup [/dev/agent](https://sdsa.ai/) building the next-gen OS for AI agents was in the spotlight.
- In December, Sequoia posted [here](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) saying 2024 has been the year of agents (an agent is an AI that can complete tasks, not only tells you how to do it but also does it for you directly), and 2025 will be the year of networks of AI agents.
- So what exactly is an AI agent and how to start building an agent app?
- ## What is an agent?
- The concept of agent is not new - in the 2010 3rd edition of Russell and Norvig's classic book Artificial Intelligence: A Modern Approach ("Modern" by 2010, two years before the deep learning revolution that started the truly modern AI), an agent is defined as "anything that can be viewed as perceiving its environment through sensors and acting upon that environment through actuators". These days, AI agent basically means LLM-powered agent - well, if we treat natural language understanding as a type of sensor, LLM agent is still a sub-category of the traditional agent.
- Lilian Weng in her popular June 2023 blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) defines LLM-powered agent system to have four key components:
- * Planning and Reflection: can break down large tasks into smaller ones; can do self-reflection over past actions and self improve;
- * Memory: can use contextual info and recall info over extended periods (for other components to use);
- * Tool Use: can understand what external APIs to use for info or action not built into LLMs;
- * Action: can actually run the tools.
- Andrew Ng describes four [agentic design patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) as:
- * Reflection
- * Planning
- * Tool calling
- * Multi-agent collaboration, where "memory" is mentioned: Each agent implements its own workflow, has its own memory (itself a rapidly evolving area in agentic technology: how can an agent remember enough of its past interactions to perform better on upcoming ones?)
- In Deloitte's [report](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf), AI agents are reasoning engines that can understand context, plan workflows, connect to external tools and data, and execute actions to achieve a defined goal.
- In a November 2024 blog by Letta [The AI agents stack](https://www.letta.com/blog/ai-agents-stack), LLM powered agent is described as the combination of tools use, autonomous execution, and memory.
- In addition, Harrison Chase defines agent in the blog [What is an AI agent](https://blog.langchain.dev/what-is-an-agent/) as "a system that uses an LLM to decide the control flow of an application."
- Yet another simple [summary](https://www.felicis.com/insight/the-agentic-web) by Felicis of what an agent does is that an agent expands LLMs to go from chat to act: an agent can pair LLMs with external data, multi-step reasoning and planning, and act on the user's behalf.
- All in all (see [Resources](#resources) for even more info), agents are systems that take a high-level task, use an LLM as a reasoning and planning engine, with the help of contextual info and long-term memory if needed, to decide what actions to take, reflect and improve on the actions, and eventually execute those actions to accomplish the task.
- It's time to see an agent app in action and enjoy some coding. Below is a preview of the questions or requests one may ask the agent:
- # Example Asks to Email Agent
- * do i have any emails with attachments?
- * what's the content of the email from LangSmith
- * do i have emails with attachment larger than 1mb?
- * what kind of attachments for the email with subject papers to read?
- * give me a summary of the pdf thinking_llm.pdf
- * Draft an email to xxx@gmail.com saying working on it and will keep you updated. thanks for your patience.
- * send the draft
- * do i have any emails with attachment larger than 10mb?
- * how about 5mb
- * reply to the email saying thanks for sharing!
- * forward the email to xxx@gmail.com
- * how many emails do i have from xxx@gmail.com?
- * how about from yyy@gmail.com?
- Below are screenshots of some example interactions with the agent:
- 
- 
- 
- # Setup and Installation
- If you feel intimated by the steps of the following Enable Gmail API section, you may want to check again the example asks (to see what you can ask to the agent) and the example log (to see the whole conversation with the agent) - the devil's in the detail and all the glorious description of a powerful trendy agent may not mention the little details one has to deal with to build it.
- ## Enable Gmail API
- 1. Go to the [Google Cloud Console](https://console.cloud.google.com/).
- 2. Create a new project by clicking the dropdown on the top left then click NEW PROJECT.
- 3. Enter a Project name then click CREATE.
- 4. Under "APIs & Services" > "Enabled APIs & services", search for "gmail" and then Enable the "Gmail API" for your project.
- 5. Under "APIs & Services" > "OAuth consent screen", click "GO TO NEW EXPERIENCE", then click "GET STARTED", enter App name, select your gmail as User support email, choose External under Audience, enter your gmail again as Contact Information, and finally check the I agree to the Google API Services under Finish and click Continue - Create.
- 5. Again under "APIs & Services", go to Credentials. Click on + CREATE CREDENTIALS, then choose OAuth client ID (NOT API key).
- Select Desktop App (NOT Web application, because you're assumed to want to start your Gmail agent locally first) as the application type and name it. Click Create to generate your client ID and client secret.
- 6. Click Download JSON and rename the downloaded file as credentials.json. This file will be used in your Python script for authentication.
- ## Install Ollama with Llama 3.1 8B
- Download Ollama (available for macOS, Linux, and Windows) [here](https://ollama.com/). Then download and test run the Llama 3.1 8B model by running on a Terminal:
- ```
- ollama run llama3.1
- ```
- This will download a quantized version of Llama 3.1 of the size 4.7GB.
- ## Install required packages
- First, create a Conda or virtual env:
- ```
- conda create -n emailagent python=3.10
- conda activate emailagent
- ```
- or
- ```
- python -m venv emailagent
- source emailagent/bin/activate # on Linux, macOS:
- source emailagent\Scripts\activate # on Windows
- ```
- Then install the required Python libraries:
- ```
- git clone https://github.com/meta-llama/llama-cookbook
- cd llama-cookbook/end-to-end-use-cases/email_agent
- pip install -r requirements.txt
- ```
- # Run Email Agent
- To run the agent, you need to first copy the `credentials.json` file downloaded and renamed above in Step 6 of Enable Gmail API to the email_agent folder, then run:
- ```
- python main.py --email <your_gmail_address>
- ```
- The first time you run it, you'll get a prompt like this;
- ```
- Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=xxxx
- Enter the authorization code:
- ```
- You need to copy the URL above and open it in a browser - if you Sign in with Google using the same Gmail you enabled for the Gmail API, then you'll see "You’ve been given access to an app that’s currently being tested. You should only continue if you know the developer that invited you.", otherwise if you sign in with another Gmail, you'll see "Gmail Agent App has not completed the Google verification process. The app is currently being tested, and can only be accessed by developer-approved testers. If you think you should have access, contact the developer."
- In the latter case, go to APIs & Services > OAuth consent screen > Test users, and click the + ADD USERS button, and you'll see this message: While publishing status is set to "Testing", only test users are able to access the app. Allowed user cap prior to app verification is 100, and is counted over the entire lifetime of the app.
- After clicking Continue, check the Select all checkbox to enable both settings required for running the agent:
- ```
- View your email messages and settings.
- Manage drafts and send emails.
- ```
- Finally, copy the Authorization code and paste it to the Terminal, hit Enter and you'll see the agent's initial greeting (which will likely differ because the default temperature value 0.8 is used here - see [Ollama's model file](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values) for detail) such as:
- ```
- Hello! I'm Email Agent, here to help you manage your email account with ease.
- What would you like to do today? Do you want me to:
- Check and respond to new emails
- Compose a new email
- Organize your inbox with filters or labels
- Delete unwanted emails
- Something else?
- Let me know how I can assist you!
- Your ask:
- ```
- If you cancel here and run the command `python main.py --email <your_gmail_address>` again you should see the agent greeting right away without the need to enter an authorization code, unless you enter a different Gmail address for the first time - in fact, for each authorized (added as a test user) Gmail address, a file `token_xxxx@gmail.com.pickle` will be created which contains the authorized token.
- See the example asks and interaction log above for the types of asks you may enter.
- # Implementation Notes
- Notes here mainly cover how custom functions are defined, how Gmail API based functions are implemented, and how an Agent class is defined to handle memory for contextual chat and perform pre- and post-processing on the tool calling.
- ## Available Custom Functions (Tools) Definition
- The `functions_prompt.py` defines the following six custom functions, as part of the system prompt (along with examples for each function call spec that Llama should return):
- * list_emails_function
- * get_email_function
- * send_email_function (new, reply, forward)
- * get_pdf_summary_function
- * create_draft_function
- * send_draft_function
- Below is an example function call spec in JSON format, for the user asks such as "do i have emails with attachments larger than 5mb", "any attachments larger than 5mb" or "let me know if i have large attachments over 5mb":
- ```
- {"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}}
- ```
- Before LLMs, it'd be a REAL pain to cover ALL the possible user natural language inputs that can be and should be all translated into the same semantic representation (if you've done Amazon Alex Skill or Google Assistant development or any pre-LLM NLU work before, you'd know that the JSON format is the same as intent-slots representation). Now LLMs such as Llama do the most heavy lifting in translating a natural language open input into its semantic representation.
- But still, if you look at how the `list_emails_function` (which is used to search for emails based on a user query) is defined below, you'd see a lot of work would be needed to convert the user's asks to the filter values the Gmail API can accept:
- ```
- list_emails_function = """
- {
- "type": "function",
- "function": {
- "name": "list_emails",
- "description": "Return a list of emails matching an optionally specified query.",
- "parameters": {
- "type": "dic",
- "properties": [
- {
- "maxResults": {
- "type": "integer",
- "description": "The default maximum number of emails to return is 100; the maximum allowed value for this field is 500."
- }
- },
- {
- "query": {
- "type": "string",
- "description": "One or more keywords in the email subject and body, or one or more filters. There can be 6 types of filters: 1) Field-specific Filters: from, to, cc, bcc, subject; 2) Date Filters: before, after, older than, newer than); 3) Status Filters: read, unread, starred, importatant; 4) Attachment Filters: has, filename or type; 5) Size Filters: larger, smaller; 6) logical operators (or, and, not)."
- }
- }
- ],
- "required": []
- }
- }
- }
- """
- ```
- In fact, even though many hours of pre-processing work has been done to cover some test examples, not all of the examples in `functions_prompt.py`,have been covered and tested.
- ## Actual Function Call Implementation
- For each defined custom function call, its implementation using the Gmail API is present in `email_agent.py`. For example, the `list_emails` is defined as follows:
- ```
- def list_emails(query='', max_results=100):
- emails = []
- next_page_token = None
- while True:
- response = service.users().messages().list(
- userId=user_id,
- maxResults=max_results,
- pageToken=next_page_token,
- q=query
- ).execute()
-
- if 'messages' in response:
- for msg in response['messages']:
- sender, subject, received_time = get_email_info(msg['id'])
- emails.append(
- {
- "message_id": msg['id'],
- "sender": sender,
- "subject": subject,
- "received_time": received_time
- }
- )
-
- next_page_token = response.get('nextPageToken')
- if not next_page_token:
- break
-
- return emails
- ```
- The function will be called by our agent after a user ask such as "do i have emails with attachments larger than 5mb" gets Llama's response below:
- ```
- {"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}}
- ```
- ## The Agent class
- Implemented also in `email_agent.py`, the Agent class uses 3 instance members to allow for contextual aware asks to the agent, making it have short-term memory:
- 1. `messages`: this list holds all the previous user asks and the function call results based on Llama's response to the user asks, making Llama able to answer follow-up questions such as "how about 5mb" (after initial ask "attachments larger than 10mb") or "how about from yyy@gmail.com" (after ask "any emails from xxx@gmail.com).
- 2. `emails`: this list holds a list of emails that matches the user query, so follow-up questions such as "what kind of attachments for the email with subject xxx" can be answered.
- 3. `draft_id`: this is used to handle the ask "send the draft" after an initial ask such as "draft an email to xxx".
- The `__call__` method of `Agent` includes the call to Llama with the `messages` and parses the Llama response if it's a tool calling spec JSON result, or if Llama doesn't return a tool calling spec, it means it doesn't find a custom tool for the user ask so the Llama response is returned directly:
- ```
- try:
- res = json.loads(result.split("<|python_tag|>")[-1])
- function_name = res['name']
- parameters = res['parameters']
- return {"function_name": function_name,
- "parameters": parameters}
- except:
- return result
- ```
- Also implemented there are both pre-processing logic, mainly to convert some parameter values from Llama's responses to what Gmail APIs can accept to make the API calls happy, and post-processing logic to convert function call results to user-friendly natural language.
- ```
- function_name = result["function_name"]
- func = globals()[function_name]
- parameters = result["parameters"]
- ... <pre-processing>
- result = func(**parameters)
- ... <post-processing>
- ```
- When you try out the app, you'll likely find that further pre- and post-processing still needed to make it production ready. In a great video on [Vertical LLM Agents](https://www.youtube.com/watch?v=eBVi_sLaYsc), Jake Heller said "after passes frankly even like 100 tests the odds that it will do on any random distribution of user inputs of the next 100,000, 100% accurately is very high" and "by the time you've dealt with like all the edge cases... there might be dozens of things you build into your application to actually make it work well and then you get to the prompting piece and writing out tests and very specific prompts and the strategy for how you break down a big problem into step by step by step thinking and how you feed in the information how you format that information the right way". That's what all the business logic is about. We'll cover decomposing a complicated ask and multi-step reasoning in a future version of the app, and continue to explore the best possible way to streamline the pre- and post-processing.
- ## Debugging output
- When running the app, the detailed Llama returns, pre-processed tool call specs and the actual tool calling results are inside the `-------------------------` block, e.g.:
- -------------------------
- Calling Llama...
- Llama returned: {'function_name': 'list_emails', 'parameters': {'query': 'subject:papers to read has:attachment'}}.
- Calling tool to access Gmail API: list_emails, {'query': 'subject:papers to read has:attachment'}...
- Tool calling returned: [{'message_id': '1936ef72ad3f30e8', 'sender': 'xxx@gmail.com', 'subject': 'Fwd: papers to read', 'received_time': '2024-11-27 10:51:51 PST'}, {'message_id': '1936b819706a4923', 'sender': 'Jeff Tang <xxx@gmail.com>', 'subject': 'papers to read', 'received_time': '2024-11-26 18:44:19 PST'}]
- -------------------------
- # TODOs
- 1. Port the app to using [Llama Stack](https://github.com/meta-llama/llama-stack) Agents API.
- 2. Improve the search, reply, forward, create email draft, and query about types of attachments.
- 3. Improve the fallback and error handling mechanism when the user asks don't lead to a correct function calling spec or the function calling fails.
- 4. Improve the user experience by showing progress when some Gmail search API calls take long (minutes) to complete.
- 5. Implement the async behavior of the agent - schedule an email to be sent later.
- 6. Implement the agent planning - decomposing a complicated ask into sub-tasks, using ReAct and other methods.
- 7. Implement the agent long-term memory - longer context and memory across sessions (consider using Llama Stack/MemGPT/Letta)
- 8. Implement reflection - on the tool calling spec and results.
- 9. Introduce multiple-agent collaboration.
- 10. Implement the agent observability.
- 11. Compare different agent frameworks using the app as the case study.
- 12. Add and implement a test plan and productionize the app.
- # Resources
- 1. Lilian Weng's blog [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/)
- 2. Andrew Ng's posts [Agentic Design Patterns](https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/) with basic [implementations from scratch](https://github.com/neural-maze/agentic_patterns).
- 3. LangChain's survey [State of AI Agents](https://www.langchain.com/stateofaiagents)
- 4. Deloitte's report [AI agents and multiagent systems](https://www2.deloitte.com/content/dam/Deloitte/us/Documents/consulting/us-ai-institute-generative-ai-agents-multiagent-systems.pdf)
- 5. Letta's blog [The AI agents stack](https://www.letta.com/blog/ai-agents-stack)
- 6. Microsoft's multi-agent system [Magentic-One](https://www.microsoft.com/en-us/research/articles/magentic-one-a-generalist-multi-agent-system-for-solving-complex-tasks)
- 7. Amazon's [Multi-Agent Orchestrator framework](https://awslabs.github.io/multi-agent-orchestrator/)
- 8. Deeplearning.ai's [agent related courses](https://www.deeplearning.ai/courses/?courses_date_desc%5Bquery%5D=agents) (Meta, AWS, Microsoft, LangChain, LlamaIndex, crewAI, AutoGen, Letta) and some [lessons ported to using Llama](https://github.com/meta-llama/llama-cookbook/tree/main/end-to-end-use-cases/agents/DeepLearningai_Course_Notebooks).
- 9. Felicis's [The Agentic Web](https://www.felicis.com/insight/the-agentic-web)
- 10. A pretty complete [list of AI agents](https://github.com/e2b-dev/awesome-ai-agents), not including [/dev/agents](https://sdsa.ai/), a very new startup building the next-gen OS for AI agents, though.
- 11. Sequoia's [post](https://www.linkedin.com/posts/konstantinebuhler_the-ai-landscape-is-shifting-from-simple-activity-7270111755710672897-ZHnr/) on 2024 being the year of AI agents and 2025 networks of AI agents.
- ================================================
- FILE: end-to-end-use-cases/email_agent/requirements.txt
- ================================================
- google-auth==2.27.0
- google-auth-oauthlib==0.4.6
- google-auth-httplib2==0.1.0
- google-api-python-client==2.34.0
- pytz
- beautifulsoup4
- ollama
- pypdf
- ================================================
- FILE: end-to-end-use-cases/github_triage/README.md
- ================================================
- # Automatic Issues Triaging with Llama
- This tool utilizes an off-the-shelf Llama model to analyze, generate insights, and create a report for better understanding of the state of a repository. It serves as a reference implementation for using Llama to develop custom reporting and data analytics applications.
- ## Features
- The tool performs the following tasks:
- * Fetches issue threads from a specified repository
- * Analyzes issue discussions and generates annotations such as category, severity, component affected, etc.
- * Categorizes all issues by theme
- * Synthesizes key challenges faced by users, along with probable causes and remediations
- * Generates a high-level executive summary providing insights on diagnosing and improving the developer experience
- For a step-by-step look, check out the [walkthrough notebook](walkthrough.ipynb).
- ## Getting Started
- ### Installation
- ```bash
- pip install -r requirements.txt
- ```
- ### Setup
- 1. **API Keys and Model Service**: Set your GitHub token for API calls. Some privileged information may not be available if you don't have push-access to the target repository.
- 2. **Model Configuration**: Set the appropriate values in the `model` section of [config.yaml](config.yaml) for using Llama via VLLM or Groq.
- 3. **JSON Schemas**: Edit the output JSON schemas in [config.yaml](config.yaml) to ensure consistency in outputs. VLLM supports JSON-decoding via the `guided_json` generation argument, while Groq requires passing the schema in the system prompt.
- ### Running the Tool
- ```bash
- python triage.py --repo_name='meta-llama/llama-cookbook' --start_date='2024-08-14' --end_date='2024-08-27'
- ```
- ### Output
- The tool generates:
- * CSV files with `annotations`, `challenges`, and `overview` data, which can be persisted in SQL tables for downstream analyses and reporting.
- * Graphical matplotlib plots of repository traffic, maintenance activity, and issue attributes.
- * A PDF report for easier reading and sharing.
- ## Config
- The tool's configuration is stored in [config.yaml](config.yaml). The following sections can be edited:
- * **Github Token**: Use a token that has push-access on the target repo.
- * **model**: Specify the model service (`vllm` or `groq`) and set the endpoints and API keys as applicable.
- * **prompts**: For each of the 3 tasks Llama does in this tool, we specify a prompt and an output JSON schema:
- * `parse_issue`: Parsing and generating annotations for the issues
- * `assign_category`: Assigns each issue to a category specified in an enum in the corresponding JSON schema
- * `get_overview`: Generates a high-level executive summary and analysis of all the parsed and generated data
- ## Troubleshooting
- * If you encounter issues with API calls, ensure that your GitHub token is set correctly and that you have the necessary permissions.
- * If you encounter issues with the model service, check the configuration values in [config.yaml](config.yaml).
- ================================================
- FILE: end-to-end-use-cases/github_triage/requirements.txt
- ================================================
- kaleido
- plotly
- openai
- groq
- fpdf
- plotly
- ================================================
- FILE: end-to-end-use-cases/long_context/H2O/README.md
- ================================================
- ## Run Llama with H2O for long context inference
- ### Overview:
- Heavy-Hitter Oracle (H2O) is an efficient inference framework of LLMs. During the generative inference of transformers, the size of KV cache grows linearly with the sequence length (prompt length + generation length) during long context generation. And the size KV cache is usually significantly larger than the model parameters, contrains the inference throughput. H2O identifies the critical KV pairs and evicts other unnecessary ones, maintaining a small cache size thus improving the throughput.
- Besides, LLMs usually have poor generation to long sequence during inference. H2O handles this issue by maintaining only heavy-hitter tokens and the most recent tokens. Incorporated with the positional rolling strategy (reassigning the position of each kv with the position in the kv cache instead of the original sequence), H2O can process sequence length much longer than the pretrained context window. Different from other approaches, like [Positional Interpolation](https://arxiv.org/abs/2306.15595), H2O is a KV cache policy and do not involve any training process for long context processing.
- Current implementation supports llama-1/2/3, from 7B to 70B. Since H2O only maintains the most important KV pairs, it might missing some important information in the middle content for some knowlege-intensive tasks.
- More details please refer to Paper: **https://arxiv.org/pdf/2306.14048**;
- **Note: this implementation is tested with transformers == 4.39.0**
- ### Evaluation on Summarization Tasks
- The following example runs inference of Llama-2-7b and Meta-Llama-3-8B on XSUM summarization tasks. We're using `--enable_h2o_generation` to enable H2O algorithm that only keeps heavy-hitter and the local KV pairs. Use `--num_window_length `to decide the KV cache size. The number of local and heavy-hitter KV pairs equals to half of the --num_window_length (Option: the number of heavy-hitters can also be decided by `--num_heavy_hitter_tokens`) Also, use --enable_position_rolling to enable position rolling in the KV cache size that assign the positions in the KV cache instead of the ones in original sequences. Enabling positional rolling is important when sequence length exceeds the pretrained context windows, e.g., 8K in Llama-3.
- ```
- python run_summarization.py \
- --input-path data/summarization/xsum.jsonl \
- --output-path summarization_output/xsum_h2o.jsonl \
- --model-name meta-llama/Meta-Llama-3-8B \
- --enable_h2o_generation
- ```
- ##### **Results**
- Expected results on XSUM (Rouge-2 score, the higher the better) from the above scripts on Llama-2/3 models. The sequence length of inputs are ~2k. Here we constrains the size of KV cache, allowing only n KVs to be write/read after the prefilling stage. n ranges from **64** to **full** where we maintain all the KV pairs. With 128 KVs, the performance can be matched as the full baseline (~2k KVs) while performance degradation is observed with 64 KVs. Also, maintaining a smaller KV cache reduces the I/O cost of KVs, thus we can achieve better throughput.
- | KV Cache Size | 64 | 128 | 256 | 512 | 1024 | Full |
- | ------------- | ------ | ------ | ------ | ------ | ------ | ------ |
- | Llama-2-7B | 0.0439 | 0.1127 | 0.1148 | 0.1182 | 0.1170 | 0.1164 |
- | Llama-2-13B | 0.1180 | 0.1217 | 0.1243 | 0.1291 | 0.1302 | 0.1332 |
- | Llama-3-8B | 0.1107 | 0.1189 | 0.1200 | 0.1347 | 0.1290 | 0.1311 |
- ### One Demo on Streaming to "Infinite" Context Length
- The following example demonstrates the generation process of "infinite" sequence length. We use MT-Bench data and generate the context sample-by-sample. The KV Cache will keep the KV pairs from the previous samples while maintain a fixed size.
- ```
- # run with full cache
- # expected results: 1) normal generation at the early stage; 2) performance collapse and generation slow down at the middle stage, because the sequence length exceeds the context window and the I/O cost of KV cache contrains the throughput; 3) OOM errors and stop.
- bash src/streaming.sh full
- # run with h2o
- # expected results: normal generation at all stage.
- # adjust the number of heavy-hitter tokens with --num_heavy_hitter_tokens and size of KV cache with --num_window_length in src/streaming.sh
- bash src/streaming.sh h2o
- ```
- ================================================
- FILE: end-to-end-use-cases/long_context/H2O/requirements.txt
- ================================================
- transformers
- rouge
- xopen
- needlehaystack
- ================================================
- FILE: end-to-end-use-cases/Multi-Modal-RAG/README.md
- ================================================
- # End to End Tutorial on using Llama models for Multi-Modal RAG
- ## Recipe Overview: Multi-Modal RAG using `Llama-3.2-11B` model:
- This is a complete workshop on how to label images using the new Llama 3.2-Vision Models and performing RAG using the image caption capabilities of the model.
- - **Data Labeling and Preparation:** We start by downloading 5000 images of clothing items and labeling them using `Llama-3.2-11B-Vision-Instruct` model
- - **Cleaning Labels:** With the labels based on the notebook above, we will then clean the dataset and prepare it for RAG
- - **Building Vector DB and RAG Pipeline:** With the final clean dataset, we can use descriptions and 11B model to generate recommendations
- ## Requirements:
- Before we start:
- 1. Please grab your HF CLI Token from [here](https://huggingface.co/settings/tokens)
- 2. Git clone [this dataset](https://huggingface.co/datasets/Sanyam/MM-Demo) inside the Multi-Modal-RAG folder: `git clone https://huggingface.co/datasets/Sanyam/MM-Demo` (Remember to thank the original author by up voting [Kaggle Dataset](https://www.kaggle.com/datasets/agrigorev/clothing-dataset-full))
- 3. Make sure you grab a together.ai token [here](https://www.together.ai)
- ## Detailed Outline for running:
- Order of running files, the notebook establish the method of approaching the problem. Once we establish it, we use the scripts to run the method end to end.
- - Notebook 1: `Part_1_Data_Preparation.ipynb`
- - Script: `label_script.py`
- - Notebook 2: `Part_2_Cleaning_Data_and_DB.ipynb`
- - Notebook 3: `Part_3_RAG_Setup_and_Validation.ipynb`
- - Script: `final_demo.py`
- Here's the detailed outline:
- ### Step 1: Data Prep and Synthetic Labeling:
- In this step we start with an unlabeled dataset and use the image captioning capability of the model to write a description of the image and categorize it.
- [Notebook for Step 1](./notebooks/Part_1_Data_Preparation.ipynb) and [Script for Step 1](./scripts/label_script.py)
- To run the script (remember to set n):
- ```
- python scripts/label_script.py --hf_token "your_huggingface_token_here" \
- --input_path "../MM-Demo/images_compressed" \
- --output_path "../MM-Demo/output/" \
- --num_gpus N
- ```
- The dataset consists of 5000 images with some meta-data.
- The first half is preparing the dataset for labeling:
- - Clean/Remove corrupt images
- - Some exploratory analysis to understand existing distribution
- - Merging up categories of clothes to reduce complexity
- - Balancing dataset by randomly sampling images to have an equal distribution for retrieval
- Second Half consists of Labeling the dataset. Llama 3.2, 11B model can only process one image at a time:
- - We load a few images and test captioning
- - We run this pipeline on random images and iterate on the prompt till we feel the model is giving good outputs
- - Finally, we can create a script to label all 5000 images on multi-GPU
- After running the script on the entire dataset, we have more data cleaning to perform.
- ### Step 2: Cleaning up Synthetic Labels and preparing the dataset:
- [Notebook for Step 2](./notebooks/Part_2_Cleaning_Data_and_DB.ipynb)
- We notice that even after some fun prompt engineering, the model faces some hallucinations-there are some issues with the JSON formatting and we notice that it hallucinates the label categories. Here is how we address this:
- - Re-balance the dataset by mapping correct categories. This is useful to make sure we have an equal distribution in our dataset for retrieval
- - Fix Descriptions so that we can create a CSV
- Now, we are ready to try our vector db pipeline:
- ### Step 3: Notebook 3: MM-RAG using lance-db to validate idea
- [Notebook for Step 3](./notebooks/Part_3_RAG_Setup_and_Validation.ipynb) and [Final Demo Script](./scripts/label_script.py)
- With the cleaned descriptions and dataset, we can now store these in a vector-db, here's the steps:
- - We create embeddings using the text description of our clothes
- - Use 11-B model to describe the uploaded image
- - Ask the model to suggest complementary items to the upload
- - Try to find similar or complementary images based on the upload
- We try the approach with different retrieval methods.
- Finally, we can bring this all together in a Gradio App.
- For running the script:
- ```
- python scripts/final_demo.py \
- --images_folder "../MM-Demo/compressed_images" \
- --csv_path "../MM-Demo/final_balanced_sample_dataset.csv" \
- --table_path "~/.lancedb" \
- --api_key "your_together_api_key" \
- --default_model "BAAI/bge-large-en-v1.5" \
- --use_existing_table
- ```
- Note: We can further improve the description prompt. You will notice sometimes the description starts with the title of the cloth which causes in retrieval of "similar" clothes instead of "complementary" items
- - Upload an image
- - 11B model describes the image
- - We retrieve complementary clothes to wear based on the description
- - You can keep the loop going by chatting with the model
- ## Resources used:
- Credit and Thanks to List of models and resources used in the showcase:
- Firstly, thanks to the author here for providing this dataset on which we base our exercise [here](https://www.kaggle.com/datasets/agrigorev/clothing-dataset-full)
- - [Llama-3.2-11B-Vision-Instruct Model](https://www.llama.com/docs/how-to-guides/vision-capabilities/)
- - [Lance-db for vector database](https://lancedb.com)
- - [This Kaggle dataset](https://www.kaggle.com/datasets/agrigorev/clothing-dataset-full)
- - [HF Dataset](https://huggingface.co/datasets/Sanyam/MM-Demo) Since output of the model can be non-deterministic every time we run, we will use the uploaded dataset to give a universal experience
- - [Together API for demo](https://www.together.ai)
- ================================================
- FILE: end-to-end-use-cases/multilingual/README.md
- ================================================
- # Extending Llama to a new language
- Authored by : Sarvam team
- In this recipe, we will see how to add a new language to the Llama family of models. The steps are quite general and can be easily adapted to other models as well. Using this recipe, you should be able to replicate the findings of [OpenHathi](https://huggingface.co/sarvamai/OpenHathi-7B-Hi-v0.1-Base).
- Please read more about OpenHathi [here](https://x.com/SarvamAI/status/1734645628288831557)
- ## Data
- The original OpenHathi model uses a combination of [Sangraha](https://huggingface.co/datasets/ai4bharat/sangraha) and Wikipedia as its primary data sources. If the reader is interested in using these sources, they would also have to preprocess the data: clean, filter, and deduplicate. See [Setu](https://github.com/AI4Bharat/setu) for an easy way to do this at scale.
- In this tutorial, we will use the [Varta](https://huggingface.co/datasets/rahular/varta) dataset which contains 40M+ news articles taken from [DailyHunt](https://m.dailyhunt.in/). Since this data is already high-quality, we can skip the pre-processing step mentioned above. We will use the Hindi subset here, but you can add any other language present in the dataset by only passing the right language code (advanced users can also tweak the code to add multiple languages at once).
- ## Tokenizer
- Our first step towards augmenting a new language to an LLM is creating a better tokenizer. We define 'better' in terms of fertility score or the number of in-language tokens present in the tokenizer. Note that we should add new tokens without disturbing the original vocabulary, and therefore creating a better tokenizer usually involves 2 steps: (i) building a new, in-language only tokenizer, and (ii) merging this new tokenizer with the original.
- ### Building the in-language tokenizer
- For this, we will first download and prepare the data for training the tokenizer:
- ```
- python prepare_data.py --split=validation --lang=hi --docs_to_sample=10000 --save_path=./data
- ```
- Here we sample 10,000 Hindi documents from the validation split (we should ideally sample from the training split, but this is much faster) and save it as a text file inside `./data`. Next, we use this text to train a Hindi-only [sentencepiece](https://github.com/google/sentencepiece) tokenizer with a vocabulary size of 16,000.
- ```
- python train_tokenizer.py --data_file=./data/hi.txt --save_path=./hi_tokenizer --vocab_size=16000
- ```
- This creates a new sentencepiece Hindi tokenizer and saves it in `./hi_tokenizer`.
- ### Merging the tokenizers
- This process can again be divided into 2 steps:
- - add new tokens to the original Llama2 tokenizer without disturbing its original vocabulary in any way
- - expand the input and output embedding matrices of Llama2 to be equal to the new vocabulary size
- We can do the first step by (i) downloading Llama2's `tokenizer.model` file, (ii) loading our Hindi `tokenizer.model` file, (iii) appending the Hindi tokens to Llama2 tokenizer's vocabulary if they are not already present, and (iv) save the extended tokenizer for future use. All this can be done by running
- ```
- python extend_tokenizer.py --new_tokenizer_path=./hi_tokenizer --extended_tokenizer_save_path=./extended_tokenizer
- ```
- Now, you have a new Llama2 tokenizer which works the same way on English text but can efficiently tokenize Hindi words as well. You can also test to see if it works as intended:
- ```
- >>> from transformers import LlamaTokenizer
- >>> llama_tokenizer = LlamaTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat-hf')
- >>> our_tokenizer = LlamaTokenizer.from_pretrained('./extended_tokenizer')
- >>> for i in range(len(llama_tokenizer)):
- ... assert llama_tokenizer.convert_ids_to_tokens(i) == our_tokenizer.convert_ids_to_tokens(i), f"Token mismatch at index {i}."
- ...
- >>> text = "मैं एक अच्छा हाथी हूँ"
- >>> llama_tokenizer.tokenize(text)
- ['▁', 'म', 'ै', 'ं', '▁', '<0xE0>', '<0xA4>', '<0x8F>', 'क', '▁', 'अ', 'च', '्', '<0xE0>', '<0xA4>', '<0x9B>', 'ा', '▁', 'ह', 'ा', 'थ', 'ी', '▁', 'ह', 'ू', '<0xE0>', '<0xA4>', '<0x81>']
- >>> our_tokenizer.tokenize(text)
- ['▁मैं', '▁एक', '▁अच', '्', 'छा', '▁हाथी', '▁हूँ']
- ```
- ## Continual pre-training
- OpenHathi uses a two-stage pre-training process:
- - Phase 1: learn to translate paragraphs of text (use translated text as context and generate the original text, ~15B tokens)
- - Phase 2: bilingual next token prediction (train on text where the language changes after every sentence, ~15B tokens)
- Note: OpenHathi's final data mixture also contains monolingual data and romanized transliterations.
- We can easily create data for both phases using any translation model. OpenHathi uses [IndicTrans2](https://github.com/AI4Bharat/IndicTrans2). We provide sample code for both phases below.
- ### Phase 1
- With the assumption that we don't have source-native data, let us first get some English data to translate.
- ```
- from datasets import load_dataset
- ds = load_dataset("rahular/varta", split="train", streaming=True)
- english_paragraphs = []
- for d in ds:
- if d["langCode"] != "en": continue
- english_paragraphs.append(" ".join(d["text"].split("\n")))
- ```
- Now, our goal is to create data in the format `{translated_paragraph}\n\n{english_paragraph}`. We can use the `translate_paragraph` function ([link](https://github.com/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/example.py#L150])) from the IndicTrans2 codebase to do this easily.
- ```
- quantization = ""
- en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B"
- en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, "en-indic", quantization)
- ip = IndicProcessor(inference=True)
- phase1_data = []
- for para in english_paragraphs:
- trans_para = translate_paragraph(para, "eng_Latn", "hin_Deva", en_indic_model, en_indic_tokenizer, ip)
- phase1_data.append({"text": f"{trans_para}\n\n{para}"})
- # if you want to save it for future, you can do so easily with HF datasets
- from datasets import Dataset
- phase1_ds = Dataset.from_list(phase1_data)
- phase1_ds.save_to_disk("data/phase1")
- ```
- ### Phase 2
- This is almost the same as phase 1, except that we have to replace the original sentences in an alternating manner to get the data in the required format. We can use the `split_sentences` ([link](https://github.com/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/example.py#L60])) and `batch_translate` ([link](https://github.com/AI4Bharat/IndicTrans2/blob/main/huggingface_interface/example.py#L109)) functions to do this.
- ```
- quantization = ""
- en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B"
- en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, "en-indic", quantization)
- ip = IndicProcessor(inference=True)
- phase2_data = []
- for para in english_paragraphs:
- en_sents = split_sentences(para, "eng_Latn")
- trans_sents = batch_translate(input_sentences, "eng_Latn", "hin_Deva, en_indic_model, en_indic_tokenizer, ip)
- final_para = []
- for idx, (en_sent, trans_sent) in enumerate(zip(en_sents, trans_sents)):
- sent_to_append = en_sent if idx % 2 == 0 else trans_sent
- final_para.append(sent_to_append)
- phase2_data.append({"text": " ".join(final_para)})
- # if you want to save it for future, you can do so easily with HF datasets
- from datasets import Dataset
- phase2_ds = Dataset.from_list(phase2_data)
- phase2_ds.save_to_disk("data/phase2")
- ```
- ### Train
- Finally, we can start finetuning Llama2 on these datasets by following the [finetuning recipes](../../getting-started/finetuning/). Remember to pass the new tokenizer path as an argument to the script: `--tokenizer_name=./extended_tokenizer`.
- OpenHathi was trained on 64 A100 80GB GPUs. Here are the hyperparameters used and other training details:
- - maximum learning rate: 2e-4
- - minimum learning rate: 2e-6
- - optimizer: AdamW (weight decay = 0.1)
- - beta1: 0.9
- - beta2: 0.95
- - lora rank: 128
- - lora alpha: 64
- - lora trainable: q_proj, v_proj, k_proj, o_proj, gate_proj, down_proj, up_proj
- - lora dropout: 0.05
- - block size: 4096
- - global batch size: 4M tokens
- - input and output embeddings are trainable
- - lr schedule: cosine decay with warmup (warmup ratio = 0.1, number of cycles = 3)
- - deepspeed stage 2
- - dtype: bfloat16
- The resulting (partial) loss plots from the OpenHathi training are shown below:
- Phase 1: train loss
- 
- Phase 1: eval loss
- 
- Phase 2: train loss
- 
- Phase 2: eval loss
- 
- ================================================
- FILE: end-to-end-use-cases/NotebookLlama/README.md
- ================================================
- ## NotebookLlama: An Open Source version of NotebookLM
- 
- [Listen to audio from the example here](./resources/_podcast.mp3)
- This is a guided series of tutorials/notebooks that can be taken as a reference or course to build a PDF to Podcast workflow.
- You will also learn from the experiments of using Text to Speech Models.
- It assumes zero knowledge of LLMs, prompting and audio models, everything is covered in their respective notebooks.
- ### Outline:
- Here is step by step thought (pun intended) for the task:
- - Step 1: Pre-process PDF: Use `Llama-3.2-1B-Instruct` to pre-process the PDF and save it in a `.txt` file.
- - Step 2: Transcript Writer: Use `Llama-3.1-70B-Instruct` model to write a podcast transcript from the text
- - Step 3: Dramatic Re-Writer: Use `Llama-3.1-8B-Instruct` model to make the transcript more dramatic
- - Step 4: Text-To-Speech Workflow: Use `parler-tts/parler-tts-mini-v1` and `bark/suno` to generate a conversational podcast
- Note 1: In Step 1, we prompt the 1B model to not modify the text or summarize it, strictly clean up extra characters or garbage characters that might get picked due to encoding from PDF. Please see the prompt in Notebook 1 for more details.
- Note 2: For Step 2, you can also use `Llama-3.1-8B-Instruct` model, we recommend experimenting and trying if you see any differences. The 70B model was used here because it gave slightly more creative podcast transcripts for the tested examples.
- Note 3: For Step 4, please try to extend the approach with other models. These models were chosen based on a sample prompt and worked best, newer models might sound better. Please see [Notes](./TTS_Notes.md) for some of the sample tests.
- ### Detailed steps on running the notebook:
- Requirements: GPU server or an API provider for using 70B, 8B and 1B Llama models.
- For running the 70B model, you will need a GPU with aggregated memory around 140GB to infer in bfloat-16 precision.
- Note: For our GPU Poor friends, you can also use the 8B and lower models for the entire pipeline. There is no strong recommendation. The pipeline below is what worked best on first few tests. You should try and see what works best for you!
- - Before getting started, please make sure to login using the `huggingface cli` and then launch your jupyter notebook server to make sure you are able to download the Llama models.
- You'll need your Hugging Face access token, which you can get at your Settings page [here](https://huggingface.co/settings/tokens). Then run `huggingface-cli login` and copy and paste your Hugging Face access token to complete the login to make sure the scripts can download Hugging Face models if needed.
- - First, please Install the requirements from [here]() by running inside the folder:
- ```
- git clone https://github.com/meta-llama/llama-recipes
- cd llama-recipes/end-to-end-use-cases/NotebookLlama/
- pip install -r requirements.txt
- ```
- - Notebook 1:
- This notebook is used for processing the PDF and processing it using the new Feather light model into a `.txt` file.
- Update the first cell with a PDF link that you would like to use. Please decide on a PDF to use for Notebook 1, it can be any link but please remember to update the first cell of the notebook with the right link.
- Please try changing the prompts for the `Llama-3.2-1B-Instruct` model and see if you can improve results.
- - Notebook 2:
- This notebook will take in the processed output from Notebook 1 and creatively convert it into a podcast transcript using the `Llama-3.1-70B-Instruct` model. If you are GPU rich, please feel free to test with the 405B model!
- Please try experimenting with the System prompts for the model and see if you can improve the results and try the 8B model as well here to see if there is a huge difference!
- - Notebook 3:
- This notebook takes the transcript from earlier and prompts `Llama-3.1-8B-Instruct` to add more dramatization and interruptions in the conversations.
- There is also a key factor here: we return a tuple of conversation which makes our lives easier later. Yes, studying Data Structures 101 was actually useful for once!
- For our TTS logic, we use two different models that behave differently with certain prompts. So we prompt the model to add specifics for each speaker accordingly.
- Please again try changing the system prompt and see if you can improve the results. We encourage testing the feather light 3B and 1B models as well at this stage
- - Notebook 4:
- Finally, we take the results from last notebook and convert them into a podcast. We use the `parler-tts/parler-tts-mini-v1` and `bark/suno` models for a conversation.
- The speakers and the prompt for parler model were decided based on experimentation and suggestions from the model authors. Please try experimenting, you can find more details in the resources section.
- #### Note: Right now there is one issue: Parler needs transformers 4.43.3 or earlier and for steps 1 to 3 of the pipeline you need latest, so we just switch versions in the last notebook.
- ### Next-Improvements/Further ideas:
- - Speech Model experimentation: The TTS model is the limitation of how natural this will sound. This probably be improved with a better pipeline and with the help of someone more knowledgable-PRs are welcome! :)
- - LLM vs LLM Debate: Another approach of writing the podcast would be having two agents debate the topic of interest and write the podcast outline. Right now we use a single LLM (70B) to write the podcast outline
- - Testing 405B for writing the transcripts
- - Better prompting
- - Support for ingesting a website, audio file, YouTube links and more. Again, we welcome community PRs!
- ### Resources for further learning:
- - https://betterprogramming.pub/text-to-audio-generation-with-bark-clearly-explained-4ee300a3713a
- - https://colab.research.google.com/drive/1dWWkZzvu7L9Bunq9zvD-W02RFUXoW-Pd?usp=sharing
- - https://colab.research.google.com/drive/1eJfA2XUa-mXwdMy7DoYKVYHI1iTd9Vkt?usp=sharing#scrollTo=NyYQ--3YksJY
- - https://replicate.com/suno-ai/bark?prediction=zh8j6yddxxrge0cjp9asgzd534
- - https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c
- ================================================
- FILE: end-to-end-use-cases/NotebookLlama/requirements.txt
- ================================================
- # Core dependencies
- PyPDF2>=3.0.0
- torch>=2.0.0
- transformers>=4.46.0
- accelerate>=0.27.0
- rich>=13.0.0
- ipywidgets>=8.0.0
- tqdm>=4.66.0
- # Optional but recommended
- jupyter>=1.0.0
- ipykernel>=6.0.0
- # Warning handling
- warnings>=0.1.0
- ================================================
- FILE: end-to-end-use-cases/NotebookLlama/TTS_Notes.md
- ================================================
- ### Notes from TTS Experimentation
- For the TTS Pipeline, *all* of the top models from HuggingFace and Reddit were tried.
- The goal was to use the models that were easy to setup and sounded less robotic with ability to include sound effects like laughter, etc.
- #### Parler-TTS
- Minimal code to run their models:
- ```
- model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler-tts-mini-v1").to(device)
- tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler-tts-mini-v1")
- # Define text and description
- text_prompt = "This is where the actual words to be spoken go"
- description = """
- Laura's voice is expressive and dramatic in delivery, speaking at a fast pace with a very close recording that almost has no background noise.
- """
- input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
- prompt_input_ids = tokenizer(text_prompt, return_tensors="pt").input_ids.to(device)
- generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
- audio_arr = generation.cpu().numpy().squeeze()
- ipd.Audio(audio_arr, rate=model.config.sampling_rate)
- ```
- The really cool aspect of these models are the ability to prompt the `description` which can change the speaker profile and pacing of the outputs.
- Surprisingly, Parler's mini model sounded more natural.
- In their [repo](https://github.com/huggingface/parler-tts/blob/main/INFERENCE.md#speaker-consistency) they share names of speakers that we can use in prompt.
- #### Suno/Bark
- Minimal code to run bark:
- ```
- voice_preset = "v2/en_speaker_6"
- sampling_rate = 24000
- text_prompt = """
- Exactly! [sigh] And the distillation part is where you take a LARGE-model,and compress-it down into a smaller, more efficient model that can run on devices with limited resources.
- """
- inputs = processor(text_prompt, voice_preset=voice_preset).to(device)
- speech_output = model.generate(**inputs, temperature = 0.9, semantic_temperature = 0.8)
- Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)
- ```
- Similar to parler models, suno has a [library](https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c) of speakers.
- v9 from their library sounded robotic so we use Parler for our first speaker and the best one from bark.
- The incredible thing about Bark model is being able to add sound effects: `[Laugh]`, `[Gasps]`, `[Sigh]`, `[clears throat]`, making words capital causes the model to emphasize them.
- Adding `-` gives a break in the text. We utilize this knowledge when we re-write the transcript using the 8B model to add effects to our transcript.
- Note: Authors suggest using `...`. However, this didn't work as effectively as adding a hyphen during trails.
- #### Hyper-parameters:
- Bark models have two parameters we can tweak: `temperature` and `semantic_temperature`
- Below are the notes from a sweep, prompt and speaker were fixed and this was a vibe test to see which gives best results. `temperature` and `semantic_temperature` respectively below:
- First, fix `temperature` and sweep `semantic_temperature`
- - `0.7`, `0.2`: Quite bland and boring
- - `0.7`, `0.3`: An improvement over the previous one
- - `0.7`, `0.4`: Further improvement
- - `0.7`, `0.5`: This one didn't work
- - `0.7`, `0.6`: So-So, didn't stand out
- - `0.7`, `0.7`: The best so far
- - `0.7`, `0.8`: Further improvement
- - `0.7`, `0.9`: Mix feelings on this one
- Now sweeping the `temperature`
- - `0.1`, `0.9`: Very Robotic
- - `0.2`, `0.9`: Less Robotic but not convincing
- - `0.3`, `0.9`: Slight improvement still not fun
- - `0.4`, `0.9`: Still has a robotic tinge
- - `0.5`, `0.9`: The laugh was weird on this one but the voice modulates so much it feels speaker is changing
- - `0.6`, `0.9`: Most consistent voice but has a robotic after-taste
- - `0.7`, `0.9`: Very robotic and laugh was weird
- - `0.8`, `0.9`: Completely ignore the laughter but it was more natural
- - `0.9`, `0.9`: We have a winner probably
- After this about ~30 more sweeps were done with the promising combinations:
- Best results are at ```speech_output = model.generate(**inputs, temperature = 0.9, semantic_temperature = 0.8)
- Audio(speech_output[0].cpu().numpy(), rate=sampling_rate)```
- ### Notes from other models that were tested:
- Promising directions to explore in future:
- - [MeloTTS](https://huggingface.co/myshell-ai/MeloTTS-English) This is most popular (ever) on HuggingFace
- - [WhisperSpeech](https://huggingface.co/WhisperSpeech/WhisperSpeech) sounded quite natural as well
- - [F5-TTS](https://github.com/SWivid/F5-TTS) was the latest release at this time, however, it felt a bit robotic
- - E2-TTS: r/locallama claims this to be a little better, however, it didn't pass the vibe test
- - [xTTS](https://coqui.ai/blog/tts/open_xtts) It has great documentation and also seems promising
- #### Some more models that weren't tested:
- In other words, we leave this as an exercise to readers :D
- - [Fish-Speech](https://huggingface.co/fishaudio/fish-speech-1.4)
- - [MMS-TTS-Eng](https://huggingface.co/facebook/mms-tts-eng)
- - [Metavoice](https://huggingface.co/metavoiceio/metavoice-1B-v0.1)
- - [Hifigan](https://huggingface.co/nvidia/tts_hifigan)
- - [TTS-Tacotron2](https://huggingface.co/speechbrain/tts-tacotron2-ljspeech)
- - [MMS-TTS-Eng](https://huggingface.co/facebook/mms-tts-eng)
- - [VALL-E X](https://github.com/Plachtaa/VALL-E-X)
- ================================================
- FILE: end-to-end-use-cases/NotebookLlama/resources/clean_extracted_text.txt
- ================================================
- ===============
- Knowledge Distillation is a methodology that transfers advanced capabilities from leading proprietary Large Language Models (LLMs) to their open-source counterparts, such as LLaMA and Mistral. This paper presents a comprehensive survey of KD's role in imparting advanced knowledge.
- Abstract —In the era of Large Language Models, Knowledge Distillation emerges as a pivotal methodology for transferring advanced capabilities from proprietary LLMs to open-source counterparts, facilitating their self-improvement by employing themselves as teachers.
- xamined through a meticulous survey that delves into the foundational pillars of algorithm, skill, and verticalization, which form the backbone of knowledge distillation and deep learning models. The survey provides a comprehensive examination of key mechanisms within the knowledge distillation framework, specifically focusing on the enhancement of cognitive abilities and their practical implications across various fields, with a particular emphasis on the interplay between data augmentation (DA) and knowledge distillation.
- en-source LLMs, this survey highlights the potential for more accessible, efficient, and powerful AI solutions.
- Most importantly, we advocate for compliance with legal terms that regulate the use of LLMs, ensuring ethical and lawful application of knowledge distillation.
- An associated Github repository is available at https://github.com/Tebmer/Awesome-Knowledge-Distillation-of-LLMs. Index Terms - Large language models, knowledge distillation, data augmentation, skill distillation, supervised fine-tuning
- sophisticated problem-solving capabilities, the core significance of these large language models (LLMs) lies in their emergent abilities, enabling them to tackle a diverse array of tasks with remarkable proficiency.
- their remarkable capabilities, have some notable limitations, particularly when considering the advantages offered by open-source models, such as GPT-4 and Gemini. These models are often expensive, with substantial usage fees and restricted access, making them inaccessible to individuals and smaller organizations.
- ng restrictions and costs. In contrast, open-source LLMs like LLaMA and Mistral bring several advantages. Accessibility and adaptability are key benefits, as they are more readily available to a broader range of users, including researchers and organizations.
- ts. One of the most significant limitations is the smaller model scale, resulting in lower performance on real-world tasks with multiple instructions (Zheng et al., 2023a). Models with fewer parameters struggle to capture the depth and breadth of knowledge embodied in larger models like GPT-4. Additionally, the pre-training investment in these open-source models is typically less substantial. This reduced investment can lead to a narrower range of pre-training data, potentially limiting their understanding and handling of diverse or specialized topics (Liang et al., 2022; Sun et al., 2024a). Fine-tuning steps are often fewer due to resource constraints, hindering model optimization for specific tasks or industries.
- ary models becomes apparent when compared to highly fine-tuned proprietary LLMs. Primarily, the disparity between proprietary and open-source LLMs becomes evident, with proprietary models excelling in complex scenarios, while open-source models excel in a wide range of scenarios. Knowledge distillation, a technique that leverages the advanced capabilities of proprietary models, is used to enhance the competencies of open-source models. This process is similar to transferring the performance of a skilled teacher to a student.
- tillation of LLMs, where a small seed of knowledge is used to prompt the LLM to generate more data with respect to a specific skill or domain (Taori et al., 2023). Furthermore, KD retains its fundamental role in compressing LLMs, making them more efficient without significant loss in performance.
- advanced context following and instruction following**
- **key aspects of knowledge distillation**
- * **contextual understanding**: in-context learning and instruction following
- * **alignment with user intents**: human values/principles and thinking patterns like chain-of-thought
- * **NLP task specialization**: semantic understanding and code generation
- **critical skills for various applications**
- * **healthcare**: accuracy and contextual knowledge
- * **law**: contextual knowledge and precision
- * **science**: contextual knowledge and precision
- ned in the era of LLMs, the benefits of knowledge distillation in the era of LLMs are multifaceted and transformative. Through a suite of distillation techniques, the gap between proprietary and open-source models narrows and is filled. This process streamlines computational requirements and enhances environmental sustainability of AI operations, as open-source models become more proficient with lower overhead.
- ch domains. The escalating need for a comprehensive survey on the knowledge distillation of LLMs stems from the rapidly evolving landscape of AI and the increasing complexity of these models. The ability to efficiently and effectively distill knowledge from proprietary LLMs to open-source ones becomes a practical necessity. This is driven by the need to bridge the knowledge gap between the proprietary and open-source LLMs.
- This need is driven by the 3 models mentioned, including Student, Vicuna, Opt, GPT, and others. These models are being used in various sectors such as law, healthcare, finance, and science, and the ability to distill knowledge from them is becoming increasingly important.
- synthesizefeedbackFeedback input outputSelf-Knowledge outputinputinput YlabelLabelingExpansion X,Y demonstrationsexpandFeature featureinput,outputextractSec.4Sec.5 Sec.3.1Sec.3.2 Fig. 2: An overview of this survey on knowledge distillation of large language models
- es emerging, but there is still much to be learned from the era of Large Language Models (LLMs). In this section, we provide a foundational overview of knowledge distillation, highlighting the role of data augmentation (DA) in this context.
- Traditional techniques, such as supervised fine-tuning, have shown promise in distilling knowledge from LLMs. However, the increasing complexity of these models requires careful consideration of the trade-offs between accuracy and computational resources. To further explore the possibilities of knowledge distillation, we examine methods involving supervised fine-tuning, such as incremental learning and transfer learning.
- Supervised fine-tuning involves training a model on a smaller dataset with the goal of adapting to a specific task or domain. This approach has shown significant improvement in various NLP tasks, but may not be scalable to large-scale applications. In contrast, transfer learning offers a more flexible approach, where a model is trained on a smaller dataset and then fine-tuned on a larger dataset. This can lead to improved performance on a variety of tasks, but requires careful selection of the target dataset.
- Another approach is divergence and similarity, which involve exploring the differences and similarities between the knowledge distillation process and traditional machine learning. Reinforcement learning and ranking optimization are also gaining attention, particularly in the context of knowledge distillation, where the goal is to optimize the distillation process itself. These methods can improve the efficiency and effectiveness of knowledge distillation, but require careful consideration of the trade-offs between exploration and exploitation.
- Skill distillation focuses on enhancing student models to improve their understanding of the task and their ability to perform well on NLP tasks. This can be achieved through various methods, including data augmentation, feature learning, and attention mechanisms. By incorporating these techniques, student models can better understand the context and intentions of the user, leading to improved performance across a variety of tasks.
- We propose several strategies for skill distillation, including:
- mmendation systems, and the evaluation of text generation. In §5, we delve into domain-specific vertical distillation, demonstrating how knowledge distillation techniques are applied in specialized fields such as law, healthcare, finance, and science, highlighting their practical implications and transformative impact. The survey reveals open problems in §6, highlighting current challenges and gaps in knowledge distillation research that present opportunities for future work.
- large, complex model to a smaller, more efficient model, mitigating the challenges of computational demands and resource constraints in deploying large-scale models in practical applications. This process, prior to the era of Large Language Models (LLMs), focused on compacting complex neural networks for deployment in resource-constrained environments, such as mobile devices or edge computing platforms, where computational efficiency was paramount.
- al., 2022a), Alpaca (Taori et al., 2023), Code Alpaca (Chaudhary, 2023) Self-Align (Sun et al., 2024b), WizardLM (Xu et al., 2023a), WizardCoder (Luo et al., 2023a), WizardMath (Luo et al., 2023b), AugGPT (Dai et al., 2023a), TDG (He et al., 2023b), CurationUltraChat (Ding et al., 2023b), Phi-1 (Gunasekar et al., 2023), Phi-1.5 (Li et al., 2023a), Phi-2 (Mar, 2023), Magicoder (Wei et al., 2023), WaveCoder (Yu et al., 2024), ZeroGen (Ye et al., 2022), InPars (Bonifacio et al., 2022)
- Self-Align (Sun et al., 2024b), RLCD (Yang et al., 2024a), ImpDistill (Jung et al., 2023), LMSI (Huang et al., 2023a), ReST (Gulcehre et al., 2023), Self-Rewarding (Yuan et al., 2024a), Baize (Xu et al., 2023b), STaR (Zelikman et al., 2022) DistillationSupervised Fine-TuningAlpaca (Taori et al., 2023), Vicuna (Chiang et al., 2023), WizardLM (Xu et al., 2023a), Self-Instruct (Wang et al., 2022a), Baize (Xu et al., 2023b), STaR (Zelikman et al., 2022), Divergence and SimilarityDistilGPT (Sanh et al., 2019), f-Distill (Wen et al., 2023), MiniLLM (Gu et al., 2024) TED (Liang et al., 2023a), GKD (Agarwal et al., 2024), BabyLlama (Timiryasov and Tastet, 2023) Reinforcement LearningCAI (Bai et al., 2022a), UltraFeedback (Cui et al., 2023a), WizardMath (Luo et al., 2023b), MiniLLM (Gu et al., 2024), GKD (Agarwal et al., 2024), GPT3 Reward (Kwon et al., 2023) Rank Optimization
- ollowingInstruction FollowingSelf-Instruct Wang et al., 2022a, Alpaca Taori et al., 2023, Vicuna Chiang et al., 2023, WizardLM Xu et al., 2023a, Orca Mukherjee et al., 2023, Orca2 Mitra et al., 2023, WizardMath Luo et al., 2023b, Llama-GPT4 Peng et al., 2023a, Multi-turn Dialogue Chiang et al., 2023, Baize Xu et al., 2023b, UltraLLaMA Ding et al., 2023b, CAMEL Li et al., 2023b, OpenChat Wang et al., 2023c, Zephyr Tunstall et al., 2023, RAG Kang et al., 2023a, SAIL Luo et al., 2023c, Self-RAG Asai et al., 2023, AlignmentThinking PatternYe et al., 2023, Orca Mukherjee et al., 2023, Orca2 Wang et al., 2023d, AFT Cheng et al., 2023, KnowPAT Zhang et al., 2023a, PreferenceCAI Bai et al., 2022a, GPT-3 Reward Kwon et al., 2023, ILF Scheurer et al., 2023, ALMoST Kim et al., 2023a, RLEF Roit et al., 2023
- i et al., 2022a), Align Honesty (Yang et al., 2023a), SANDBOX (Liu et al., 2023b), Self-Align (Sun et al., 2024b), UltraFeedback (Cui et al., 2023a), RLCD (Yang et al., 2024a), AgentToolformer (Schick et al., 2023), Graph-ToolFormer (Zhang, 2023), Gorilla (Patil et al., 2023), ToolAlpaca (Tang et al., 2023a), ToolLLM (Qin et al., 2023a), CRAFT (Yuan et al., 2023a), Confucius (Gao et al., 2023b), MLLM-Tool (Wang et al., 2024), α-UMi (Shen et al., 2024), PlanningFireAct (Chen et al., 2023b), AgentTuning (Zeng et al., 2023a), Lumos (Yin et al., 2023a), AUTOACT (Qiao et al., 2024), TPTU-v2 (Kong et al., 2023), NLP Task SpecializationNLUAugGPT (Dai et al., 2023a), GPT Annotation (Gilardi et al., 2023), (Ding et al., 2023a), TDG (He et al., 2023b), SunGen (Gao et al., 2023a), Mix Distill (Chenglin et al., 2023), Annollm (He et al., 2023a), UDG (Wang et al., 2021a), ZeroGen (Ye et al., 2024)
- al., 2023 GPT-3 Labeling Wang et al., 2021b BioGPT Guo et al., 2023a ChatGPT NMT Yang and Nicolai, 2023 Information RetrievalQUILL Srinivasan et al., 2022 Promptgator Dai et al., 2023b InPars Bonifacio et al., 2022 AugTriever Meng et al., 2023 Sun et al., 2023a RankVicuna Pradeep et al., 2023a RankZephyr Pradeep et al., 2023b ExaRanker Ferraretto et al., 2023 Recommendation NDR Mysore et al., 2023 InstrcutRec Zhang et al., 2023b ONCE Liu et al., 2023c Text Generation Evaluation PandaLM Wang et al., 2023b Prometheus Kim et al., 2024 InstructScore Xu et al., 2023d TigerScore Jiang et al., 2023c Auto-J Li et al., 2024a CodeCodeAlpaca Chaudhary, 2023 CodeLlama Rozi `ere et al., 2023 Magicoder Wei et al., 2023 Phi-1 Gunasekar et al., 2023 PERsD Chen et al., 2023 MFTCoder Liu et al., 2023d WaveCoder Yu et al., 2023
- et al., 2023e), SVIT (Zhao et al., 2023b), LVIS-Instruct4V (Wang et al., 2023e), Shikra (Chen et al., 2023c), LSKD (Park et al., 2023), DetGPT (Pi et al., 2023; Zhao et al., 2023c), LRV (Liu et al., 2023f), NExT-GPT (Wu et al., 2023b), Valley (Luo et al., 2023d), ILuvUI (Jiang et al., 2023d), StableLLaVA (Li et al., 2023c), PointLLM (Xu et al., 2023e), Verticalization DistillationLaw (Huang et al., 2023b; Cui et al., 2023b); Medical & Healthcare (Zhang et al., 2023c; Chen et al., 2023d); Finance (Zhang and Yang, 2023); Science (Xie et al., 2023a; Zhang et al., 2024) and Misc. (Dan et al., 2023; Guo et al., 2023b) Fig. 3: Taxonomy of Knowledge Distillation of Large Language Models"
- r network, often through techniques like soft target training, where the student learns from the softened softmax output of the teacher.
- The distillation of knowledge from larger models to smaller ones is a technique used to improve the performance of AI models. In this context, distillation refers to the process of distilling the knowledge from a larger model into a smaller model, allowing it to learn from the teacher model's output.
- The current era of knowledge distillation in large language models (LLMs) has shifted the focus from mere architecture compression to a more nuanced process of knowledge elicitation and transfer. This paradigm change is largely due to the immense knowledge that LLMs like GPT-4 and Gemini possess. The parameters of LLMs make it challenging to compress them using pruning or quantization techniques.
- size, the current focus in llm-based knowledge distillation is to extract and transfer the rich, nuanced understanding that these models have developed the key to this modern approach lies in carefully designed prompts that elicit specific knowledge or capabilities from the llms, tapping into their understanding and capabilities in various domains ranging from natural language understanding to more complex cognitive tasks like reasoning and problem-solving
- explicit training objectives. This era of knowledge distillation also emphasizes the transfer of abstract qualities such as reasoning patterns and preference alignment. This is in stark contrast to the earlier focus on output replication, indicating a shift towards a more holistic and comprehensive transfer of cognitive capabilities. The current techniques involve not just the replication of outputs, but also the emulation of thought processes and decision-making patterns of the teacher model. This involves complex strategies like chain-of-thought prompting, where the student model learns the reasoning process of the teacher, enhancing its problem-solving and decision-making capabilities. 2.2 Relation to Data Augmentation (DA)
- llation, Unlike traditional techniques such as paraphrasing, or back-translation, which primarily aim at expanding the training dataset in a somewhat mechanical manner. DA within the context of LLMs focuses on the generation of novel, context-rich training data tailored to specific domains and skills. This innovation is driven by the unique capabilities of LLMs to generate coherent, diverse, and intricate data samples that closely mimic the nuanced understanding and cognitive abilities of human experts in various fields.
- ource models, through Deep Learning Models (LLMs) are prompted to create targeted, high-quality datasets that are not merely larger in volume but also rich in diversity and specificity. This approach enables the distillation process to be more effective, ensuring that the distilled models replicate the teacher model's output behavior and embody its deep-seated understanding and cognitive strategies. The significance and necessity of Data Augmentation (DA) for achieving Knowledge Domains (KD) in the LLM era cannot be overstated. DA acts as a force multiplier, enabling the distilled models to acquire and refine capabilities that would otherwise require exponentially larger datasets and computational resources. It facilitates a more nuanced and effective transfer of knowledge, focusing on the qualitative aspects of learning rather than quantitative expansion.
- er of LLMs empowers open-source models with the ability to approximate the contextual adeptness, ethical alignment, and deep semantic insights characteristic of their proprietary counterparts thereby democratizing access to advanced AI capabilities and fostering innovation across a broader spectrum of applications and users 2 3 Survey Scope Building on the discussions introduced earlier this survey aims to comprehensively explore the landscape of knowledge distillation within the context of LLMs following a meticulously structured taxonomy as in Figure 3 the survey’s scope is delineated through three primary facets each encapsulating a range of subtopics and methodologies
- undations and methodologies of knowledge distillation. It includes an in-depth exploration of processes involved in constructing knowledge from teacher models (e.g., proprietary LLMs) and integrating this knowledge into student models (e.g., open-source LLMs). Under the umbrella of 'knowledge', we delve into strategies such as labeling, expansion, curation, feature understanding, and feedback mechanisms. The exploration seeks to uncover the various ways in which knowledge can be identified, expanded, and curated for effective distillation. This subsection examines learning approaches like supervised fine-tuning, divergence minimization, and reinforcement learning techniques.
- ow algorithms enable knowledge transfer, allowing open-source models to replicate and sometimes surpass proprietary capabilities. Skill Distillation examines specific competencies and capabilities enhanced through Knowledge Distillation. Contextual discussions follow (Taori et al., 2023; Luo et al., 2023c), including instruction following and retrieval-augmented generation (RAG) capabilities. Alignment research investigates thinking patterns, persona/preference modeling, and value alignment. The 'agent' category focuses on skills like tool usage and planning. NLP task specialization (Dai et al., 2023a; Jung et al., 2023; Chaudhary, 2023) is examined through lenses like natural language understanding (NLU), natural language processing (NLP).
- tion, and Code Generation**
- Finally, the survey explores how Knowledge Distillation (KD) enhances Large Language Models (LLMs) in interpreting and integrating multiple forms of input, enriching their utility and applicability across various contexts. Verticalization Distillation
- This section examines the application of KD across diverse domains, providing insights into how distilled LLMs can be tailored for specialized fields such as Law, Medical & Healthcare (Wang et al., 2023a), Finance (Zhang and Yang, 2023), Science (Zhang et al., 2024), among others. This exploration showcases the practical implications of KD techniques and highlights their transformative impact on domain-specific AI solutions. Through detailed analysis and examples, this part aims to demonstrate the versatility and efficacy of KD in adapting LLMs to diverse domains.
- stem. by navigating through these facets, this survey endeavors to provide an extensive and nuanced analysis of knowledge distillation in the era of LLMs. it serves as a guide for researchers, practitioners, and enthusiasts in the field, shedding light on current methodologies, challenges, and opportunities for innovation in this rapidly evolving domain.
- across a range of applications.
- Distillation Pipeline in LLM Era
- ================================================
- FILE: end-to-end-use-cases/RAFT-Chatbot/README.md
- ================================================
- ## Chatbot Recipe:
- As the popularity of our Meta Llama 3 models grows, we've seen a surge in demand to adapt them to specific domains, enabling businesses to better serve their customers. For example, a company might have a vast collection of plain text documents related to their custom domain and want to create a chatbot that can answer client questions.
- In response to this demand, we're exploring the possibility of building a Llama chatbot that can answer Llama-related questions using our Meta Llama 3 models. In this tutorial, we'll demonstrate how to do just that. While our Meta Llama 3 70B Instruct model is an excellent candidate, its production costs are relatively high. To reduce these costs, we'll focus on creating a Llama chatbot based on the Meta Llama 8B Instruct model, aiming to achieve similar accuracy while minimizing inference costs.
- One common approach to produce a model based on new domain data is **fine-tuning**. The idea is to start from a pre-trained model that already has some knowledge of language from its pre-training and adapt it to a new domain. However, [recent paper](https://arxiv.org/pdf/2405.05904) highlights the risk of using supervised fine-tuning to update LLMs' knowledge, as it presents empirical evidence that acquiring new knowledge through fine-tuning is correlated with hallucinations w.r.t. preexisting knowledge. Fine-tuning can also be costly if the domain knowledge has to be updated frequently.
- Another solution is to use **RAG (Retrieval-Augmented Generation)**, which combines the strengths of traditional information retrieval systems (such as databases) with the capabilities of generative large language models (LLMs). RAG operates by first retrieving relevant information from a database using a query generated by the LLM. This retrieved information is then integrated into the LLM's query input, enabling it to generate more accurate and contextually relevant text. This helps to reduce LLM hallucination as the related documents are provided to LLM and has a lower cost to update the domain knowledge.
- In this tutorial, we'll use **Retrieval Augmented Fine Tuning (RAFT)**, a technique that combines fine-tuning with RAG to better utilize custom domain text data. RAFT is a general recipe for fine-tuning a pre-trained Large Language Model (LLM) to a domain-specific RAG setting. It helps LLM to better utilize custom domain text data, by ignoring those documents that don’t help in answering the question. This approach can create a more factual model and reduce LLM hallucinations during inference.
- The process involves preparing training data with each data point containing:
- * A question (Q)
- * A set of documents (D)
- * A corresponding Chain-of-thought style answer (A*) generated from one of the documents (D*)
- RAFT tries to teach the models to differentiate between two types of documents:
- * Oracle documents (D*): documents from which the answer to the question can be deduced
- * Distractor documents (Di): documents that do not contain answer-relevant information
- The following graph illustrates the RAFT main concepts:
- 
- For more information on RAFT, please refer to their [blog post](https://gorilla.cs.berkeley.edu/blogs/9_raft.html).
- ## Fine-tuning Llama
- To build a Llama bot, we need to collect relevant text data. Ideally, we would include a vast range of Llama-related web documents, but for demo purposes, we'll focus on official documents. For example, we can use the raw text from official web pages listed in [Getting started with Meta Llama](https://llama.meta.com/get-started/), excluding the FAQ page since some evaluation questions will come from there.
- We have two options to obtain the text data: using a local folder or web crawling. For the local folder option, we can download the desired documents in PDF, Text, or Markdown format to the "data" folder specified in the [raft.yaml](./raft.yaml) file. Langchain DirectoryLoader will load files in that folder, but it may also ask us to install more package dependency if the files formats are not supported natively.
- Alternatively, we can create a sitemap XML file, similar to the example below, and put the file path in the [raft.yaml](./raft.yaml) file, so eventually a Langchain SitemapLoader can retrieve all the text from the web pages.
- ```xml
- <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
- <url>
- <loc>http://llama.meta.com/responsible-use-guide/</loc>
- </url>
- <!-- more URLs -->
- </urlset>
- ```
- ## Create RAFT Dataset
- To create a RAFT dataset from the prepared documents, we can use the Meta Llama 3 70B Instruct model either through APIs from LLM cloud providers or by hosting a local VLLM server.
- For this example, we'll demonstrate how to create a VLLM OpenAI-compatible server that hosts Meta Llama 3 70B Instruct locally and generates the RAFT dataset.
- **Local Server Setup**
- First, ensure VLLM is installed. Then, run the following command to start the VLLM server:
- ```bash
- CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server --model meta-Llama/Meta-Llama-3-70B-Instruct --tensor-parallel-size 2 --disable-log-requests --port 8001
- ```
- **Note**: Make sure the port is available, and the server requires at least 135GB GPU memory, so we need to use multiple GPUs in a tensor parallel way.
- **Querying the Server**
- Once the server is ready, query it using the following command in another terminal:
- ```bash
- python raft.py -u "http://localhost:8001/v1" -k "EMPTY" -t 4
- ```
- If you prefer to use a cloud API, replace the endpoint URL with the cloud provider's URL and set the API key using the `-k` flag or environment variables.
- **RAFT Dataset Generation**
- The [raft.py](raft.py) script reads all documents from local or web sources, depending on the settings, and splits the data into text chunks of 1000 characters using RecursiveCharacterTextSplitter.
- Then, it applies the `question_prompt_template` defined in [raft.yaml](raft.yaml) to each chunk to generate queries to Meta Llama 3 70B model, and the model will generate a question list (By default 4 questions in that list) for each text chunk. For each question and corresponding text chunk, we generate a Chain-of-Thought (COT) style answer using Meta Llama 3 70B Instruct APIs.
- Once we have the COT answers, we can create a dataset where each sample contains an "instruction" section. This section includes some unrelated chunks called distractors (by default, we add 4 distractors). In the original RAFT method, there is an oracle probability P (by default, 80%) that a related document will be included. This means that there is a 1-P (by default, 20%) chance that no related documents are provided, and the RAFT model should still try to predict the COT answer label, as stated in the blog, "By removing the oracle documents in some instances of the training data, we are compelling the model to memorize domain-knowledge."
- **Modification to Add Refusal Examples**
- In this tutorial, we made an important modification by adding additional refusal examples (by default, this refusal probability is 5%). When the related documents are not presented, we set the COT answer label to "Sorry, I don't know the answer to this question because related documents are not found. Please try again." Our hypothesis is that this will increase answer precision and reduce chatbot hallucination. In real-world production scenarios, we prefer that the chatbot refuses to answer when not enough context is provided, so that we can detect this refusal signal and mitigate the risk of producing wrong or misleading answers (e.g., we can ask a human agent to take over the conversation to better serve customers).
- **RAFT Format JSON Example**
- Here is a RAFT format JSON example from our saved `raft.jsonl` file:
- ```json
- {
- "id":"seed_task_228",
- "type":"general",
- "question":"What is the context length supported by Llama 3 models?",
- "context":{
- "sentences":[
- [
- "DISTRACT_DOCS 1"
- "DISTRACT_DOCS 2"
- "We hope that Code Llama will inspire others to leverage Llama 2 to create new innovative tools for research and commercial products. Download the model Explore more on Code Llama Discover more about Code Llama here \u2014 visit our resources, ranging from our research paper, getting started guide and more. Code Llama GitHub repository Research paper Download the model Getting started guide Meta Llama 3 Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Get Started Experience Llama 3 on Meta AI Experience Llama 3 with Meta AI We\u2019ve integrated Llama 3 into Meta AI, our intelligent assistant, that expands the ways people can get things done, create and connect with Meta AI. You can see first-hand the performance of Llama 3 by using Meta AI for coding tasks and problem solving. Whether you're developing agents, or other AI-powered applications, Llama 3 in both 8B and 70B will offer the capabilities and flexibility you need to develop your ideas. Experience Llama 3 on Meta AI Enhanced performance Experience the state-of-the-art performance of Llama 3, an openly accessible model that excels at language nuances, contextual understanding, and complex tasks like translation and dialogue generation. With enhanced scalability and performance, Llama 3 can handle multi-step tasks effortlessly, while our refined post-training processes significantly lower false refusal rates, improve response alignment, and boost diversity in model answers. Additionally, it drastically elevates capabilities like reasoning, code generation, and instruction following. Build the future of AI with Llama 3. Download Llama 3 Getting Started Guide With each Meta Llama request, you will receive: Meta Llama Guard 2 Getting started guide Responsible Use Guide Acceptable use policy Model card Community license agreement Benchmarks Llama 3 models take data and scale to new heights. It\u2019s been trained on our two recently announced custom-built 24K GPU clusters on over 15T token of data \u2013 a training dataset 7x larger than that used for Llama 2, including 4x more code. This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. Model card Trust & safety A comprehensive approach to responsibility With the release of Llama 3, we\u2019ve updated the Responsible Use Guide (RUG) to provide the most comprehensive information on responsible development with LLMs. Our system-centric approach includes updates to our trust and safety tools with Llama Guard 2, optimized to support the newly announced taxonomy published by MLCommons expanding its coverage to a more comprehensive set of safety categories, Code Shield, and Cybersec Eval 2. In line with the principles outlined in our RUG , we recommend thorough checking and filtering of all inputs to and outputs from LLMs based on your unique content guidelines for your intended use case and audience. Meta Llama Guard 2 Explore more on Meta Llama 3 Introducing Meta Llama 3: The most capable openly available LLM to date Read the blog Meet Your New Assistant: Meta AI, Built With Llama 3 Learn more Meta Llama 3 repository View repository Model card Explore Meta Llama 3 License META LLAMA 3 COMMUNITY LICENSE AGREEMENT Meta Llama 3 Version Release Date: April 18, 2024 \u201c Agreement \u201d means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein. \u201c Documentation \u201d means the specifications, manuals and documentation accompanying Meta Llama 3 distributed by Meta at https:\/\/llama.meta.com\/get-started\/ .",
- "DISTRACT_DOCS 3"
- "DISTRACT_DOCS 4"
- ]
- ],
- "title":[
- [
- "placeholder_title",
- "placeholder_title",
- "placeholder_title",
- "placeholder_title",
- "placeholder_title",
- ]
- ]
- },
- "oracle_context":"We hope that Code Llama will inspire others to leverage Llama 2 to create new innovative tools for research and commercial products. Download the model Explore more on Code Llama Discover more about Code Llama here \u2014 visit our resources, ranging from our research paper, getting started guide and more. Code Llama GitHub repository Research paper Download the model Getting started guide Meta Llama 3 Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Build the future of AI with Meta Llama 3 Now available with both 8B and 70B pretrained and instruction-tuned versions to support a wide range of applications Get Started Experience Llama 3 on Meta AI Experience Llama 3 with Meta AI We\u2019ve integrated Llama 3 into Meta AI, our intelligent assistant, that expands the ways people can get things done, create and connect with Meta AI. You can see first-hand the performance of Llama 3 by using Meta AI for coding tasks and problem solving. Whether you're developing agents, or other AI-powered applications, Llama 3 in both 8B and 70B will offer the capabilities and flexibility you need to develop your ideas. Experience Llama 3 on Meta AI Enhanced performance Experience the state-of-the-art performance of Llama 3, an openly accessible model that excels at language nuances, contextual understanding, and complex tasks like translation and dialogue generation. With enhanced scalability and performance, Llama 3 can handle multi-step tasks effortlessly, while our refined post-training processes significantly lower false refusal rates, improve response alignment, and boost diversity in model answers. Additionally, it drastically elevates capabilities like reasoning, code generation, and instruction following. Build the future of AI with Llama 3. Download Llama 3 Getting Started Guide With each Meta Llama request, you will receive: Meta Llama Guard 2 Getting started guide Responsible Use Guide Acceptable use policy Model card Community license agreement Benchmarks Llama 3 models take data and scale to new heights. It\u2019s been trained on our two recently announced custom-built 24K GPU clusters on over 15T token of data \u2013 a training dataset 7x larger than that used for Llama 2, including 4x more code. This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. Model card Trust & safety A comprehensive approach to responsibility With the release of Llama 3, we\u2019ve updated the Responsible Use Guide (RUG) to provide the most comprehensive information on responsible development with LLMs. Our system-centric approach includes updates to our trust and safety tools with Llama Guard 2, optimized to support the newly announced taxonomy published by MLCommons expanding its coverage to a more comprehensive set of safety categories, Code Shield, and Cybersec Eval 2. In line with the principles outlined in our RUG , we recommend thorough checking and filtering of all inputs to and outputs from LLMs based on your unique content guidelines for your intended use case and audience. Meta Llama Guard 2 Explore more on Meta Llama 3 Introducing Meta Llama 3: The most capable openly available LLM to date Read the blog Meet Your New Assistant: Meta AI, Built With Llama 3 Learn more Meta Llama 3 repository View repository Model card Explore Meta Llama 3 License META LLAMA 3 COMMUNITY LICENSE AGREEMENT Meta Llama 3 Version Release Date: April 18, 2024 \u201c Agreement \u201d means the terms and conditions for use, reproduction, distribution and modification of the Llama Materials set forth herein. \u201c Documentation \u201d means the specifications, manuals and documentation accompanying Meta Llama 3 distributed by Meta at https:\/\/llama.meta.com\/get-started\/ .",
- "cot_answer":"Here's the step-by-step reasoning to answer the question:\n\n1. The question asks about the context length supported by Llama 3 models.\n2. In the context, we need to find the relevant information about Llama 3 models and their context length.\n3. The relevant sentence is: \"This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2.\"\n##begin_quote## This results in the most capable Llama model yet, which supports a 8K context length that doubles the capacity of Llama 2. ##end_quote##\n4. From this sentence, we can see that Llama 3 models support a context length of 8K.\n\n<ANSWER>: 8K",
- "instruction":"<DOCUMENT> DISTRACT_DOCS 1 <\/DOCUMENT>...<DOCUMENT> DISTRACT_DOCS 4 <\/DOCUMENT>\nWhat is the context length supported by Llama 3 models?"
- }
- ```
- As shown in the above example, we have a "question" section for the generated question, a "cot_answer" section for the generated COT answers (where the final answer will be added after the "<ANSWER>" token), and an "instruction" section that has all the documents included (each document split by `<DOCUMENT>` and `</DOCUMENT>` tags) and finally the generated question appended at the end. This "instruction" section will be the input during fine-tuning, and the "cot_answer" will be the output label that the loss will be calculated on.
- ## Creating an Evaluation Set
- To create a reliable evaluation set, it's ideal to use human-annotated question and answer pairs. This ensures that the questions are relevant and the answers are accurate. However, human annotation is time-consuming and costly. For demonstration purposes, we'll use a subset of the validation set, which will never be used in the fine-tuning. We only need to keep the "question" section and the final answer section, marked by the `<ANSWER>` tag in "cot_answer". We'll manually check each example and select only the good ones. We want to ensure that the questions are general enough to be used for web search engine queries and are related to Llama. We'll also use some QA pairs from our FAQ page, with modifications. This will result in 72 question and answer pairs as our evaluation set, saved as `eval_llama.json`.
- ## Fine-Tuning Steps
- Once the RAFT dataset is ready in JSON format, we can start fine-tuning. Unfortunately, the LORA method didn't produce good results, so we'll use the full fine-tuning method. We can use the following commands as an example in the llama-cookbook main folder:
- ```bash
- export PATH_TO_ROOT_FOLDER=./raft-8b
- export PATH_TO_RAFT_JSON=recipes/use_cases/end2end-recipes/raft/output/raft.jsonl
- torchrun --nnodes 1 --nproc_per_node 4 recipes/quickstart/finetuning/finetuning.py --enable_fsdp --lr 1e-5 --context_length 8192 --num_epochs 1 --batch_size_training 1 --model_name meta-Llama/Meta-Llama-3-8B-Instruct --dist_checkpoint_root_folder $PATH_TO_ROOT_FOLDER --dist_checkpoint_folder fine-tuned --use_fast_kernels --dataset "custom_dataset" --custom_dataset.test_split "test" --custom_dataset.file "recipes/finetuning/datasets/raft_dataset.py" --use-wandb --run_validation True --custom_dataset.data_path $PATH_TO_RAFT_JSON
- ```
- For more details on multi-GPU fine-tuning, please refer to the [multigpu_finetuning.md](../../getting-started/finetuning/multigpu_finetuning.md) in the finetuning recipe.
- Next, we need to convert the FSDP checkpoint to a HuggingFace checkpoint using the following command:
- ```bash
- python src/llama_cookbook/inference/checkpoint_converter_fsdp_hf.py --fsdp_checkpoint_path "$PATH_TO_ROOT_FOLDER/fine-tuned-meta-Llama/Meta-Llama-3-8B-Instruct" --consolidated_model_path "$PATH_TO_ROOT_FOLDER"
- ```
- For more details on FSDP to HuggingFace checkpoint conversion, please refer to the [readme](../../getting-started/finetuning/multigpu_finetuning.md) in the inference/local_inference recipe.
- ## Evaluation Steps
- Once we have the RAFT model, we need to evaluate its performance. In this tutorial, we'll not only use traditional evaluation methods (e.g., calculating exact match rate or ROUGE score) but also use LLM as a judge to score model-generated answers.
- We'll launch a VLLM server to host our converted model from `PATH_TO_ROOT_FOLDER`. To make things easier, we can rename the model folder to `raft-8b`.
- ```bash
- CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --model raft-8b --port 8000 --disable-log-requests
- ```
- Similarly, if we want to get the 8B instruct baseline, we can launch a 8B model VLLM server instead:
- ```bash
- CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --model meta-Llama/Meta-Llama-3-8B-Instruct --port 8000 --disable-log-requests
- ```
- On another terminal, we can use another Meta Llama 3 70B Instruct model as a judge to compare the answers from the RAFT 8B model with the ground truth and get a score. To do this, we need to host another Meta Llama 3 70B Instruct VLLM server locally with the command, making sure the port is not in use:
- ```bash
- CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server --model meta-Llama/Meta-Llama-3-70B-Instruct --tensor-parallel-size 2 --disable-log-requests --port 8001
- ```
- Then, we can pass the ports to the eval script to evaluate our RAFT model once our `raft-8b` VLLM server is running:
- ```bash
- CUDA_VISIBLE_DEVICES=4 python raft_eval.py -m raft-8b -u "http://localhost:8000/v1" -j "http://localhost:8001/v1" -r 5
- ```
- To evaluate the 8B baseline, we can use the following command once our 8B VLLM server is running:
- ```bash
- CUDA_VISIBLE_DEVICES=4 python raft_eval.py -m meta-Llama/Meta-Llama-3-8B-Instruct -u "http://localhost:8000/v1" -j "http://localhost:8001/v1" -r 5
- ```
- **NOTE**: Please ensure that the `--model` in VLLM server creation matches the `--m` in raft_eval.py. Otherwise, VLLM will raise a `model not found` error. By default, the RAFT model is called "raft-8b". Here, `-u` specifies the RAFT model endpoint URL, `-j` specifies the judge model endpoint URL, and `-r` defines how many top-k documents the RAG should retrieve.
- This [raft_eval.py](./raft_eval.py) script will load questions from the evaluation set, generate answers from models and models+RAG, and compare the generated answers with the ground truth to get the evaluation metrics, such as ROUGE score or LLM-as-judge score. It will then save those metrics and evaluation details to eval logs.
- ## Experiment Results
- **Overview**
- During our experiments, we encountered issues with using only the Llama website data, which consisted 1980+ RAFT examples generated from 327K characters text. We believed that this initial data was insufficient, so we created an additional PyTorch RAFT dataset using text from official [Pytorch blogs](https://pytorch.org/blog/) and [Pytorch tutorials](https://pytorch.org/tutorials/). This new dataset contains 20K+ RAFT examples generated from 4.7 million characters. We combined both datasets to create an `all_data` dataset. We then fine-tuned the 8B model on each dataset separately for 1 epoch with a learning rate of 1e-5, resulting in three RAFT models: `llama_only`, `pytorch_only`, and `all_data`.
- **Evaluation on non-RAG baseline**
- First we run a non-RAG baseline, just using Meta Llama 3 8B Instruct and Meta Llama 3 70B Instruct model to see if our model can already answers some questions without any fine-tuning and external knowledge base. The LLM score, the percentage of correctness marked by LLM_as_judge, for 8B is 47.9% and 70B is 59.2%. Clearly, there are some information that has been pretrained into our Meta Llama 3 models.
- **Evaluation on RAG baseline**
- Then we tested these 3 RAFT models with Langchain RAG, along with the Meta Llama 3 8B Instruct and Meta Llama 3 70B Instruct RAG baselines, using the RAG document top-k retrieve parameters of 3, 5, and 7. We deployed a Meta Llama 70B Instruct model as the judge to score our model-generated answers against the ground truth in our evaluation set. The LLM scores are shown below:
- 
- Our results showed that RAFT models performed similarly to the 8B RAG baseline, but noticeably worse than the 70B RAG baseline when context documents were limited (top_k <= 5). However, when top_k = 7, the RAFT models performance suddenly increase, with the `all_data` 8B model achieving a score of 76.06% which beats the 70B baseline's 74.65%.
- **Refusal Examples**
- We also analyzed the number of refusal examples, where the model responded with "Sorry, I do not know." The `all_data` model was more cautious and tended to refuse to answer, whereas the `llama_only` RAFT model did not learn to refuse at all, likely due to the limited dataset size.
- 
- **Precision Analysis**
- We calculated the precision of our model answers, which represents the likelihood of producing correct answers when the model decides to respond. The formula used was $\frac{LLMScore}{1-\frac{numRefusal}{totalQA}}$.
- 
- Note that the 8B and 70B RAG baselines never refused to answer, so their precision was equivalent to their LLM_score. Our `all_data` and `pytorch_only` models tended to refuse to answer when provided documents were limited (top_k < 5), but when they did generate an answer, the likelihood of it being correct was higher. Specifically, when top_k = 7, the `all_data` RAFT model had an 82.97% likelihood of producing a correct answer when it decided to respond, outperforming the 70B baseline.
- **Example Comparisons**
- Here are some examples where our `all_data` RAFT model correctly answered questions that the 70B baseline failed to answer:
- ```
- Comparing interested question: What tokenizer is used as the basis for the special tokens in Meta Llama
- ground_truth: tiktoken
- True all_data_RAG_answers: <ANSWER>: The tokenizer used as the basis for the special tokens in Meta Llama is tiktoken.
- False 70B_RAG_answers: <ANSWER>: The tokenizer used as the basis for the special tokens in Meta Llama is SentencePiece.
- ```
- ```
- Comparing interested question: What is the license under which the Llama Guard model and its weights are released?
- groud_truth: The license is the same as Llama 3, which can be found in the LICENSE file and is accompanied by the Acceptable Use Policy.
- True all_data_RAG_answers: <ANSWER>: The license under which the Llama Guard model and its weights are released is the same as Llama 3, and the [LICENSE](../LICENSE) file contains more information about the license.
- False 70B_RAG_answers: <ANSWER>: The Llama Guard model and its weights are licensed under the Llama 2 Community license.
- ```
- **Key Takeaways**
- From our experiments, we learned:
- 1. Few thousand RAFT examples are insufficient, and at least 10K examples are recommended.
- 2. The LLM_as_judge is not always reliable, and we noticed there are chances that answers were scored incorrectly.
- 3. The chunk_size for RAFT documents and RAG documents should be the same.
- 4. The RAFT method appears to help the LLM differentiate related documents from distractors rather than forcing it to memorize the training data, as we used Pytorch data as additional data to help our Llama chatbot to answer Llama questions. More research experiments will be needed to understand more about this.
- ## Local Inference Steps
- Once we evaluated and refined our RAFT model, we can deploy it locally to interact with it by asking questions manually. To do this, run the following command:
- ```bash
- python recipes/inference/local_inference/inference.py --model_name raft-8b
- ```
- For more details,please check [local_inference recipe](../../getting-started/inference/local_inference/README.md)
- ## Acknowledgement
- Finally, we would like to extend special thanks to Tianjun Zhang, the first author of the [RAFT paper](https://arxiv.org/pdf/2403.10131), for collaborating with us on this tutorial and providing valuable guidance throughout our experiments. Our code is also partially inspired by the [RAFT section in Gorilla github](https://github.com/ShishirPatil/gorilla/tree/main/raft).
- ================================================
- FILE: end-to-end-use-cases/research_paper_analyzer/README.md
- ================================================
- # Research Paper analyzer with Llama4 Maverick
- This leverages **Llama 4 Maverick** model to retrieve the references of an arXiv paper and ingest all their content for question-answering without using any RAG to store these information.
- ## Features
- ### Leverage Long Context Length
- | Model | Meta Llama4 Maverick | Meta Llama4 Scout | OpenAI GPT-4.5 | Claude Sonnet 3.7 |
- | ----- | -------------- | -------------- | -------------- | -------------- |
- | Context Window | 1M tokens | 10M tokens | 128K tokens | 1K tokens | 200K tokens |
- Because of the long context length, the analyzer can process all the reference paper content at once, so you can ask questions about the paper without worrying about the context length.
- ## Getting Started
- 1. Install dependencies:
- ```
- pip install -r requirements.txt
- ```
- 2. Run the application:
- ```
- python research_analyzer.py
- ```
- 3. Open the gradio interface on localhost in the browser.
- 3. Provide a paper url such as https://arxiv.org/abs/2305.11135
- 4. Press "Ingest", wait for paper to be processed and ask questions about it
- ================================================
- FILE: end-to-end-use-cases/research_paper_analyzer/requirements.txt
- ================================================
- gradio
- together
- requests
- pypdf2
- gradio
- ================================================
- FILE: end-to-end-use-cases/whatsapp_llama_4_bot/README.md
- ================================================
- # WhatsApp and Llama 4 APIs : Build your own multi-modal chatbot
- Welcome to the WhatsApp Llama4 Bot ! This bot leverages the power of the Llama 4 APIs to provide intelligent and interactive responses to users via WhatsApp. It supports text, image, and audio interactions, making it a versatile tool for various use cases.
- ## Key Features
- - **Text Interaction**: Users can send text messages to the bot, which are processed using the Llama4 APIs to generate accurate and contextually relevant responses.
- - **Image Reasoning**: The bot can analyze images sent by users, providing insights, descriptions, or answers related to the image content.
- - **Audio-to-Audio Interaction**: Users can send audio messages, which are transcribed to text, processed by the Llama4, and converted back to audio for a seamless voice-based interaction.
- ## Technical Overview
- ### Architecture
- - **FastAPI**: The bot is built using FastAPI, a modern web framework for building APIs with Python.
- - **Asynchronous Processing**: Utilizes `httpx` for making asynchronous HTTP requests to external APIs, ensuring efficient handling of media files.
- - **Environment Configuration**: Uses `dotenv` to manage environment variables, keeping sensitive information like API keys secure.
- Please refer below a high-level of architecture which explains the integrations :
- 
- ### Important Integrations
- - **WhatsApp API**: Facilitates sending and receiving messages, images, and audio files.
- - **Llama4 Model**: Provides advanced natural language processing capabilities for generating responses.
- - **Groq API**: Handles speech-to-text (STT) and text-to-speech (TTS) conversions, enabling the audio-to-audio feature.
- ## Here are the steps to setup with WhatsApp Business Cloud API
- First, open the [WhatsApp Business Platform Cloud API Get Started Guide](https://developers.facebook.com/docs/whatsapp/cloud-api/get-started#set-up-developer-assets) and follow the first four steps to:
- 1. Add the WhatsApp product to your business app;
- 2. Add a recipient number;
- 3. Send a test message;
- 4. Configure a webhook to receive real time HTTP notifications.
- For the last step, you need to further follow the [Sample Callback URL for Webhooks Testing Guide](https://developers.facebook.com/docs/whatsapp/sample-app-endpoints) to create a free account on glitch.com to get your webhook's callback URL.
- Now open the [Meta for Develops Apps](https://developers.facebook.com/apps/) page and select the WhatsApp business app and you should be able to copy the curl command (as shown in the App Dashboard - WhatsApp - API Setup - Step 2 below) and run the command on a Terminal to send a test message to your WhatsApp.
- 
- Note down the "Temporary access token", "Phone number ID", and "a recipient phone number" in the API Setup page above, which will be used later.
- ## Setup and Installation
- ### Step 1: Clone the Repository
- ```bash
- git clone https://github.com/meta-llama/llama-cookbook.git
- cd llama-cookbook/end-to-end-use-cases/whatsapp-llama4-bot
- ```
- ### Step 2: Install Dependencies
- Ensure you have Python installed, then run the following command to install the required packages:
- ```bash
- pip install -r requirements.txt
- ```
- ### Step 3: Configure Environment Variables
- Create a `.env` file in the project directory and add your API keys and other configuration details as follows:
- ```plaintext
- ACCESS_TOKEN=your_whatsapp_access_token
- WHATSAPP_API_URL=your_whatsapp_api_url
- TOGETHER_API_KEY=your_llama4_api_key
- GROQ_API_KEY=your_groq_api_key
- PHONE_NUMBER_ID=your_phone_number_id
- ```
- ### Step 4: Run the Application
- On your EC2 instance, run the following command on a Terminal to start the FastAPI server
- ```bash
- uvicorn ec2_endpoints:app —host 0.0.0.0 —port 5000
- ```
- Note: If you use Amazon EC2 as your web server, make sure you have port 5000 added to your EC2 instance's security group's inbound rules.
- ## License
- This project is licensed under the MIT License.
- ## Contributing
- We welcome contributions to enhance the capabilities of this bot. Please feel free to submit issues or pull requests.
- ================================================
- FILE: end-to-end-use-cases/whatsapp_llama_4_bot/requirements.txt
- ================================================
- aiohappyeyeballs==2.6.1
- aiohttp==3.11.16
- aiosignal==1.3.2
- annotated-types==0.7.0
- anyio==4.9.0
- async-timeout==5.0.1
- attrs==25.3.0
- certifi==2025.1.31
- charset-normalizer==3.4.1
- click==8.1.8
- colorama==0.4.6
- distro==1.9.0
- dotenv==0.9.9
- eval_type_backport==0.2.2
- exceptiongroup==1.2.2
- fastapi==0.115.12
- filelock==3.18.0
- frozenlist==1.5.0
- groq==0.22.0
- h11==0.14.0
- httpcore==1.0.8
- httpx==0.28.1
- idna==3.10
- markdown-it-py==3.0.0
- mdurl==0.1.2
- multidict==6.4.3
- numpy==2.2.4
- pillow==11.2.1
- propcache==0.3.1
- pyarrow==19.0.1
- pydantic==2.11.3
- pydantic_core==2.33.1
- Pygments==2.19.1
- python-dotenv==1.1.0
- requests==2.32.3
- rich==13.9.4
- shellingham==1.5.4
- sniffio==1.3.1
- starlette==0.46.2
- tabulate==0.9.0
- together==1.5.5
- tqdm==4.67.1
- typer==0.15.2
- typing-inspection==0.4.0
- typing_extensions==4.13.2
- urllib3==2.4.0
- uvicorn==0.34.1
- yarl==1.19.0
- ================================================
- FILE: end-to-end-use-cases/whatsapp_llama_4_bot/.env
- ================================================
- # WhatsApp Business Phone Number ID (NOT the phone number itself)
- PHONE_NUMBER_ID="place your whatsapp phone number id"
- # Full URL to send WhatsApp messages (use correct version and phone number ID)
- WHATSAPP_API_URL="place graphql request i.e. https://graph.facebook.com/v{version}/{phone_number_id}/messages"
- # Your custom backend/agent endpoint (e.g., for LLM-based processing)
- AGENT_URL=https://your-agent-url.com/api
- LLAMA_API_KEY="place your LLAMA API Key"
- TOGETHER_API_KEY="place your Together API Key, In case you want to use Together, instead of Llama APIs"
- GROQ_API_KEY="place your Groq API Key - this is for SST and TTS"
- OPENAI_API_KEY="place your OpenAI Ke to run the client"
- META_ACCESS_TOKEN="please your WhatsApp generated Access token from the app"
|