From c6e447f28c7da33e48aa6fe2eea3ab855ffb20c5 Mon Sep 17 00:00:00 2001 From: hande-k Date: Wed, 20 Nov 2024 08:47:02 +0100 Subject: [PATCH 01/17] docs: add print statements to the simple example, update README --- README.md | 24 +++++++++++++++++------- examples/python/simple_example.py | 21 +++++++++++++++++---- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 28d5858a0..8dc8c5c66 100644 --- a/README.md +++ b/README.md @@ -105,37 +105,47 @@ import asyncio from cognee.api.v1.search import SearchType async def main(): - # Reset cognee data + # Create a clean slate for cognee -- reset data and system state + print("Resetting cognee data...") await cognee.prune.prune_data() - # Reset cognee system state await cognee.prune.prune_system(metadata=True) + print("Data reset complete.\n") + # cognee knowledge graph will be created based on this text text = """ Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval. """ - - # Add text to cognee + print("Adding text to cognee:") + print(text.strip()) await cognee.add(text) + print("Text added successfully.\n") # Use LLMs and cognee to create knowledge graph + print("Running cognify to create knowledge graph...") await cognee.cognify() + print("Cognify process complete.\n") - # Search cognee for insights + # Query cognee for insights on the added text + query_text = 'Tell me about NLP' + print(f"Searching cognee for insights with query: '{query_text}'") search_results = await cognee.search( SearchType.INSIGHTS, - "Tell me about NLP", + query_text=query_text, ) - # Display results + # Display search results + print("Search results:") for result_text in search_results: print(result_text) + # Expected output: # natural_language_processing is_a field # natural_language_processing is_subfield_of computer_science # natural_language_processing is_subfield_of information_retrieval asyncio.run(main()) ``` +When you run this script, you will see step-by-step messages in the console that help you trace the execution flow and understand what the script is doing at each stage. A version of this example is here: `examples/python/simple_example.py` ### Create your own memory store diff --git a/examples/python/simple_example.py b/examples/python/simple_example.py index 47940ca6e..e0b212746 100644 --- a/examples/python/simple_example.py +++ b/examples/python/simple_example.py @@ -11,29 +11,42 @@ from cognee.api.v1.search import SearchType async def main(): # Create a clean slate for cognee -- reset data and system state + print("Resetting cognee data...") await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) + print("Data reset complete.\n") # cognee knowledge graph will be created based on this text text = """ Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval. """ - + + print("Adding text to cognee:") + print(text.strip()) # Add the text, and make it available for cognify await cognee.add(text) + print("Text added successfully.\n") + + print("Running cognify to create knowledge graph...") # Use LLMs and cognee to create knowledge graph await cognee.cognify() + print("Cognify process complete.\n") + + query_text = 'Tell me about NLP' + print(f"Searching cognee for insights with query: '{query_text}'") # Query cognee for insights on the added text search_results = await cognee.search( - SearchType.INSIGHTS, query_text='Tell me about NLP' + SearchType.INSIGHTS, query_text=query_text ) - - # Display search results + + print("Search results:") + # Display results for result_text in search_results: print(result_text) + if __name__ == '__main__': asyncio.run(main()) From 57783a979a021b676081d8017eecc8d2945dbf3a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 20 Nov 2024 14:03:14 +0100 Subject: [PATCH 02/17] feat: Add support for multiple audio and image formats Added support for multiple audio and image formats with example Feature COG-507 --- .data/multimedia/example.png | Bin 0 -> 10784 bytes .data/multimedia/text_to_speech.mp3 | Bin 0 -> 28173 bytes cognee/tasks/documents/classify_documents.py | 47 +++++++++++++++--- examples/python/multimedia_example.py | 48 +++++++++++++++++++ 4 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 .data/multimedia/example.png create mode 100644 .data/multimedia/text_to_speech.mp3 create mode 100644 examples/python/multimedia_example.py diff --git a/.data/multimedia/example.png b/.data/multimedia/example.png new file mode 100644 index 0000000000000000000000000000000000000000..4d406cafdeb7e6d778ff3fa37d99d42088673bf9 GIT binary patch literal 10784 zcmeHsXEdB&*S<&)(UOQnlnA0561@|m_cDXgqMOm8cM{Qi^e%cCLoh@1XwkbN$|ylF zdKtaE@_U~3U(b3z{nz{XJ)iDh$#V`1lx$#h#s=ot&IpUS6J`pLce4nwgpL^Yb$^ zGlRk4xVX5wx;i5xBRf00+1c5FfdOxCZv+DI=g*(`_;?Z$5=BMD$jHdKxw*W&y!Y?l zKYjYNudgpGEX>5jgolU6(9p1}s|$rf<>%*HSy@FyL<9u|g@lA`Z*Px{jm^x=1Ox;$ zH#b9}Ph5dOG_XS zI66A|>C>l-jEud#Jq!k8Zf>5Km}qZrpOcd#Dk=&Bfrf^LDl01)7#Os*wLgCRC@(Kx zR#vvQwx+DC?C0kPfk5DJxUaA8*47pc4UL0?Lw$Yy&dyGHdivqvVP+{{PE*QNlA&Pr|00{;K9K`XlQ6!T3U2;bZu>IZfU3+@$r?Fm6)__IvgC93B`ABbv!e-vo_41bj@@{D&E(*HxL(ek5}q5 zE^R&W!-r2u;=jImX=_2^6!h`sqZOb&-`AI{->dQ$y@3;vJ#=T2hZAF)oNcV_nG2yR z#f`-cMdz+=KsT&Q(5*N9@5g&S%7k7EsPxU06_5F@fmPX;1{JHhtD%m&?VfCwe(M3) zUB(jtFRJGr8_WEznAl4dB`9|EA~_9Oa0d*3VuO{q3kMM+ymLN*IPk&E!%{E`Z7^)Q z_ax53FV5K9PP?{Lt}}dBJL2c*R7Opkv$pT zV`4K&8)tqp?nL`KEAM+26{>7t9%U6e@wK)(_FPs>+ zjd<0FKJ1L~-`J(m*l1FcXKSj3325$2yEOtsinn|4!Gm1r(&L?9uPL{X#g2h}J@XVk zTM<>2Cl_P83w3<{^*VQDk>EN#DreWBV;G+Jo`^1>xlNg1Vk~~$Lp1cr(YFuk(1 zXBLx_93xm?I}ccGgIK9{b0WEi$^6Uh#kcY&_~of1&PPXKa5 zdl>Dpoz93BNr0y0;~BfC(ENI6@#m(`(unrr)UL+Nv6jo4^e9_mNEdiRWx}AzDN0 z%@4LSC3H>2k6|1^BV)@n{WX_(0AM}`44A-6ug4lJ02mtL(H{;vtVh025jkzC>>5w9 z4j+xMs@o(GF+huu6EGG^)PIg-tUR%RPagb!Sa~4%r}^h*-coDMNJOz`3UB6kt&ive z8BYPqo*Q2wALXY*vlc{I6Zya`>LKRdP60`@p{wCdY5Iw;5MaLgQhQ%tlDk=0p60^P zkE_73^Z=j{{OjlyB<$D+q*mqmS#zsazTYFO}f0qie&VnvfhMW0g|3HHORMJwsa zEL}g?Tz$;VrK9XBzg1P7Soyih;Wqw@>To%h3}12C7o(aS%vgh;x)LzWTCtZr)?YK1 z!aZh5QtZ;Y2x}eSu zCPqI2Ip#jps;uD9MV^sZResi5F!0Dwm(2imy+HnfoJ)#YB%1dM8AWEa%%>sBX5C(O z>q<+qbnv^j>hl5$P#)N_ETaS48Z3!+Z%U81+|1Ke8lS+( z3O{RZnLY|Mi`t&vk^U|6^BKRITXvYb82o!RDX z18=`J(W#s5IudSU4Xp_DT1w>IK@jb;dK4?bWW0mR;(ccUFY?_lV@tFH>>a(|D!?4# z6()6N3_$5RN)}l$SD&u&prz`L)PF?V0%W=$;kAq?z^IQRa@O-C(iLN*HG9th%q)15 z?0)tc=u7;Y_MFeQX#sH!_X0HOtdn;Xne}K3PT=4`mtrWM-&$jv8wvo7(Q%gC2l}(e zC-XHkxA=({rDPL(-+{MYUmoD2U%VP+g*)&W2&5j*5h5W{e%T)0Z{m!%-m@XE!TPZ6 zo%dG`!?%3Fh2%-O}G7xR&Qe)_^vItlg;)3K`qY zZ@+C(To2JYxs`@oKW{8wtH^YJ7PKe<6H^II5tRvVW$bi3|2b7#9(K+ zgmqcugD4l}re0=!p+T#yby1mK8o5~8e!JS2f;|^~_++eXIuy|hwm&}WCf6bvXt31TBirz9Sh$40w9OX0{zWnRecW)mKm{Z+64Wx!QR?yYEx^>|(=Z|l}7 zJg|h{Xo`*flEX{Wz-Ygg<7?D#-1JcSl&hLESqQa0wJmwZQnnW^)c|4%byVa(&q){K z?-9_j^iK93mN#4m92t#I-DX!egFZAlE_{YS00f&Gg|CIorz-S4_vn~fgT){llq>b@ z!4C(Fp-mdpW`3-?aq1}-YryJJD$!(#$YnAU<+UV%5GaqsK@BwwJO`mXoyHz z-Gc(8V;&?)XsPnU{;~ge3*MXyGcdZK_Udh2*V$;|hNyd5Je1THmEc@A%PYTZ(^_UU zpy&6U-KM% z&}nRXsN@s7vlo8OlyIa<57N}RBa3Jd;d528FBQcc#)wpA8G31uf3m>p^T8jpWO`3o z31zCi#+r^Jluiy!NNe6R#u?lB$e>1A?%TROHWCggpK*$|(FzC*&~Vw3Pr3(8;#Ors)V)9e<^!J9qd(u6FS4vxd6DRXHmVwop=2BJx8f*ZAREZ~d}o z2BEHpd?T1rjjVUWu=!Qx{6Q7|w44!)GLPrT$Z-lnL>7M9RiLwwGusHrLn(H&ym&(~ zeQjgCfAO-LFHx#X-^BFQQ5?N0_Z2u85@dk=6i59 zcexG`JaJ<`v~#J47nF~ZEEuG%%d=GNEl-(X=Z^|)$*0g6?d^XA*V?Xkt~QDoFxK=_ zsQ)D3#~2MFD{3Qr%bFYt& z&#vF8dqKIv5+3XcW>OOhXB9k|c*mQv&ZMaVJ6fdYGlY&D-U+cf-hYMrR9*q5%16l5 zitS@)AaypRsB$exqi#=;`-&Pw%FU!Y7+LQnpSj19DBi zDeF+nuHBojVgaLYYQrVBB1+yCsaSJ6wFi8tNnP2r2VZW0;ls*;oOD0zaWXz8D=Waf z|AhS+AoE6jOj}}sq~Avu$t>g4pywq}Ms&$T-imNFVa!;(`dGw=1*qL#exn($o@A}| zk4txL;_3RgO5Rk3spAjJ$L=D?KT|A|A2!l?M+_tVmzuaR%PeR)=*HDgWj;4Pu?Oa@ z2Clwt(kLnj+adZwAbR3B9ZNA^hgCHO0D) z90SCIT_`c1FLemNa|A~Sm{-|(l(}w7%jMH?dMKQKY7jW>!jk2fl(IMHy>2>khJ ze0S)S^X!3grfhf{&~&r*T=`kctk#xKXmX~vKS=3dB1QCxiGIy>Ba!5_VEv)Erpd+O za2t&+S4APuyfBW)Y{ROYzsPR!EB;@^vpT$eLFoU}+CS}ni2_G-otvp(Z z$Q#IXJF8>5HV1y|3-XeE)U=FlFlV;y{sIM}eXcroGUnGUNN~F2uOurI+YBcjGNRZ? zfB@sl9V|g1>_jLJzauB+ccmvz+lJ{Eq#BLrKv`20;(6{MRAy$3nMH z+CUBC_1KaMGoav?OWi&93xBifq4a0d`>gp|&Pw4VWHbCoe5{(1 zyQ@4Z6rr|M?L}{6%K9o}+4>@Tdf>|x8`<+?9Zs7BN%=ZHtkX&#pIiYdUE53eU`qCd z3Y=M~%JLPMHYX3|__^-R6(-Vxwk!Jq_W3wQKv)D^=&@NQ3k%n}gn`A|9PwZHIufXM zZI`wG`e-_D+>&WR5CSsSgU{e99gx`|QFWDF0`UN~DgphZJMf?1z zJ$PO9;6ol<;rl_$`YLR)a1?f-Rx#j&i+t`Rac%mm+w^pmVAFT2iTdg2^m@a zY2Xv7q6U-hrHYOhvnXPQPzOO<){3E=p%ay-r->qPt%(|3`PJ}?HT}#0I^fe{!@1o2 zeJIx^vFW87vkAAywAlKiKApPODt8w3?%*B?XhQBtQd60K{T0Zgi6zUG*R}Hk)cY@e z?3>>EBT3IZ?d?lTC2UEK6MUr;LV~(G_L!S%u)H!y)f~Y0~ zm=Dgu5vXiMK^b{N*uNK_WKSW!u_Ol@0c%IW@U0EQyg4?puab?jh>?sdqxSo`9&i0Nwc@#( zJHP|myX!QX@=HkH2M-mvo6ZRgI{$2|X(?||{VD)|vPhVrEj`knBc^==FM5?ZVCL-u z>=R6XCob#NpK#r0gZ!Wv=A;ub&Pe#>b6n1Ggd>HfF$|00F)7g19CXsugI8?e0TCn{(Ym|b@Hp%RDZJIS&@ump}27It2F<5?~h0mpNFStm+AFS=WvAcMgIqGK} z!K<=^+HeS%X;0~R51=qpX3e4puvK?dXVG#CAin&zS`UNrGud*YZ2& zbuAVNR}Qc-`dbg?a^oEWn9D0PlF&fXGz`A%)COPas%ecJAo@E%XW$l96jZ4lqOXD~ z!;XH{61^?F9T2Vb-wDI1Ff1w*Ky@ZCEiW2BJO+ZAFVea^#JxSLheSCmWcu6W%zF-p z9GK^8f$0kxuBNTAc$1hq`r%qtu$D|!lPp^Ku|r5q2}t{~TEAHt$bRKnHGYpx$%mpD;7_5sZ*1*Gg+L83eiDAwK@(FIgyRQ?C{pxPmS-ahSy(&iMV@g4vu= z3}Y_!wY^`M-eg`Y&&GeU;?7W7==wSjWlIQe5Y=*AH1D_HV9%*-Pn^O~QkfojaEVQ) z6;j@SU?kclB*g}I{zVNR)v8?7d=%JR>^a;@HvOSe!_{;6*tbN#bYTN!CG-g$(d=$8 zopizX=OTGvRmJaS@M~ytsl<*2oT~MucM#QhSq;Pwpwm61>U2>e*Jqn#=q*5%Q%H(t zVt_lCMp~iGl=p{;zJUu{ZphqM=WX`K^IwXN@PBu^#6F?Fe5G9fcR5DFS()?-CVs$9 z8ZE3eK6Z4Qq<$fExJ*GBGuy*9*ouj}Kic&S3B|8E=7R}B&zm(?6%S$? z|DRPueErl|^jGfhg|1B9?`W3eX+{*Gbaf=MV$3}JubSrRr4aNm6c z1JSFHQTd}IyMy=<3i92t>aZcL#`}nLBLH^YZA_B;qZm)@h<>r0l%QLL!3Mpi?h1q@ z`jzLz$jfCGZ*cmJTnNj zg8R9MTaC)nZcOF5Y*cCcS1*w@iBcY&tt@}Z&uNTT5&Byk@VV%=q`vDI`g4anbnb;f zS6XJ{7>wEo0>-?7cPrJiArNeB~&WXDR2I zG?{5x%T%hRLyqsInQjRA1>)H^=StC7^Y4e#M6@W+J3 zHvF0q1t{5LN*6bdHF=+j%j+Oe=yuo0u~XcAMA`m!Y3e&BAwU!4g)UA`L~`NJ#NBP? zGR8Vc4G4}pC-wPhcI`!*A zf5B0&#VeG$#Zjy*;G?9tgi_jVvek%W(vk`f(ymTrl^w(6s-N!B4yAbeiP9lV=g1nC z7fP<_mERYl!gKZwO>e$@6Mz^$pTRK&2hgUH`;40nNdd?}$e{+_)6+p`!Y3!g)Gin5 zjV=4fi`cdFarei+M<$4v?%GBTY&+&wAovK1vd^9EX3i=`2EW;~J83CtzS+IlW#CFl zAi#HgroAw@a6BxtcN&_LUXg8SwG-fH@Lu7PTa8i&V6-C}%Tky%d%k-saqtnJBO1^G zW8_8w!*}Cemw>FC=o-IvBmDy&KE?<3AMRMby+IxL+))3O5_PGZ%W%c~-4Gp^D|^Df zcWeJSy2@3U$w~M5;(9MNT-SUSOH~2^jBa4jgO!Ao9qeh!O`yHu_SNy{yN_&!yoAQq zMpzsD?p68Hw(4#fnhq@bvMn@CEo(`vupSwT1ODiVQEbXzD)2%Lnod zs5jNc{3y6MXNqo*2a_mxfQi!D(q65<_Rj!mA*`;V4`<+qRjk!)=GgXtdVe+Ve*VTL z->|P6UyiEX zkG!0Dlt@zYaI!2_-g00`J{q*~WEYj|+k*)SRE!4^%{_16w;1yNj5z5QKGS<$PRRxa zY$BeHWAX*0yB4p(#^%-{%O7 zIMW0S4j04>oa5hSf0Nb(nl}CzQ^F(6wWbQdFn|_*Y=r+v-3IWJ zZl9?47Y5O7V*PK;3GO(mTCmluNi#6Yh(Q=}U;*xI-|8lay{yjg2)AHdsdQGxjC^UL z*}nzUxme3nJW+WJL-Ha@V__&^-QP@^m>MFUy!wLn#K+>VY0#vOxiZklNp?4X$?C=l z<%)97;{zYxxgrg&Q*!(3&6^o7!zj z$(}tk`E}UubYlyr-?gKb40GDRo{xIsST6;e!2c#7?TS7aFBOE?yj0&Fo?cO{nLSF^ zs;04V`)kCj5q2^SF8)a^1j{`O0-gSVIeFN>XGPyfjmDs1=n{y_+!43!a!R5TBBVx7F+d~lfy8DMzaBlz$6 zt??xD-&FUWK&jzc5r3>9E2_&?M?6x)3} zUs>S5E*|I@n>kh>I0?Y29MpSLVK)3ebI>IFPZoc>qy_crMG#~#IOh6yFo#XhXsaqr zBJq>HLzpvI)dgH*^&!ONXd!KCP2#Pab>~T1w$^DQ7pLG;{nns5rhnPSPHobc&EG7n zn^I=`kqj`9T~w>y@2mi0v%Gsv^#~qOr3U-0wg+0kcbG4<0S-Vuh^GBq5r^I~GMLu( zd=jsFMu<7n)9>}b^fwjsQipbh_RsS8d1q6&Gj9^4zrKFbHD$U>`RFrNB?Q44ib&TI zSgfq#$phBb@V5kTXHHQJJMj{}H+Tp8qnW1%g(l65)Rm@!WOk;i9~bjm<6GB@zomou zc-u*1SB<=7Zo;Y3Ur<(bIuL;U+HC&W9V9OTv&Zr6sk!S!;aamtg2oRr1IDZ8^67M* zu~F{{I{$4wRoAheNO!A;H_Gx9@-~gS$1}Ceq32og7L`ikFa}c zd<3<9#zj2X;e|UHG$&6UMa#i*2Q}f71x=68TLM$MmtZ~|C$VAMa`72%ONXNH;qQ}& zlkkPb{)+k|GavVjq0B^D>T=V25VyW0v{v>8KRvB9xcT3WCi^)?QE|umUlUV|SCC4( zr)K?ggqYU`kAB^mqgJw^`*pf1a_X6rDDUrm+rP|eo^Hq_EIFL_JIh`C)V*HZyx2KoO)617liG zC;(0h0tcrC=jzu|dm~bAB^#N$sL>k**e-@2Qz2n-uDHS_YwD&5t2rv;hwc*;10#4Z zixnrvO;r_PBG=0_8|{zW6}^_}PK_3qWbmt)t!fq$xES0$y za}mUV_)kR~(+s#61)0I)AVxfpBd6LDjEvM#m!zI7V^yf1<*aqxu-f`& zJ6O$oEL4n=Fxut$EORw4WJ3|fHHK6aEfjb{2EnQtxjZf&C?Suogzo+6U12xRF9p))nHS4GxT!LEgOeo66)^o`aYJSGItH+ zKImH;=Iny|Y-O$mFI(#Vt%^ZE?^NvHYxQ4VnR@10@&x&F#r4Y@CbBV{2#c@c761#qU4=?jMmmf5KTm;RFScmH#b_sM!DW)^*#D4@KyISN|VN z#Ffp^(<)R&-HP`8%K9|KBc1PvHN0{@tq2fn2nzZyA)CH__FqCW@SWUWJo0chmThuTR5i-JQApvKg;f(69|&i~M*O8Whkw(Kl|@$cOYiLai!C4dfL8S?&nLJqjF7iiu7D zCuK&Zz<^Il7xH9ra(RGB@WW9;w@FzLPX*FHLk9m+NQwz86yapy)m3?Kk&8+W*EGcl z|L%d@0P$;Suh4212NyoOAp~f%av=btqYtd66u%~cANDz5iChTdjG746&RddVDR7_< z9C=x(31q)`X%0hr5Tbh`i@>x>fS3{GWwvsWorgEIN^0^@YkYzUJ)Yh{$O>+kz+0k5 zE%r7{rlAl@8-Xi&rxI>ik+<6c%exC#O)=5Kwn?S zhM4-k&c19-+_*Mg@u>0KXrTKLFPSo8S#we^th%d%HG{ah9LZ1X4J@<$IoSnSMtU*? zi`(9tC-wnJGwO_HX!Ir|Pib2XaQ>Oxi$xvI90YF$BM>_^)Ys|-#y%p`zAf42G*Tvi zEFyaEK^=NwxF-a=9HhOSMKT|jPI+)FDy!epUqle)eG&$MNrxQzhAC-KurNZS0omD( zi({eDXaE@2vN@bp*<~m-1Zz>8l$qXI2z8P$5F=g-wI1n_VHZ_S-d|4wP>U$GNn=Vi za(>{a|2>oZ+6FhgA4GqyN`g`Ld*V->nY&3BDO;(yTzvX3pCJs*@&l6nQRK7Z1DzyUhGEd?ay2K^gwXH<@ln!p zUk#!^9-+U&#rdASnj1c(7Fo_cD_Dt!4l`BSjcf(izB-Q3I|E?!$-9b*&IZTzFq zW@#F??qt@!u;HW?`T&MB{})UWnJH-u&RT{c0G^V`nrV%?&M1nZ0@mp*(nAErR<9}; z3;$JDL$Q)q{|)d($;s|VRXqJS?|lb?W*I3)t%_8G4@=g%x$XO;TjHTIO*=Oaz4bci zte^^<(1CaB9CVDy7DV5nHH}A~25NAkfSQJ>+8Bu>^%o9~Z4}<~Nz_a_`>(!P79Lww z2R2etSDu8mzI#!P24_Hv8?uQNBICn(UFd`R{cjf1L-K4DC!}(dTo^~SZBz56NDzxj ztpE7=#D~BCSaa<7R|**8Ua8seQX*16c)51^e3-+H3Fi`Vui;*&G~ zr*mCC*ylHy0Y1eeQFrT~XGddlb?-l}P1g2RlS$|^eFe;_0_4#oY{^@(x=9wC~{)7iz|WA-9}u%QejlCx9a@!`(ZpHcVcGQMQKgaWA(eE_YH zMUizM_~*v2qBj|&w${l~om(blAJp6J)2NXi56FdnfzUEYAbs}85fD#c1K6%$pSMQi zBFVqt*tTnWMl_9yWmQwewXU}|f!(s8urgq{KNm)bvwd40iSt>fuY;5SnO_X2t|Qj= z&POlAebCSQ3>tbIa3o=BL`=+hp7G`-esvM;8BZw1{W*TljnkzfQ*1a3{KYQ+V*337mI0zwtjv;)3d^Br}z+y zLpFGytD1pyeh5|Yfe+>og5u!(xCYt7vek11=Y>fm7*a~4es5#k_pLSCLpuavbt z+$d1W9w`)!fPLKJrUBu{=9);Chp%Ades?B|fu8jF_Sw(~IOCEg0BZ)kor<2;SYiFN)ZZwM8^od1E zj_(RJWd;!G`Vr@f!T9Y@ZCao_yKw1WG;873_4+29LXtR zG7SAZq&Kt|tUGtmS??)Tk&I)bEKhU^xXy^U{Cs z^Za&l|9<-Yuxb}&b;F$Vee4?2Q%fy;5>7Ks%*q@aZk>9`bp9^D{Pghy;sJS9A(B;P z(?{FPM(gFgTl)|3i&zblBZ*_b z>-ert#Fr5ZtXR4}=)XdY80L2E*ZEqh0qKh~7{#*6Y!Qw#41{iRjh|wee+aZakU;e~ zq;FzGw^`$QekN?Q247q(~m z>cLIp3+79nfe(G4(MP5AU_Jb)`gLcUgm&t0NYCL5k~LGnfL2vhXPpXN4w&xe>#VOA z9#Z8pbZDj-QVBN;9WYQWQ3^SZi&I2SiGqt!Ecw_FRs!fFY4kEws~-l!5-4fS=GyXS!DS>6~$Js8Z zN{K9%tN(g@TfAT179c$d{C7@mua8d

3v5iqi+YJ%YUAZ&(>^9(g-cJFz$3m@`j7=$?ndfeS?qTWnnz$Y58|io&P9~DG7%1@8X8LpZ3k^1y%f&{P6gG9HJlPL|Q}e?b^*&lrKGPxVFxww$7`p z?tC5c1y4JU6zS2UK~bLTNgc{4n?-bIm1mPJ*2KjTau#d5pkzE#R3&BPEh?@W{|0E=ph7DB!==Tc@vBd`4%v^EhR;e<);7&Gs%Gu|W?Z2elz87Na8m(J3~fnD-S#Wy-eBhI2M zX8CYJE7W5&h*9#fUm`9|8_XbaU+Fd9(G}Nn5wCy#um9x*Iabe-9E&{vkpj4&)82+A zt!?nKj+zQtr)ReiFbwv)sQ*!DQuMQ&;r6sa>4(eUqEqmF<(}PkzwGkKbUwsav!#JM z(X~2Tc?_TwI%_>+H~Bm<9vtA?v%;Yk!+qqLzbL(zCcqkrxJ1|oBM^C?{vfKcZTNt6 zsiRUwGaYo>j+A&W*h9^UL*|h238C~<*CjsC)P2_RiKNC7=xo*|M+Ci;e~*V28~t}l zDOOzLHhV$D0rmTvC^4=|4fQYlMd$J4m9?j#*?sM!`S-nWKdMdR&C9ja=(v^PcpT1PLAZntoJjj zx$}>FvA7n0)ua7{27|E6*&O9&!vU3X#s-~(sBri*ndE1_$H|?458o@m>;~(wAbuNF zUd5pyWC^WvTNiJ*cLEIA7oH;{Q)b^6h89PNFbzcqF%?N{Xw47J2?L(bx0%@6ca1{k zN5o`qnX79J``z>d-_&~5B&^rgvreZXJs%*Bd;^kxQ}X!?Nk-pzQhc{a@3>Xgsx4;e zG*DzHaHKol`IG4=bNDjI%}dPUFv$QpE8Sl(olwn(!4>@bBhehcqeD!2Z_|=~b)KF) zS!^dN^vJokeTrj16=>0IBiyhrAmN14YRgzv{0I{N&LkoEJUS9LR=?@RUd^?DTk8Fo z+smncO!sFQNKYfJr+!|~#DbLGK9>v2nSj|(?btp_EL*(isP<@*lKE|jHoL5jpu59l z47TuL$VR8kAkH)8OvJ-@^FL7Io?v{5nGlW%gQvO8C8P@u0=%v1g{`MXaTk%QtrRn& z-_HQ`Jj%7~d12CnaoOe+6pl7_WFD%eqb?=%t>!R@4wnrzVv(mk_6=MLe6gPo&TT1(xR34Qk_3YutV?Q!-{F>VT^( zzsA2wC%9xMGZa>n?rm7v>b91xfr+X7bBWp3lS8m0<1;|5tzGwLLb)TUuU5?i{!8xb z=b}zyzJov2^XQXpJ{99G0(DV zvezRc$4H}AuqW0ODU^~$or+u2C1Bk$mo-olaXz0Wl_}OCPUT$LNSh5W1#rBIBrqJV z5f|WQqWwyQAwJI~3h}r66FSl}1F^JgeH@%kLm87^s-wA%6L^LZV&TfruNOy_tko?D zs8;b^s|@w<`BO_*fDfKc-KGwRfQLo*FS`7|`|f1yK?}zLdH~=Clw2BFIS0`of>HdF`9oGXJr#xCxN*%+VYbM3l$lMkQX0ggoy;QA3*`BqCb)QZKt}E!> zK=P+RFz%ewWk5ag(W>BeQ@Ur927fPNi>hSn*6p?6b60M&Y5HsatN-%(C$!Tko@QY< zZAQ|yLRR%Psxv!JiEJ6Sg;JDUeYLb|&Giyf$6o4Qp~JCTc-{_+YHV883m_@&3w+;b z&jwC;11V0N3DnVhV*J=64*fn8Qz0C{E2I5~eF-MXPq&+~R2FNBW@YO}$qu}sBe5GS z-DlEz0bk>lZPYm#7Y&(M6wmCp$q|wRq(TMZV~b){e&<~IAt1lz3|UA~!NBOb4} zAp*x1{Sm4B@pQ!!Dj~N;dVWBOUX{{JfKpC3HI@!p=P<~4lXY2@c|&CANlCHi2gazD zn$w|mh|M)V!1y-i`v~T2KWsH@&9M<;?P8^|vS~7Efl~v2>!k!dU^p)nH!mgTuv|t< zj3$VRGmH;d{Oi;3$nN z0_i!YD^Y&RADlO@xaxHF@Y7^4+b;A$1X%OB-Rd4KsU$j|9F?&Fs9dos%H=v)i;heF z+2d_hEyk3+Dq~C>dWvn`_0%>=dt}%Tj9c@|RVN3pnBUeMbNQ-ysX1t38OG<`9o(_9 z;$V=lvhrjOVWC-AC~cAg@KP6?F~DuJ(Ad*9sS#1hB<493oHt0%3Dn6g?li1j5+Lp@9Gwp2!?<(0$nU6|mTFt7{Z7xiw%#!L7`g(mmZY)8qe)`cD;NAB=vv39%Q1fFIYTYLls~bFYct=IVWK@ z7r*nn*Vtf51?ib$`}jS-%~-3-?PH*80)$k|lLZ~e18bEe6t?nVt-z`Mg25-Ws@&*O zhSf{1>G{OVlR(juGCfdeg_TK12F~wVA^I5kC>bd&R?e%6M67D^=;$n62LQ^)E-k-N zTLRukH;9MKz37;9UmiDZIxIGnU~cv+8j3(d8Cb)W3Jc6YQv1C>EJ7n|Tc{c7siGp9 zey`0l`Pt$)AG{iz{h{T}*U|WHZ^?Cu#U!n03WBW0#c`GjlwTAc(3io_F~`zG!hY5b zNt{=)L5oB@AM^gw4IYlpUB3SnWVlwHVdE+-;%|pLZRNRIK6bz;LMe6RJplY*eyCO) zQdS;kpY!{i(++@v2SI}o@APxf0kHdW-R~X0dNR3s86iErw4UXOkO0-mHwLd>{B3FJ ziyidUWsUiyjz#~;yA%fJ*o&c2I_}? zPC!p9q~J&(DpaiU&C6CU;NzVsLVAWcIW{ZEzbmj34-5{$b|V4FK>09D@>wg~y8W*0 zm(yskuA(R{@V;D2w*+uzi5Oxz+JJD+(g|c|7=^5-lpl)x@nn;R#J{2x{tn|Qd_FRN zKO(Gg7&f1;5Drz>ml41~Lr#97NLuXDTe`k94A$sjbAmZ2B% zc#>ip zw(kTm8%;c>@aMu}0Jv#R-6nrZC{Y0fD3Jyl4`&&NKrA1`9?5cyB93Hpa*lRfA?~|T z^zEXsB$p`mRl-a}m%Vve%i$J6@7+%Mh)es2Qi!-2{I)M(-6G9ho-rD1@`ZtH8@hTk+-N_Yg zYuhGAUE5kox!l=!r5lAo2n(*OF_By^>ZU< zU3{<$_}wmH+N-)L03Vr#DKP-O)j}P1dXXE@vr$3G5vkE?>(6aJan>^hYiNjWjvcZ%H6wl+N<}U$Hv&_e!jYYOkhA8 zuvBoIpyBklEbcRa)ng>Sdrp9C^Xoa`zOSSnhSQ-Sq$%p~rY6dGFkR$#U9wyHoj18%ay{$Yryq5qs zAX_H4=EYL^z(##aDK=efqHG*ZIzdMtF1t8neA1|6?6U3V3gT7kJb&(2_XZT3=-HL< ze^~NA6S9HHmgu1@M7V@`DQD()0bZ zx@4l1$q0nb<37!)UdoKo5xbmQ@^AKLDxKf12V*fy<+DV9#67tM@`QeQ2K4AHSwLiJ zJP%sOzIN4ZSa>!!6zM6XR%&kTRpmi(^JfH?T4^(8d?X%cuZ&G5cH~=lrdzs}TslUY zb;_6$kgddal+9>0{+utx-Ud|v!z~9>OGrttu#7LIdD&m2IN()VTdc3MPLE3t_G12A zY3O3L!EcI}x9!>+-&RFyDtmH9!MhyTpSj(ygKC17%H3{cyWSQ~mSXw|b-3@cQq~$G zJ#~D6qMWFCa6lA*WRM&^7O&BC+bQH3t}r#!u+LH&y^j^7OZ5Q_NSee92lT-Jq1fAS zJ30)?$z`iX>r4zV9>yLvo(jvkO3HqJqn%7r@?X>9{iF|xm!OF6hkIzbK71LwKqNKYhwnZ$4uWWVX%Tyj!J`^UrO?NhNROQ$qbPVOuMqGkgZMK3 zG8ACJ2%@msW+lY~Z9Tq>OEVq3z(qV_0mG#u27#gaD3N_j_(1fqB2;c3A}%xrsbN$g zD{3Jc17J7|HxdZwQ?clf;yZ&+B+!mrcs+f+6zVOrj~hXdnK*Dj=lNQ0lH2{sF)HU^ z;f<~7gC!L~gEx#C(g(ac5_ds*NNL7Te$r++pi(rXQawUm?-L^~e=yx!(+EV=+NNM- z-Up&>+YmD7wDd3+{)AGNez`nurj0Ny;IXohqB$-N335if28N59H!m3@4s1H#>BdFc zR3H#zGJkIv$;~rYR2QC-Zr0kT+*I_$q{{BBy7VhxSp8Of;l3egx{71HVQMFXcKXlH zl}(Xhq=$;;s;6-OD;PLM#^_sF3?2~_IvxlXL3_u?P0LsIkHacHNbf^h(>QyUuEHO? zL*-Nqk!-pK^nhW(OxIYNPqc^k55aB-`v>#I$Ba@6%ip=(@=Gc|2cJipr=r*Cs`k7f zHhr7UL~Nv|4+Z||7>G0yJ44q1ujTU%wd*=C`p`xDc%|{nB})bq3+p32^t7E_Nw5jL zyx$KWHd^2ODQA?>HLn%!=Bl#ICj1nOIqxw~!|}z$Ky9lB_GtNw)_POyi<_8BV9nUD zZBO&$xdl3es@oyU* ziar=uBcM=k3XbC^r9-DVfa&i*Zd$B41znStLVB3#7T#3VPoOQ8GP98__vspIvkNaI zU)**H`O1JHU;)Fjc(!uSRnbq+RD3Fxeaowg{nf$j_qD+~muyYl zbF7`zwK*P3(jvvpqosRmaaxKQgLBwHCn1%Lb)*-8h!y71zMQmpuUV366Y@Kf`5UG7&V|C*VPo%|RTBq>fijTs{yDw<<5Mvnknd5E-`d&8?Rp5NC*k z7y9phY)#YzH2k0kmyr<(@m;_8e&bv3?gPwp1(5tnRq6}K{s(2tocJXT1>xLZIZnwP z5-`@embBSc$1Ud^=)Y%%8~f-nIhGqdQuNw6>a%_y=|M-AMS5OR{ZlXN@lP^T!uWu* zOf4ISO2?yTM`U@-1W)9&Mj-ZkIi3BX0+s$~QJAP2*jzhw`;D#>1TDH~a80a1v?&|` z^b-qoW&tUC8=w|HCGh!)MOAeZj-}f>9K~gu~IA6Sy-3p>{ zr5}tqALCo`Hsy-V=DQ8up7D9z;@TmP7KIg3tehZv&S}kADfkz`JK^tJKh;oTu5T_| zzsC13s&jL+;2Lo?<91iPbbR;tVRP&D_z;<24{s#abrSPtp5<_3{$RJXV|fN`+UN^9 z025B);Cw)Otf@W~GXjl;f%sVbrnWASE5aSzc0fPMN8Pm)DI1&-X_ceER)nD4rasvr z@7CZ&l+HOH{;X_J)~Ff`=eJwV*B&<()Qzpu*!De?e}U?l{@qM(^)_q>u3p1tNut(& zKJtweiePGiF`*M+Kx0q)d1PqKp9QH)2R=n;DHY-fp}&5l8zx4d?^vFL^!PH}(<@$Q zI{+5~xLX{W*V+*JIf}kVd{~+`cU6q@-kM4RkR?=7X(>N_u&tkn;2Q(}AK!Vvu_#h& zf{%C}jEpe5?V={b4k3F)FdLq_^5uS9yZg89H;AnY z#K^l%%9*U-EBxJpH-b@OQIGZf%)JytQ2HsK&>ssDt&gI^8s|N9yP;>>iF!c(9FubX<4;jkgd|dm0x-edPg6ub6OoZTp+LJxsE3opxH*-fc|hcmGlU z{h5bC*4;!*VRKlvuxe}fyn6xReg)1U_aOq1#be||S30qUA8Wlbde0dGVT$aVIZf{` zt;VNj_sL)vKc?iV?q`^v@u#l{H<%#v{8Gkcl;96Q>$VZAT1GUEg> zvW`p=1_k?{e!g>SsuyMQ|5@h)fP-UMwE+D#(bk_MK64VgckqAIH+@4F3HIEGXPLE> zj^^fbHmMW%W?>`c(PsphbSf?3`^V$49cYz_^oX;sqZnRgCjoF_cpcE9WW(cB=w83D z@MxzgH-st!(m?^D1klTsdXnhsbI3GthLCa-d60TsM?mqzhFvB_U?zWeS*8ffOgy?ep`Ibt24(%Z>Hg znmK}?&&N#~cgbP~DQuLEO17(E*)LJ;2kkMfKe}7{6CP&2cHdg?gIGXxmAG$bQaxp2 zxm`}C*+%*?Xlo(7+%0)9jDhl*+u-l76;jZsuJQ2j9N+kE89?Pe@c?)*QEhXHkRA|J zd3YHyCb|iVku+z>&z8L*>mI$ESP?(+xFz_FKookBN)Yq1GnVEOCVu{qv5$zY`6*dTPLM1r zhiTIqBW7#oMiDEl+-*uyG;WI}J>8gxh+-tn3(@utU zuQ@RR>&vCLHXyvq<4xN(FV!hJRM_YzYgxAU&5W=7&i)LG(-X%n?WdIhS~s>b*1*aKGW3fInoz+ znzmYB*WMSKo~pcksmgARt=y59bR<`^C;r3cH}IhH+dk&NbF5Te!C9IzWPBtco|nBe z1F=6uvy944Z-Whr*yW43eGLO~(0Wa#8UZ4NL>h=Dylw|rC;{VF$zM!Fux|UM1%c+a zq8oMqNJKXR0L&cYT})FjdY%Bs-up+UBZF-bB0=fgTO1>pNBvJ+U#9S_VYxvAQ$%9U z%zG|bJ&viR6U!_41vxIn_0O$Wi#>LwIqrxgJ!zyz15zyCPBSs)6P6MtVp_ zluEk!qnNV zQ|w2@_?VQ$$5xY?bScs#q{keBB2)i3xTuSr2q55Y=ILDKuJK+?Pd{`AK$8GU{Fvr}yyia6u*vavZS^coH zAE6DVx0?9FlS=c9)X&+cv;YGm?DO zDe$a>W{udNiW&Cv<;}sCOH~)k&!<#J%=iBBM*I$3w%n)A5Djm;I5*r+F-?h61k=Up zkGg9_7ikIg_L$=xTb%*&{tE)Tq_bLSy0GDIE74i&6Atul5>Jk{1$A zgln1~4cA*CZGz7?dHMwDV5W3r&EWnrhJ^J)jgep#DSu*`=LC{Xf!b*?s!}n2zfr|7 z0bmLU&;YXyvMpHMKX*d|eC)%r06yD(`L{m{e4Z?T ziRNi;)-1$g&({y>DPq_;&jR>unla7oZ@oQ#bl;Ra!{SXzXX$vXv`M5wDP~)5Nmq;d zn(vwPBTJjh%cS@;UEj1cY~0+ZxcZA`okRVOVW?4<#RRsR_@oYlXi0Tt+1Ek-;uupq z=CrI2*svFF5h=ZBs=O~K@v^7n(nmw(oX>qs_Byyey6F>@chr`$-vzS+*u;U@El5uq zgEsq`q%3A!ZSuMJl%QEiZwLA$4G}|VF3ehFPES`#!5*th*Ix`qf_D5kWLZV%dCGjs zQm98E)I4#+)cE-($-J%&Ktv}T^P(u&i%m55G3y5R)S&G3)BP0${QP+0wqVi^E zMRU+i+Syfxk}%ut8YG5;SO}Az{=KBT9f-wEA-oXf<0#|J`^j|Cci+&lYxr|^>~vT) zF~)cdE@4r>p#EYt_|JAIW

7f9;<%`WWsk?rD%{6pqtQwqtQCm@4T)DMlto8Vj&s zD_=ky8%Qq4y};UPk&4$uq>`|@<)=556)#^YNJd6ZPCJ~^xV-Y9Bh^x<>?nxcY1=v0 zxMW=27P*tN%5HXkTYb*ZxqD`LBZ)zR(JsgXQlupVgznIX@~!mv2Yo;lbn)=k9#pJG zWi-R00ZSs|6VK#$;>WnD>4AlYcA}GYl^CpsDhr6vi#EqIbs=u^a>5{HCe%e|EkMP> zD@g=^7*&Zd;>xVa6+gih28d8lK2xDe)DSAiuoE2WjHIWVnK}7f5bU~(PdE|MNKC}Y z!Ah((k1icxdU|SVyf;-YrZU{V=wlh&rXTm~_VYxYO>c*_FC7a-mpKUk8z1v$Nfs&=wgKCx{Tre)U!ZAD4?s5(^$YbKNii-lK~Wp1%U%V&9E=edFE2*@ww+} zvlB_P^E@68l(4FIfQmGLQC&ZQeAO2OdwSOu<-N*AY&2Z#m!oa-AGyd9tMz)Xm)y|N zL;O{}DZ0u&Pr@#yIow7Cwsw138>&JJ9fm~|2><*4sx9rW0erG zwLg%1**x`RTEa!@Q~$jEZ+zZ?fl4_(XdJd$=d@M-;jKi~M#c>2q%z6=4U3|LCF|80MuS!BFclR4i9)r|yLW%n)7xT$~%OjPxkb@X|L14Qtn? z@vw~@^`BTt+MBS-36yo8^R$hpVt*dFt0uvL(rZyQ3$Zi4?tM}r2E)TJxP@lQ;Y5rX z=eZsIGAdgQ=^xff_9I4_W#7e{Xilwmaz7ZKTz|}UqTGRKNMXRgj{lukV_P*$qjJ;L zqvS2~ieV71Wz80m@A)JowEpww^6i>aSR2x#MJ;XBNj^Af>MTDS+K)~Q#-o^{ZzmEZ zmRwwUSSc>pQx=n}@J2`p*zKp2TM!|n0;I(bV}B8;1~STs@Xuz(uk$9bjHZALJ&T0R z?czVKT4iu3@X&0Ig|sX+(cOs)384v*o*0N`>Mo{R&v{G64+Xm)xo>7_gU__71^=u@ zm#OfPmWw?NIZ6KE!aL=JR(RamV5G+b z@?E`Nascgfb*Ec8S`x}cth9&Js6D)-kGvbLw&>*)yI{;rp%N&&%tzlUB8GixuRM~t zLp~=~%_rF+8eedk#A$#W292iQ(-9>m$nZ<6ny*KfQ%@AnW=5BV>Egr()A&B36Zads zAs$r>SM<5BWlQO`D-?g80)>yRddh&=I_LzGL(c_JH+GSpU`XL@{GVxJM$8nVWJ4Gp z)IyV|l`f_%(8S;?8U`$xCli|$%(|kh|afO`F47z$8!%O01I4lp33q@#g%dj_eW~C zV;j-k@bObbmRzx5^H_!s(8W6;J&6>vYvP&*c&uVB=T^m_lSzAq=IVdyu|%59#dJ$X zM>a_(vgSpk!y+d8C9Ov63z2Rpxq<%-T(gb-J;YzLlw>BicB}X_ui&z0kr}YeRXjb& z2K*YNd*rcZLxDf^mOs+Z_S^pbefwYgY2sGjGBwy+dNF$X1*Q8Mf9-GCxO_g}^54&F z=}B-;nEy9ER!!Bp3M4Up!RUC{by7WY z&co9YNfAkXxfHzgHVZbT$3Nfx+dnG{GHL(bV=#pcpC~^oEF-w{Ush@@p8`G`${U7{1ubOI#bd>qCC7SY(Vs&UTf5i)%P&urndTiH zXb9<%;;fmD*I1JRYI%lX@9f!=45{*JJyb}d`qLnWcGsJ%KLJL3AqC2ig-JD|EU(XshKuNPbWams=V*Q~5KC0a=Z=l%T<4Up5DGT>?$-}1Pfx83)8okC zlDb-@JCSv7%;UP}r<}FrX;r_z;c9^VH{VT2o1)zptt%l}w`-}WD5JQ`9%gKGV3?Q6WJ0L?$M)Y3mzIk~s4CCf z;0FSsA-%XT%Y~=e{l6uAcAFPg!-GmV63=-IMFl7^!ip^%59JM61LPvvF-&wEXV3W3 z#iQXn94o*tHUu2V_>fS`j+X}Y4c!jeu#!ThnjPZ$W>+Wvsa$Y52Wm;fFraJJ4N4sf ztsA+$*Uq{nS`_9pKY!<|x4`&pu5T8q1yZso^6?S3XuQgnmY^tm{%5xT-HC475$bK; z8As4kx*0}_U;ogz0@w&#OWm|s7#%DxwY;+aI#QL`&;w%c?Z~Xj4OcvZ&6vL;J#u@;w@o2{{%?^GT98x#y)VnU4XKU;mjuKB@@M;e!(}u^1H- z*9w0bj!ETCJx#rxhe$<6XY)N*RO)LKZQS;*PAKrI#$LB^?b~xTeO}yH^S1a@RJ{rm z{RReff8Mea{Wbstt~Hn}%?&ye@LXt0&g8C!N^?Cf9n8pRR z>F#SqWtx83QfqcznkQX3ET%+g7FK&-kH!JDNqnDff^M%OTU;|M%BCo#Ji;Lkb0B6Z zFDL!0L3}i0-2Gg3k;L>-ZQk(m!!vH*)r7WCq(_c&?nDZ(8L1n6ZEnosvcKMy(t2y0QhjYLhxoUI zhpz1{_n04yKxF76+Us84R*QrrLb?B?5VFuIfq#96)x5mH=S^ki8B|>5+=l2X&~A}s zEo3joIyMQ<{-KyIuw2)?+f48NlaZ|binWLpD^2-o;g79cDD1T6fdk=-c(CR%J`BEP zlYS`ZtwwrG$$zKddr7BkfqwynnZ{gGcS}XLkHWPUXLYHo&CyX>nR4H#+ z8_Q+uEe>-t4zpAli77==c5HXzM##(Cn6zRXBV|O-D;fx4I7ZdTCh8t%@WS-dVHV&Y8%9tjHlyT_y~j)e7QowLwY_^92W>+t>dW*9c$ib+R#cbNYfI5 ze=}j`7>=d@2ncBsZ$(b0cL#-}{F|+L(Lj4;O8TaYs2qFMk61KFci~`fA#z&dv0rA&S=O^K}JYM8b`^m8M6M8HbRz3nOs2M zPTi$*b$Q@&r$T6Q!ixpznWhkVU;gSX-s{(#Q-4==ijr-J4BdF^I7cv@Zh})>C+c&= ze~8v;Z3RWpP4nB-saVDl?xnDGkJ7;Dv`(dg35$aR$JBKk~Ge~ zmo!dj?`am#0v^H1scB$ywH;J0C?Jn7F`%_oqs9)cC&pGG5MsjVc%4XwEhhzKZ8OPU z`tNz7A5hB^7+`^T(*DQgE5p-*t@GoX;3I%kayoW~ZqjIYY0e%mI!P+{LWE}kqGR4n z>Pjt(=fThO66GTu3Y<3%BgX6_0D$&B%NghhSES~g;_2CCLur67VWY}ZyK5=VZY**O z=b%r8L=dbmuv2pJadN-*^g{J4y09TaeeG2W)CkE(!QUg?K4ua_#^;Q>-%S{3qzEw1 z7jS-0Zx|NIKLX_%HpIc{>ejAK#PKJQYyp6RtLMs>TDo1v*?*jdqUUm$`{}la#%bAxv5^ zL$ymG-YNnV5@nA6p*<(`+n`}TRMj1tC(#;T7h6ZZV&zt@rrJ5~>mKTR9* z*Y2m=_Yu>NjrTk&|HUU=u=xL}?JT3B{Gzr$3`5rdLk}_3pLXaFqy`vz=n#=^kZ$P? z1*D~0N;*UV>FyK>2`Qyikemm6)_OlbU*7Y1X06}a=ghhHeeG-SxypXNJcRk&a3_jX zHMiXv-9_G-pvjf`w!PEig0o)@3q2hhB;8V7ZyT>m|ka_KYsl}rE0 z-DCIo3pO$Ob0kLJ$QUs$;iYXN5*J%$%WSI~CH7oiMy@WT&We)#k@iL_vfeXQb;{z| z=lv$38wNx13{$82$0@-bIND4KE;9e_A6XvYj0xEhD(Ag_9`KVlsLLOxbG!S3JRQTD zb2~yNGHTT2-PaMOUG}^ksnf8nejLl2AR&{_xH3Y3EpvvaXw&cJ+CfELmuU1WKLehb zuTHOd;dfmcE=24jKj4_m9ieFhszNgRAB%>!ro!2jnNG?JlkU zcJ*`A>XU~gJbx05Hq9Fnu!4U(q2g)Eg}KrHFS&svN<>>+I8@ndIY&+uy>Kv{laRdp|KhJsJgRIGLRM#fjyqxfd(Z zcXKvUBur22Bne5B(2!;q!JzGc>C{io&S}gF-2mFR9;Xd{?g`P(@Q@-NKP3c)9;iti z$#BZUTK~Yq2%Qlax(mQhf2pZ5>2y5-ay%Vv+Z~mP7>}S=SO$5du@^5*V`&L6VcWQ@ zAC04bmtWaPR`i@p^b_O(VT|Kr)ge|Zd8Yd(K-0eFBEfH^CWTtxRla65Pt?%At%Q)- zUYtQ31Anrd$Pf^0jH~bQgLl6X4CsDod}(*M$xy&v`-TT)o!GWRk@T1CfrpRojI#Tq zZ$F>>2)1DoJC-?rf33zy=vIjM@K5x8p7CQ;AwqC~5E~CuW}kw$y489USZkLgNkJL@ znsCz??>VNj^Ba|lnc{te7C1ntmBh`9djmrYKa5`YSw!s6S42O%bsYZ7^w^74;uuVQZ?X*Px6iDv;V8V%R*=5K6d&=@~EH?8y3>&Ry2B@ zrxeu$bVvi$D?m3@+w*7A31v8b>1m|R`eD?|G9^tH&0?#>$3!|rI#H<7ggwh+B=$ z8dVK_HB$Ao7OT@Jk2lmFxqQMEIc#~r*!A+*^mOc`y^&Ivnd?!|o&60k5vBAZFAzpXy#W%KS#bNISezOfgf!u=rn$)an zeQAlP&1kY|=c}7t3k?}3FcC7_1AX*C8r_9H9TW+l*dfM>CL^uVoMORI42z%Y>c>h~ z`{(~Lht7yh=lQ*ADc}zGY7;W^n<=$bNr$i^EVuPQ0In2G4w5_)l;@bBjR6Tg`69HQ zEa8lbej?-Ym^jLn$99lU8qMI%c3@rW>Kc}{j^6u44StV4#-5enrMM4u_PA1;Wr9v5 zH+J#myDQ{HY>qazUoykbhM+VhmY#^U?h!Y=OkbM25bzEY>&nCVaiwFCsOa=ZWdEXJ z(&rk{mabYdOF9!#^f&>U78jUzh$+Sq+SfIgwu;&G1lj z_^B4fbnyDcHP_?h`5}GSeQJA9f^w^6ONtjaLI(s~+UN?luoK}Q_y)G?*5TphJ-7u`nO)`j}BoV{ln(js3T zvlpa@7P}mNmGn~l1HmQeYAS4q;JwH3*O8nC9)DX@kc9(6@@q^s!W8 zzewL<+dw~XiPak7X`{08+s2~YXXSo`3%5{zoXi-&mSbH75dzbJ$;4EtHz0XmrErzEUOE5) zZ>a$Jk7RJ&(BQOfEL9+8IHgt0n`8K2b?pHg`^x#U3@s_}JZm>sLQMm^obJ%%15Y_U z^OKOE9+kk#>yvMYh#bjc+)SV-ndYhN8@)E|}=7p9`Jk1+sNQd?r;ZOrBfAuV?&!M93_45QjtGWN$Xy$oS3 zACLb)oe8FKVg2i8M=Wzj~<#xWg*$xB4_JgbzW7IM>)M9mu3JHo^f@xtw#g}HS5oY~SkLE$)CuiAHA4)EEIl(k7)B_-8K2u*Oa=(g(jj$Z zc-P8lhB)Qf{rn|#Vb}M_UU?C?+P#Ewb4wOvkd-SSasM8Vem{D(%2I2k0b?g3xxf1E z*PGo}G-+U&uXklj;|mVsEh|n67|;2a2T@G8%DN|3p%!sswv$@z%Eq ziZp!|96tMEY!v?&ro*DecSLsz~ zw-2r?vWyK+*>HLOgqeE@g5Pid*u|XOYjfktKqij4=|Xg4uchsHo+D(jynNxbI^g>S z`pRF&;PTV9U)cbX7++B#0NpW6w(Q3f9B{LK`26OW?tVzkrqu23g!%ETG~+c|-Sl6- zRkQGQQHb;xUyQ7)yVSu@vWCHr?=LdFJ|+uBXBS`h2Hi;tt}`C7i}|7RCqbA%2VVZm zf_4_Hh-wEFS}G-gwkIZ5mOUQ$8LDWD@}n`W9dmZ3QmUTt*9&@YrNUjjAnBSS`TA5~ z^@ax~AH9FZgfg(${fC|xtvi$cB;oilO9QSuwMIFfI?F58e|nvnplV*s`K(S_UX;7^ z}Y?9F94|9p` zi1E=n`rLa_U9DUXOJe5$14#J`$fS_W?c8A7u=TEqtoFmZmFTle%Ef4o5BHEjrJf+myZ7 zMgFrCtFZO!Y@Xqe7=$fVm@c*9_hF_3bxyg>!gCVi=2RAqH(mkc2~4RLYn5C$%fUy? z62R7{_6Nk(E~BuuduC6Hcl-5-3?vE*XA7MN2$sJEn)RMl${=ej{M20N?(fhi`G zu3j{0vZEdsMN8xA>H6e|o8&odS@WXSm%fpmB(li0&@z4Qns_-mCX_E1sf2oDw>i@g z0~~gI{0YR~+>*(mFO)i@{$9Nz6QO7h(l$5G_2-J$lg9yIfwi4#^sK%e2E+cvhlC!_ z;a%P+ezlL$84{?9B(hFc_U)LyP+?`)rUT zS1ZF@uCi|Bw9o@qB-q9E?a-NfQTLVDTZ@*QO7f-+@WX!5L-)=zP5q*(^|UIk*s7Hu zUCQvDTUvIO&N={PBT0#W5{0UK5W)shFcI-~*ABu}rFc{f&k+r5$V~hvJSqi`0wNS; zEahd(A_PZp{H8s@APh|QwU)a>=`{cd1JDl;OO1W050D_;jmvVMLaG!%*pNCJT%VN- zB)l`1FC6i&N=wuxxidZlzWC?Q_L*E^vvl4x7f1!ly3=1k0bGvRMA@*P5O3~=5vO+kL<9_L*E5R*e&RusQ zRdYh$>gvx|_I4)5vx?$v!P&8|J8btaG#lCm#?X&xMDhxrl~d#;NmlS*BKS)Mk5i-J z82|jnQqY;aI9;P;XixUrYaYJk3^EwuB75A0(3#b&$6-=zhr8`-g!>PZKzvj|2@STw z(~s-;pkoy(9OyJg4`|Cl4zo9*=H8WYeRGSx3QDWGSe>p;EY{Rnbr#7Q|0Xp*>mEjy zen1VREbN-4ADroMVaN)!z|{CahBXDmzEfg_*ZF zF`klCgB3v01aq#<&mb?e&oVS}1tPG;U8F(KB&Rhd_V;5qv+MY1pl4t6-1Lv8tEJ{K1Dz$up)IgG3c;>Qh1 zC(Q+MN$$JPT*;r>pDDs1eOX1Jv+(FoP79MSR{G4HZHZ{$e6&78DdC2x$7t%a+L-W4 zF8t~rHC z*Em$zg3(Wa8~IAo9WuhuFJu{;6n<>pWN*r`?^DUNN0&o@158y#Y$pR1z4qF=@}K?_ z0!=zkQ<2aMbw;*&&=cQth9uNX9lpCaC9 zPI`CnyH!PT9KspT&$VgXx!sg9+M+P^x^?cFB=Q`$4|b)1*cd;8V5wBG2evCUeW9be zG2<)v#!oAIvOMfR?cB=nG^~E|H?0_<3yZ7#025SO=D- z$k~@gNN3`tX4#4+^uXXYfbiHkDn-r$P{>1d)+T%^aQ95Zvti^f1lz zoqTK}G1juI6KP}bmBofD=1t~(+7$JY!Xhj`t3FES>z8hF34A>B;|4d8yE{FX6Fd7efDW<9KjYDFo0)0PLZ z>R1#S=moE&7(8Z=rgJPC8!x<6w8)|UMgjLbY>b4KGiERfL{adVlZ-rWBpsQ$zp)3U z2gub?Z-6<6nu>1Zkm||pW&O7j`W)iBB9m9^UBRy@T(nxw7-Z`VpD7F^KJa{?=XXf} z`D+o|g%k81Y-@IjZSMFk!fzL0Pl9%JQ1ckdiG3^)Eg{(0c7c7vwm|to#Fb5*;v-#a4{o znYsS;Tq3RG>A$pZ5&f6RG64rfNK8y{V#LJW90FivuIe`3!2muBPtbji;Wq9>m5D6pO@0Z%fMjw<5pe*a7F=F##O`6NM_i8L1+4b;*=7b*nIBgp&JEE&avt1 zoTrb_-Cx*~0Aa=#$9-cFkwajF)azh`IZQtBnMQxVtE!ZXu0>PRp9cf1Yi`(`ZM^O=)DD z-}Y9Q@N@Z3v>r+q>ESiwa2x%XDc255_Nnwj^OJeVTt+TRy@F9$4omw3q@<_JVr@NZ z+%05;jm}^C8vWG}UsqYCA0npvq3U?j+om*_D6@qE@ct$yGof!Q;dPvJQ&QNq|bM_xI_P=@q;ms$Vw~Yck3whYcAL z6dTq?m&g_Q?@rzG(hT?Qc1d0hyz(dl%eeEexaqb(6`Vr2!~OgDa$~CX-LvEF1;?9# zDb`v^wG`ridgd<{vL{K`lhFOU7I3o6+>R7n;4RsTR_uzhnfN{IR)jPdV)(m%PXjUT2?YM&OiKvlw+2a9ZYHriIr05_pXO*J(i|*SQTr62b{W zT6lfmguI{{CldtzDFob&LSjQ{EwMwxBd`p~fM%P8?f^h&%ot?0CgDh)AF2A=YM8ny zW)ENrbRdSLu~l+VPCWZhL~~WKcmg8bc0_G1EQ9yJvkj#wYtQpn^u~0d#2(~Pm5eYA zXjMX^pWg!;Nkm=UV||3_u7(o;Nn}!ga{CR`IbyVhQ1AWME=*i!)Jc zA*>ME6B!H#XsW4Z>LZ@bs<=?h2m(Vc91uD1y*Ie*Kz9*3-~$k$)(v|EP&OAX%Ak*R zwyTzIkGak#mOjNk82)?rz;n)OcLoPs2(jvol8Fz|;SnI_HSoOKeLdyr7X(aro-lEI zBsPj;@HlfV-E~=ts=2Pm%YE~Sg0CmRu%AD8|8~XH$MW)VJ+%x-ya-ZmvR{OVi0#t2 zoE<0b%0cd4QA8Q+{~;wMC98lM^fssjpbM!HuF_Q+w&~1E@E#f$+24b1V4oXfY+6HsOo7F3Ajzc14^HQ?P zI!ckup-BmYvggj$^916yEoybh&%fL`M+bP-!sL?w46es8!xSMd3I%5M21Ojsz#lpC zkru4=SrE4?a_diA9+Uy6*X_ZRg0tg1Rq^d8s3pUrq)5l@HEmq_l?U=U0&Kt2 zk30=RwRkmkIIc3Noiw*yN--aIN_^B&^I4LO;JUxBvoOJc_)2s~b^uEUWTD;VouzJz z)eNsQ8+BCvQ>kz2XQir2=^szUL#peUIUji37#iG5^X})f+5HExS{-BmkmHQ4-Kl)9 z1xzXV$)xg6>whffx*sT!J`!)+h^?Ax%tBa(D*a8)uLWoqVr~QGcyj(x_Wz$c2K@9kO=)x!m#g6ewNfY=9;H(&Nh*J1M8Axvb*`WqZ z$V6P17zfz7Y`H_fNZEOF%e78mUT00W>n<7IZ*tsUdn57LncPe`^h{c2fv(`W9xWlu zkAY|Let(e^jr{%7eJX6@$EvtTEYouE7)VQ)5dZe{MmhO9+MjRKcouz2Hm+hAss9ik zGg!9qaGpPb3KdDr7&|)$i1t6`h_0hjLK61EY6tRWZ2jz+peeBrIPv*m_7?L_39G3m>jg`?@5H zN(5es@UoX3V2VP~zp$*V9Ih7}IAoU^Q-yKNd7h&?$ZQbaXms=bYI^l6QYQtZfi`OW z_FbdJ&#?+VIy|O8-**G)ta@g8xexKNqq{Qxoac}5XLS+^_3=rXm*wWQTt4y7F8s#F zWiG%HrqV-BYu?kK@A|IjR6k9nQ2%A6S|tBtfrzce<(st_LI+yk@2h`p1;~UWT`a>Rg@)^j85y0W7OpG$A3A&J)xKo80 zUdN#wZ9J9UrdIk3>$AO}CL5C@k%T)c?zcfZ_q%FgOvu6N%C06VZzcl;D`NNBOuR0b z1BM|i4`%UFWePDoJ`XISn8XPAB3asP%AbpX1q_RDn1YhvOxJMOWYIn@yDoLGyR-M? zJ_!YxZeCtTPEnd|&5-&7&pToA&S!Bw>h^h_I<46g`v0XHKC2~P(+8|tIV~&3}-h4VZqW00NVwE*$tZ1B2|M%EWqFj{DbAR^bvAO#tonT9o z{qH_B;0j%Ei#6;99OsYN*zo5PPE=c_89Dp62sW=|T^JtnqIr!L(Y%%uKKYg1%t(fi zm4#hA8D{>X_&f%n{SU2&7mhzz&{b0$C-o+qF*H}(k*K=Ny!Qav}4)pV=1Hi8>-#oekbdcM!d!Ad;drsxWnza3dUSqnm6I+mjHJz3B z^$$!JgnyC|BJ6Gp>fE5E+usF`?*bK8iu}x!()9&o0Vq9KQ{*@Y3Q4M*V&>7Scc0-^ zwQG{-p2091juk#VSMSSEkzopdG<(}4Q@)_PJ#rDN`Y&H4Wh_#xTk9R@^Wu6dXa&ms z6*#E*U$E){i!(D%=SL*=^+n|?AimWB##RU>fIia#e-MQX-7clo9Pxnxmsc^tb$<6b z`7o8(Ny1L~Qq!f$KKi0RSxXVOFXRMn;dmw5M^^He;m>kVqf>{ncO14hkm>o{w8Ki$ zC%Lcb&g-(NOrNr3Ih(f1`qZ5s;zI#FH6G9F1$~IO&}dm~=3&D^QEDJcQkwJ$A+u=;#tSJo ze>}dHrB8YeZ?0cO=3tS>$WlEvZDkL6qCV>VVQQ@Bnyhq`$1W`+%GYz+-Sy@@ud`&c zvAASlta{J`4=WU3uJ6uwlyB6qWwmO=be*Ug14Sz%aZ`Gjx*}4E3&F5R>pCxTvbH3h zPu{oXbTHC=hu$%VfrSQ(+nV#QMSZ5c1`#bZ9(*<@$!x@=a^c=Bf97!$tjxhfA_W2b zI;!-FnN5;b$!@3-Odgnggt?BIEXE=YGO~{zGLSQX#5w`GQdN4u%*P`ictq$(8`SQf z7y9jODV&53HWAc+lvN$S9D#msaQ%mY(lvrj!`0>%ZxrU98Q06fs~iJ^CvSyoJmfKX zUnbsEQV_gVPm62Jv|2B$aj3dncl{5SGAf9}pJhN&!*fu>~}7$E=v z5LtC2FN1#T1wNUp*5_>Y@htov3E6a++uF(c{gmbPv#&LDjEb;ZXS z=~FcW%{v}#2?~g3f*^4?(7|lk+a-5;l_gXw)MU$u^fA&(t=Jnw$+9MAOMLW zkdtCbcOQI_e5tOThx;oq}j0IDXqT5ghBrgOBhKJe2rByBEy$>*| zdSRROE6+=KZmcL_Q>G^lTi|Q%Yzb$sF`;zkQEHNS(+`hsvpq2o1v?40SNX8=`u#O| z&*Kc`jPf*FP;^DB2U`oNHh)<8{RJ*b5Q_k$SWwDN!xELNU_mDn;z0h@wldHx&7V%d-^s3d8N!*uPh9(P)@>tu~9Ol_L8~Bl@gYwY!|TsBmb47LFT`0%(}3 zE~8UoPbN%#?;BX?qklfGe5#e!hrC5!>!G9aGk z%AEDVov?p?iClUkpli@5wx{DBnSJd`c=On3nxanQB%@25c`vY`vMW^qtwUta*v^rbcWQU%*Z-p{JsUDMV#nGX9Nan^8 zVNwW>jBU|(I5P$HqbXgy(yHiAsArjtaCWPdinfWuT1mFKMq6dvYxAla!H4rx3C(_@ z2LFP6(iK$6w_}g`@hqRlK2VIWFhh9&j}0=aJvR78q@rS2bt1JewRSgkwqx<`YsuVa zVJ{*A?ntoTP6L_hlS-lmU8CD@3Vu7(LWYh)@;J&`;G}M7BiZbrmAZ~hAHa;ue$es! zMfmo_UOpGwV_D|bxbPG?k+8T;@tIG?ZWD<@q&g#SA9y}6q&z|3uHc_|Rv+wGY7PAG zkub=y9Lgb&YW6#OOi^~N(;T4P_#ye`J>%t0{$=NLUJg@jlopQ%u|)B`3^CYP^&UMk zLdlqlFR-=QFXt4%n32I_;*Y9Vw%1;Mu_HY~dDeAb13E(g<`O#s)X^H1kgmfjLy}c^ zfHcDq-I|{4yiyd&{p(1nP45Su9uE9XIACk;_Z)@7n*&L>X$a?DqKHs;CE{x}ug9tN zR*@4p9V1-cstAQe1_$CX3&|4|_VNsk2fU}d6P?GH)OSFsb5pUjdnUB~g3DFHx>@wY z8djLiD*wB5%s%of>SVdLUiM!8>v`Cs95nI%PWrY{Xs;nO1Kpr>^?M$hf(^17Cq6kW zs;5W$z*E8&CkF?ND4xo_u~CpCuV5JEMxhq0g-aZvnY@bn6`XQmZjo3ZV(+b?u)b%1 zadn64ARNf4QxgkdRMPOzaO(50yfA35{?{!Og% Z|FRx list[Document]: documents = [ - EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location, name=data_item.name) + EXTENSION_TO_DOCUMENT_CLASS[data_item.extension]( + id=data_item.id, + title=f"{data_item.name}.{data_item.extension}", + raw_data_location=data_item.raw_data_location, + name=data_item.name, + ) for data_item in data_documents ] return documents diff --git a/examples/python/multimedia_example.py b/examples/python/multimedia_example.py new file mode 100644 index 000000000..6c8bc5995 --- /dev/null +++ b/examples/python/multimedia_example.py @@ -0,0 +1,48 @@ +import os +import asyncio +import pathlib + +import cognee +from cognee.api.v1.search import SearchType + +# Prerequisites: +# 1. Copy `.env.template` and rename it to `.env`. +# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field: +# LLM_API_KEY = "your_key_here" + + +async def main(): + # Create a clean slate for cognee -- reset data and system state + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + + # cognee knowledge graph will be created based on the text + # and description of these files + mp3_file_path = os.path.join( + pathlib.Path(__file__).parent.parent.parent, + ".data/multimedia/text_to_speech.mp3", + ) + png_file_path = os.path.join( + pathlib.Path(__file__).parent.parent.parent, + ".data/multimedia/example.png", + ) + + # Add the files, and make it available for cognify + await cognee.add([mp3_file_path, png_file_path]) + + # Use LLMs and cognee to create knowledge graph + await cognee.cognify() + + # Query cognee for summaries of the data in the multimedia files + search_results = await cognee.search( + SearchType.SUMMARIES, + query_text="What is in the multimedia files?", + ) + + # Display search results + for result_text in search_results: + print(result_text) + + +if __name__ == "__main__": + asyncio.run(main()) From 61ed516d120b9082c3b8736b9749b5dbdc0bb101 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 20 Nov 2024 16:21:29 +0100 Subject: [PATCH 03/17] docs: Add multimedia notebook Added multimedia notebook for cognee Docs COG-507 --- notebooks/cognee_demo.ipynb | 40 +++--- notebooks/cognee_llama_index.ipynb | 25 ++-- notebooks/cognee_multimedia_demo.ipynb | 169 +++++++++++++++++++++++++ 3 files changed, 203 insertions(+), 31 deletions(-) create mode 100644 notebooks/cognee_multimedia_demo.ipynb diff --git a/notebooks/cognee_demo.ipynb b/notebooks/cognee_demo.ipynb index 45f5a618c..33ea91a35 100644 --- a/notebooks/cognee_demo.ipynb +++ b/notebooks/cognee_demo.ipynb @@ -265,7 +265,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "df16431d0f48b006", "metadata": { "ExecuteTime": { @@ -304,7 +304,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "9086abf3af077ab4", "metadata": { "ExecuteTime": { @@ -349,7 +349,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "a9de0cc07f798b7f", "metadata": { "ExecuteTime": { @@ -393,7 +393,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "185ff1c102d06111", "metadata": { "ExecuteTime": { @@ -437,7 +437,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "d55ce4c58f8efb67", "metadata": { "ExecuteTime": { @@ -479,7 +479,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "ca4ecc32721ad332", "metadata": { "ExecuteTime": { @@ -529,14 +529,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "bce39dc6", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", - "# # Setting environment variables\n", + "# Setting environment variables\n", "if \"GRAPHISTRY_USERNAME\" not in os.environ: \n", " os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n", "\n", @@ -546,24 +546,26 @@ "if \"LLM_API_KEY\" not in os.environ:\n", " os.environ[\"LLM_API_KEY\"] = \"\"\n", "\n", - "os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" # \"neo4j\" or \"networkx\"\n", + "# \"neo4j\" or \"networkx\"\n", + "os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" \n", "# Not needed if using networkx\n", - "#GRAPH_DATABASE_URL=\"\"\n", - "#GRAPH_DATABASE_USERNAME=\"\"\n", - "#GRAPH_DATABASE_PASSWORD=\"\"\n", + "#os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n", + "#os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n", + "#os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n", "\n", - "os.environ[\"VECTOR_DB_PROVIDER\"]=\"lancedb\" # \"qdrant\", \"weaviate\" or \"lancedb\"\n", - "# Not needed if using \"lancedb\"\n", + "# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n", + "os.environ[\"VECTOR_DB_PROVIDER\"]=\"lancedb\" \n", + "# Not needed if using \"lancedb\" or \"pgvector\"\n", "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n", "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n", "\n", - "# Database provider\n", - "os.environ[\"DB_PROVIDER\"]=\"sqlite\" # or \"postgres\"\n", + "# Relational Database provider \"sqlite\" or \"postgres\"\n", + "os.environ[\"DB_PROVIDER\"]=\"sqlite\"\n", "\n", "# Database name\n", "os.environ[\"DB_NAME\"]=\"cognee_db\"\n", "\n", - "# Postgres specific parameters (Only if Postgres is run)\n", + "# Postgres specific parameters (Only if Postgres or PGVector is used)\n", "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n", "# os.environ[\"DB_PORT\"]=\"5432\"\n", "# os.environ[\"DB_USERNAME\"]=\"cognee\"\n", @@ -620,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "7c431fdef4921ae0", "metadata": { "ExecuteTime": { @@ -881,7 +883,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/notebooks/cognee_llama_index.ipynb b/notebooks/cognee_llama_index.ipynb index 742c2f51c..ec899aaea 100644 --- a/notebooks/cognee_llama_index.ipynb +++ b/notebooks/cognee_llama_index.ipynb @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -90,23 +90,23 @@ "# \"neo4j\" or \"networkx\"\n", "os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" \n", "# Not needed if using networkx\n", - "#GRAPH_DATABASE_URL=\"\"\n", - "#GRAPH_DATABASE_USERNAME=\"\"\n", - "#GRAPH_DATABASE_PASSWORD=\"\"\n", + "#os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n", + "#os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n", + "#os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n", "\n", - "# \"qdrant\", \"weaviate\" or \"lancedb\"\n", + "# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n", "os.environ[\"VECTOR_DB_PROVIDER\"]=\"lancedb\" \n", - "# Not needed if using \"lancedb\"\n", + "# Not needed if using \"lancedb\" or \"pgvector\"\n", "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n", "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n", "\n", - "# Database provider\n", - "os.environ[\"DB_PROVIDER\"]=\"sqlite\" # or \"postgres\"\n", + "# Relational Database provider \"sqlite\" or \"postgres\"\n", + "os.environ[\"DB_PROVIDER\"]=\"sqlite\"\n", "\n", "# Database name\n", "os.environ[\"DB_NAME\"]=\"cognee_db\"\n", "\n", - "# Postgres specific parameters (Only if Postgres is run)\n", + "# Postgres specific parameters (Only if Postgres or PGVector is used)\n", "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n", "# os.environ[\"DB_PORT\"]=\"5432\"\n", "# os.environ[\"DB_USERNAME\"]=\"cognee\"\n", @@ -130,8 +130,6 @@ "\n", "from cognee.infrastructure.databases.vector.pgvector import create_db_and_tables as create_pgvector_db_and_tables\n", "from cognee.infrastructure.databases.relational import create_db_and_tables as create_relational_db_and_tables\n", - "from cognee.infrastructure.databases.graph import get_graph_engine\n", - "from cognee.shared.utils import render_graph\n", "from cognee.modules.users.models import User\n", "from cognee.modules.users.methods import get_default_user\n", "from cognee.tasks.ingestion.ingest_data_with_metadata import ingest_data_with_metadata\n", @@ -196,6 +194,9 @@ "source": [ "import graphistry\n", "\n", + "from cognee.infrastructure.databases.graph import get_graph_engine\n", + "from cognee.shared.utils import render_graph\n", + "\n", "# Get graph\n", "graphistry.login(username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\"))\n", "graph_engine = await get_graph_engine()\n", diff --git a/notebooks/cognee_multimedia_demo.ipynb b/notebooks/cognee_multimedia_demo.ipynb new file mode 100644 index 000000000..2d35132f6 --- /dev/null +++ b/notebooks/cognee_multimedia_demo.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cognee GraphRAG with Multimedia files" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "## Load Data\n", + "\n", + "We will use a few sample multimedia files which we have on GitHub for easy access." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pathlib\n", + "\n", + "# cognee knowledge graph will be created based on the text\n", + "# and description of these files\n", + "mp3_file_path = os.path.join(\n", + " os.path.abspath(''), \"../\",\n", + " \".data/multimedia/text_to_speech.mp3\",\n", + ")\n", + "png_file_path = os.path.join(\n", + " os.path.abspath(''), \"../\",\n", + " \".data/multimedia/example.png\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set environment variables" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# Setting environment variables\n", + "if \"GRAPHISTRY_USERNAME\" not in os.environ: \n", + " os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n", + "\n", + "if \"GRAPHISTRY_PASSWORD\" not in os.environ: \n", + " os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n", + "\n", + "if \"LLM_API_KEY\" not in os.environ:\n", + " os.environ[\"LLM_API_KEY\"] = \"\"\n", + "\n", + "# \"neo4j\" or \"networkx\"\n", + "os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" \n", + "# Not needed if using networkx\n", + "#os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n", + "#os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n", + "#os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n", + "\n", + "# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n", + "os.environ[\"VECTOR_DB_PROVIDER\"]=\"lancedb\" \n", + "# Not needed if using \"lancedb\" or \"pgvector\"\n", + "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n", + "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n", + "\n", + "# Relational Database provider \"sqlite\" or \"postgres\"\n", + "os.environ[\"DB_PROVIDER\"]=\"sqlite\"\n", + "\n", + "# Database name\n", + "os.environ[\"DB_NAME\"]=\"cognee_db\"\n", + "\n", + "# Postgres specific parameters (Only if Postgres or PGVector is used)\n", + "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n", + "# os.environ[\"DB_PORT\"]=\"5432\"\n", + "# os.environ[\"DB_USERNAME\"]=\"cognee\"\n", + "# os.environ[\"DB_PASSWORD\"]=\"cognee\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run Cognee with multimedia files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cognee\n", + "\n", + "# Create a clean slate for cognee -- reset data and system state\n", + "await cognee.prune.prune_data()\n", + "await cognee.prune.prune_system(metadata=True)\n", + "\n", + "# Add multimedia files and make them available for cognify\n", + "await cognee.add([mp3_file_path, png_file_path])\n", + "\n", + "# Create knowledge graph with cognee\n", + "await cognee.cognify()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query Cognee for summaries related to multimedia files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from cognee.api.v1.search import SearchType\n", + "\n", + "# Query cognee for summaries of the data in the multimedia files\n", + "search_results = await cognee.search(\n", + " SearchType.SUMMARIES,\n", + " query_text=\"What is in the multimedia files?\",\n", + ")\n", + "\n", + "# Display search results\n", + "for result_text in search_results:\n", + " print(result_text)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 38ef3d465f411d0d5e9b948879845b0ee5ddab88 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 20 Nov 2024 16:25:23 +0100 Subject: [PATCH 04/17] test: Add github action for multimedia notebook Added github action for multimedia notebook Test COG-507 --- .../test_cognee_multimedia_notebook.yml | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 .github/workflows/test_cognee_multimedia_notebook.yml diff --git a/.github/workflows/test_cognee_multimedia_notebook.yml b/.github/workflows/test_cognee_multimedia_notebook.yml new file mode 100644 index 000000000..dd14fa5e6 --- /dev/null +++ b/.github/workflows/test_cognee_multimedia_notebook.yml @@ -0,0 +1,63 @@ +name: test | multimedia notebook + +on: + workflow_dispatch: + pull_request: + branches: + - main + types: [labeled, synchronize] + + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + RUNTIME__LOG_LEVEL: ERROR + +jobs: + get_docs_changes: + name: docs changes + uses: ./.github/workflows/get_docs_changes.yml + + run_notebook_test: + name: test + needs: get_docs_changes + if: needs.get_docs_changes.outputs.changes_outside_docs == 'true' && ${{ github.event.label.name == 'run-checks' }} + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - name: Check out + uses: actions/checkout@master + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11.x' + + - name: Install Poetry + uses: snok/install-poetry@v1.3.2 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Install dependencies + run: | + poetry install --no-interaction + poetry add jupyter --no-interaction + + - name: Execute Jupyter Notebook + env: + ENV: 'dev' + LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }} + GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }} + run: | + poetry run jupyter nbconvert \ + --to notebook \ + --execute notebooks/cognee_multimedia_demo.ipynb \ + --output executed_notebook.ipynb \ + --ExecutePreprocessor.timeout=1200 \ No newline at end of file From b60f2603f4632c1d678d5677a07ba8c8eab4ddc7 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 20 Nov 2024 17:11:23 +0100 Subject: [PATCH 05/17] test: Add test for pgvector to confirm database deletion is working Added assert to verify all tables in database have been cleared. Added method to SqlAlchemyAdapter to get all table names in database. Test COG-488 --- .../sqlalchemy/SqlAlchemyAdapter.py | 23 +++++++++++++++++++ cognee/tests/test_pgvector.py | 6 ++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index febfe1931..aa2a022d3 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -130,6 +130,29 @@ class SQLAlchemyAdapter(): return metadata.tables[full_table_name] raise ValueError(f"Table '{full_table_name}' not found.") + async def get_table_names(self) -> List[str]: + """ + Return a list of all tables names in database + """ + table_names = [] + async with self.engine.begin() as connection: + if self.engine.dialect.name == "sqlite": + await connection.run_sync(Base.metadata.reflect) + for table in Base.metadata.tables: + table_names.append(str(table)) + else: + schema_list = await self.get_schema_list() + # Create a MetaData instance to load table information + metadata = MetaData() + # Drop all tables from all schemas + for schema_name in schema_list: + # Load the schema information into the MetaData object + await connection.run_sync(metadata.reflect, schema=schema_name) + for table in metadata.sorted_tables: + table_names.append(str(table)) + metadata.clear() + return table_names + async def get_data(self, table_name: str, filters: dict = None): async with self.engine.begin() as connection: diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py index 1466e195f..18d9e2c91 100644 --- a/cognee/tests/test_pgvector.py +++ b/cognee/tests/test_pgvector.py @@ -87,9 +87,13 @@ async def main(): print(f"{result}\n") history = await cognee.get_search_history() - assert len(history) == 6, "Search history is not correct." + await cognee.prune.prune_data() + await cognee.prune.prune_system(metadata=True) + tables_in_database = await vector_engine.get_table_names() + assert len(tables_in_database) == 0, "The database is not empty" + if __name__ == "__main__": import asyncio From 796bbadb43a977b97afdc2514ccdd9185962033a Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 20 Nov 2024 17:27:12 +0100 Subject: [PATCH 06/17] test: Add verification of deletion of local files Verify local files used in end to end test are deleted upon completion Test COG-488 --- cognee/tests/test_pgvector.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py index 18d9e2c91..7e52f7740 100644 --- a/cognee/tests/test_pgvector.py +++ b/cognee/tests/test_pgvector.py @@ -90,6 +90,8 @@ async def main(): assert len(history) == 6, "Search history is not correct." await cognee.prune.prune_data() + assert not os.path.isdir(data_directory_path), "Local files are not deleted" + await cognee.prune.prune_system(metadata=True) tables_in_database = await vector_engine.get_table_names() assert len(tables_in_database) == 0, "The database is not empty" From 27aade2a4394ee4f7fcbfaf8629b3faf9e99637d Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 21 Nov 2024 10:25:26 +0100 Subject: [PATCH 07/17] refactor: Expand on assert in test Expand on assert fail message in pgvector test Refactor COG-488 --- cognee/tests/test_pgvector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py index 7e52f7740..c9c841d31 100644 --- a/cognee/tests/test_pgvector.py +++ b/cognee/tests/test_pgvector.py @@ -90,7 +90,7 @@ async def main(): assert len(history) == 6, "Search history is not correct." await cognee.prune.prune_data() - assert not os.path.isdir(data_directory_path), "Local files are not deleted" + assert not os.path.isdir(data_directory_path), "Local data files are not deleted" await cognee.prune.prune_system(metadata=True) tables_in_database = await vector_engine.get_table_names() From b815aeaa2afa5ccd13ff4d06194a154a0c787317 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 21 Nov 2024 13:44:09 +0100 Subject: [PATCH 08/17] test: Add test for database deletion for LanceDB and SQLite Added database deletion test for LanceDB and SQLite Test COG-488 --- cognee/tests/test_library.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py index 66d218c3b..9a49206a1 100755 --- a/cognee/tests/test_library.py +++ b/cognee/tests/test_library.py @@ -57,6 +57,16 @@ async def main(): assert len(history) == 6, "Search history is not correct." + await cognee.prune.prune_data() + assert not os.path.isdir(data_directory_path), "Local data files are not deleted" + + await cognee.prune.prune_system(metadata=True) + connection = await vector_engine.get_connection() + collection_names = await connection.table_names() + assert len(collection_names) == 0, "The vector database is not empty" + from cognee.infrastructure.databases.relational import get_relational_engine + assert not os.path.exists(get_relational_engine().db_path), "The relational database is not empty" + if __name__ == "__main__": import asyncio asyncio.run(main(), debug=True) From 157d7d217d32432724bd216fc1ceb0a00933366d Mon Sep 17 00:00:00 2001 From: hande-k Date: Thu, 21 Nov 2024 13:57:42 +0100 Subject: [PATCH 09/17] docs: added cognify steps in the print statement and commented example output --- README.md | 42 ++++++++++++++++++++++--------- examples/python/simple_example.py | 18 +++++++++++-- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 8dc8c5c66..ed4489fbf 100644 --- a/README.md +++ b/README.md @@ -116,34 +116,52 @@ async def main(): Natural language processing (NLP) is an interdisciplinary subfield of computer science and information retrieval. """ + print("Adding text to cognee:") - print(text.strip()) + print(text.strip()) + # Add the text, and make it available for cognify await cognee.add(text) print("Text added successfully.\n") + + print("Running cognify to create knowledge graph...\n") + print("Cognify process steps:") + print("1. Classifying the document: Determining the type and category of the input text.") + print("2. Checking permissions: Ensuring the user has the necessary rights to process the text.") + print("3. Extracting text chunks: Breaking down the text into sentences or phrases for analysis.") + print("4. Adding data points: Storing the extracted chunks for processing.") + print("5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph.") + print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n") + # Use LLMs and cognee to create knowledge graph - print("Running cognify to create knowledge graph...") await cognee.cognify() print("Cognify process complete.\n") - # Query cognee for insights on the added text + query_text = 'Tell me about NLP' print(f"Searching cognee for insights with query: '{query_text}'") + # Query cognee for insights on the added text search_results = await cognee.search( - SearchType.INSIGHTS, - query_text=query_text, + SearchType.INSIGHTS, query_text=query_text ) - - # Display search results + print("Search results:") + # Display results for result_text in search_results: print(result_text) - # Expected output: - # natural_language_processing is_a field - # natural_language_processing is_subfield_of computer_science - # natural_language_processing is_subfield_of information_retrieval -asyncio.run(main()) + # Example output: + # ({'id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'natural language processing', 'description': 'An interdisciplinary subfield of computer science and information retrieval.'}, {'relationship_name': 'is_a_subfield_of', 'source_node_id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'target_node_id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 15, 473137, tzinfo=datetime.timezone.utc)}, {'id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'computer science', 'description': 'The study of computation and information processing.'}) + # (...) + # + # It represents nodes and relationships in the knowledge graph: + # - The first element is the source node (e.g., 'natural language processing'). + # - The second element is the relationship between nodes (e.g., 'is_a_subfield_of'). + # - The third element is the target node (e.g., 'computer science'). + +if __name__ == '__main__': + asyncio.run(main()) + ``` When you run this script, you will see step-by-step messages in the console that help you trace the execution flow and understand what the script is doing at each stage. A version of this example is here: `examples/python/simple_example.py` diff --git a/examples/python/simple_example.py b/examples/python/simple_example.py index e0b212746..55b07c4c3 100644 --- a/examples/python/simple_example.py +++ b/examples/python/simple_example.py @@ -1,5 +1,4 @@ import asyncio - import cognee from cognee.api.v1.search import SearchType @@ -29,7 +28,15 @@ async def main(): print("Text added successfully.\n") - print("Running cognify to create knowledge graph...") + print("Running cognify to create knowledge graph...\n") + print("Cognify process steps:") + print("1. Classifying the document: Determining the type and category of the input text.") + print("2. Checking permissions: Ensuring the user has the necessary rights to process the text.") + print("3. Extracting text chunks: Breaking down the text into sentences or phrases for analysis.") + print("4. Adding data points: Storing the extracted chunks for processing.") + print("5. Generating knowledge graph: Extracting entities and relationships to form a knowledge graph.") + print("6. Summarizing text: Creating concise summaries of the content for quick insights.\n") + # Use LLMs and cognee to create knowledge graph await cognee.cognify() print("Cognify process complete.\n") @@ -47,6 +54,13 @@ async def main(): for result_text in search_results: print(result_text) + # Example output: + # ({'id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'natural language processing', 'description': 'An interdisciplinary subfield of computer science and information retrieval.'}, {'relationship_name': 'is_a_subfield_of', 'source_node_id': UUID('bc338a39-64d6-549a-acec-da60846dd90d'), 'target_node_id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 15, 473137, tzinfo=datetime.timezone.utc)}, {'id': UUID('6218dbab-eb6a-5759-a864-b3419755ffe0'), 'updated_at': datetime.datetime(2024, 11, 21, 12, 23, 1, 211808, tzinfo=datetime.timezone.utc), 'name': 'computer science', 'description': 'The study of computation and information processing.'}) + # (...) + # It represents nodes and relationships in the knowledge graph: + # - The first element is the source node (e.g., 'natural language processing'). + # - The second element is the relationship between nodes (e.g., 'is_a_subfield_of'). + # - The third element is the target node (e.g., 'computer science'). if __name__ == '__main__': asyncio.run(main()) From ac3f9882787367aeb78306182e24fd0c0f0607f0 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 21 Nov 2024 16:09:50 +0100 Subject: [PATCH 10/17] test: Add checking of Networkx database deletion Add test to check if networkx database has been cleaned properly Test COG-488 --- cognee/tests/test_library.py | 12 ++++++++++-- cognee/tests/test_pgvector.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py index 9a49206a1..6c9d41800 100755 --- a/cognee/tests/test_library.py +++ b/cognee/tests/test_library.py @@ -57,15 +57,23 @@ async def main(): assert len(history) == 6, "Search history is not correct." + # Assert local data files are cleaned properly await cognee.prune.prune_data() assert not os.path.isdir(data_directory_path), "Local data files are not deleted" + # Assert relational, vector and graph databases have been cleaned properly await cognee.prune.prune_system(metadata=True) + connection = await vector_engine.get_connection() collection_names = await connection.table_names() - assert len(collection_names) == 0, "The vector database is not empty" + assert len(collection_names) == 0, "LanceDB vector database is not empty" + from cognee.infrastructure.databases.relational import get_relational_engine - assert not os.path.exists(get_relational_engine().db_path), "The relational database is not empty" + assert not os.path.exists(get_relational_engine().db_path), "SQLite relational database is not empty" + + from cognee.infrastructure.databases.graph import get_graph_config + graph_config = get_graph_config() + assert not os.path.exists(graph_config.graph_file_path), "Networkx graph database is not empty" if __name__ == "__main__": import asyncio diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py index c9c841d31..bd6584cbc 100644 --- a/cognee/tests/test_pgvector.py +++ b/cognee/tests/test_pgvector.py @@ -94,7 +94,7 @@ async def main(): await cognee.prune.prune_system(metadata=True) tables_in_database = await vector_engine.get_table_names() - assert len(tables_in_database) == 0, "The database is not empty" + assert len(tables_in_database) == 0, "PostgreSQL database is not empty" if __name__ == "__main__": import asyncio From 54daa6986cc3d1a884696958ac2fc83a1cbd0c24 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 21 Nov 2024 16:39:38 +0100 Subject: [PATCH 11/17] test: Add test for deletion of neo4j graph database Added test to verify all nodes and edges from neo4j database have been cleaned Test COG-488 --- cognee/tests/test_neo4j.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py index 756b29cc4..02f3eaccd 100644 --- a/cognee/tests/test_neo4j.py +++ b/cognee/tests/test_neo4j.py @@ -61,6 +61,15 @@ async def main(): assert len(history) == 6, "Search history is not correct." + await cognee.prune.prune_data() + assert not os.path.isdir(data_directory_path), "Local data files are not deleted" + + await cognee.prune.prune_system(metadata=True) + from cognee.infrastructure.databases.graph import get_graph_engine + graph_engine = await get_graph_engine() + nodes, edges = await graph_engine.get_graph_data() + assert len(nodes) == 0 and len(edges) == 0, "Neo4j graph database is not empty" + if __name__ == "__main__": import asyncio asyncio.run(main()) From 0f8baeeb0d25a7015a2d313d683863e3c69645e6 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 21 Nov 2024 18:19:23 +0100 Subject: [PATCH 12/17] test: Add database deletion test for qdrant Added testing of database deletion for qdrant Test COG-488 --- cognee/tests/test_qdrant.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cognee/tests/test_qdrant.py b/cognee/tests/test_qdrant.py index 680399e60..4c2462c3b 100644 --- a/cognee/tests/test_qdrant.py +++ b/cognee/tests/test_qdrant.py @@ -59,9 +59,16 @@ async def main(): print(f"{result}\n") history = await cognee.get_search_history() - assert len(history) == 6, "Search history is not correct." + await cognee.prune.prune_data() + assert not os.path.isdir(data_directory_path), "Local data files are not deleted" + + await cognee.prune.prune_system(metadata=True) + qdrant_client = get_vector_engine().get_qdrant_client() + collections_response = await qdrant_client.get_collections() + assert len(collections_response.collections) == 0, "QDrant vector database is not empty" + if __name__ == "__main__": import asyncio asyncio.run(main()) From 443133ffbbfa97c2c58f252f41aab8d936af62ce Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Thu, 21 Nov 2024 18:37:09 +0100 Subject: [PATCH 13/17] test: Add database deletion test for Weaviate vector database Added database deletion test for Weaviate vector database Test COG-488 --- cognee/tests/test_weaviate.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cognee/tests/test_weaviate.py b/cognee/tests/test_weaviate.py index c93dc036a..c352df13e 100644 --- a/cognee/tests/test_weaviate.py +++ b/cognee/tests/test_weaviate.py @@ -57,9 +57,15 @@ async def main(): print(f"{result}\n") history = await cognee.get_search_history() - assert len(history) == 6, "Search history is not correct." + await cognee.prune.prune_data() + assert not os.path.isdir(data_directory_path), "Local data files are not deleted" + + await cognee.prune.prune_system(metadata=True) + collections = get_vector_engine().client.collections.list_all() + assert len(collections) == 0, "Weaviate vector database is not empty" + if __name__ == "__main__": import asyncio asyncio.run(main()) From d1f82173206d7d9b415ba371da335fafba11a7e7 Mon Sep 17 00:00:00 2001 From: Boris Date: Fri, 22 Nov 2024 10:26:21 +0100 Subject: [PATCH 14/17] feat: COG-585 enable custom llm and embeding models --- .../databases/graph/get_graph_engine.py | 21 +- .../databases/vector/create_vector_engine.py | 41 ++-- .../embeddings/LiteLLMEmbeddingEngine.py | 29 ++- .../databases/vector/embeddings/config.py | 19 +- .../vector/embeddings/get_embedding_engine.py | 14 +- cognee/infrastructure/llm/config.py | 2 + cognee/infrastructure/llm/get_llm_client.py | 14 +- cognee/infrastructure/llm/openai/adapter.py | 189 +++++++----------- .../modules/pipelines/operations/run_tasks.py | 11 +- cognee/modules/settings/__init__.py | 1 + .../modules/settings/get_current_settings.py | 54 +++++ 11 files changed, 222 insertions(+), 173 deletions(-) create mode 100644 cognee/modules/settings/get_current_settings.py diff --git a/cognee/infrastructure/databases/graph/get_graph_engine.py b/cognee/infrastructure/databases/graph/get_graph_engine.py index 038e878c0..5770bcda4 100644 --- a/cognee/infrastructure/databases/graph/get_graph_engine.py +++ b/cognee/infrastructure/databases/graph/get_graph_engine.py @@ -9,18 +9,21 @@ async def get_graph_engine() -> GraphDBInterface : config = get_graph_config() if config.graph_database_provider == "neo4j": - try: - from .neo4j_driver.adapter import Neo4jAdapter + if not (config.graph_database_url and config.graph_database_username and config.graph_database_password): + raise EnvironmentError("Missing required Neo4j credentials.") + + from .neo4j_driver.adapter import Neo4jAdapter - return Neo4jAdapter( - graph_database_url = config.graph_database_url, - graph_database_username = config.graph_database_username, - graph_database_password = config.graph_database_password - ) - except: - pass + return Neo4jAdapter( + graph_database_url = config.graph_database_url, + graph_database_username = config.graph_database_username, + graph_database_password = config.graph_database_password + ) elif config.graph_database_provider == "falkordb": + if not (config.graph_database_url and config.graph_database_username and config.graph_database_password): + raise EnvironmentError("Missing required FalkorDB credentials.") + from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine from cognee.infrastructure.databases.hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index db5ef3129..4b4799ee7 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -10,26 +10,29 @@ def create_vector_engine(config: VectorConfig, embedding_engine): if config["vector_db_provider"] == "weaviate": from .weaviate_db import WeaviateAdapter - if config["vector_db_url"] is None and config["vector_db_key"] is None: - raise EnvironmentError("Weaviate is not configured!") + if not (config["vector_db_url"] and config["vector_db_key"]): + raise EnvironmentError("Missing requred Weaviate credentials!") return WeaviateAdapter( config["vector_db_url"], config["vector_db_key"], embedding_engine = embedding_engine ) - elif config["vector_db_provider"] == "qdrant": - if config["vector_db_url"] and config["vector_db_key"]: - from .qdrant.QDrantAdapter import QDrantAdapter - return QDrantAdapter( - url = config["vector_db_url"], - api_key = config["vector_db_key"], - embedding_engine = embedding_engine - ) + elif config["vector_db_provider"] == "qdrant": + if not (config["vector_db_url"] and config["vector_db_key"]): + raise EnvironmentError("Missing requred Qdrant credentials!") + + from .qdrant.QDrantAdapter import QDrantAdapter + + return QDrantAdapter( + url = config["vector_db_url"], + api_key = config["vector_db_key"], + embedding_engine = embedding_engine + ) + elif config["vector_db_provider"] == "pgvector": from cognee.infrastructure.databases.relational import get_relational_config - from .pgvector.PGVectorAdapter import PGVectorAdapter # Get configuration for postgres database relational_config = get_relational_config() @@ -39,16 +42,25 @@ def create_vector_engine(config: VectorConfig, embedding_engine): db_port = relational_config.db_port db_name = relational_config.db_name + if not (db_host and db_port and db_name and db_username and db_password): + raise EnvironmentError("Missing requred pgvector credentials!") + connection_string: str = ( - f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}" + f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}" ) - + + from .pgvector.PGVectorAdapter import PGVectorAdapter + return PGVectorAdapter( connection_string, config["vector_db_key"], embedding_engine, ) + elif config["vector_db_provider"] == "falkordb": + if not (config["vector_db_url"] and config["vector_db_key"]): + raise EnvironmentError("Missing requred FalkorDB credentials!") + from ..hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter return FalkorDBAdapter( @@ -56,6 +68,7 @@ def create_vector_engine(config: VectorConfig, embedding_engine): database_port = config["vector_db_port"], embedding_engine = embedding_engine, ) + else: from .lancedb.LanceDBAdapter import LanceDBAdapter @@ -64,5 +77,3 @@ def create_vector_engine(config: VectorConfig, embedding_engine): api_key = config["vector_db_key"], embedding_engine = embedding_engine, ) - - raise EnvironmentError(f"Vector provider not configured correctly: {config['vector_db_provider']}") diff --git a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py index a41618f18..617698fd1 100644 --- a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py @@ -1,32 +1,39 @@ import asyncio from typing import List, Optional import litellm -from litellm import aembedding from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine litellm.set_verbose = False class LiteLLMEmbeddingEngine(EmbeddingEngine): api_key: str - embedding_model: str - embedding_dimensions: int + endpoint: str + api_version: str + model: str + dimensions: int def __init__( self, - embedding_model: Optional[str] = "text-embedding-3-large", - embedding_dimensions: Optional[int] = 3072, + model: Optional[str] = "text-embedding-3-large", + dimensions: Optional[int] = 3072, api_key: str = None, + endpoint: str = None, + api_version: str = None, ): self.api_key = api_key - self.embedding_model = embedding_model - self.embedding_dimensions = embedding_dimensions + self.endpoint = endpoint + self.api_version = api_version + self.model = model + self.dimensions = dimensions async def embed_text(self, text: List[str]) -> List[List[float]]: async def get_embedding(text_): - response = await aembedding( - self.embedding_model, + response = await litellm.aembedding( + self.model, input = text_, - api_key = self.api_key + api_key = self.api_key, + api_base = self.endpoint, + api_version = self.api_version ) return response.data[0]["embedding"] @@ -36,4 +43,4 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine): return result def get_vector_size(self) -> int: - return self.embedding_dimensions + return self.dimensions diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py index 8c03d389b..ecfb37204 100644 --- a/cognee/infrastructure/databases/vector/embeddings/config.py +++ b/cognee/infrastructure/databases/vector/embeddings/config.py @@ -1,23 +1,16 @@ +from typing import Optional from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict class EmbeddingConfig(BaseSettings): - openai_embedding_model: str = "text-embedding-3-large" - openai_embedding_dimensions: int = 3072 - litellm_embedding_model: str = "BAAI/bge-large-en-v1.5" - litellm_embedding_dimensions: int = 1024 - # embedding_engine:object = DefaultEmbeddingEngine(embedding_model=litellm_embedding_model, embedding_dimensions=litellm_embedding_dimensions) + embedding_model: Optional[str] = "text-embedding-3-large" + embedding_dimensions: Optional[int] = 3072 + embedding_endpoint: Optional[str] = None + embedding_api_key: Optional[str] = None + embedding_api_version: Optional[str] = None model_config = SettingsConfigDict(env_file = ".env", extra = "allow") - def to_dict(self) -> dict: - return { - "openai_embedding_model": self.openai_embedding_model, - "openai_embedding_dimensions": self.openai_embedding_dimensions, - "litellm_embedding_model": self.litellm_embedding_model, - "litellm_embedding_dimensions": self.litellm_embedding_dimensions, - } - @lru_cache def get_embedding_config(): return EmbeddingConfig() diff --git a/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py index a82876ef8..d2582fbf0 100644 --- a/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +++ b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py @@ -1,7 +1,17 @@ -from cognee.infrastructure.llm import get_llm_config +from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config +from cognee.infrastructure.llm.config import get_llm_config from .EmbeddingEngine import EmbeddingEngine from .LiteLLMEmbeddingEngine import LiteLLMEmbeddingEngine def get_embedding_engine() -> EmbeddingEngine: + config = get_embedding_config() llm_config = get_llm_config() - return LiteLLMEmbeddingEngine(api_key = llm_config.llm_api_key) + + return LiteLLMEmbeddingEngine( + # If OpenAI API is used for embeddings, litellm needs only the api_key. + api_key = config.embedding_api_key or llm_config.llm_api_key, + endpoint = config.embedding_endpoint, + api_version = config.embedding_api_version, + model = config.embedding_model, + dimensions = config.embedding_dimensions, + ) diff --git a/cognee/infrastructure/llm/config.py b/cognee/infrastructure/llm/config.py index 37541adf2..d148042be 100644 --- a/cognee/infrastructure/llm/config.py +++ b/cognee/infrastructure/llm/config.py @@ -7,6 +7,7 @@ class LLMConfig(BaseSettings): llm_model: str = "gpt-4o-mini" llm_endpoint: str = "" llm_api_key: Optional[str] = None + llm_api_version: Optional[str] = None llm_temperature: float = 0.0 llm_streaming: bool = False transcription_model: str = "whisper-1" @@ -19,6 +20,7 @@ class LLMConfig(BaseSettings): "model": self.llm_model, "endpoint": self.llm_endpoint, "api_key": self.llm_api_key, + "api_version": self.llm_api_version, "temperature": self.llm_temperature, "streaming": self.llm_streaming, "transcription_model": self.transcription_model diff --git a/cognee/infrastructure/llm/get_llm_client.py b/cognee/infrastructure/llm/get_llm_client.py index 16ff5b320..1449d33b3 100644 --- a/cognee/infrastructure/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/get_llm_client.py @@ -20,21 +20,33 @@ def get_llm_client(): raise ValueError("LLM API key is not set.") from .openai.adapter import OpenAIAdapter - return OpenAIAdapter(api_key=llm_config.llm_api_key, model=llm_config.llm_model, transcription_model=llm_config.transcription_model, streaming=llm_config.llm_streaming) + + return OpenAIAdapter( + api_key = llm_config.llm_api_key, + endpoint = llm_config.llm_endpoint, + api_version = llm_config.llm_api_version, + model = llm_config.llm_model, + transcription_model = llm_config.transcription_model, + streaming = llm_config.llm_streaming, + ) + elif provider == LLMProvider.OLLAMA: if llm_config.llm_api_key is None: raise ValueError("LLM API key is not set.") from .generic_llm_api.adapter import GenericAPIAdapter return GenericAPIAdapter(llm_config.llm_endpoint, llm_config.llm_api_key, llm_config.llm_model, "Ollama") + elif provider == LLMProvider.ANTHROPIC: from .anthropic.adapter import AnthropicAdapter return AnthropicAdapter(llm_config.llm_model) + elif provider == LLMProvider.CUSTOM: if llm_config.llm_api_key is None: raise ValueError("LLM API key is not set.") from .generic_llm_api.adapter import GenericAPIAdapter return GenericAPIAdapter(llm_config.llm_endpoint, llm_config.llm_api_key, llm_config.llm_model, "Custom") + else: raise ValueError(f"Unsupported LLM provider: {provider}") diff --git a/cognee/infrastructure/llm/openai/adapter.py b/cognee/infrastructure/llm/openai/adapter.py index 2ad275e22..28cdfff4e 100644 --- a/cognee/infrastructure/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/openai/adapter.py @@ -1,174 +1,121 @@ -import asyncio -import base64 import os +import base64 from pathlib import Path -from typing import List, Type +from typing import Type -import openai +import litellm import instructor from pydantic import BaseModel -from tenacity import retry, stop_after_attempt -from cognee.base_config import get_base_config from cognee.infrastructure.llm.llm_interface import LLMInterface from cognee.infrastructure.llm.prompts import read_query_prompt -# from cognee.shared.data_models import MonitoringTool class OpenAIAdapter(LLMInterface): name = "OpenAI" model: str api_key: str + api_version: str """Adapter for OpenAI's GPT-3, GPT=4 API""" - def __init__(self, api_key: str, model: str, transcription_model:str, streaming: bool = False): - base_config = get_base_config() - - # if base_config.monitoring_tool == MonitoringTool.LANGFUSE: - # from langfuse.openai import AsyncOpenAI, OpenAI - # elif base_config.monitoring_tool == MonitoringTool.LANGSMITH: - # from langsmith import wrappers - # from openai import AsyncOpenAI - # AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI()) - # else: - from openai import AsyncOpenAI, OpenAI - - self.aclient = instructor.from_openai(AsyncOpenAI(api_key = api_key)) - self.client = instructor.from_openai(OpenAI(api_key = api_key)) - self.base_openai_client = OpenAI(api_key = api_key) - self.transcription_model = "whisper-1" + def __init__( + self, + api_key: str, + endpoint: str, + api_version: str, + model: str, + transcription_model: str, + streaming: bool = False, + ): + self.aclient = instructor.from_litellm(litellm.acompletion) + self.client = instructor.from_litellm(litellm.completion) + self.transcription_model = transcription_model self.model = model self.api_key = api_key + self.endpoint = endpoint + self.api_version = api_version self.streaming = streaming - @retry(stop = stop_after_attempt(5)) - def completions_with_backoff(self, **kwargs): - """Wrapper around ChatCompletion.create w/ backoff""" - return openai.chat.completions.create(**kwargs) - @retry(stop = stop_after_attempt(5)) - async def acompletions_with_backoff(self,**kwargs): - """Wrapper around ChatCompletion.acreate w/ backoff""" - return await openai.chat.completions.acreate(**kwargs) - - @retry(stop = stop_after_attempt(5)) - async def acreate_embedding_with_backoff(self, input: List[str], model: str = "text-embedding-3-large"): - """Wrapper around Embedding.acreate w/ backoff""" - - return await self.aclient.embeddings.create(input = input, model = model) - - async def async_get_embedding_with_backoff(self, text, model="text-embedding-3-large"): - """To get text embeddings, import/call this function - It specifies defaults + handles rate-limiting + is async""" - text = text.replace("\n", " ") - response = await self.aclient.embeddings.create(input = text, model = model) - embedding = response.data[0].embedding - return embedding - - @retry(stop = stop_after_attempt(5)) - def create_embedding_with_backoff(self, **kwargs): - """Wrapper around Embedding.create w/ backoff""" - return openai.embeddings.create(**kwargs) - - def get_embedding_with_backoff(self, text: str, model: str = "text-embedding-3-large"): - """To get text embeddings, import/call this function - It specifies defaults + handles rate-limiting - :param text: str - :param model: str - """ - text = text.replace("\n", " ") - response = self.create_embedding_with_backoff(input=[text], model=model) - embedding = response.data[0].embedding - return embedding - - async def async_get_batch_embeddings_with_backoff(self, texts: List[str], models: List[str]): - """To get multiple text embeddings in parallel, import/call this function - It specifies defaults + handles rate-limiting + is async""" - # Collect all coroutines - coroutines = (self.async_get_embedding_with_backoff(text, model) - for text, model in zip(texts, models)) - - # Run the coroutines in parallel and gather the results - embeddings = await asyncio.gather(*coroutines) - - return embeddings - - @retry(stop = stop_after_attempt(5)) async def acreate_structured_output(self, text_input: str, system_prompt: str, response_model: Type[BaseModel]) -> BaseModel: """Generate a response from a user query.""" return await self.aclient.chat.completions.create( model = self.model, - messages = [ - { - "role": "user", - "content": f"""Use the given format to - extract information from the following input: {text_input}. """, - }, - {"role": "system", "content": system_prompt}, - ], + messages = [{ + "role": "user", + "content": f"""Use the given format to + extract information from the following input: {text_input}. """, + }, { + "role": "system", + "content": system_prompt, + }], + api_key = self.api_key, + api_base = self.endpoint, + api_version = self.api_version, response_model = response_model, + max_retries = 5, ) - - @retry(stop = stop_after_attempt(5)) def create_structured_output(self, text_input: str, system_prompt: str, response_model: Type[BaseModel]) -> BaseModel: """Generate a response from a user query.""" return self.client.chat.completions.create( model = self.model, - messages = [ - { - "role": "user", - "content": f"""Use the given format to - extract information from the following input: {text_input}. """, - }, - {"role": "system", "content": system_prompt}, - ], + messages = [{ + "role": "user", + "content": f"""Use the given format to + extract information from the following input: {text_input}. """, + }, { + "role": "system", + "content": system_prompt, + }], + api_key = self.api_key, + api_base = self.endpoint, + api_version = self.api_version, response_model = response_model, + max_retries = 5, ) - @retry(stop = stop_after_attempt(5)) def create_transcript(self, input): """Generate a audio transcript from a user query.""" if not os.path.isfile(input): raise FileNotFoundError(f"The file {input} does not exist.") - with open(input, 'rb') as audio_file: - audio_data = audio_file.read() + # with open(input, 'rb') as audio_file: + # audio_data = audio_file.read() - - - transcription = self.base_openai_client.audio.transcriptions.create( - model=self.transcription_model , - file=Path(input), - ) + transcription = litellm.transcription( + model = self.transcription_model, + file = Path(input), + max_retries = 5, + ) return transcription - - @retry(stop = stop_after_attempt(5)) def transcribe_image(self, input) -> BaseModel: with open(input, "rb") as image_file: encoded_image = base64.b64encode(image_file.read()).decode('utf-8') - return self.base_openai_client.chat.completions.create( - model=self.model, - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": "What’s in this image?"}, - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{encoded_image}", - }, - }, - ], - } - ], - max_tokens=300, + return litellm.completion( + model = self.model, + messages = [{ + "role": "user", + "content": [ + { + "type": "text", + "text": "What’s in this image?", + }, { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{encoded_image}", + }, + }, + ], + }], + max_tokens = 300, + max_retries = 5, ) + def show_prompt(self, text_input: str, system_prompt: str) -> str: """Format and display the prompt for a user query.""" if not text_input: diff --git a/cognee/modules/pipelines/operations/run_tasks.py b/cognee/modules/pipelines/operations/run_tasks.py index 7058bdb69..205670b90 100644 --- a/cognee/modules/pipelines/operations/run_tasks.py +++ b/cognee/modules/pipelines/operations/run_tasks.py @@ -1,5 +1,7 @@ +import json import inspect import logging +from cognee.modules.settings import get_current_settings from cognee.shared.utils import send_telemetry from cognee.modules.users.models import User from cognee.modules.users.methods import get_default_user @@ -157,7 +159,7 @@ async def run_tasks_base(tasks: list[Task], data = None, user: User = None): }) raise error -async def run_tasks(tasks: [Task], data = None, pipeline_name: str = "default_pipeline"): +async def run_tasks_with_telemetry(tasks: list[Task], data, pipeline_name: str): user = await get_default_user() try: @@ -185,3 +187,10 @@ async def run_tasks(tasks: [Task], data = None, pipeline_name: str = "default_pi }) raise error + +async def run_tasks(tasks: list[Task], data = None, pipeline_name: str = "default_pipeline"): + config = get_current_settings() + logger.debug("\nRunning pipeline with configuration:\n%s\n", json.dumps(config, indent = 1)) + + async for result in run_tasks_with_telemetry(tasks, data, pipeline_name): + yield result diff --git a/cognee/modules/settings/__init__.py b/cognee/modules/settings/__init__.py index e705f8767..d7e67e73b 100644 --- a/cognee/modules/settings/__init__.py +++ b/cognee/modules/settings/__init__.py @@ -1,3 +1,4 @@ +from .get_current_settings import get_current_settings from .get_settings import get_settings, SettingsDict from .save_llm_config import save_llm_config from .save_vector_db_config import save_vector_db_config diff --git a/cognee/modules/settings/get_current_settings.py b/cognee/modules/settings/get_current_settings.py new file mode 100644 index 000000000..3d6bad896 --- /dev/null +++ b/cognee/modules/settings/get_current_settings.py @@ -0,0 +1,54 @@ +from typing import TypedDict +from cognee.infrastructure.llm import get_llm_config +from cognee.infrastructure.databases.graph import get_graph_config +from cognee.infrastructure.databases.vector import get_vectordb_config +from cognee.infrastructure.databases.relational.config import get_relational_config + +class LLMConfig(TypedDict): + model: str + provider: str + +class VectorDBConfig(TypedDict): + url: str + provider: str + +class GraphDBConfig(TypedDict): + url: str + provider: str + +class RelationalConfig(TypedDict): + url: str + provider: str + +class SettingsDict(TypedDict): + llm: LLMConfig + graph: GraphDBConfig + vector: VectorDBConfig + relational: RelationalConfig + +def get_current_settings() -> SettingsDict: + llm_config = get_llm_config() + graph_config = get_graph_config() + vector_config = get_vectordb_config() + relational_config = get_relational_config() + + return dict( + llm = { + "provider": llm_config.llm_provider, + "model": llm_config.llm_model, + }, + graph = { + "provider": graph_config.graph_database_provider, + "url": graph_config.graph_database_url or graph_config.graph_file_path, + }, + vector = { + "provider": vector_config.vector_db_provider, + "url": vector_config.vector_db_url, + }, + relational = { + "provider": relational_config.db_provider, + "url": f"{relational_config.db_host}:{relational_config.db_port}" \ + if relational_config.db_host \ + else f"{relational_config.db_path}/{relational_config.db_name}", + }, + ) From 9193eca08b0f7462bef727ddc18177ebd3fd7717 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 25 Nov 2024 15:00:02 +0100 Subject: [PATCH 15/17] Trigger GitHub Actions --- .../databases/vector/pgvector/create_db_and_tables.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py b/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py index ef27e2889..f40299939 100644 --- a/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +++ b/cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py @@ -10,5 +10,3 @@ async def create_db_and_tables(): await vector_engine.create_database() async with vector_engine.engine.begin() as connection: await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) - - From 97dbede4c4377a7ebc6b99c89e80d5721121525b Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 25 Nov 2024 15:31:32 +0100 Subject: [PATCH 16/17] test: Add fix for telemetry issue in gh actions Set environment variable of ENV to dev for all jobs in workflows in GH actions --- .github/workflows/test_python_3_10.yml | 1 + .github/workflows/test_python_3_11.yml | 1 + .github/workflows/test_python_3_9.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/test_python_3_10.yml b/.github/workflows/test_python_3_10.yml index 7f762d778..5a4523853 100644 --- a/.github/workflows/test_python_3_10.yml +++ b/.github/workflows/test_python_3_10.yml @@ -13,6 +13,7 @@ concurrency: env: RUNTIME__LOG_LEVEL: ERROR + ENV: 'dev' jobs: get_docs_changes: diff --git a/.github/workflows/test_python_3_11.yml b/.github/workflows/test_python_3_11.yml index b05d901dc..9c79fb0ff 100644 --- a/.github/workflows/test_python_3_11.yml +++ b/.github/workflows/test_python_3_11.yml @@ -13,6 +13,7 @@ concurrency: env: RUNTIME__LOG_LEVEL: ERROR + ENV: 'dev' jobs: get_docs_changes: diff --git a/.github/workflows/test_python_3_9.yml b/.github/workflows/test_python_3_9.yml index 47c5ddc41..9c8456536 100644 --- a/.github/workflows/test_python_3_9.yml +++ b/.github/workflows/test_python_3_9.yml @@ -13,6 +13,7 @@ concurrency: env: RUNTIME__LOG_LEVEL: ERROR + ENV: 'dev' jobs: get_docs_changes: From 66c321f206a10e1191a7cdedc191cbecd22cc0f5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 25 Nov 2024 17:32:11 +0100 Subject: [PATCH 17/17] fix: Add fix for getting transcription of audio and image from LLMs Enable getting of text from audio and image files from LLMs Fix --- cognee/infrastructure/llm/openai/adapter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cognee/infrastructure/llm/openai/adapter.py b/cognee/infrastructure/llm/openai/adapter.py index 28cdfff4e..1dc9b70f5 100644 --- a/cognee/infrastructure/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/openai/adapter.py @@ -87,6 +87,9 @@ class OpenAIAdapter(LLMInterface): transcription = litellm.transcription( model = self.transcription_model, file = Path(input), + api_key=self.api_key, + api_base=self.endpoint, + api_version=self.api_version, max_retries = 5, ) @@ -112,6 +115,9 @@ class OpenAIAdapter(LLMInterface): }, ], }], + api_key=self.api_key, + api_base=self.endpoint, + api_version=self.api_version, max_tokens = 300, max_retries = 5, )