From 0728ec31e83752a730bd976d1eb2c821ca36c12c Mon Sep 17 00:00:00 2001 From: shahab00x Date: Mon, 19 Feb 2024 23:44:01 +0330 Subject: [PATCH] produces the correct html code and summary. Next objective is to be able to push it to wordpress directly from this gradio UI. --- requirements.txt | Bin 0 -> 20608 bytes scrape_amazon.py | 8 ++++++-- webui.py | 32 +++++++++++++++++++++----------- 3 files changed, 27 insertions(+), 13 deletions(-) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..b053218f4cd2c28018869393b51807a716072776 GIT binary patch literal 20608 zcmd6vS#Mp*b%oD!fc%F5c?>Ad6d53Z<8HUzMmH?GZ76;sIFsTik`hHezsQ@Q}9bbaL_%wcI|npI{MrH?G5|G-Ec814L8H%@L&2`9o`HZ`d%6Sb@-QIZ#dA` zksywSTm9=^zkym2#9D(5+~IIIoC)^6Jztw(IU25pPs3$Fy(onOo^R`Wy?f67Mc-RJ-Rhp+YVAQ=%kHcX?d*T2{ZB;Yo~Vc% zOsVB?q1AhWxouB3+v=k3QqP`*n&IP0IIi1X z$ofz#@3kIWmJG049qm_oexdy~W;(ndP8%KT`uDB&C*Bh;WWP&&e)x62HOT>42IfpK=XzRv+MD;q6?%_t$F_DTe%Tq@#V?5q<{z8y`LGl=wG(`A>T65i z8*QJd6p$pV&zjb3Q}KoUCTBlQj=+AUer);H|yf~NOInkj-EuvjlO$1@^_Ed5<_3u3$~60 z?TEK^t&81ESD8NI!Rsm85Y{VU{j5Ds<<+O+Zc3Rk#+Fw^g{fL?_Cf3|noPr~q{s!C(uORW$}8XuS4O%7odB$$u;rolPJ8<^Ibv5 z((xW5I+()2^VFEPZE-}r!{_16(PwHY_!>FkW+oS;9o<|1mH|GCmFzYBzf>diez&Jx zuVqmmCFP4I>9xi~#cL$fV~&+$;lxH_D76Ken$n(?$b@X`m;%-Q9#f9Y9a#)V&FPGu z7!`bmLJCw@72TlqQeWs2FQB%GT~kRF9x?%O8Jb+Vp-I;XST|N4FNkHlR;DZR0xX7% za3ksLNK(^?5uBDLRT-ABA&$2MO*~s9O#jPmyeob7T1xjt+k>#J zOYXOl4mI_2l>Xyg!wVI}O0x}quZWZ9dD)%tvMZ@k4-$7Szmc0Q`5rX{(Ry3!_l6JO zz|H)=;fE?_TVGVosh{U9{i?+cq5yUG)mOW7tc;~o;p5M@;=6h{TbyBT_4!SMIuR6| z30dd08eJ9djwDELM}ECCCxgX1m|J4fx@?heG)FZ@MU#wb8u1$ZhFFv?5+3ks*BN{Y z_2i&qaZL6ie^;&P2#{R#*y3g?ROgspDvxcwds6R$-$TuJl1}<4d_CQPeVvF-#ZnOn zpI?$?P|cT?h&0AWP9}~(J-VtM5_*D_3~@8#2_IxC-#6T@8r{V^^G3p}f`@Zi=PgEjYB-F$xN+}D&WId{w&v!r*#OqwYY7UiDRV-ey>50721j2cmZxZgZk@Pr zE7;SvlkEgv1Bpz}C7JzaJm&k>CDZpD%Bp*+mh?%jjyJfb#Pf;FuFcCEmYao;mB{=( zwi{7#(x``G`eEv2*X&SDM2UQD(ju63}NCHX1X zkksW&0-3|yHO`ks2fgR1sM!|<&X~(u^+h|7SLuoIjAi}hJi|mJ{iQi&FIRnN1m4|e zT{R5!>0rE47PKcF=`T}8e7_Vvbnoo`dPp+RqgJ{erm|a-Y?*Yt5%yE<{qMp~<-FFk zg&plkYxD#}Gb*iZaZP{9RD_JXr)Mkrd7u?5qMOgq#etshH>hKMcA#~@(+h0LlU4;m zY$O|C7d!d{+}nD#p=W%WKKeNg5g(8LpX$i8y-rK>u-~pHvW`2s6-f^H(d`{do^<2* z3f(rnH9zUg$x?WhYiKlxhM8+p(eCSuYHDAffp?=pqIqx~N;gbO(Hi*O$Dk*y)$u=` z*8AbT?Cp!Jm#FUfXYZ|!cKDGq9~lt8xs%5Qn`c1Y?^ra!Q6QzNvs0ajA2{k}P z4(y8P>WD{Nztr<1?P9t2Q8uyOv<2362hZ5caU>iO0f?Yb;>0MwZ_#8*(*G` z&ZlAtKc<05cC6Lpvwg)LXmLHTxU#9v>|86(1beS8h1!%k$~e~2oi4)c7``s;(?Ozb z=3-m=J*{Y{P^iqtvk&;9laGvk5EOOHZ#qkXOCaDfk|Yn(qZ19C8G*u*kf?L&u{2A3 zrPeSG+kZbVrgk`e2^QZPmy!x$Z zz8~Ic-`aaw7wXU8GBZh{lg)Ww7rg3GE+Nf`&X89 z24R=qisHYB1}rojn={iu{Xm9wg~AhR`P!+Tle3@*{Z;LTbb4s&KlGdVO>7|2!3LP} zGE407E%s4xyE3HaztM_NibR+TX75wcz1r!#cs!M6;M}|w{HE}IOaJgzwzH>c7d6j` z^6F#rx|#=^iXJegJEAgj6keBhh;VefeZMfCL@oCBaa}fEky?!C&Gd)<25V=p=}zg4 zBZ<;l>Y!94K=71GAb#^ZO^N?3OPYGP;jpA&vCZ}q`m*@IjH3xS*vYmXm@i*-$vh== zP@SFV%iVl31v5RWwopzq>sK)GpbNd7SufL+*K3~Im<<|kL6qu8MA}ea)#wuOZ*ZVvBj+Wark-oZ&}WF;-!xrh6@yUIKg4o zWDBA~Achc~+^H`151;P!lRLD}Jm^Ulu_m2u$!Ev&=H=#Dbe~kR$I{gD*9k^pvo48`bSj*OkgchBfbS!6QF7g+ z;+n2xhP(p92|4FNcmNR7+_?yj{?oOMPl5XRglz=69PxbS=p8v_Jrmc}>7qktM$RuE zn?2w?P=XayX3TTy3Qkr6K|V@-dM@k_`o?B2+G(AqJLs|^1!u-Yk+T`c<9(wS7%J3UFhyOIkMqzfuY*Q}0Mco?VLp~RI@c;{XQxee(dg(YdzRT=aX8B4cS ze}S13YOIB61F)`?*aaIfzgUTH9bk=AYTQ&{hCHrT4Go8(3vJW6fw^jC@>J>VQe2mB z+s?hCPdmAx5w0tfU(9;K*T!vT)ccYQ)9Q11=9;J)$L(NDZ1xO;tV-p>zozr_3uS?g zXQIh`<{%<$h?gzN@J=h4sZ1#VpZ90-L03-oh3zs+neP9yFkxqz0wE==xO@tI^$?46 zY&|3K;N$${7p`eE18yxR!Tk@4miY47nt%b<+vFm5*4#?Imy~R|hjO z$5{i5DO<&2FCt1X0V%?l&GB29;FQLjZ|18|pITGJWnu<^-mAs*d{NpJeaG8eNn}gB%W&#am>CejJkzueJD%F?!JtBfL6ElQ+ zc?I-0+7;iku=hk%Qp0?2mj{V>3zGWV9EFAP>OhDK57q^5K(c+1Oy?a-1$j+bno*yR= z(8J__`6+)hqsLdKH_aAJrqB&`ZmIua_?>c8ByN3CQ8JsZ$uPR&Rs@mXPh;n$OLA*8 z&fNv(xXgu#Aa!ez-EDJxekGqC-xhr-SM-|YCKA2Ui8v>VgJ~?y#rUIPR{)kCl`OYb zO2f;I>Urvxv>QCyvxyp&fxtZs`xQjmBv7{-9+&WZKVb$x4Zm)Qde2z;fyU zRgOfl>)_-jGC9=I3d-3xc7FS{Hw*?Uc}Mu`Sy#mjii~VaCn8mzOn$%)$@;&@Quy7`^nSmnKqC} zJ#lYUv*(KA?}U|l&J_dl9^*u7bz0&Yz)^-duzN!)8sDUkF04^*X%nlFV)`{YYieI| z5a*EQYz+5C6h*u4#GUu4-U{B;tYGQs3 zw++R6*qh2HcSp$nxjkZ=El*3;eIhx&6bXI9u!pv7Q2F?jkV!TbAUsZOgUTLR@H4!k z<>-o8Yy2U7C$m26rRtHYHgc1jlP*uT9OVn6r)$h!#`oTRio?wh>N|e&)&YAvx|9s* z)rnF*aRUM`~JqAb=!o`EO#>QgIX9g!rnn8o(`x(T4E=&_j+=Ru#uY)ad7G}C(CYpVvQ zbEo=funD1IWcyOwQiO`F|g$Q zvTC5mXC-}HdenaAHP-2z`cAZ;G&ykgS9BP6@#jex?~UXQC+v=BP!g*-m8Vppv-iEn z;SaI~s(@eg_0x;dtG7`4wPR^@yG?J%ck_AhQ5rRFPMR;i*1tqL@&dCj=EU>;F8Y~f zJK}alH9yCOt+AdrPacuvJIx?|&|a}AWK=SA?qb?Ymjeuvnc5v3P-D5~0t!kh?s}iI zmhpMY9>nv!jllCiN=ut<2it&S9Py!^E-i#|Z117qIGNa1$Is+|N72fy6i(iplhB0Ct}!B?Ut~vE z*vsz#CAU5bF7T#=nu0r4oL;yhq34>XB-JbT@QBt&qRDybpN)~vV>X3!GIW9TYQEvc z&zC8L$2z;k-g@uocO(Pj;dx7q7}3Xxf@Ri2)#_^BmW*xLqtJE_o4JVNf1QZ1F4bYb zf|=lbN3)}4`D1l6OU-)6b9{U&5xJ-5>DBg4)S)IgmL$ljtD>d1Z)$hweVRoovM3!U zIh9<>9D16?@XYs;8t+~CE=E=EjwY!BIQ6-bmJ^NA*MZ@$nwb8q=nIeOnJkx&vO#in zN!Rho*}}f(+JiSD=^he;ss;gyTOgbrx;LS7qH>ng9?e9w%D8ICyAVH`$j#xpQ-2LJ<-0`nqsy$(O;8Q$TNOp zXMPSmUHSZNAlLe6ku^kLZdUQOY)+aYm9b2aFmJ#{Zb%ng(Hz}9Q!1<~(yuBL>{Yc} zm&sk+&tV$p9zDBLM?@nz9m4`Wd;F8;?mx5%ec*d)u`z0|3&B&}a!&PnEv;xJ6N`_s zFyaGuZ^v3%gg;q=$cMhT#ftUtHI95tn&#=_=CG@v#gvZk^ZX+mriR-GRIS(zGaP#M zY0d%@^W(_TIT-su>iCSKEm!fWh5VICWIaeG2O_Ec51h(qN`G3zkV$;V@~IqoIT)$ zs>NK=15;1ipPBJj502MHbH1a3#K>Xv)xHx_SpnRhalji>6`*s60X~tg&kmzGCMAol z{B?aAJAIjDHNS^XEyP_m?woTAV(f98_B-nk7hS_c8M&#~J(GFtjc1Nsr5hqPeuDlM z)9#4xx{ZFi;O&jnjr0a&UuqC&omNG78M8vBOXImWUYv+&DBfQ{%jD|VQ{`x2m}>I` z9l@uon0gC=XM7H?v2c9vK=zO4VIRD~Ml7xV4Q$n&7B(y-)ApL&*!JeJV|}(cAY3Kb zHj9GiEiU$1urK)vwvY5|r@yE(`AHSZh4}P6objxOsBKE{wM?N{k91wDk@>MGIHu1o zKVw#aw2c9%L;$P@|3F63S3HMqfvF>~b&kt^f#7{D;?hgD*LSp33dh3AyA@BO`bu<- z^BC{Y7-w?Vwxr~_9Z`?-=jnMf{PcIWVm+3apHfG=--zvbCV)h^tIpddsWp1(`aEMr zYg2X6l}8(ni`ZsO=iIeM+I^nl|3pAP)QRqX^0w3MEE0#dI@@DCr=HBAusr(?daoEk zJwVKKN8v67ue5#hl&Xq;)ssqWjN1nfvokj zFm6eT2Q6AoYcF<1JLzetYTRXFE6kaRujA;?nmSc-PKV0X;Xky?C;i6OZJmMSCVky* z59iTGpn$-u=?5YKU`u-57YP%es01o=nv(Ip=%Hc>xJsYK#checim*m)6K~pT{$B-W zL1aSa4WFDzc)yQ=wA~P6n6#E&eU}&eb$w9uIF1^FZ+=jP!51feWDMRNi;q;4!`>p{ ztedZBpaktD7a=h!@x+{>qn8VlGyFF4?_&+mT;Wh5Qbz%sI}hxH2ge_K=uAqU>+b4p z8lr{Ixc=Gy=fMBdk?Gx){o&o56*xN6?fXtK9kcK4PrLUqhUe~FAm^U%C)on=dheOs z()T`OAoL5*bltVqRMM+*vkki=Dst{SpF^I+*R}LOg@N8uYuP%vuS_40w{WVlB&}6F zUiLwqL>{JtL8pFur{b7xIylMI*z>+@j6Ja-?AhAqf0|)GJp1YyIm~zTycI_#-ru`j`x6c_AsPKq42O%1w{$N&NaOIl12G-&so~@}L#wtLMs*>Y=S9 zWA`WK@h_hG)^u`EpCihT9>g2do!65EZ@vc3`bqc^b@~MEDW6E z=KJQ)yf4ATavVimeLd(?*0Tq(CESvm?cmKj(eWfRv~$;Sidg1-yotTTXbedujkf zBFD06L*r>kBACGDIo5m4=zeUUvj!iuuRO>H=z^*a^;;i#iiC3x8ZF~Zmg^UJ16~A8 z=~XRt^kf{|mB@{9ugu>6qbGOrqW5}^mn5>IWvH~K{mv*7@9(g&i|?kJ5+*-4q9&Zv zePth?nbs`Al_Z&s_(simF_T$xDJ!9s3 tags img_tag_pattern = r'' # Function to replace tag - def replacer(match): + def replacer(match, url=url): img_url = match.group(1) srcset = ', '.join(f'{url} {w}w' for url, (w, h) in img_dict.items()) replacement = f'''
- Image from Amazon.com + Image from Amazon.com
Image from Amazon.com
''' @@ -39,15 +39,25 @@ def write_article(url): text = scraper.get_product_info_and_reviews(url) image = list(scraper.images)[0] - # prompt_for_ai = "Write a summary of the following product and an overview of people's experiences based on the provided reviews of it as follows. Format it nicely and professionally in HTML:\n\n" + text - prompt_for_ai = f"Write an HTML code that includes summary of the following product and an overview of people's experiences based on the provided reviews of it as follows. Underneath the title in small letters write 'This page includes paid Amazon affiliate links' and Include a link to the product {url} at the very end. Also include this image {image} after the second paragraph. Format it nicely and professionally in HTML. :\n\n" + text - ai_response = aii.ask_ai(prompt_for_ai, model=llms[1]) + # prompt_for_ai = ("Write a summary of the following product and an overview of people's experiences based on the " + # "provided reviews of it as follows. Format it nicely and professionally in HTML:\n\n") + text - print(ai_response) - html_content = ai_response - # prompt_for_ai = f"Take the following HTML code and slightly modify it by converting the names of this product to links to {url}. Also include this image {image} after the first or second paragraph. Return a nice and professional HTML code:\n" + ai_response - # html_content = aii.ask_ai(prompt_for_ai, model=llms[1]) - html_content = replace_img_tag(html_content, scraper.images) + prompt_for_ai = "write a succinct summary article about this product. Format it nicely in HTML:\n\n" + text + + # prompt_for_ai = (f"Write an HTML code that includes a professionally authored article summary of the following " + # f"product and an overview of people's experiences based on the provided reviews of it as " + # f"follows. Underneath the title add this tag

This page includes " + # f"paid Amazon affiliate links

and Include a link to the product {url} at the very end. Also " + # f"include this image {image} after the second paragraph. Format it nicely and professionally in " + # f"HTML. :\n\n") + text + html_content = aii.ask_ai(prompt_for_ai, model=llms[1]) + + prompt_for_ai = (f"Take the following HTML code and slightly modify it. Underneath the title add this tag '

This page includes paid Amazon affiliate links

'. Include a " + f"link {url} to the product at the end. Also include this image {image} after the first or " + f"second paragraph. Return a nice and professional HTML code:\n") + html_content + html_content = aii.ask_ai(prompt_for_ai, model=llms[1]) + html_content = replace_img_tag(url, html_content, scraper.images) print(html_content) return html_content