From 00d8bdf3cebf131b5079beee2d2a53a718f3f819 Mon Sep 17 00:00:00 2001 From: retoor Date: Sun, 1 Dec 2024 22:02:32 +0100 Subject: [PATCH] Performance upgrade C version. --- Makefile | 9 +- isspam | Bin 33312 -> 33136 bytes retoor_c/isspam.c | 282 +++++++++++++++++++--------------------------- retoor_c/rstr.h | 4 +- 4 files changed, 124 insertions(+), 171 deletions(-) diff --git a/Makefile b/Makefile index 078b003..22f42fc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ CC = gcc CFLAGS = -Wall -Werror -Wextra -Ofast -std=c2x -all: build run valgrind build_risspam run_risspam +all: build run valgrind build_risspam run_risspam benchmark build: @echo "Compiling retoor_c project.". @@ -39,3 +39,10 @@ run_not_spam_risspam: valgrind: build valgrind ./isspam ./spam/*.txt + +benchmark: + -@rm -rf books + echo "Extracting books." + tar -xzf books.tar.gz books/ + echo "Extracted books." + python bench.py diff --git a/isspam b/isspam index 4e85c07d49ad11fa871713ef503b6014d23bf852..adb20b9a2006c43520bc06695e3109bfe0c0503e 100755 GIT binary patch literal 33136 zcmeHwdw5jU+3%W31|nc)5YT`iBMurb2?-Dk0yPN$l$bUGI9=yDod}&EDUsb1$=6EKD7F>|2ad-7bMt$&4Kv)M}!^ZI(X(zdhgcah`qNCyNC=0bX1O*gXQs76j44WG(6RFUNb`TCyap*(wNmIY zQ@5<2qN=>hAjkb8#Qx>4bSJx9mY1s?%<(6SXLCB*n-xy$c~1fyMD{*b35=;I7t-+IdTw0PS)0Ca`g1CbUk(Gv3e8GgLYtR>jx4?t_E z14LbxZ*31p{GrAbOFM!Af1|ev#(F%hoxy;tvPZCv$cXl~Vl>Ui$eG9gvq_qVaWpTD ziWDJZBxZ#irRhH!cWrDXVqDEHk*Vz`&`7*iVw2n3(IsuHU8eu{UH4*`&Sznnj!q-~ zJa&^zPvZPmb{o=ksC9yh({-~2enhSx(zr|D$4t0W@)vZ8e8sQQP7^-qa=}+@!lz5T z%!HpI@k$e3Eb%H6UMg|TgfEbIg9%?G@vBVuw1p zP52cO-)6$sO1#g6H%t6J6W$^5?It`b@kdPf4HDmB!f%uK6DIsFiT9iE`y~FH3ICDA z_nGjACBEN;KPvGd6aE{C>n41c#E+Qp0f`?o;V(#>na1PSBwk>`-UKNqm1$^cz}(sWl%VLk67IbL!9yxLtvuM-2E>1Afea)7nlQ zY96C?r&EETY91r}bOT->_0T*hhZvsgFyO}ZWsw0VUFvWeaK7%88N~*i%1}p{0as%X zU%7BXtB9WyT@bPJG#u^Ox1OtAR0jIT*I@THRLIr}h8E}UI?=s*M4frMl zev$$2HsF&C_%;Jh>wk6h8Sp6z1ijCIpKQRl8}L&M_#+0q$bj!K;I!6P#}fwpR0V?e z8*t-A!*d3FhJk;d0YA-vkDiWtVAKPn9vJn&s0T(pFzSI(4~%+X)B~d)81=xY2Sz>c z|DOjwwx9Ne)^n&pi`!nmlre2X@|H}RHkOj1Oqw>7 zl1-U3Z6qZx&!lMsDOr z_+|8$)a%RpSFf#JQ@ytO+G@{$7GE+Qk)VH$okG0{y%N%IC}}PB3`+hTgog*U*aygZ zKxF}ci153`_+Wo@jqCW$ZrAauZrJ{4qwDwpg0mNwOZ^Qt>?WX4UKi> z{pHVVi6w7CMQX~tiy8Z*rBv|)kHL9b!gdRhg~uXOFm#;^CaUfUsUiDJ6vEhmq~O1r z^H*F>Us}HT-{4ny74@amYCrJwfEl3%Q~pv`&BAMj2qNMT{=Urn;tKC{hMC<=~Fq^8$m z`$gN6{qWlZ7qfBt7RY303jdUU2Bm|xhrnae{{u|PMpRO?_ew4HC35u!l0~ab)hsj4 zLkq3edTfh8EuiNTr2u=A*Ja9%5YH`j|+RV^lwc?7C}RtYgjm=gG6I` za3uo+5i$RU0xRC=YHQZcoy-+R zi(pCD>VdvNR%+nhJ-ko92!>`1|+~dB?TH z{jPrD{Us=8u-4T;E*|XVDAPB?A@-g32%gw}c>L6if5_iHg9pjqT4If>L9anTh*pgq ztdAw@WBqX67iZG^!wc6>hhA!(snj>(_i$%r8fd0=^B(didL+epc00v}@6#XrT#X;q zSE3~zhGu%m2y@<&4LDCvkvWwOA-$~HKy_aOXbvKI@7OJ-vL=%vOGB0vOZNC>-&%o5HCe}(54{jVZ#Ir^_N_Lok&;vJ zqsB{9egzf|$vXVx$nT{58}Xx`@d+{U>jggX(4hac6D8^~%>Fi7l=%Rof!2F^IacOh0l|=|gt9Iiu4_KkDE>3CCetc|TuWuTE}9 z`T7eGGE9(Z`{EbUG(Sb_DJe#+XkK*?)mKv?JtdPk7&}O7rq8R9k*py_Vt&=)Gr@9% zU}Wzns-Hn}Siu=*W7(e6l)Efwou}!-QEw+sw95?0{!;3=U(#Mss!IW_3aXGmYrKu; zPgIN_hV1aKP4(gNQvRk`CS1rO=b&6IUgyx_i}%A$P3RsVThF6&edFI@!J@S+^tu+m z(V_g{6nwS<&a;wudP>G~F!m8-k5uvcQmiB^kej}~6;nff>=O~V@hc15@t{Lnbc($< zfeNokbiB$|Y<}L3~m5IxVbwY~Mv1>#DbK z8lh9S_5g8OZ2t~2{0Pc=0w({CMQG>dHN<*V^yt$D)|V1N+W83k<2;Q@sY@fPLqf z;lCQfU9N|}Q@|p1)s61NYzLepaAF_6Fx{chA3L0=F$z*pdM zo-)==jYqgJ#03;Ot$x#okV&*5Vm{KXB##)N5#iG%XhJQvTiZoJgrgDQehyknc7xh? zPQs9jrQMmb95SK%i*IclquGD*m(U5@aK*RVGH4-2tg7G648a#e8ZmJQm;5Rwj=_rL zX5Q$0exUt|`qwufe%HmeLO#B9S$&0xhAzk{4)+coo#tqjAU= zcX4B0r}-}qE_dwzLg50b@JvoEC)z=)?K{Wz_NUvc9*g|NXs&wuPxhj@tTb*6S|wNX z?fRIG1<1dlk=)oeNv}cNF*hvf*=y17p{m5UP^#qFMdZ95+pmb~UksDQ2lZKGc^qzf zio7YJ)v^rrs5l>Lv4!(C7~JtCw?PX0^@-&Wvu&YN+~x;?*DSccyauQGPocDaQJ}zn z%m3tK2h>u1iy-xNea{|v=5q*NNK&!c#Uv3XGkW$E)0~yral%#YIOGn$fZ=EN1t=;p zr3t&6DiQ-QiEnfw`$3Vt)R28CWj}`146=Jfw#$%RM%fJ_yIEx04A~PYdoj#6_xJMt zs_xWz8z*W{;6ZO@!R{|Q(J}ko{R3h?<{g19*3oB+%!>%oH|n_4X$>W2A+$z0Ea2Va z2d6?|k8LWYK`o#o^o>07Fgg)w=%?WAv3;}<5*UNNi%KB5pOWYxzXzoz70a&SGBGaE zT4r0VTFT7+08YZv2;)?FeNfb$rD^Z4G>fI;*<|{_TJmTTWkN#l)2N$MPJvIKVN=$_zbl^ zuIqh1V&8Uh^^-IsX+49M!7)NGQjxgm^X`9G?Dy{O{=!*}$$^H{+JfED7rVbGjlKx& zEw!4v}Rxhk7C#&==I;_TwN4}zcHk5q;?t1ln?urfA`B(0`1@%i# zqRBinn;YVd?-gtD8LJmSa_gIsHq3Z)5QK<_20)qm9F9cv7U-&teW1mXTD)@jWRz>) zc^QIjH>Ss>*j9;DV%T%rcjaM*d}djE!gt-V!P?(K3`v1A4|#WcfOU^a|qqO(dE#8^dHooVsmR2 zJfi}<2j0g*%uZhEv87NS+T7DY>l4c?)q{1wW40qmi(%8=yPU?^_>>hDG{PiT#ExTx zbYl`IdbZXz0ZobPw!aKdZ|Pg?jGVE}0;znLoB?~$p`HXoqU&=Ssj9lawrqU8e&HYN zz5fQZx`$bMpu)0Z>u2`fEkNq*JF&V;lzcFsih>#{=+oNRm-Vq{Ti}l<>wVMS`)hFO zAEA1*7T{jT>{ClYsQg|>e&2+75q_`3h?4F2*U0g{`uH471n~Re=)>-KzKhqK8xN&0 zeCu+=hZ}=9I^RcFlfH%O!(oFU7JcI1QYm!P322#O!w@UhSFn?}+YhNHJ4iPwI{afa z*{#ZB!@odaE+&nMDSyQ+EESkZ=uKXPeyr9j>4iXneb>z1HzVZ;Lp=|5cgKrSxp$X- zR_kX4kwQKAKAJnuY8Fo49zD~(Yb_#huci0R=o{wX$wYisv>|RqJf$~-FyN{+=XbRD za)+|L{IG_mDev#o;*(wO_~K`(g=y#O>tQ|YQvCz=eGL1e+`gCIQ}*qMo&*Vxg{EN` z_&N^(tR$(q77m4eF8x|$@T)3%_+3MXofm0AVd|TJag>+5EeWKwP_;`?Rjo*>a-LYeax*-te!3G4vE4bIG0$~XH zlMAy)DBeEeZWh&6|J}QpnpS~d+?TTV{v0($4=*9DSPz0w4<7?6dN|(}wiO~x_QKK) z`eP8UL&L^l+|cl;B96D=YPH`2$AC?SQ=}N3>Cd0lUS=tX4%IK*6M0YNRnu-WgsVl^ z?Z=uAo#y9rxea%LSo8z$q*AMEW4gL>LxIElQT_SEl$i1=CVy4^w^6mEE4%uly;qfg z_7s+{jNj6eOZk=H#^ugNT6&#=b8=<$nq%z180$$@fi^dS(66iQn3IWT^(mZp9htkcsm^-~mEepo*Z4B2_8+SQ~g zG35)~GAwN8V$n8mNhE(5NhU^= zR&opBu^H=mcD7S~hI)}xP_4|Mk-Q6XeJ%Enp2wfq662ToNA2gl0IM&sGj_Dve$L=g z`$Sx{9JOD&`>6eD+e2`c-U_2g=&0R6*E5NVb0za$Znsn;1i~b+fNF6#GB&pV2cLh1 z3rOFeA#^$(<6oP;gF5Nkn;Yu$v%kD_$W5Y)Dt!o6r;ze z2SzVZ)YjCx?y1EU@o^}whHMm;dzxh08 zB(xc@)7y%-36+k^*pue>@wWlx84NW$eZgoT;>1gxoMC?;;t%-9gpQ8rI5zt%5O#45 zTT#Q7*RaMK)=m=EwNo3>YW@gk^M}|*e+RhyY-5`@ z683g=F+bi-)gfhk(NG8;-;CEz1?lBj)U?<8*>%x&6yXa+eeEbO67u>GhAiy!c6P!u z%)bdW3$wQNke^ZIqhxN#8wh)Sk@jGK`NPzbB--f>cw0%<7le;|czuwe)mFmWM-yc({M?klO z_JdaKOQoDA;I7`EQmHP`D$q9IWiO{v`#~LlPNj;m&9x4+7PKF<3$)>tRO)`vA<$Pq z9j~TRhd_6LI`WYJS}Iiqy6=rtYCCA(o2k@`p#7jaXxV{OYC=9^U7*FF+d*qV*u|mhP!lH}p<2M#`vkRv! zJZGM37Ems~29(-}3lfNf^hWTvANY2Xmj@oLXSd_80d4mdVao~~32RMZ(d~J)h0a^^ zs|$;JY_)}D*4xGv76Yj+EUGDV)D#xfj&Cp_%f~~!2!99gHw1ln5FEFW3LQPxtHu=; zTsEF!=NSGbBQCC$dAC|?3yXU4mKQpM;|hza3mw&k1=UbQyx6*D8hFV*(v#u6AZt(sNyi^ZS zzb4ciF^oUzi~Er0$GONu^H{4;^-Bv2P#co(2ha5Vsnoa9JRMw~w?Fc#kT)xxwJ8*PRs8BaaQWg$>pl zM>GqyBwcHus|&i`|EIcS9jmNA$+!7Mf>AqDr{t^?y^)*9BCFa#e%*&r-$Mr~KD7D|1k=t+C z_&gisd~+~ehCnGpIonarA6jph81Qk9 z`$PQbS}N#NnO5)V#IsxcAiL(=eGIpMISR(#Z~tk944`@PpP%bRaDF zF(mWVCD-fHKI*S@(0oG26hYFzQj8r52#CUg4q8vrFoGxFDeTwhPl&BxfpLRTA4moGXDXckES~${@WThCZ|W^` z3am=f21(aR+9heXq?^jjtQ*FLnl&{{Gs@{*P-iNB*f2!Vhs@`v^-e;=bU#i|$s@_kk-bbq5 zKdRn0s@^ZE-Y2TwAFAFLs@@N(-Uq7Q|EavReCg5)oW-k}uz?VDmd`7lTUu5!cV3jI z=btSRzN!%&P=vV}g;eiT+7$KT#KtU~*+MNkgL3HFGd(^LfWtZoKbDmivqsDPYmvc6 z1Ix7zIzI`2Tsl90EFWbo6D_j~CymdWdFsqnr%bdsV2riYl2>@a_(kIvjz4SaM9WEI ztBS6lYC#cs^O?%1pJYAdH#K0!vozz!e`|bRAryP?XG4!C6D=0YmxU)1p;*EnPG(vu zhkCXZ=XR@;a`=zU!cMR%>+}B1EVjJ7aTnQ&#^7Yk)sx2DVzXRhwLnMbCSN#&OZISk zYrx;^#9OxMoz-~lc9`CE9cg2DWxjK1`T5f4&XP{NCAzhwIT*!zqDv~v0EFAZk@cXF zXu!Xu)gSPO+I>iR@xJRN;syWCl2-NqrE{Gn>aWGuQqmO)Vnc*>7nZa{J35>tEltt( z4%#`uCjy#UebCer2$oaXN8H8s1=Ci-2c*5{Q(~1Q-x66soDf6g=k@#so zw$Lmc!aotkLI0$Wj#>B{iN6fE6{`-jk#v1G5`Vqq-ywZ~BtI_3FX2VI9Q+}U zJ6P5Ef{|aJ;q+$UPAO|7cEppDVYMgcMV0qM@Kd>u$aax|&mIFllKlgcA9kvP$KA^k zSMPIHdfwxD9Bjv2!OpLrae4^21NJC>rR-zQuTmL02Gb4Mx!q*vRNyB;QhewE75rBct(9D(uu44l#)%t-zTO8zQYU%ynfH3$EF zl7Bm0Y~$e9%AgNR+*w5^u8onV<57-_bK`HnPkS{6RP5$6T(*!OG@|^kouX0NepRWnnR1Nti8?{7yM8 zohCCTNjtk;0+7wdsw98aVu7n+_Zp5Xtr^;p1Mkj({{T4IX&z^OnuGth9CxtpGentc zJn5HspS+N}Q<_Bg8AkH++rTFx{wH<`M&-|=QV%{h#*f>jo-aB7G*j?BMEe20vahorzEi902~OX5crzZ?f< zN&Iu*6#v^yem)tO5F@o?1#l%RRdCWk{8n6lIE^S{SLDzW1y1ealf$N`^{%(Sre}{7Lvz1s^mzM_d!5ME;y9ak$S$jC#EdaowGLX@Wf5d&YCcK9>28jVIFS!8ddQ{xFm_2R*GF!6t8qr#TV~g+1QrCc3rS zh1;n9W@sOggTCI=?(v2~-pwA|?!-ONmXNp8?`e*9c5a4$<;=6(eQ8a#+jHr%Wh?6%J&n~h?m9?(^Lt2D z=@0zy+qmf@ea^=diugObd>;9MBmS8nkB7buWcXfDS_0n!;vY`(MA|z#JpN$IKm7<$ zNIbOQ;m-aEz1$r zV)$y5hu_d=yiw>QO1PVyElm0Yt*A4uSOflz@F2g$OMg8o%{`ytu5^Z%-?45Fgyol+ z(ri>IDpx(U5N5d7-LV;W&-q8JviL&cISm%>z|C#R221G+N~%&_A^!%Cd`!a=Z2C48 z=k>|YJ!L6FFv3Bi-H3~S*$SS^V)AwPy#Y9h9;k>gSqO1AqvDxQUZrK|qfp8e_=WDo z(-)}H`Me2DUwiU2hl8Fr+`p%K)L#mVa6Rt9^B~RH9;Eh4D;M#R{b4Kd-Kwmzlr0?{ z&7PJZQV~2$KwrMf;)(?6c?cEU)XDJB0lufzOrAz01)4EVYz*VaL%Rm}lvw5o5FRk% zBUxEGWgBE+#!r!@GstBgPg6LonnHcUE6qVp;~xT}fNG8+whbRBOLKA8i|z^EiI0yl zJeI+$oAw*8s;6ml#E;>L96?Xzh%cL^HBot4vFmMy7sN9j5#^6riL@$lS}82Us-8*b0;>XKqDbRv5h9qunFf6b+OU_ zw#G`aX;g|$GCY_Q*=&IL22@FVvxKT^>aoSu$~b!)_O@7Q^X34G5Hu1JId}jm%y+v; zt4w&17xH&_NkE=;bwpSxcV#I~OIs0G{3Pu6u~K|O4@sIM!5!pdda1un&Wdf#(56yC zhnOCPggQr2-p+O?6XNJ=tQ7NdCyi;P&HkonD>%J@R!rhD*&b*Ks&j8s69!e4!k0w- z>Kq4w zqVn<}eIL$jzZX(;4^GLe`*ezy;ku0uvXOkJK8ZYI`Rck`(IYbr!Z=lO3Vqrn zulDs6Jy9`AYVJRKrMzl?wNIlc?K2qr(1M5 zj>!7c_(6w~Q+E9V7_lgMb)WypwZx6~Hu+D?@GM)~x9<2o+Rs!Z}u0jGZ_AfI%p z^3}dZl~=I9e!jo01f1$b(ut6#BFap-l0Vag6E7W#PLuKi&HT1_#kpCpl2e>#=a9cw z%KH_gq-Obw9P(dEdH27NUz$UHd6N+I{R{a^rM$VG|1RYVkZ(LxQqgO2$UiO%R{L~H zhmuqD+frWTD_Y$w&MFijsghIh^*Q7>NcmF~V-9)JmmYfzynRysJ{f(pkw?$QsJK`2 n?+L(E>>9*baZsebNKY&xZMJ`64*6Ld1i$)@uSjL$R;B+RgVX@3 literal 33312 zcmeHwdw5jU)%Tf61|z|lAVdO+jF@0h5)vVpaM4Ui;0z9k0u>dQOeT|yCX+Z9E?$EZ zEaNbiw${?GwlCGz;%i?^t)C!x&mTJVuw z{->9$=Ica$M*GWnjwrW;B70?OTWiCN=~LSpOWIof;k6}eD`%9mi&FrF;6$SKW$&(0tR=pp4_n1SAr0pFHE&QCyZN1E}Pl0nX|K{!-- zH)X(|lL7yE2K-eS@YiO*Kb!&oSO)x48OocRp}gxd$oXpq{HhH22QuIXfS-t;@j45L zk;*uw+QU<1Kh`hs^#;7s)a(m+m0+l`)vvhS!BD{4xz6ovxdjn#XkDkz9qI^ocKQPD zwhk{N@OW>bba#`dwGD{QK&wC0#5kG)KA+Om(dqL;UZ4#?%4iVW?Y?#?zBAMk@Oc{D z?ctDbEhXQgAl@c&(McthJRyi`(jnU=CAuLmwDtr-?sgA~fP(k}fn>&^j<$|9N#(t3 zJ?^Gfzo)JBHlGsog^-!24I$*v5>V1LYW25<6eN>EP2RSSpwB=M_A{l4xxG^%J*fAa zq0DzVYv#HyFP&C8JsD1grlo?HmsTVRrYUac(uHp5?rUxhhJ1mg3+J|V_5zof7iM9cGvRL*`d<1=|Aq++h4?RpGlPe)^pE;Yi=x7(<@gX9+j=(j zq}K`D=+;&=O^dQh#Gm@+gXm_nl@<{Xk0bglB`D&f7`<7!8*#dl;|h-|dFP3M7|*4@ z=85@Ljsf2<@%5Zf(&vb{-GH~w<#fdc{1|~RGvF^2_(}u*Vu7zV;3o^bYQUEZ{9*%s zhQKd3;Hw0_-hiJi@GS;>wZL~8@HGOz)_`{ke3t=#t-xp4frPoUNhjI z5%?1Z{7!*CX~6dhyy8*VCH;24z~>n7ZwP#z0soG`7Z~vG3%uQcKPm9V2K=yEu1A>x ze~!Q(aPxGULuK(G`+Hc2r}>z?G##GCZh4*1;m1fM;7J{x=4SGe;}*>u<$R6OE$b$E1y$!oC= z&lfa2X}J!co1{j0b@&lFe2We*mR2maQ->EzJceJZ!^=LM61sHwk&Kf6*{H+o=WCmF z_)$9gEjm2S-{rMchZkKoOWLNx%dVdiw(IZ(QV8JFI{a82zDI|rxw^de>G0=EBw)V| zpK;Q!qnE*d*e?8BjU3BSW0u#hLG^Cv37H4`)ySTlUPdxlab%IA3>F>5U)zLgh3H!- zC2?eMaIk4Ji<4s}_NU_Hkcr)?I5n`ulc_j0ti;2qI5}$KTd6oXXyTq!oE$R|NX5w^ z6Ah_2Ib!0vRGb_zQIm?3<0URn#mV6k6H{?&(1~+XadNzbITa^|OMLWIvb^MIi6f~v zIap$UDo&1-*qw@#LnWR}#mSKp52xbfK#6aq;^a7qds1=gSQ3F`e6Z+B)JBtS0{Swt z|6tLgh3WBmhWM3+_zXk5)DWL!h+k-kpJ#|$4e?=y_*brU{XR9sKQP4KHN+1Y;;$Ow ze>BAV4Dp?Y_zpw-2}Ar*L;Qz^_=9P2$Mue{IhHw=E?Yt^DKcoX-9H&kB0m2sXY^w= zzADER5B24_;w2T>WOBvlH~VHIC^cRYL_&Fw8tKh>mYR@egA1y$ z+fO3t7f9mq=yPgxuXYrRBQ<(ly8vFHtw93;%}mfRMm4&-yhnQ(Nx(i_DzHH91j<$Q z4_o(kh4RJoL|A9S{}-zJ&wi?cE8k` zm&Jm-s8cHMxoM?ig=3{-mBZZ&ve_b+Fwb}lP?rc)14G$1{D@fN`O~H_xu*b_jbM%K zpcr_psnj;18=9s1H`@~{3Qw(f6`rc@0`iHag{OMa*f&kn||DeX_d$^HwJ;o2tfq$hIwlz$4LeU^_wW70kYQDP|y$;YoHYV-@FYW2j6S{bXF#@_); z-K0h=F+daG?F4&a!pjhVJH1$}C_AZn-iM&$;M{ zs_o}*LfjN4)-*n!q(v-6EQ&JSw371J%j}iEMVH5P%Hv6{Z<_oKgP~cFa1bmewQBtD zh&8H(u27@g>^%SPi2Tcd{vMI=yqY$Hm1@T{@LR6GdYR=`N;g?<0~mM)+Eh_Io`1Pc zn@1sT;Ky=4phAacbbo5}%O%?P|4Gelxrs_`qgxGUy+pP_)k^T0vdd({gwBSfgS}OD$TznRSBa=rTf@?gw1Cn zCH)|xs(YHC_V!6Q6Z%$6Y?@NdlhH46Mo<2xkz6-^&Tor_NC@d{RAbpiKtSu-!WM8x z-|ovUMC>i?V<>3bF}kpOe`q`$5ys>FL)Yh1LYLWgEJnsXrtbaW<3#*xGzT@dw6LX7 zy=*iS7%qS&oj3Jv?o)Vl`ywVR=6Nt#r$=+&wMd4zt z1}%oyYV<8mpCH*^T zjoU$$ZjG^|AM9Z3{y91%oh=5abhFSdm>H5%AFIRIAAL^LKvM9;(*zfwUuAOF3?cVz zG=OxuH!!(-jB=lcozfLf%6$v=GU)4yS1c+*2hh#Bk#jCb7{EiyoY&n(#AVKG1}q+(N1{%!F>$&;J&;XuDUS# zValD!mrJ3#Ft+A{VC08E_Oe=xg7ZM{SoRw_tbps&=uHe;CX)Hyl04X(Cf>FeDQOb@lfp_4Sx&cCr=U*-PO z3zywXV~vZ#+M17`!K{@^;59XNXC4iY`Jk)jbe3X9vxI0OC6_T|^f-9)csfl25*0{I zuJ3qPg0%!2Vb$36Ij&eoo;quctvikaFO0YSMOnDvsIB`DVy@_W+8B~8#^4x**%)l= zPEdNRVylDv0E6&rG!ctfeu_B8;X?>MKtY%3AFgP{Z3t>5jC;JFIAY5%o7t;IcAK2h zJ0V~h6ya36 z@qGJ)R0SVHPKl$mBtZT1GXh%-aS||(y@5y=Er0|K1Va6m{&^LO2bd7>Wk9>Sw>k&3$hf5dzdTx>eUtdWG*69Y^-5Fx6Y;I0pwpmP3$-&tCK5K5C8lQrK-GcV} zoQWK3E(-GYmE^EhC3^sA-g^~XXFCgjKr(+o*}?{uK>rA6=37|<@#K6BBDRqTG~zd)(0D31BBuGEi0?7%~E{wvo0SVTmQ73k()wS}l+SG=)sSoOwG;V`sJ zj0R^Z+S(5?+M-g}qJs3+*&+s6x(@kXs_Mv|0^Ry2gME7EaQpliP}*lR3p4xNNvgm; zb6}tKKy&*fR#O`jernlBB6Rk-me@^}8vzD>q5bxK(sa&fLdNsY!QP`BU)DqRz;|_h zsk9gKi`Z4NUo0m1sE0g)+ANY6DH+9((YI&~_StMCBqkz3uGeA{Kyrf6NIpPN%OXCk z2=$Fgj1NmN%H4V)TGF402HPCp=?zg=roDA+aL~X){;Njr&QU^jj-{@6(Lof5b-U#U zxjm@gkG_s-;yW#|5eL|gjY(@D;j>oe1m0jP7BQ};tHbwCdBdPq;fDfhyahexaqS3& zQ596;d{(sRE_Ek0YFtZ!JUNTXYbyC0pl!z}G-*tQCy9KpX=Lj9{K_@MRNEu32F~7y z74j}qpNR;u3i(^AiQfWlRMZG;D$9yGfSeQSSf|IvQ`*mRM!!mKX%#$chYFg56vVVY zhfLVL)C@Th3yCu|**Yw*jA!vBOv#9479jOX1yvTBU{&WlO9Ac60g^#|nXUUD=oDSC zIhT?`5z9EnxZW~2Q&EW28M!M*amMGH`m*4-mTaP;R-i@;FzkWV`Fk=9uBGMgr}`gf zY#w&nX1`Af7@svQ7dg7(^`mKAyjz$1*GL42k@ZiJB{|Y}2`-Awu5_7-?m*ZT9n@a@ zJJpbi8v2tO9Z{&wCK0g_g(!hDy35wx1~Fi5Bvx1Sc?7Rz(oE;f1VkM^lNW*RU1%z`TXH)m9HVP7eJj&~g7Cg*A{WKF%Eb|=ezHX%LmK_gPyt`YHO*jIIQlkLn>GXUO;a!jKcB!pJL-$ zI$013Cf4wQ_q|G%yiu3Dl#&l%Vh8ShJh@euTtUgS$%o{}c=Cc&GOL+$nFU4^Cay&$ z$^M<%b9qm7*Wi>jBh{yI(65-VdyhtP()O*MUa_AdH*2&*8gdZr@j5Cf+G8{7bOW*^ z4+%49Dp>`XY=T#zl}fqMT?1J8()f;M7#wNau>@?r*a!%X#SDLSf+D%t%tkm?~ws( ztwrQs5z9{~4j2aBHa;DGx&m)HpJObec7%7tj?w5n2sb|2YePO6~U{5=}+uBJ!x^ zAtX65i-od@j4Y2KfgoWW(!*XaZLrEGrEVFae!

p=Oiqe!Wntv05Y|sha?TY>ld*cHWXxy(uA86Y+(!plJ`mrgrFdq1< zy6Y=bwe7*auFvh&wg<4r=rvX6d=-Aa>+{m^^G{<#A61Jj|I5g08SQgBw&Ho2wG|H1Ei&>r(N9M z7E&T&`D7Zm_aQ*CcV8V~H2*zBxy`Rtqlp2&zhV!azY!hKC}C`LL6hS(n2h(DW}DU= zR;ykL*|1EZVQ~uUqX}5^udw2`DGOMU8c5W#fyFHC048nA+uVw-=;4$VM-fFl z|0$e>N-K7u9EoGxb3eYEC4Wbk?4jfl@O#}9QEyL7kZe0AbRP_rV`Ethl7V8yF5Awx z=YCr2Q*uI9ZSV*bi77Qz7iNsu9m0B+%CAsbr^Fw}qr|VC3 zn+YSRtjS5wkEu2ymenW>Rbi8WivZCv-HBP_t$S+=vq6PjM%E8HW6Q8lFl9HiQ&5x{ z+T#%Dj2?Hz%6LD?%9Gth#VOMjXPm>f(`};J0xJ(6*+0m9{lRD7{MEN0` zm7hV=_-g=dJ62Nnk22P4zXr>~=qa3dwj+BKH8np|Z999>r#NEM@6|R_E*N1irfPF+ zyr9q-uPro@AP2?;42}mwqtSi_UO+toD{J?6RJ*R|N!|1V&Gg%4ti020sq!jR-m3pe zdEY&pD({WZO19bxKFPfovZ=hQ88nS|1M17W7c2{-?_lrpPGs+jRT|6t8Or;0Z7b!H zD(^~6Z|g*H>sV>e!}4q3CFU3U@=_bE)oJ)Ng+>2B&jG;fm}O>T9eo7t7kd5?2f-%Y zti&|_zkuoP37Nq)7 z%rX%~WK?8kB|e&@BK#40{{l=tvw(v`G;24r{EoSzAHpjK#*6aMOm*W}(j2C^ppbMw z7@FpcT>*!C<=rsf zj*VXr;EI0cj2ts-KRLt-4n1zC#7M6hr&IL^YB5S-4k?Bd*#ON%7|(xXM9LvMQ0l}M zK8t<3ge7o%MA5Z=XPoAesK9`c;gm7JVD#j-|n)_r44rDK> z>pqGFX7mefG^o{B-hkYHMxqzy#wI3wlBgz;huL<96{l_Ii?P}F;T*bZ_LHH%Ix6?v zw9@uK4-VyMT(|A0Goc|??s2RDve&j_*ua}Q1$cj+c7KH2Lu(UzXm#Z{mrKsq7sf)* zQKejn>bqO>Gqe9)jg<|Iq6~lDGl~| z4)&Uddi^xySEGFxM9)LD_RWor*zAh_)loH~EA*B4{Md~IcJ@=OiW0k-)v?Y9+UeG74z8I|`$^S2_X zy>M`FkgtcsZ=ud6VQ9NV(f&-URrEs-y-sVQ@L@HU#irvW%~E|SD%<`AHAS!R z>Z2gR(T8!9#EhYV^F_AKu41V=jUd}x21svbPK;zOg z*ozEqWf@$_Gx!+{MC&lO+@sb;dQRff7xp{1U_W|qr7Q{y2oSLRB z+TX$Ln&rj5bK*vD$@x(1YdKA}<2@g#wo7-b8}{2aHlT)p`2%70QQ0;Y1M`t>)h;HH zFU;c=*CW4=Y&Td&p~AKMA=^o2l%jeVdMU_XfP&<-2& z+$w5xDaM%Ch<>=wD}Nympg88V&RI(fa{{j{vl7kN|&5V$$#YBck^VsF8SCiNS+C9&5RIo zxQWx8LOYiV>?4}t5CMOn^)a0vm7_YeZx9RZPwv$kakmWmubUFz-#<7AUZSI=GtDD= z5&!??GKr^)ulEG}t^VejcK+m{ot}R5HBPaI0_%Xbdz$g^Vd?Nxsgm?w_6T7{`Xrrr z_%RdVLSK7FV4Xc2^fdcs+KYly@Us^Oe3O}YPl$8@ChWhpHSDv~lbDxqOsgMEKCHF) zRaSoawCNQyuBfak_cVALeND|RtvBD&*6#1l)4(_#v0{@8U+R{3MxLod)boGWlQIk zR4TKnm@|ulR}NP$E<$<996?$5xw=BId}P8BVWlMHBCe)PpC{F`r?Khf!Ps6%;kDgV zR5V;kF`b@PQew2KQc{v`PKjVS%7`!@qC=UD(7v`#Wge;(st1ST30-f;YF}WT67qRl zT761ODAdXR1eM0tW~x!81F7bF)W)^?BI(@ebH4p`cY z$9*{@RM?Maaq(QSAJ0WMD<1HKf=UyxLEesxe`y7fx^Ad8ou4&}-qC+RNyE0r#QntTbcoi*A*Ulh*=v0p_3! zr}^X-z@vcs08at7VC1x84wZws<6^*#SUPM6)Bs-uY{ByEV~S(>I34}<@;4z5u;38n z0p?)OLj$bD4pJe?vlg%#a2wzXzypA50V}bcw~gX}f2MS7J=-yl*#|fsa4oj~>jAf5 z^7%8s94rDfz~xxe;re6Y26N!%aSK#+F>WncG5*Oh^_-zNi3;D>)g}^5KP9ncUgq&y1 zi<@h#1)H*Jt@eAf9oFKArPf+zzHhj-7#N4OpvIb4W6i0}U2MS2#|mC4!0!$GHo_Mc z04jCXyoh=EaBI%Bxy6W{#P4@VTP4!&HP>1TB3bjT_KxA!0*5uvVa;(s5Yb|bSUCY& z(vRe%Xm3x_&IK*A3uu?Z@4i8_S6lNonW5>uRE9{lc?pvYr5E(}y*QUp_hxM}Gn;G# z-_O8D_El7C-c8mV#g5=s;P(LkAmMo#tX(E^{cxsXP3{z+_kgAvKK?^MuHSW3mK&}8 z=B&l0q>6Q97pjwwLAM=k;7<53+4&phW!8fGvudpNcy^7oc$39pExUJ^!&(_RYrge_ zIV)tgo-{A8c4gfH=m2KbaBC&%0(Al&)Df=~$(B^MPNa=S zySkmyQns9DwVQp??z54!1$1qoqjpR>-cP;|CmlCs*IJA3C0~dPGjBrOJ9Ii+O**Lf z(ffdYhLxQUVVy65Hswoe!HkTPt~5Uvl2I*51Xx-9!%qE>wS#1(bVi+;Z%JAg1;k;! zLj6S}mfXjPb|@cSU@cq2l}=_;3#Ey76L<^I$6O@JEc`#E^L%Tyx&3rz^7Vb-9f5x7 zO3Ih{NgF(f*T+$$l_70nGHpdNjc%f^MOslZ&CSxtuP#8^a->~LX$#4h)ub=CNmo|k z%19-+XUQWghmM9<{dBxdJIEvfc8MLX#C2?c=pZ^vQm9b zE%i0E@a{*3G_8jX}BC=OnKfQo^NS!sOF1Icj zre>lWm33VBFB($_(%tgMf& zwbo~yO+I;gC0w0Lb#N52?3g=X2SMoTOx9;gmXp^(XV%l%L&{p1OZ`k4WGwwq-(S#3 z1zXY&f#uJ^F2aBRrOwq^D@o_m4FdzM-m=*Jf2 zprbtiy7D0bMsK9Cg z7YkS~V5fjx0&W&?tAN`D>=CeEz{3Kb5D+gjW>=nob^*%-tQK&wfb{})3fLv!W&yVf zXe`gai|5pH3X@6J1dm(iawy*?E8ib0-xn+24=djXE8qWGCHUm~UFG{+YXn}tuT{RE zRlbi^zJFD|Z&kiuRlZMEzCTsIFIB!D^?O3DeE+F@-|3SA|BQh0{iX7KrTYb5zK>MC ze^kD2RK8zSzE4!XKa^Vq?`fYqcc#5~Sp#k$2;0l2m0n(2R&x2YFpE#0EHDb~+Vcj- z-CFiKvb9NR5_%A@C?-a1x(Jc+ltMQ-B>N*m;4+WGAJY<(QKIRgl}N}U64NT?HcRcZUQtYztgPV+ECs_57K<^O9gx0lFYleej)Gthy%0_cc+PE)w8&0f;f5N>Uw<8yo>prP3d zNlpHa5}c`fZ;`|u0U$dmlc6Peg6sTvHFt-HcxGxRykv;HxMG5kV#6t%2cN0R1lSvPbxZ)Q7 z3>o9@DkxBX_l)V{YX{B#un_(4vQx@YlT`EUVl_T&tQdqaM3}!B%2smD{$}Wb?BUg2 z7yU)N$f0W@enZih0dH3F6r+-Ko;MV|Q_$BN2I8v)ezO6;O5pnqcsl3N>SaQI6A6J`Z+ej-yd>A#=Rk5+0INy-!)s=S98{g~wUN+flH z1x6|I`yLX1iqT`XBIY9!e=h2QdMM*nmI3bop1uRngP@oB&J^_3C7h6*jUre}c=Jd_&X0u{WdWm?u@qb>_~H^*SViHHz z;9_UkfFDY{SonT}r7FrlGsrpDLiIIPIk%n@O1oXm@Z*$jVP}ai1D@)AyYTZ(LO$IG zN%g)((93$ip3w^;9cUGDwhDh^_e~-FwhZ)fCV#9lvYrc(`aCS;bQWBROanMmxXE=ox9+p8+9f@kEY3CQ?rTZ%?D-k>OZEkbSlae`fc%A!&lZ zD^oe$CxW*^;QK{=6$<<;;K`o%3VX`(E)ewF1))$&xkk`$6!g;menu~Z>OdTL$*V&v z+lbzb^$?n*{(2&VoS(D)VXR`W=hSwg=t~*s2QuKZu%SZbtvBd@9`Hl;1IiN2=gUhw z@QoZ#L#rD%Oe?KHx*HvH`+zT4T80~i_4EDoB@xna&pV!A^(v`P@3=$lZhY{^?+Zd` zV~4xBt)s!y=57pi1cGi)crD%6+=+XbeT|SlBn5qBsMYNW1U&29xYrmrAU6d(?LK#7 zxV?QHSadNreP<|*)a{;konv90yKYgf8{hh=y^Ig}} zI9%>)=FMADx75AVQRAuu$LZf~s!o2Bh}}b-jM5i~+<}mO!pJQU*G;+J?b4I$) zDkVAa`6Aqfjn6iPTHD&(zK*7U`Q4%be^|oJo>6eKrw9~!M!@Y`i;o<^ydv)gQ~_=V zXCI+TW<|>3<4UdcbtSr$-JQIt-_7q~cl#PWA&>YtQyQDhmu?sr+47vYAkL2OUP(bz zU6dDYGEbL7`TLt;4XmO4zBRBdTXZJBf0d-3uHa_z6fL`vywx8hp;*5r$*4?}Z}K4r z1-GELt;0?2>@!?xbolBO`}kC_4fmW&fuwq?WP+Q~$Dw3tI|IJeZt)n2yQAS|%G~1> z->^y(4X1_Wc>NL;`(799oJQnr^LhNR9zF08QbZ<1-3T+JKIE0;q3>NuRbVBm-t^B_ zQNd5ELw94a!`*`0?5QNqYoHT$4BU8Pr7_*2RF%B;ys4)yNSoHqHNj-Q+{MyA|kg?OKLLKxxgY;2qr+8=pUk+yqiyNILrUMaS? zO0lJfXK6y~bSSo6R?^xiP>vcWHsqQWM&5#*IHk04ogZ0n7z*$dJTnwzdvhdJMBGRV z_}V(ZQx8|#=xYc!gVN(~#?UXKt^TGC8TK?Zpo^6;eD~BR!?2(X z5*n>I74PuU@R1Z4@Hg|gepdCr57Y9Hrdnnp!uY*ld7dV75YsXlRW3#dQ>!F@rtjiP z205RUaoQfx^V9m5E(zs4mSbs8F#U2Os^^#cloHz0$xq|I9_gRr3`~|^o)bzarv1rt zLZkjP&CJD5@*fuZOQ?uA=}5Md{PH}p6*wYE@?&00mpq@8=a3xBWymnWTk)eaKFKf7 z^(3q&B3wdjG9O8Q7t-j=P{!rCp@j9sjLWD$$Kq8W2vD6!etC|WgrdS^{>JiuSMb*e zf%05b!fGLy_{nBQ{vQEHc9rsz>kYB)kmto9VVBW=9s*X+Zx?(L(lcTDOD<<5{HcL| z`$$eLp`?>CWL(1S2L2wwFJafe$^RJxzg)*lXqh5&=1}GEa!g;{v4$1FBz5a#ti&Vij3vHpOhi_B)nPh%XA4HjXWf_lO_5zTi=#} zf3@Hj+k+YTNsnaTlcd6PN`gO+24`GF_|cZD^m{q}o(-JzT?CWY)(##llMPR#$Z|_U T>aK_4?_bR+?lUkLqRM{*8!#6= diff --git a/retoor_c/isspam.c b/retoor_c/isspam.c index 2907e89..97584fa 100644 --- a/retoor_c/isspam.c +++ b/retoor_c/isspam.c @@ -4,8 +4,8 @@ #include #include -#include "rstring_list.h" #include "rstr.h" +#include "rstring_list.h" #include #define sl rstring_list_t @@ -25,24 +25,16 @@ char *forbidden_words[] = { "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds", NULL}; -bool show_capitalized = false; -bool show_sentences = false; -bool show_words = false; -bool show_numbers = false; -bool show_forbidden_words = true; - - - - -bool file_exists(char * path){ - FILE * f = fopen(path, "r"); - bool result = f != NULL; - if(f){ - fclose(f); +bool stricmp(char *word1, char *word2) { + while (*word1 && tolower(*word1) == tolower(*word2)) { + word1++; + word2++; } - return result; + return *word1 == *word2; } + + void sld(sl *lst) { for (ulonglong i = 0; i < lst->count; i++) { printf("<%llu:%s>\n", i, lst->strings[i]); @@ -65,6 +57,7 @@ char *remove_preserved_chars(char *content) { } return cc; } +//Memory usage: 29 TB, 213.322.618 (re)allocated, 106.670.251 unqiue free'd, 0 in use. char *slds(sl *lst) { str_t *buffer = strn(1337); @@ -81,20 +74,6 @@ char *slds(sl *lst) { bool isws(char c) { return c == '\t' || c == '\n' || c == ' ' || c == ','; } -char *stripws(char *content) { - char *cc = (char *)malloc(strlen(content) + 1); - *cc = 0; - char *ccp = cc; - while (*content) { - if (!isws(*content)) { - *ccp = *content; - ccp++; - *ccp = 0; - } - content++; - } - return cc; -} char *fread_till_eof(FILE *f) { char c; @@ -106,12 +85,10 @@ char *fread_till_eof(FILE *f) { return content; } -rstring_list_t *get_sentences(char *content) { - - rstring_list_t *sentences = rstring_list_new(); +int get_sentences(char *content) { + int count = 0; char *sentence_buffer = (char *)malloc(strlen(content) + 1); char *sentence_buffer_p = sentence_buffer; - // rbuffer_t * buffer = rbuffer_new(NULL,0); bool in_line = false; while (*content) { if ((*content == ' ' || *content == '\t' || *content == '\n') && !in_line) { @@ -124,7 +101,7 @@ rstring_list_t *get_sentences(char *content) { *sentence_buffer_p = *content; sentence_buffer_p++; *sentence_buffer_p = 0; - rstring_list_add(sentences, sentence_buffer); + count++; sentence_buffer_p = sentence_buffer; *sentence_buffer = 0; content++; @@ -137,32 +114,55 @@ rstring_list_t *get_sentences(char *content) { content++; } free(sentence_buffer); - return sentences; + return count; } -rstring_list_t *get_words(char *content) { - rstring_list_t *words = rstring_list_new(); + +bool is_forbidden_word(char *word) { + + for (int j = 0; forbidden_words[j] != NULL; j++) { + if (stricmp(word, forbidden_words[j])) { + return true; + } + } + return false; +} + +int get_words(char *content, int * count_caps, int *fw_count) { + int count = 0; char *word_buffer = (char *)malloc(strlen(content) + 1); char *word_buffer_p = word_buffer; *word_buffer_p = 0; + bool has_lcase = false; // rbuffer_t * buffer = rbuffer_new(NULL,0); while (*content) { if (*content == ' ' || *content == '\t' || *content == '\n') { if (word_buffer_p != word_buffer) { - rstring_list_add(words, word_buffer); + if(!has_lcase) + { + (*count_caps)++; + } + count++; + if(is_forbidden_word(word_buffer)){ + (*fw_count)++; + } word_buffer_p = word_buffer; *word_buffer = 0; + } + has_lcase = false; content++; continue; } *word_buffer_p = *content; + if(islower(*content) == *content) + has_lcase = true; word_buffer_p++; *word_buffer_p = 0; content++; } free(word_buffer); - return words; + return count; } bool is_fully_capitalized_word(char *word) { @@ -174,23 +174,24 @@ bool is_fully_capitalized_word(char *word) { return true; } -sl *get_capitalized_words(sl *all_words) { - sl *capitalized_words = sln(); - for (uint i = 0; i < all_words->count; i++) { - if (is_fully_capitalized_word(all_words->strings[i])) { - rstring_list_add(capitalized_words, all_words->strings[i]); - } +int get_capitalized_words(sl *all_words) { + int count = 0; + for (uint i = 0; i < all_words->count; i++) { + if (is_fully_capitalized_word(all_words->strings[i])) { + count++; } - - return capitalized_words; + } + + return count; } char *clean_content(char *content) { - char *allowed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?"; + char *allowed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz.,!?"; char *clean_content = (char *)malloc(strlen(content) + 1); char *clean_content_p = clean_content; *clean_content_p = 0; while (*content) { + if (strchr(allowed_ichars, tolower(*content))) { *clean_content_p = *content; clean_content_p++; @@ -201,176 +202,123 @@ char *clean_content(char *content) { return clean_content; } -sl *get_numbers(char *content) { - char *cc = clean_content(content); - char *ccc = stripws(cc); +int get_numbers(char *cc) { + int count = 0; + char *ccc = cc; char *cccp = ccc; - free(cc); char *number_buffer = (char *)malloc(strlen(ccc) + 1); *number_buffer = 0; char *number_buffer_p = number_buffer; - sl *numbers = sln(); while (*cccp) { if (isdigit((*cccp))) { *number_buffer_p = *cccp; number_buffer_p++; *number_buffer_p = 0; } else if (number_buffer != number_buffer_p) { - sla(numbers, number_buffer); + count++; *number_buffer = 0; number_buffer_p = number_buffer; } cccp++; } free(number_buffer); - free(ccc); - return numbers; + return count; } -bool stricmp(char *word1, char *word2) { - while (*word1 && tolower(*word1) == tolower(*word2)) { - word1++; - word2++; - } - return *word1 == *word2; -} -bool containswordi(sl *words, char *word) { - for (uint i = 0; i < words->count; i++) { - if (stricmp(words->strings[i], word)) - return true; - } - return false; -} - -sl *get_forbidden_words(sl *words) { - sl *found = sln(); - for (int j = 0; forbidden_words[j] != NULL; j++) { - if (containswordi(words, forbidden_words[j])) { - rstring_list_add(found, forbidden_words[j]); - } - } - return found; -} unsigned int total = 0; +char *readall(FILE *f) { + if (fseek(f, 0, SEEK_END) != 0) { + fclose(f); + return NULL; + } + size_t file_size = ftell(f); + if (file_size == (size_t)-1L) { + fclose(f); + return NULL; + } + if (fseek(f, 0, SEEK_SET) != 0) { + fclose(f); + return NULL; + } + char *buffer = (char *)malloc(file_size + 1); + if (!buffer) { + fclose(f); + return NULL; + } + size_t bytes_read = fread(buffer, 1, file_size, f); + buffer[bytes_read] = 0; + return buffer; +} + void analyze(FILE *f) { + if(!f){ + // File doesn't exist + return; + } total = total + 1; printf("#%u\n", total); - char *data = fread_till_eof(f); - - str_t *all = strn(1337); - char *sbuf = NULL; + char *data = readall(f); + if(!data) + return; char *clean_data = clean_content(data); - - free(clean_data); - - sl *words = get_words(data); + int capitalized_words = 0; + int fw = 0; + int words = get_words(data,&capitalized_words,&fw); + int sentences = get_sentences(data); + int numbers = get_numbers(clean_data); // All words - printf("Words: %llu\n", words->count); - if(show_words) - sld(words); - sbuf = slds(words); - stra(all, sbuf); - free(sbuf); + printf("Words: %d\n", words); // All capitalized words - sl *capitalized_words = get_capitalized_words(words); - ulonglong capitalized_words_count = capitalized_words->count; - printf("Capitalized words: %llu\n", capitalized_words_count); - if(show_capitalized) - sld(capitalized_words); - sbuf = slds(capitalized_words); - stra(all, sbuf); - free(sbuf); - - sl *sentences = get_sentences(data); - + printf("Capitalized words: %d\n", capitalized_words); + // All sentences - printf("Sentences: %llu\n", sentences->count); - if(show_sentences) - sld(sentences); - sbuf = slds(sentences); - stra(all, sbuf); - free(sbuf); - - + printf("Sentences: %i\n", sentences); // Numbers - sl *numbers = get_numbers(data); - printf("Numbers: %llu\n", numbers->count); - if(show_numbers) - sld(numbers); - sbuf = slds(numbers); - stra(all, sbuf); - free(sbuf); + printf("Numbers: %d\n", numbers); // Forbidden words - sl *fw = get_forbidden_words(words); - printf("Forbidden words: %llu\n", fw->count); - if(show_forbidden_words) - sld(fw); - sbuf = slds(fw); - stra(all, sbuf); - free(sbuf); - strd(all); - if(words->count){ - double capitalized_word_percentage = 100 * ((double)capitalized_words->count / (double)words->count); - - printf("Capitalized percentage: %f%%\n",capitalized_word_percentage); - double forbidden_word_percentage = 100 * ((double)fw->count / (double)words->count); - printf("Forbidden percentage: %f%%\n",forbidden_word_percentage); - ulonglong word_count_per_sentence = words->count / (sentences->count ? sentences->count : 1); - printf("Word count per sentence: %llu\n", word_count_per_sentence); - } - slf(capitalized_words); - slf(sentences); - slf(words); - slf(numbers); - slf(fw); + printf("Forbidden words: %d\n", fw); + + if (words) { + double capitalized_word_percentage = 100 * ((double)capitalized_words / (double)words); + printf("Capitalized percentage: %f%%\n", capitalized_word_percentage); + double forbidden_word_percentage = 100 * ((double)fw / (double)words); + printf("Forbidden percentage: %f%%\n", forbidden_word_percentage); + ulonglong word_count_per_sentence = words / (sentences ? sentences : 1); + printf("Word count per sentence: %llu\n", word_count_per_sentence); + } + free(clean_data); free(data); } void analyze_file(char *path) { FILE *f = fopen(path, "r"); + if(f){ analyze(f); fclose(f); + }else{ + printf("File doesn't exist: %s\n",path); + } } int main(int argc, char *argv[]) { - + if (argc > 1) { for (int i = 1; i < argc; i++) { - if(!strcmp(argv[1],"--hide-capitalized")){ - show_capitalized=false; - }else if(!strcmp(argv[1],"--show-sentences")){ - show_sentences=true; - }else if(!strcmp(argv[1],"--show-words")){ - show_words=true; - }else if(!strcmp(argv[1],"--show-numbers")){ - show_words=true; - }else if(!strcmp(argv[1],"--hide-forbidden-words")){ - show_forbidden_words=false; - }else if(!strcmp(argv[1],"help") || !strcmp(argv[1],"--help")){ - printf("%s", - "Usage: spam [file] [file] [file]\n" - "Flag defaults:\n" - " hide-capitalized = true\n" - " show-sentences = false\n" - " show-words = false\n" - " show-numbers = false\n" - " hide-forbidden-words = false\n"); - return 0; - } - + printf("File: %s\n", argv[i]); + analyze_file(argv[i]); - printf("%s\n", rmalloc_stats()); printf("\n"); + } return 0; diff --git a/retoor_c/rstr.h b/retoor_c/rstr.h index 9ae9c72..8ad3cb6 100644 --- a/retoor_c/rstr.h +++ b/retoor_c/rstr.h @@ -26,8 +26,6 @@ void stra(str_t *str, const char *to_append) { if (required_new_length > str->size) { str->size += required_new_length + str->buffer_size; str->content = (char *)realloc(str->content, str->size + 1); - } else { - // printf("NO NDEED\n"); } strcat(str->content, to_append); str->content[str->length] = 0; @@ -49,4 +47,4 @@ char *strc(str_t *str) { return content; } -#endif \ No newline at end of file +#endif