From f3f022a83e9616dffa1cf1df3cdc434747e8be3a Mon Sep 17 00:00:00 2001 From: Girjesh Rajoria Date: Wed, 24 Mar 2021 07:58:12 -0400 Subject: [PATCH] Test for checking results aws Added query generator which generates equivalent queries for aws and ceph. After runing these queries, their results are matched. Signed-off-by: Girjesh Rajoria --- test/queries_generator/aws_cmds.sh | 15 + test/queries_generator/aws_queries.txt | 10 + .../queries_generator/aws_results/output1.csv | 10 + .../aws_results/output10.csv | 10 + .../queries_generator/aws_results/output2.csv | 10 + .../queries_generator/aws_results/output3.csv | 10 + .../queries_generator/aws_results/output4.csv | 1 + .../queries_generator/aws_results/output5.csv | 10 + .../queries_generator/aws_results/output6.csv | 10 + .../queries_generator/aws_results/output7.csv | 10 + .../queries_generator/aws_results/output8.csv | 10 + .../queries_generator/aws_results/output9.csv | 10 + test/queries_generator/generate_aws_cmds | Bin 0 -> 32664 bytes test/queries_generator/generate_aws_cmds.cpp | 26 ++ test/queries_generator/queries.txt | 10 + test/queries_generator/queries_generator.cpp | 325 ++++++++++++++++++ test/queries_generator/run.sh | 13 + test/s3select_test.cpp | 45 +++ 18 files changed, 535 insertions(+) create mode 100755 test/queries_generator/aws_cmds.sh create mode 100644 test/queries_generator/aws_queries.txt create mode 100644 test/queries_generator/aws_results/output1.csv create mode 100644 test/queries_generator/aws_results/output10.csv create mode 100644 test/queries_generator/aws_results/output2.csv create mode 100644 test/queries_generator/aws_results/output3.csv create mode 100644 test/queries_generator/aws_results/output4.csv create mode 100644 test/queries_generator/aws_results/output5.csv create mode 100644 test/queries_generator/aws_results/output6.csv create mode 100644 test/queries_generator/aws_results/output7.csv create mode 100644 test/queries_generator/aws_results/output8.csv create mode 100644 test/queries_generator/aws_results/output9.csv create mode 100755 test/queries_generator/generate_aws_cmds create mode 100644 test/queries_generator/generate_aws_cmds.cpp create mode 100644 test/queries_generator/queries.txt create mode 100644 test/queries_generator/queries_generator.cpp create mode 100755 test/queries_generator/run.sh diff --git a/test/queries_generator/aws_cmds.sh b/test/queries_generator/aws_cmds.sh new file mode 100755 index 00000000..dce1579a --- /dev/null +++ b/test/queries_generator/aws_cmds.sh @@ -0,0 +1,15 @@ +#!/bin/sh +set -x +set -e + +mkdir -p aws_results +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select lower(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from s3object;" "aws_results/output1.csv" +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select to_timestamp('1999-11-04T20:27:03.479340Z') from s3object;" "aws_results/output2.csv" +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select date_add(day, 8, to_timestamp('1982-02-03T00:20:12.42831Z')) from s3object;" "aws_results/output3.csv" +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select substring(upper(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## '), cast((avg(cast(_3 as int)-cast(_2 as int)) - 3) as int), min(cast(_1 as int)-cast(_1 as int)) + 7) from s3object;" "aws_results/output4.csv" +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select 10-cast(_2 as int) from s3object;" "aws_results/output5.csv" +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select character_length(to_string(to_timestamp('1963-06-22T05:35:39.954350Z'), 'SSSSS HH ')) from s3object;" "aws_results/output6.csv" +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select cast(_2 as int)-2 from s3object;" "aws_results/output7.csv" +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select char_length(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from s3object;" "aws_results/output8.csv" +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select to_string(date_add(second, 5, to_timestamp('1938-09-24T10:57:42.142042Z')), 'hh ') from s3object;" "aws_results/output9.csv" +aws s3api select-object-content --bucket girjesh-bucket --key test_data.csv --expression-type 'SQL' --input-serialization '{"CSV": {}, "CompressionType": "NONE"}' --output-serialization '{"CSV": {}}' --profile openshift-dev --expression "select to_timestamp('1931-12-23T02:54:25.451925Z') from s3object;" "aws_results/output10.csv" diff --git a/test/queries_generator/aws_queries.txt b/test/queries_generator/aws_queries.txt new file mode 100644 index 00000000..c3249158 --- /dev/null +++ b/test/queries_generator/aws_queries.txt @@ -0,0 +1,10 @@ +select lower(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from s3object; +select to_timestamp('1999-11-04T20:27:03.479340Z') from s3object; +select date_add(day, 8, to_timestamp('1982-02-03T00:20:12.42831Z')) from s3object; +select substring(upper(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## '), cast((avg(cast(_3 as int)-cast(_2 as int)) - 3) as int), min(cast(_1 as int)-cast(_1 as int)) + 7) from s3object; +select 10-cast(_2 as int) from s3object; +select character_length(to_string(to_timestamp('1963-06-22T05:35:39.954350Z'), 'SSSSS HH ')) from s3object; +select cast(_2 as int)-2 from s3object; +select char_length(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from s3object; +select to_string(date_add(second, 5, to_timestamp('1938-09-24T10:57:42.142042Z')), 'hh ') from s3object; +select to_timestamp('1931-12-23T02:54:25.451925Z') from s3object; diff --git a/test/queries_generator/aws_results/output1.csv b/test/queries_generator/aws_results/output1.csv new file mode 100644 index 00000000..2da8a1fc --- /dev/null +++ b/test/queries_generator/aws_results/output1.csv @@ -0,0 +1,10 @@ + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## diff --git a/test/queries_generator/aws_results/output10.csv b/test/queries_generator/aws_results/output10.csv new file mode 100644 index 00000000..b0a6a8a9 --- /dev/null +++ b/test/queries_generator/aws_results/output10.csv @@ -0,0 +1,10 @@ +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z diff --git a/test/queries_generator/aws_results/output2.csv b/test/queries_generator/aws_results/output2.csv new file mode 100644 index 00000000..ceb48596 --- /dev/null +++ b/test/queries_generator/aws_results/output2.csv @@ -0,0 +1,10 @@ +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z diff --git a/test/queries_generator/aws_results/output3.csv b/test/queries_generator/aws_results/output3.csv new file mode 100644 index 00000000..ce1a7527 --- /dev/null +++ b/test/queries_generator/aws_results/output3.csv @@ -0,0 +1,10 @@ +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z diff --git a/test/queries_generator/aws_results/output4.csv b/test/queries_generator/aws_results/output4.csv new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/test/queries_generator/aws_results/output4.csv @@ -0,0 +1 @@ + diff --git a/test/queries_generator/aws_results/output5.csv b/test/queries_generator/aws_results/output5.csv new file mode 100644 index 00000000..72c01e00 --- /dev/null +++ b/test/queries_generator/aws_results/output5.csv @@ -0,0 +1,10 @@ +-42916 +-21159 +-35571 +-38378 +-42792 +-45572 +-8538 +-22623 +-38429 +-6601 diff --git a/test/queries_generator/aws_results/output6.csv b/test/queries_generator/aws_results/output6.csv new file mode 100644 index 00000000..f9e7242e --- /dev/null +++ b/test/queries_generator/aws_results/output6.csv @@ -0,0 +1,10 @@ +9 +9 +9 +9 +9 +9 +9 +9 +9 +9 diff --git a/test/queries_generator/aws_results/output7.csv b/test/queries_generator/aws_results/output7.csv new file mode 100644 index 00000000..738f6b83 --- /dev/null +++ b/test/queries_generator/aws_results/output7.csv @@ -0,0 +1,10 @@ +42924 +21167 +35579 +38386 +42800 +45580 +8546 +22631 +38437 +6609 diff --git a/test/queries_generator/aws_results/output8.csv b/test/queries_generator/aws_results/output8.csv new file mode 100644 index 00000000..6d3ca06f --- /dev/null +++ b/test/queries_generator/aws_results/output8.csv @@ -0,0 +1,10 @@ +34 +34 +34 +34 +34 +34 +34 +34 +34 +34 diff --git a/test/queries_generator/aws_results/output9.csv b/test/queries_generator/aws_results/output9.csv new file mode 100644 index 00000000..786a90cc --- /dev/null +++ b/test/queries_generator/aws_results/output9.csv @@ -0,0 +1,10 @@ +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 diff --git a/test/queries_generator/generate_aws_cmds b/test/queries_generator/generate_aws_cmds new file mode 100755 index 0000000000000000000000000000000000000000..b4c7bb02f3f379059e39c35eb712f9b8b772cb72 GIT binary patch literal 32664 zcmeHw3wT`Bb?zBGkn9*qen1QcJjfU<+j`mvnO8=(#~u;c7M5%r66bg{8cBmk8fE54 zmQCD<2vbiKQHKw6`MA9|YCfP&Uvcv#E^%5TBNw^!5kpgOliS4Q1~S8|3J}nB0NsDB zz4y#HqjM;pK)<%1wyjxb{cG*D_g?!k`|Pu4pRafuH{`fnnhH7Etr~F?r5=XFr>33V z!>q*Wv`=XsZLxNhb`g*~{CQYHR9q?AnIi;>7D~NbP>Od%%8%rDz!C2uqFuqCX{vHa zl;Wj>M_vlrxlpi*Y9)fFc=>ecrVj+Shl!$(6$@5Tb39J3O31Y;8BfwPEs^biD$9B&TxC`+9ZG)$U*Az^gJ?PhyTJszXO@H z)BgzyYKM=aq1fSn;$UaM!A=qmv$NAyV2}TA2mGrJ`8hJn-p&UO`W^74U4GWjw#UDM z3bxa~*1`XVgMU^!_~)-3^8bIG(si)^cMkbk=g=Nra`1DDL%Lpvd`>v%Z+EbN-XZ_r zc4#L-2mB3(`d#5rk1sg*VV*-iKks0N#ss%bf4s}V|F<~Q`w@qB_yF`vwTrbAD?OY_ zzf9|OP{K#b1wI#kr|`!}cdmAsHnP>j{6dK@mwr3y7B~y>al0(nxeR<2{7LqQRtWt^ zWrW3`cKIoTKBYTwyU;I@={_a>+*2xWMCb55>7R3@9s#INjMFirEwEw*jm@>1u6J}r zy7idhj~cqJ>342!Hg3@MK!1OAb#+avKNbo=7!7rIGz6NB>gqtJKdKv1f5?b61ia0D ze0bMYH#7zMyxtbQ`38OKXPZ}LY3Bxgliubx{NAQOb{VOzhJ}E?#~%n8d%ay*8mS>8 zk#3Z_H(;P-UT+_ltl6lkYL7(s_@ixl$OuOLMg*?#cTh5)Z8p{jXN4kg%ydtA1AVRD zKBzVuVOs^^%B_gGN*IbD13`aRLyO*0rF+@4&UMqgT5rByZ>iS3UM@A&%lc|g&^eM# zHdTentga4rw}owsjU9PEGT0{!ZB~thONz!W?i(b3mzR*kz}=EXz}Z|Yn`1%Gp@Wnrwi&=IySHK?H; z^^X32y(burMY{cA40L*56+4moe7dGDGznErXn8zvW|7O(_$?Y6k3ltZB*CC(Gbq}O zB7qhRcGv5o12|WRO=lDpjBePB;k#$APG41Mya8Clf4g3ky|>oTcLauKBk0}Q1%Gj? zjs}g+Xk<@zEjrz!FSO=R;!S!iXz1Z!cZboL?bugc1v71-SdZ%b&Bp3Lq}M=knvEXY zZvo+P=St$QF8)$#`YfsdBS0P3Ll$jobu6?`jY%JA6chgBdclDD+l)2r-*6=04~yA} z_!f_Tv~Th32N`siHM-6%N)JamFb@kxqmgL#9iv9C52F}*XQ(Y0MbmEzh`AbOgk54X zh^vkO=6l3dYx=hCJt4SmYcST^71SUMs5v8z;b{B)Aw%nnb@yO;Xtbjfkcym2cKPwe zwQ=1#eN{!3wz08c?K-`tqNd_{ZQYI?^=ljS>WWnY-mwEJtIgLHHN4i5gZuyVUmmW- zKSFb`Rzx!Tm)21ld5q>@-Qgm>!W5a~dVcxR$TP$D|3r&V;M`04iIJ30L;TN0mLnge)ggEiGJTT*d84t{O z;6INC-YUN8yl?QeLf=q+67yBxfl(u8>WpvjRN)wBa_agQ0Z%Rc1wM-xdvHa3C&`lM zrVy6?gm}6&NuFW;IpS%mpB!cW8RBV*pFGO^Q^eEMJ~_htapGx8pFG6;cZsK|d~$&K zKO&x{XUQJszePMv-IKeRe}s6NnkSo>e}H(Jk|#aPe~oyWawn^p|0?k`)lRyZ-$y)6 zv6CgtM~SDYby8#gF5-)cpZEa4(pKVWDxEyX{GG(p6gqi^`K`pyBYu?m+lZe}{88rD z5>Hd*Hd)a)9~e#M6{G*~5G(@iY}q?qdFO;%N$;Y+`;M@ig^K zdYGS0JWY9%Rm|rQPgC8boB8v&-RYz$ZnA{=H;JdIZBk?Y72;`1o1A!`%KswqZsN}| z{}bYA3Y$E`{By+9)HONE{4>PU)HHdN`KO4dDQR+q`QyaXR5W>r`R@`>x2VYh=KqNJ z>xl1R{#(S;6g9bv`A3MSscEu_`3H!nDQVKf{MU%5$N0%A@YLR`UvG#%`$b>;Mc?2r zCYo9r#zzlTdNkkoNjH=6=a-5sKn6feFP7Bub969bsw`fj05WBwBa zQJ0zZBmqG6 zfE_t6*ipff^f+~>6AP+PHt20k>m8?-e&QM_s7MQZgQF!Usw94G27a5wiv&I(6a1!N zcS)A&8`<$l_8!5yB^w2MViydtXA)ynhT#Lk)YDJWIz-|#q++C0MudtmFIBF9%DDG6 z8lQZ_hdC+bkLRhV;ordQP>FBwm4&{cc|O+}0$+fkdq<1!eG<`-CXE9_+g^)vgp(M` ze8b*RSfjk0I4FyHD~2rKBfjBHqsPC?;6IS9X9PX~ea_hMrV(G9CGWE2z%E!vq=Dn3 z=}3o#$ipI;>tR+DQU6lN-7Fs_wd28R59*E$-yx*u(0!4rCX`rL$bW><7sVQ7`7o&+ zzdIfKGLC&%NDsKVHn`+}K(3O}P_PF`GJJqj0XryIk<56)5G-Ylybu)ZF3E0Z);mC= z+r%}zGM>0qT#wRqjfkSjC`$y(`58VuLfqg}CBFF6zW57?`@o){N8=Rn+h7J!<;hQR z0aMNW_W#EjR-Iuj*dL-E+czNPxM zM@fADd-2}M!Ts;Kdat`@KeRE-UG?M-n$fu*K+%pDqEDP4_Q`Xl!0v~}p}*nSCrFn% zsFwH+nus(R-#_Ua@=jtb2Zp4d5(8}fg*X4q7eDQL7c-j#hE0{c5Bl-mw<*fdwzuo! zXMQjj35qd~c2pqaY#@O?p^=tIn}*gxuGqgSno+&Qdf6g~_fK3cH6+S@&FXa|v z(huqSIQ}%>yKnh+kCyn#PWj^JFa#I-PA2kvC(q^i;s@$reEh&de7d1Ie&7mRRZ*N_ zFZ#&=ZfmGupx~Af86GBW-_T*ouy5%5oc0hKY8+CnrGDy6{J?5fKd_1;k0-XH9)<)M zID<@6(!SxFb1>Yn%Fy>289E@`^Qo}jQfz+{X0SGtrY zhIUHVQ=_3;<64Hv-~V;0MKnWJ2N{}MFNX1Lr=bHno>=$}*&G9Vn%G&yx}fDvD2;1j z#kaMLxm>iHyr&IIV})e)lLzs|p1AI< zRGq^GTubBZR_*}_-{53%-z}Jfkju`BaYne~ci462R`uSSY>G9dsko>Q_R&sCDGpz& z_K$}Cqr`R7BN>2RT}jn*ISZ+Mjwd!FdLqs?#u6u)GX6YWrh5suzQO&ITJeLUDfd&G z6m4Kw)H~f%an%lOL%ZNo(`}y^!pVf`JA;}sHQ^Orpy?TWJJ0{??`Ryp<&SYoRKL9; z{(SwmhWM}RTk7NQZSxJ6KMJ_HvFv@+Xkp?z6UgSt_j8TK)jus>Jb=Ws#`v3!@i*4T ze_21Z;OD-qp6kK>lXbDT( zH2aL7+&c==acX_`c!P&F5uH9{+sJpz! zP0_JWsyA%JW^3MV>|}Gd_6BxCz1!avywNSO_E0#e5RKAs$5#1!Lhe{F91Ixck=DD2 zV56K7>_$rE(uRi1-NC(XnQ1rJV1EzNi-jWH<;LEgp!>S!+Z(R~9_sGtHOjGJD};S4 z`}_tJ+}GV*x~_S9>5cBY_pfx9u8VZ3s4a*IxODTD&EC@e6dijLJ~Tcn^+Y3F5Zb2| z>kPFUPbnK%WBb2c3oMo3QaHXx>3nTBP@g(4({t!qy_Rg^jpQ zrIoWbm6s!suXT`9I&R4Cg1jHsd51I7=}xCZ`^=9+ugIKELt0*fIy?e77H`t|2&LuJ zHjg>TDIb*2Q;^fX>bxV?eB>6SeM@cnPtn%rnbX;nPKVn1HzB8qLCPQDwEPLkk3!CS z090G@rsZcLe*|)V)~)2Xr{$B7zlFYftJ(fuA$J2?h`urx-+ufIUBzz_a*DDZe>X#C z2hs=s)qNNFis}XnHWqohi|Xo&s_KiDtu1n| zEn2v?sAO$XA@{R;VP^n#RG%VqQ&X;{O93p+KVPWby7}co?K#)5>#;)Z&7Ayi6l!0~ z%^xV#9?i}Fw*u|S-29&vXg|-*f2KftKR5qaf%d=h^1oG}{V*^8{sQga^4B2N1G8WY zHcNixLSjG6$$vISOXdh{*5=BuXb-vsGv^vCVK(L8p@qRyK6013E?*zev_Es@|IFoj zfz)1i&3@OV9iZr_<(V+!ff*0Xcwoi@Gai`nz?mf;cEb0U2|6Nq^+lOU=ko+6{Y6T9 zRVgSv+M7yYjv$!mdH720O?()S&M2t8cdd;p%WKLs>I&T7(0Mo3U*lTUS$9Jnu!&L+KGXEm%;7`m-lExIf4o6o>*JrMR|*+`pjku49j>sUE4u)>2y8S4CM z9Fg`mY}wk%K!&am# zWa#ZttV`)V5&X&^7)89)7{`8Dei7Xp&ILLblmp{ei}K+|I^j@9H{mn>T=B8`b6PtN z%(UZ;^)v1GzuLGktsTc^+VM;~{;&7_Hmw~8XWH>hJN|FbjveO5)7rbk{9~G)!}vZ; z&!PX&MAb5X_gAOp?u(arXsXHg%b7Y#rs%ZvdPIL!az$t6t)K9`Rr=j6>u#x}Gk*Vg z{jTmI={UoU^~-JVA%)ET9@4Jgsk&0sPDibUT`Jc!>~z%n*oa&wv(u@P?Sl57JN0qe zqE6xPhuBdvMeWj3GDYolRCyMCNV!yb9+Uau%sG6}FW8Udc&16|H}^)Ujjd?5} zh6bFP@GD+dG7D}YCg);JeJ3i}CI{RvakZwV^m`rfVF&zi2Yk!{|EUB1t^+=Y3XF1P z=5wjU)prs_r*MnJ)p#dRP5YdKon8n0umgU~0spB3{u>8;aSn23Q?8pG@SPIREbqM% z&nz#UUuf4(z6U%9tCec3R{l9A@m)(i3S#;<5+BLHe-7M*_WVS%(5GTjc!Tlj{p%7e zXi~cBxsK4*w5x%;Z2GAOxLy9YvwpEwBF7Dt?w1(9OryQ_6cj!HoYLL1OyImv1@fa3 zSI?K!a~3DRonrfQwJU7K8M}Nw2Ys3aXO3epv7PDT%p0sff7-Y-CGpI0Y#yHXxM;;T zWBgeLd>$p85%xLYKLAeWDZX9d;TSaMrcj8*XFEH0I^c!_{yK27U%Ar5b~o~;rrm@E zZ#(;az)SG->B|}8=Y9wMp8_ZQndei^o2zMV*vTA+mlV)ic=~$>#ph51+|JHEiD#a3 z`LDq3@;@c@|6KZ0={$*goQHg>xe@KHqp%DU7t+Z*7qbWW+?-;Ken4Ul`=!1aqw8lJ z@CA6FNA}OTJ*1WjDSGr9o}QON$7%ultne-OBx z{ePDFef~9 z{2p*S``>oJyP!NT2TsapKj)!8H)mn$yj78VJjX)Uva?_BfZy+czvzHpiA4iD`-gzr zo&WlWz{wBm%RQXM3YpP&B>v7af!`$Y64VQgKbhkOokLuL@4L}vp)b|7E4dyOm!=)S zDcxvBy3aW1|3d2btrYe*N>eo`ETx+{9^Ojzl1?sl6<{3wu2R?uN=eK?f6M{@G#+@` z*F07n%7!@Q+cb)1WtQ`Dg3>IotGT&DF?in#u?bBUpiAz<4FLx-Fd@%S)Wxc z9}fekd_E)VeWjHC0Jz(tDVWm^b}qw&!OlNn2mGYO&y{+tsZ{pHqI5WXC7xba;#7FO zc6E6)*w*Pc#7Xg$Ybq-$(*L1}W8?!-qq-uZRrtHRBltOk9>S5Q6@g4%$H9J?SVd>V z$izs`4zK9#4u^IJ!+UKbI0-BGtmNJv+ei+w3O*_?qd1%`s{qjr9P}3r;+#Hnd9Ap$ z+*X;joL1R%KUn29{;(>rf$4s+Doyu~RnA5LMGdnLtvVV0vI^ON)o*%RSU3%b64C*L zqj?m;{MlVhW2xvGS5$Ds!u-QO2y0Aea(jAQ9bi=lY-Vu(p(lL-Y z`8lThd;7HjexMQ#8o{=T8&<8Uu~*XD(R_7(H0s}rtQ*n2n%L0L*4x#!7g0gA#9hx@r?SNtE+Y85<=3y zwbA`J+_5bv&bh`m54|cB()5juTh`V$B1&(o-k|d_j=H{KYyBp#?%lkejtJMWBS!c7 zB-*Y-q-j;Vt!R8@<_{3TiqR*W*$9PH>zg z9U*za=N(%tupj9F&eTcYIITF?5sB`l6T#^u>2Evbqu{q`rtZowI`d|K>BuRuEL)sh!Y57IgE!or0* z-RS^fOA4w2F8Cy8yR-5ewxENy;cKop7DSiH>Ii^ZeVMtWD!(^BVeEP9MrF6H2wKPyC$|pGe6pXNET$ zb-zu`idOeg-hSZxMh8nFvuwteGq~lE?-=;HB7MPztyzrMUN5%S8r8MYAeq1%Shi}s zHN?A`TQ;>19$m9Ifq#05&98%)F1_Gy!)l8^(r1!eOlLj#TCJ}auOisYht^LUm#IFp z7$#G_+d2W^EbA?s)~Px3M}7AOy^+9D`0U!5eA|S@Quc1NycodFCLH%s-hN{fwb}S& z&(XI{l3#0Jk!0s(v@g>+0H!wuSi`V2J>zi+;6(1Sn_YMSN zy?pnXombOu4ze_itbJ>Jo6&Te))!M)-D&fJ7psD0QjJ= zYQl@Ay6~-w8!*hb^gcfv8#Cw?SKhAXwfeq{H$tFL^8F6fj@8>|EJe+(F03zCu^1Es zYpT9!Wd&(88}yzei=t{3+>Uq|LyA+ABKqFUc%50Te1i(VW8-6ZF`DWA-IBTNTa|o0 z6ne_Xb%&Di&NTacTD#!%FHfx^wztdhw}Ki`K|9qI|7r)Tr8praSb;miiq_r`UK5Iw zQUrvzft1tinq{5zW2B0E}us;y&q2}I-TSvAb7P5p5b&aI_T_LPRchEvJ zJ}Nj#VICtFj*W?Qbzz-PsL9k>g=mLT1R#T>2w3{(6XTBuYf( zU;Uo6koNsL-V%7eL}3?yPWPWG|Gkh?{Iya~?T_=}c@PCR2@xp&v=3ILuj1bi zjC?}*=9g;5ddOf-8!j%4U@Z972^K-zA|bDE}$w2rkU= z)jk^ap64GcMM*Q$Kb8?+?HfE;BO*L3`++i(nf_=-e6_#OO&uHIs357&%=DjwqzHd1 z8MS{$y*E13-zt4Ye}=et1fGho_A4fMqhGqcL4|{gr|2s{Ro@^5wXadVms?egYM&~e z%GXuDWr{odw`W@+J%lMvD!(>=<3zGVzeTVq3nrjMI@pq&UaY4Ei6ukr&H2$lg_Hq8W zULar2aG+gzGP1}Ko3@JhJ=C}m=-_i7{*>R;_=D{?sj}lT6-IZ7%a_%LHz`s1R*Kk; RU>pDOKB2K9BSHqN{eNAK29f{( literal 0 HcmV?d00001 diff --git a/test/queries_generator/generate_aws_cmds.cpp b/test/queries_generator/generate_aws_cmds.cpp new file mode 100644 index 00000000..b2cb69d0 --- /dev/null +++ b/test/queries_generator/generate_aws_cmds.cpp @@ -0,0 +1,26 @@ +#include +#include + +using namespace std; + +int main() +{ + fstream query_file, cmd_file; + query_file.open("aws_queries.txt", ios::in); + cmd_file.open("aws_cmds.sh", ios::out); + cmd_file << "#!/bin/sh\nset -x\nset -e\n\n"; + cmd_file << "mkdir -p aws_results\n"; + string bucket, csv_file, query, aws_cmd; + cout << "Enter bucket name: "; + cin >> bucket; + cout << "Enter file name: "; + cin >> csv_file; + for(int i = 1; getline(query_file, query); i++) + { + aws_cmd = "aws s3api select-object-content --bucket " + bucket + " --key " + csv_file + " --expression-type \'SQL\' --input-serialization \'{\"CSV\": {}, \"CompressionType\": \"NONE\"}\' --output-serialization \'{\"CSV\": {}}\' --profile openshift-dev --expression \"" + query + "\" \"aws_results/output" + to_string(i) + ".csv\""; + cmd_file << aws_cmd << endl; + } + cmd_file.close(); + query_file.close(); + return 0; +} diff --git a/test/queries_generator/queries.txt b/test/queries_generator/queries.txt new file mode 100644 index 00000000..123b124a --- /dev/null +++ b/test/queries_generator/queries.txt @@ -0,0 +1,10 @@ +select lower(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from stdin; +select to_timestamp('1999-11-04T20:27:03.479340Z') from stdin; +select date_add(day, int(8), to_timestamp('1982-02-03T00:20:12.42831Z')) from stdin; +select substring(upper(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## '), int(avg(int(_3)-int(_2)) - int(3)), min(int(_1)-int(_1)) + int(7)) from stdin; +select int(10)-int(_2) from stdin; +select character_length(to_string(to_timestamp('1963-06-22T05:35:39.954350Z'), 'SSSSS HH ')) from stdin; +select int(_2)-int(2) from stdin; +select char_length(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from stdin; +select to_string(date_add(second, int(5), to_timestamp('1938-09-24T10:57:42.142042Z')), 'hh ') from stdin; +select to_timestamp('1931-12-23T02:54:25.451925Z') from stdin; diff --git a/test/queries_generator/queries_generator.cpp b/test/queries_generator/queries_generator.cpp new file mode 100644 index 00000000..0e57ff24 --- /dev/null +++ b/test/queries_generator/queries_generator.cpp @@ -0,0 +1,325 @@ +#include +#include +#include +#include +#define NUM_COLUMN 3 + +using namespace std; + +enum Return_type { INTEGER = 0, + STRING = 1, + TIMESTAMP = 2, + MIX_COL_NUM = 3, + COLUMN = 4, + NUMBER = 5}; + +auto random_arth_op = [](){std::string op="+-*/";return op[rand()%op.size()];}; + +auto random_compare_op = []() +{vector op={">", "<", ">=", "<=", "==", "!="}; + return op[ rand() % op.size() ]; +}; + +auto random_date_part = []() +{vector op={"year", "month", "day", "hour", "minute", "second"}; + return op[ rand() % op.size() ]; +}; + +/*auto random_date_part_extract = []() +{vector op={"year", "month", "day", "hour", "minute", "second", + "timezone_hour", "timezone_minute"}; + return op[ rand() % op.size() ]; +};*/ + +string random_timestamp_string(string& aws_expr) +{ + auto year = [](){return rand()%100 + 1900;}; + auto month = [](){return 1 + rand()%12;}; + auto day = [](){return 1 + rand()%28;}; + auto hours = [](){return rand()%24;}; + auto minutes = [](){return rand()%60;}; + auto seconds = [](){return rand()%60;}; + auto fraction_sec = [](){return rand()%1000000;}; + stringstream timestamp_str; + + timestamp_str << year() << "-" << std::setw(2) << std::setfill('0') << month() << "-" << std::setw(2) << std::setfill('0') << day() << "T" < op={"yyyyy ", "yyyy ", "yyy ", "yy ", "y ", "MMMMM ", "MMMM ", "MMM ", "MM ", "M ", "dd ", "d ", "a ", "hh ", "h ", "HH ", "H ", "mm ", "m ", "ss ", "s ", "SSSSSSSSS ", "SSSSSS ", "SSSSS ", "SSS ", "SS ", "S ", "n ", ": ", "- ", " "}; + return op[ rand() % op.size() ]; + }; + int loop = rand() % 10; + string frmt; + while(loop) + { + frmt += random_format(); + loop--; + } + return frmt; +} + +string random_col(string& aws_expr) +{ + int num = 1 + (rand() % NUM_COLUMN); + aws_expr = "cast(_" + to_string(num) + " as int)"; + return "int(_" + to_string(num) + ")"; +} + +string random_number(string& aws_expr) +{ + int num = rand() % 10 + 1; + aws_expr = to_string(num); + return "int(" + to_string(num) + ")"; +} + +string random_num_expr(int depth, string& aws_expr) +{ + string aws_expr1, aws_expr2, ceph_expr, op; + if (depth == 0) + { + ceph_expr = random_number(aws_expr1); + aws_expr = aws_expr1; + return ceph_expr; + } + op = random_arth_op(); + ceph_expr = random_num_expr(depth-1, aws_expr1) + op + + random_num_expr(depth-1, aws_expr2); + aws_expr = aws_expr1 + op + aws_expr2; + return ceph_expr; +} + +string random_num_col_expr(int depth, string& aws_expr) +{ + string aws_expr1, aws_expr2, ceph_expr, op; + if (depth == 0) + { + if ((rand() % 2) == 0) + { + ceph_expr = random_col(aws_expr1); + aws_expr = aws_expr1; + return ceph_expr; + } + else + { + ceph_expr = random_number(aws_expr1); + aws_expr = aws_expr1; + return ceph_expr; + } + } + op = random_arth_op(); + ceph_expr = random_num_col_expr(depth-1, aws_expr1) + op + + random_num_col_expr(depth-1, aws_expr2); + aws_expr = aws_expr1 + op + aws_expr2; + return ceph_expr; +} + +string random_query_expr(int depth, string& input_str, int type, string& aws_expr) +{ + string ceph_expr; + if (depth == 0) + { + switch (type) + { + case INTEGER: + ceph_expr = random_number(aws_expr); + break; + case STRING: + ceph_expr = "\'" + input_str + "\'"; + aws_expr = "\'" + input_str + "\'"; + break; + case MIX_COL_NUM: + ceph_expr = random_num_col_expr(depth, aws_expr); + break; + case TIMESTAMP: + ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr) + "\')"; + aws_expr = "to_timestamp(\'" + aws_expr + "\')"; + break; + } + return ceph_expr; + } + + int option; + if (type == INTEGER) //return type is int + { + string ceph_col, aws_col, aws_expr1, aws_expr2, op1, op2; + switch (option = rand() % 9) + { + case 0: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "int(avg(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + + ") " + op2 + " " + random_num_expr(depth-1, aws_expr2) + ")"; + aws_expr = "cast((avg(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2 + + ") as int)"; + break; + case 1: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "count(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + + ") " + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "count(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 2: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "max(" + ceph_col + op1 + random_num_col_expr(depth-1,aws_expr1) + ") " + + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "max(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 3: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "min(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + ") " + + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "min(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 4: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "sum(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + + ") " + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "sum(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 5: + ceph_expr = "char_length(" + random_query_expr(depth-1, input_str, STRING, + aws_expr1) + ")"; + aws_expr = "char_length(" + aws_expr1 + ")"; + break; + case 6: + ceph_expr = "character_length(" + random_query_expr(depth-1, input_str, STRING, + aws_expr1) + ")"; + aws_expr = "character_length(" + aws_expr1 + ")"; + break; + case 7: + op1 = random_date_part(); + ceph_expr = "extract(" + op1 + " from " + random_query_expr(depth-1, input_str, + TIMESTAMP, aws_expr1) + ")"; + aws_expr = "extract(" + op1 + " from " + aws_expr1 + ")"; + break; + case 8: + op1 = random_date_part(); + ceph_expr = "date_diff(" + op1 + ", " + random_query_expr(depth-1, input_str, + TIMESTAMP, aws_expr1) + ", " + random_query_expr(depth-1, input_str, + TIMESTAMP, aws_expr2) + ")"; + aws_expr = "date_diff(" + op1 + ", " + aws_expr1 + ", " + aws_expr2 + ")"; + break; + } + } + else if (type == STRING) // return type is string + { + string aws_expr1, aws_expr2, aws_expr3; + switch (option = rand() % 4) + { + case 0: + ceph_expr = "lower(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + + ")"; + aws_expr = "lower(" + aws_expr1 + ")"; + break; + case 1: + ceph_expr = "upper(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + + ")"; + aws_expr = "upper(" + aws_expr1 + ")"; + break; + case 2: + ceph_expr = "substring(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + + ", " + random_query_expr(depth-1, input_str, INTEGER, aws_expr2) + ", " + + random_query_expr(depth-1, input_str, INTEGER, aws_expr3) + ")"; + aws_expr = "substring(" + aws_expr1 + ", " + aws_expr2 + ", " + aws_expr3 + ")"; + break; + case 3: + aws_expr2 = random_tm_format_string(); + ceph_expr = "to_string(" + random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr1) + + ", \'" + aws_expr2 + "\')"; + aws_expr = "to_string(" + aws_expr1 + ", \'" + aws_expr2 + "\')"; + break; + } + } + else if (type == TIMESTAMP) // return type is TIMESTAMP + { + string aws_expr1, aws_expr2, date_part; + switch (option = rand() % 2) + { + case 0: + date_part = random_date_part(); + ceph_expr = "date_add(" + date_part + ", " + random_number(aws_expr1) + ", " + + random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr2) + ")"; + aws_expr = "date_add(" + date_part + ", " + aws_expr1 + ", " + aws_expr2 + ")"; + break; + case 1: + ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr1) + "\')"; + aws_expr = "to_timestamp(\'" + aws_expr1 + "\')"; + break; + } + } + else if (type == MIX_COL_NUM) + { + ceph_expr = random_num_col_expr(depth-1, aws_expr); + } + else if (type == COLUMN) // return type integer column number + { + ceph_expr = random_col(aws_expr); + } + else if (type == NUMBER) // return type randon number + { + ceph_expr = random_number(aws_expr); + } + else + { + aws_expr = "error"; + ceph_expr = "error"; + } + return ceph_expr; +} + +int main() +{ + srand(time(0)); + int reps, depth; + fstream query_file, aws_query_file; + query_file.open("queries.txt", ios::out); + aws_query_file.open("aws_queries.txt", ios::out); + string input_str = " %%AbCdEfGhIjKlMnOpQrStUvWxYz## "; + cout << "Enter number of quries to be generated: "; + cin >> reps; + cout << "Enter depth of queries to be generated: "; + cin >> depth; + if(query_file.is_open() && aws_query_file.is_open()) //checking whether the file is open + { + while (reps) + { + string aws_expr; + int type; + string ceph_query = "select "; + string aws_query = "select "; + /*int projection = rand() % 4; + while (projection > 1) + { + type = rand() % 4; + ceph_query = ceph_query + random_query_expr(depth, input_str, + type, aws_expr) + ", "; + aws_query = aws_query + aws_expr + ", "; + projection--; + }*/ + type = rand() % 4; + ceph_query = ceph_query + random_query_expr(depth, input_str, type, + aws_expr)+ " from stdin;"; + aws_query = aws_query + aws_expr + " from s3object;"; + query_file << ceph_query << endl; + aws_query_file << aws_query <