From f8cbd8f6012836a223d3a583de6e5904593408e7 Mon Sep 17 00:00:00 2001 From: Koeng101 Date: Sat, 5 Jun 2021 09:03:19 -0700 Subject: [PATCH] Fixed EOF error call (#167) --- .../uniprot/data/uniprot_sprot_mini.xml.gz | Bin 9269 -> 9272 bytes parsers/uniprot/uniprot.go | 20 ++++++------- parsers/uniprot/uniprot_test.go | 27 ++++++++++++------ 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/parsers/uniprot/data/uniprot_sprot_mini.xml.gz b/parsers/uniprot/data/uniprot_sprot_mini.xml.gz index 6bad9a4144bf81a10eb2c9df8d21398f96a1ade2..fc6c8f18d7ac4d5514d8982cffd9b26248ca62bb 100644 GIT binary patch delta 3864 zcmV+z59jc;NVrG`ABzYGG8enC2O$-I+eX6g`4y^t+1;w0iS8p0slo^#2m_=+0)fJ( z6y;$?j5VTGhqL2fzYXe|!-S5Bojt~7(-PGL8Ylbs(0z)w{6Yza$F@3fx7&Mi2Hm@< zw}%Z(8aJegZwBZj71$G8g`BaFN*yFP@;u{dq^S-=8k>+J&7}^dMKNiE1q(xePljHg zJ(Rpkiv@1lSB?JBm|C4nPK!tFp9VoysNpvMMf(JWm#eW*M(ra`Yp_2oV2a;o-!CS` z+%`L%l?ftG(q%2jQ!Ch&fWa@58Q7WRe5PF9RHs`uN?&MVYnJ;`zq{qony)SZ6s z*jaJ1{+KQvD%D4oE-H>(Pd{aU#fGad{yfh&2;qNK5qtnXL7QCWw^fJ7L6n+57|K$Y}E$`d~53faBa1!ML~;8oq(izwLNqRdd#oI z;@(15Ed~}gxvchbrj+RH)Sk%uJzq^~`q~nIN-vNLrOm4BlCk1!maeCNw>94>(1C(# zWk**c$zZ*hPad3EA`{rxmLOZY_!(vSqBe2~g&FK3i&?RDHVZAM*=prv_y%OlV(v_= zdQ+@FOmdJ@U6|WpVNcIEs~Q&Tsr?pT2TtjG2>axb@d7ExIQ7F>|&}SP99+c2M@9tCNEd92R!EaJDEYpwSQRa*Bn;cDvdW+KkFBc=-Fj zhOm)1Ig#bTC)aF_)+v+`IM-;Lj>q||fW2w?Vd6x#$La5|Ro^XtVHmKBY`wC^I~V6F zU4vAxu=Bz}lx25E4uq&lYBy8|!9Ulq^DdqH!j8tpum-W?!8yA|>GWf{KwVXhO4BCH zQFgH7(EB4Z!A}1&1cP%_U<;itLDzS5 zR+_JzeRTtyf_;O3-dwx*_U&P=$1DC4OF(BZXm>hiqz!H~yWcN2!UB8&QM9r5{r$nl z09ybEE*}mOfIW%@(kz;R6bM4n_UpFp1Fbud#Bx2h;>p_XV>=aVCpSI%O*~iPcC?TD zqaX?m?Ks_M_hS%tvZp#*`P8bImuK0YJ|*&betVUIE?HQAiFHeJvh{DSmN8-Vt$st3_2j?4lcFD{2cr; zX!k*Dz0?-d>UUf9oyPC9#jMiV4_hUzlG>29Oji?pv$bOE6p|F*9DO#sy9?iM>w?8d zJN0Iqw5sxd8Lu{KTmGR|uGM{q?6tFV4x30=%2v19f>%l~*Tkt}tc|Dw7_+G-eGai{idmT5YI(#*}!!sgYXDL#xq*ajNL| zY;k|6>xa!=P@-D4j949Pw-;5drrlO>7_}|dqEoMbT!HZQx9r2Aoj=WT+gtqRijOXA zXT8(!R|The*3Qi={>BYc>#^P1&ooS)6?78@`^Edi0Zpp6?8Knc2Uv5P+K-U8x=-I3 zF5=R|18=3%ini$hehf}w`ZQDQ^YXjBo_%Nz8nm0R*8)aVem6_qu&#Qq-7k;KFN)7a zzH(ZBf|q4+wRtwTJO5vvhHU!oi72ptVV{B08e9ONq z7`#qEGXRx%0YbxKoxw|qd%_6eg-i^ECz+rK6JY`+nbaDaNOGe+%tB?4fl?_F5lTY> zL=8_wB%(Ny)I`ST#t@2`VIvX}5=eU_6ft`~l^B@>C^19`f+ZHr^8`rhI2s|Sgy1Lv z-Bv`Ah@*)4nkdgxA{5Gtd=e%+Apz8X3||!Efia$zd@IbF1oM!^%(L;zLXsG*45OYG zQ^5uN37KaY#xV^T4SC{4LSW1js8yMWCI)FHx!~ACL?*suOkk!k!-)yKL{N-$;$ulY z&Wz+*Mnk5dm6$?{SVEKvr7~n_66m1>Sm7a2frtebp|7M=iB?I1cuc4n0c$#cVOS8t z86hIVeylVR*aTjnFo#Esv1xW_C`jT31{x{-7&FFE7$pgzf`q;>Lp@^9SVEDE zh4DpfIF6Z&LXBAx817M$j7-cF*HL0ow44k0r#SpC71U2K)ag?yC8v@|si~yCQi>=} ze4BCAevt5)D!6JGEMhe0l1c=BH@Q@tNt0+EOX5jeFp$A#xRxJ`l2Lix-G_daRW$Mb z=Kimd9+uqWgo^~LL|~2(2QIYHLP{f~#xO~=uO*{IYD6N5F(VRMri{mckTRhOF6fdocg2Rw#<26`9^txZgz z0|JT&i{UqeF{YXzj0v`-0mnX+VxYuW#VMS;PG z^ciqANmmE3#|u?I0*M`5=&lKR0vzF}@A7nTz%X5u>;K)g%d!8$pZ1&} zhR&109xH!2oY!|;p znvZkmNa4a_n~Gyj?R1&`UO1h`!$mq?Ewaqbbg@`%`o>k8pQdi_Gn(Dy>tdS2NVr}y zLy*mv53m4TSShh~!YkBz;_l;o`YDC>xXSp+f+c^(_Hc-Hw%g05x8v!<`X1<9?UMGe zLh4*wXgbZ@^CyH&#}38F$-1^e$|Qx&NnE@36_rjukF&)cjEHMik0usB_ukzL`Remz zeK)E3?iF1uKBxAj;My8YW+TBJ8J;cfYUtdBUPv^TBNfgG(fAyx(7H%+cnwnFTq)wK zAr*g`7aD zPI7k`PSof(8@+Qe4?oS)rFBnbx97ng9>oqtl-C=jXZsSy2?|?{UVrdygL6d;HQ&VL*TqwfF~T>=^T3LuQ?neqh? z#)(t=GJv^D#KoAyno(?;D!zgH*Ef^zSDuSc;J?0~H>0wEmw z)KD2wq^QA?ZJ-rlq^U%n(CC1yFp+;xRcJ!y$HEXAni5%ol(xtUYLOLTi0KwtVFbl8 z6p`mKt|&$$qlF?qNd)2|A}o%O@tO6GD-j0Biv+Pj!-*lBgFuCG;zho($OweHrZ_SpA<7G-VE&MW1h`Ic9EMaeHWa=HlM-3MQ6dbK zujz;&t$isD$ciXNLyN2kJys$s5`&P?A;R#mw#bUuA}b7wje3l%z@aD15Lsje+&4&- z$O={>D-d)w+!vfeaU&fw8?1lKE0Gm2I9p@|g3;U}D=?9gP-zEQNXe0;7$kZ{sL!cX zoGPpo+^~oWMK`#Lc!#TifrC4WN{%J>Weis$;}T;LYow%(W2qz;G6F&&xzg|hbc%#u zq*9W^P&M$!xK>1fHi=G?p|D5XaDKT-xdL2zNoCdkOTUic8c5*`h9B`T`pyj<_Zp)P=O2 zXZs4S_A@V`7YUQlLd1WBc|G*vjNoxL(Enu#3|m#uim#Wzu>Exs7`ERM7;Y_pZ;|8P z5*VJv_FDqOw*-c72@Ky77&;F=za=nyOJMkx!0;`Bp*w-$w>d5TB!OYK>KJfE0>k~e z?h6wb_WslahK=3XxKk1sx`@Ae0>eK$MBt#`KRHBT`;Q0_*g04!p#C>B>Nc9yyf#na z^@S}Hnz%J(_UiWY!I0VUyiS7*u!rk)(g&INl=_+OS@%RTu#TVSqG9z|it( zi}G+sTx&!{hd1M2p9Xc!VM52Oy*b8Z(-PGL8jXY9=nvheSj#VrV0dh619!WT&tE^byrhV1uAC0Nix#YBX)c!CCszMF7`On%XD7@T^g)(ZNa9V@)VF5$@KKo%Y zDdx7>>8wl;d6F$_Ii6a@t^^Ezy{0)-^|G+9Ja)2StWmw+ZgyT_7VAltgX)*RMyKxd zd&ka-ll7-`@la_#s&r9tHc}z7Cw)_Yl^}BmD(hkjcR&hI^pOO9!O~!wXn< z73LMoPe2knNNBpaodJ!1t9i5-gI?&Wh%WM9eu1I9ym|+3f@MuPX#EsM+Y(fI7i5~J zt1rNn%P;#1~qo3I*^i|wG4pi9WXM-CG^T{v456wv4gcR9tvV!K`K32jDY7d-s^ zUqV<&oSew=;FD`KN9z>I2%K{?PRHYXR>0b{{5WwU+vD^%SgP-Tme38@MYdkq%{v$8 zDqVwCu(0F8L6l{8M-GIjNop5V2f;tru<|aQ>%xx4#jpmo|{@Ew(_Y}F)z=uJ$*{#^ZfQI1zWOzuo~-@=Ey@-vWiEb@l}j} zB~AvLeZ@%G>c5Wo7OuURVLWKpTb(MFmd?kyc4<$XceW3BK9xV(bi(6yF?uHoLnE-*4-J z$w)i(W}LKts`44HHfCG?u~yF2y+HQX**S+zBur&%TW!HBrIB9A=lcL{kNHra4S7&a zXrn9bMA2vun(aY5T{U~1>ar^~vj&w(k6&(P4JV7@zF1mosC~wec)zKUTFgVM(S&}g zkbAbcKh*W(W-TaDEn7yc4Ypg0s%Fz}DLC}n7HiResaMWG`1))1@zBl>v)oFH-(2z0 zh3%|&`u(clG|$?(8O7hZVQM|LOZ%A{lV=6pgwB5P{%}B(>Mc7k==1^R+@|&;9nG4I)Hx!r!almR_ycgyS<)$Xbu*%o3PgcMpS+`OWm-pdavCtkIXNM zFGaq8a_$5#%i?PDY;ITnKR*rG^rt7P!2X4O21;vi0Z8lR-gL9o?7qm;cXSK#Wd57= z$_Dg%t@B$@PP?rkkO+ z#QU53zeajka*q=(608z|IYJz`&_)X>jgT6{Akn^-j1s94i6q91NNAZd9s@$kgeJsr zO)&End?%6+TP;>nbLbroZPvb20k9v(vBFd!pdE1u56E-u}7XZuNc_ zKXK{fJBA7*X&Jo9UN^yiZ>@n!{-E7`Y*`))8`kLl0ROsI-F~O>*FpVn z15h)2SC4e{)BO$S`WS-s-TLkfAH%C7jh%|bXdjRmy$U48;{bQhM)lueQjCkwHk$Z6 zwE^fe;B1nv4q%NeWo`O!KcCpc8W#l;JGjtY1M~zq!cpJl>EM82x+v%WyK9$Y|Ajy8 zIYA7+ zzeSplb7xQC!eX0>V@~aKnf_KdoyNmOI$kZZ%*}MMSZ!qEs?ASRxAz&%?(%gp&7mh; zFPS0e=F10|fG(_*SUcerYCUnwIG=t_p*^lLK3K5Cf7l)l(av^z+4OcieOTWEovU5a z9#%-5a|=zUnS1<%u;|#X7&%$jR!NzpusDfp*Sez8>6dY~xPu;Xjq1_F;^*GGrI4?_ zOxAakn(v;`#o|k9Uka|J!DKcP+>!3t;;x3yRp^CAb2(DsoDhxAkqWJgG>6w970%Tn zz8O-Xe|Zs7p?L{Xq4^r5!lmGZr-Hi$TX9;2=%nUh@^GkY1-h=nc+6g^<}2eI#-uHSGTfq+43L_B0 zp-&B!5k-m`EZGKH5k{Iy2pOMQ@3<0SfV@Z$8#J64qJ6|eio(EXkI8tCtnhhg zVwP~EX^6E&R+tU4A{Iih5DH=#hcpOO7$;uj8;h(klBtM?iFLpmi>yevj}l)Re=IQL z7FnSK&ljjfRuDP#IAb!@)U(J6;bDp+BNC##PzvS`SxA8E1jk`W6=Or;i!dpX6&xkP zK>3=E2-4b@;()A(Vl=eKiqK;vvLZ1E2^}H~4{M98h%K_hu-K@_$O;^K!VHl`R=|CO zREex$C9(p6sNufg6p9<^nAu=ue_n~KfX>+>D-iVN7FmIbl!Qv#$wEqwB*mc7D?)uv zrQ%d!rQn7|R4BT^Rm3}71#}$TQB-m)xi4e55*e2mi&!HibsS42xsVYM3dxm*A3!J) zevwK^5<}I%ALCkaX-TDpQz#3k@Dhv@#Sv1mvbYYdjS&`0K@<~kLr9_}f7Q~{R!T-% z#6+wuKEs0+0Yg|xTc0AHr=f|8$2#_WcnDz_ml7L|h=7_lpEU4{2?}f}_MwDec$nfu zDk0%6jBFCafYMr%fM_gf93YNCYFygk^9XlAK)eJbsp1lKLAIz17{0&;i6gGb26Z8= z@T{!hYCrQ5dXX>*EksP1f7e4V&Ilf71N~o?z_3*Xt@w5c4BOu(fnobCf#G)N?=5oN zTLQzg*nUf3_?E!%ErH=%0z>D)=eGogZwU z*L`6E!`>g7z_7798+S?qLl^NkPhj{*hX@?>`zMD8Z2ul10y`%KQ`G;Co4So=HLuN6 zczv;#2~FH?W%la!^I;>i<9VG18DJ0B>!c4l@hRB_rJopMxRQb0Q?X}s?8OWd`_Z7` X?#`o&V#a(umf^nvQ83`xU_b!?3(|If diff --git a/parsers/uniprot/uniprot.go b/parsers/uniprot/uniprot.go index 7e3fdd95..bb888d99 100644 --- a/parsers/uniprot/uniprot.go +++ b/parsers/uniprot/uniprot.go @@ -60,21 +60,21 @@ func ParseUniprot(r io.Reader, entries chan<- Entry, errors chan<- error) { decoder := xml.NewDecoder(r) for { decoderToken, err := decoder.Token() + if err != nil { + if err.Error() == "EOF" { + break + } errors <- err } - if decoderToken == nil { - break - } - // type assertion startElement, ok := decoderToken.(xml.StartElement) if ok && startElement.Name.Local == "entry" { - var e Entry - err = decoder.DecodeElement(&e, &startElement) - if err != nil { - errors <- err - } - entries <- e + var e Entry + err = decoder.DecodeElement(&e, &startElement) + if err != nil { + errors <- err + } + entries <- e } } close(entries) diff --git a/parsers/uniprot/uniprot_test.go b/parsers/uniprot/uniprot_test.go index fca220a3..30593bfd 100644 --- a/parsers/uniprot/uniprot_test.go +++ b/parsers/uniprot/uniprot_test.go @@ -2,8 +2,8 @@ package uniprot import ( "compress/gzip" - "os" "fmt" + "os" "testing" ) @@ -20,18 +20,18 @@ func ExampleReadUniprot() { func ExampleParseUniprot() { xmlFile, _ := os.Open("data/uniprot_sprot_mini.xml.gz") - unzippedBytes, _ := gzip.NewReader(xmlFile) + unzippedBytes, _ := gzip.NewReader(xmlFile) entries := make(chan Entry, 100) // if you don't have a buffered channel, nothing will be read in loops on the channel. - decoderErrors := make(chan error, 100) + decoderErrors := make(chan error, 100) go ParseUniprot(unzippedBytes, entries, decoderErrors) var entry Entry - for singleEntry := range entries { - entry = singleEntry - } - fmt.Println(entry.Accession[0]) - // Output: O55723 + for singleEntry := range entries { + entry = singleEntry + } + fmt.Println(entry.Accession[0]) + // Output: O55723 } func TestReadUniprot(t *testing.T) { @@ -44,4 +44,15 @@ func TestReadUniprot(t *testing.T) { if err == nil { t.Errorf("Failed to fail on empty file") } + + _, errors, err := ReadUniprot("data/uniprot_sprot_mini.xml.gz") + if err != nil { + t.Errorf("Failed on real file with error: %v", err) + } + + for err := range errors { + if err != nil { + t.Errorf("Failed during parsing with error: %v", err) + } + } }