From 66b93dc2af71eb0576af7a414e68493026c258c0 Mon Sep 17 00:00:00 2001 From: Marcel Bargull Date: Wed, 8 Feb 2023 10:46:05 +0100 Subject: [PATCH] package-downloads: Store version-aggregated counts --- .github/workflows/packages-anaconda-org.yml | 25 ++- package-downloads.md | 182 ------------------ .../__pycache__/__init__.cpython-311.pyc | Bin 162 -> 0 bytes .../__pycache__/exceptions.cpython-311.pyc | Bin 199 -> 0 bytes .../__pycache__/__init__.cpython-311.pyc | Bin 169 -> 0 bytes .../__pycache__/version.cpython-311.pyc | Bin 33730 -> 0 bytes .../stats_from_anaconda_org.py | 108 ++++++----- 7 files changed, 72 insertions(+), 243 deletions(-) delete mode 100644 package-downloads.md delete mode 100644 src/package_downloads/_vendor/conda/__pycache__/__init__.cpython-311.pyc delete mode 100644 src/package_downloads/_vendor/conda/__pycache__/exceptions.cpython-311.pyc delete mode 100644 src/package_downloads/_vendor/conda/models/__pycache__/__init__.cpython-311.pyc delete mode 100644 src/package_downloads/_vendor/conda/models/__pycache__/version.cpython-311.pyc diff --git a/.github/workflows/packages-anaconda-org.yml b/.github/workflows/packages-anaconda-org.yml index c9d483a19d2b3..ae6e936230b03 100644 --- a/.github/workflows/packages-anaconda-org.yml +++ b/.github/workflows/packages-anaconda-org.yml @@ -22,29 +22,28 @@ jobs: export PYTHONPATH="$( pwd )/src" python -m package_downloads.ntp_time )" - git fetch --depth=1 origin data:data - last_commit_message="$( git --no-pager log -1 --pretty=%s data )" - set -x - case "${last_commit_message}" in - *"${time}" ) + + if git ls-remote --exit-code --quiet --tags -- origin "${time}" ; then echo "skip=1" >> "${GITHUB_OUTPUT}" - ;; - * ) + else echo "skip=0" >> "${GITHUB_OUTPUT}" - esac + fi - if: ${{ github.ref != 'refs/heads/main' || steps.check.outputs.skip == '0' }} name: Fetch id: fetch run: | + set -x + ref="$( git show-ref --head --hash HEAD )" pip install \ ntplib \ aiohttp requests urllib3 \ pandas + git fetch --depth=1 origin data:data + git checkout --quiet data + git checkout --quiet "${ref}" -- ./src time="$( export PYTHONPATH="$( pwd )/src" - mkdir tmp - cd tmp python -m package_downloads.stats_from_anaconda_org )" echo "time=${time}" >> "${GITHUB_OUTPUT}" @@ -52,13 +51,11 @@ jobs: - if: ${{ github.ref != 'refs/heads/main' || steps.check.outputs.skip == '0' }} name: Add changes, commit run: | + set -x git config user.name github-actions git config user.email github-actions@github.com - git checkout data - rm -rf package-downloads - mv tmp/package-downloads ./ - rmdir tmp git add -A ./package-downloads + git rm -rf ./src git status --short | grep -o ^. | sort | uniq -c git commit -qm 'Update package download stats, ${{ steps.fetch.outputs.time }}' git tag '${{ steps.fetch.outputs.time }}' diff --git a/package-downloads.md b/package-downloads.md deleted file mode 100644 index 317aec5fa420b..0000000000000 --- a/package-downloads.md +++ /dev/null @@ -1,182 +0,0 @@ ----- -# Directory structure -``` -package-downloads/ - anaconda.org/ - .json - / - .json - / - .json - / - .json - / - .json -``` - ----- -# JSON structure -``` -// package-downloads/anaconda.org/channel.json -{ - "channel": "", - "download_per_package": [ - {"package": "", "total": }, - ... - ] - "downloads_per_date": [ - {"date": "YYYY-mm-dd", "total": }, - ... - ] -] -``` -``` -// package-downloads/anaconda.org/channel/package.json -{ - "channel": "", - "package": "", - "downloads_per_version": [ - {"version": "", "total": }, - ... - ] - "downloads_per_date": [ - {"date": "YYYY-mm-dd", "total": }, - ... - ] -} -``` -``` -// package-downloads/anaconda.org/channel/package/version.json -{ - "channel": "", - "package": "", - "version": "", - "downloads_per_subdir": [ - {"subdir": "", "total": }, - ... - ] - "downloads_per_date": [ - {"date": "YYYY-mm-dd", "total": }, - ... - ] -} -``` -``` -// package-downloads/anaconda.org/channel/package/version/subdir.json -{ - "channel": "", - "package": "", - "version": "", - "subdir": "", - "downloads_per_basename": [ - {"basename": "", "total": }, - ... - ] - "downloads_per_date": [ - {"date": "YYYY-mm-dd", "total": }, - ... - ] -} -``` -``` -// package-downloads/anaconda.org/channel/package/version/subdir/basename.json -{ - "channel": "", - "package": "", - "version": "", - "subdir": "", - "basename": "", - "downloads_per_date": [ - {"date": "YYYY-mm-dd", "total": }, - ... - ] -} -``` - ----- -# Example - -``` -// package-downloads/anaconda.org/bioconda.json -{ -"channel":"bioconda", -"downloads_per_package":[ -// Limited to top N packages to avoid bloating the channel.json -{"package":"example","total":23456} -], -"downloads_per_date":[ -{"date":"2021-01-01","total":21234}, -// For stretches without download count changes, only record start and end points: -{"date":"2021-01-02","total":22345}, -// {"date":"2021-01-03","total":22345}, // Same count as previous and next date => exclude from .json -{"date":"2021-01-04","total":22345}, // Same count as previous but not next date => include as end point -{"date":"2021-01-05","total":23456} -] -} -``` -``` -// package-downloads/anaconda.org/bioconda/example.json -{ -"channel":"bioconda", -"name":"example-package", -"downloads_per_date":[ -{"date":"2021-01-01","total":21234}, -// For stretches without download count changes, only record start and end points: -{"date":"2021-01-02","total":22345}, -// {"date":"2021-01-03","total":22345}, // Same count as previous and next date => exclude from .json -{"date":"2021-01-04","total":22345}, // Same count as previous but not next date => include as end point -{"date":"2021-01-05","total":23456} -] -} -``` -``` -// package-downloads/anaconda.org/bioconda/example/1.0.0.json -{ -"channel":"bioconda", -"name":"example-package", -"version":"1.0.0", -"downloads_per_date":[ -{"date":"2021-01-01","total":21234}, -// For stretches without download count changes, only record start and end points: -{"date":"2021-01-02","total":22345}, -// {"date":"2021-01-03","total":22345}, // Same count as previous and next date => exclude from .json -// {"date":"2021-01-04","total":22345}, // Same count as previous and next date => exclude from .json -{"date":"2021-01-05","total":23456} -] -} -``` -``` -// package-downloads/anaconda.org/bioconda/example/1.0.0/linux-64.json -{ -"channel":"bioconda", -"name":"example-package", -"version":"1.0.0", -"subdir":"linux-64", -"downloads_per_date":[ -{"date":"2021-01-01","total":11234}, -// For stretches without download count changes, only record start and end points: -{"date":"2021-01-02","total":12345}, -// {"date":"2021-01-03","total":12345}, // Same count as previous and next date => exclude from .json -// {"date":"2021-01-04","total":12345}, // Same count as previous and next date => exclude from .json -{"date":"2021-01-05","total":13456} -] -} -``` -``` -// package-downloads/anaconda.org/bioconda/example/1.0.0/linux-64/example-1.0.0-h1234567_1.tar.bz2.json -{ -"channel":"bioconda", -"name":"example-package", -"version":"1.0.0", -"subdir":"linux-64", -"basename":"example-1.0.0-h1234567_1.tar.bz2", -"downloads_per_date":[ -{"date":"2021-01-01","total":1234}, -// For stretches without download count changes, only record start and end points: -{"date":"2021-01-02","total":2345}, -// {"date":"2021-01-03","total":2345}, // Same count as previous and next date => exclude from .json -// {"date":"2021-01-04","total":2345}, // Same count as previous and next date => exclude from .json -{"date":"2021-01-05","total":3456} -] -} -``` diff --git a/src/package_downloads/_vendor/conda/__pycache__/__init__.cpython-311.pyc b/src/package_downloads/_vendor/conda/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 88378b2b7d9eac3610f89066ab33a2b08618e68a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 162 zcmZ3^%ge<81XE@lPX^JCK?DpiLK&agfQ;!3DGb33nv8xc8H$*I{LdiCU#9v+sRjA^ z#YM^b1&PVoiRr2FDf#7jIr)hx#rpAOsd*{+Mf%D4c`1qd@$s2?nI-Y@dIgogIBatB gQ%ZAE?TT1|rhx1z<_8iVm>C%vKQO?EB4(f%0E$>9m;e9( diff --git a/src/package_downloads/_vendor/conda/__pycache__/exceptions.cpython-311.pyc b/src/package_downloads/_vendor/conda/__pycache__/exceptions.cpython-311.pyc deleted file mode 100644 index 8cce4bf2806f5434c00a71c792267d43835adae9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 199 zcmZ3^%ge<81lwjDPgVfZk3k$5V1zP0gOp5XNM(p(OkoIS&}8(}WV*!_mY7qT>RMEk zUvx{zGp{T$Co?51wWv5VKQFi-HF+h&XOQk+=K4jc1^N2LMalXFiOJcC>8bH4`Q>>z z`H3mT`tfC{c`5lt`pNluDT(^270IauB|!bfdIgogIBatBQ%ZAE?TXle=75}1EC?h% UFf%eT-Vl|)z#s%cMXW$k05u9XN&o-= diff --git a/src/package_downloads/_vendor/conda/models/__pycache__/__init__.cpython-311.pyc b/src/package_downloads/_vendor/conda/models/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index 1336961b8d8481223800e8ffb0bc3c5c88e92550..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 169 zcmZ3^%ge<81hZ!xPX^JCK?DpiLK&agfQ;!3DGb33nv8xc8H$*I{LdiCUpD$hsRjA^ z#YM^b1&PVoiRr2FDf#7jIr)hx#rpAOsd*{+Mf%D4c`1qdx%nxnIY7bq%)HE!_;|g7 l%3mBdK*7?SRJ$TppkW~Eiur-W2WCb_#t#fIqKFwN1_0-@DOmsj diff --git a/src/package_downloads/_vendor/conda/models/__pycache__/version.cpython-311.pyc b/src/package_downloads/_vendor/conda/models/__pycache__/version.cpython-311.pyc deleted file mode 100644 index 93e8fdff0280c67127204640c702d2748125c2f3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 33730 zcmdsg3vgT4dFH)%5g-AA1o#jsiUJ=J35kM8iPX!YY*7zevL2R5*)jECAY7254FU88 zs0Rbu+Qd7CwCn~_)CzXx22GTNj-9DyC*7)9@6=7**4=G)0T#33V#*zLTX&mwJ4>&f z)~kJVzyI6^?gdCiNhUKpdkOxy_uTXR&wu{^KmYm9!FL>vat_x_>JI~Z&T`ye(1UWZ zHyp=*T5jODTb#fNe26>G5A!_T8$yQj#$hA7H-=2ZW`r3-mSGFNO`)>$)?q8&nFUM8 zcD{VLoaYQ&&={=vHqUYI;$OOk?Lyh>+^|Ei0#+i1D%bFckwS>!)w@^Rd~}JT*KOL!h8J7@7D&`4tKCPs}aYuWSmaHILLWh ze?&>Ve7?dT4o71C*mxuy#iMO37&{ah8w-j@=;hS-#7HO-9t%ceKcZ?N%yl4q!5Eo=2`zL1E;AlyRrm zGamJXgFzuEc+Ot-1R`O(c8eJC@SGkt)kpcybkA{tZ*7*SAmAb)Lk->GkoG>j<2|18%$lBv3CI5B#T8(i>=fkQ?R&ZrG6E zMtOmM-8yVcaF_UD69~y(! zHmkRqn|M)|U*e|?`Vw~{cH9s!yvPl57r9Hu7rBf4&s)kMClbwiE&1~CD2+FNI1tPm zqcJgGHWC1)gClwCv%$;zL@^@f&C$t;pqRIg;8iFzGU7Gmje$@!Z}f{}c_Sz&Z^NsQ zKN=kwiBf-nJzQZ#CKBJB@+{MjJ|)98#EO0$;B%tB+GHTLIa{$hSFw7Lv)R1!?z+Um z1xJ0#`1WAd(UNntEOKQw@1nuvSUF$cc=Jo?&AIxHOjXC{3+|PRoWY?13trzl_3zc+ z_imr_Zok)-_3p}fcV((}C5_3z&6m?{*}ApJB3VvFe0_?>$+yEoW-=eA9>|Nv@53qsCveFGe zDhn6?=@$SC!%yIcjgaFe!2sE77L0%v!30<)m;tSV1<)pt)=k-3`^=m{V_7L++6!>C|15P@v+dG?*3 zh+SqmMT28f8#>2*L7yjfk)%;L8VpRv#v#Edd6&oU2|{X6O?FKcToXNgB6p-|+aBx^fbIuLd*c*##B z%nI`NP*FX;O+HYrf3sSy7}%^9y%*X`C^A9hQfLA7yMk|~D-i)p)b8mJf)|!85$}38 zspa_a*t1D3>bXcXR@%%*mNkJS%N9m2Sqm95ee_r;Y{(CtQz$g)oF9ZwT{}8ToqEn6 z^N`9hK01!x^?N`XvGKrU$S={Zhv*bkAG=IFPko3H7Ay2&kB>$tbi+`G&x4|fpu*q} z)L}tE?hX*GKg=>9^i7DtK#+zTx@l6B$O^+L>KXNm)SppM9K9vtj6mFp;h>;&Y>Y}E zM8(7);|Zbhmz55|FriUZN=HiB4-)@MgO7_bz?j4moM7|?~^GX~ZH>`R(DGzz(cFD%vbUWvZeK@c$BvQeptJobqk!;x_5nAO!(u0|w)$&&}e zO06>nBx7E3cMJrPU!9M$yFXv8S6f1#09#b zEF|3c$7rlF42hFrCdAP6AQ%^EHv;QhC4ix3oo8_Jtfn|wk`W!S0j)%4y?~dXVA4>~ z1B#ea+BGPtMx_x+B@mhT?ebh4kDZgaFpWB5ZA=W27-LdK?k?6rOcE%$zd#s*9-s^b za5CTy423czy@tqH>aWr9PzY0i$)Km35JMuL>MUoi40VYjB&;aJ2-HDWiw3)hGA2V3 za}JX{?eRgfdiwi4blZ)4rC!vVj1QqC%;P0qP0c}<>M4;49C={ulSH;c=)^A|v&Wdw z?&0GZ9RQT0Ca-Wr{yLhvzARh{c?9@?kl$bUnyugsG>LmL;a<6xtpFYm=BW;T=O2gDZGxJP;WZ{S)V8-bOr~ zH>_{T+(ve05_2torRdW&iaQPs5+GLz{_t_CAZ zsRFF5b*d6h4CPU^Emi}UNFIe5DMZX1az`a|XabDs%JCVsD&tscr7jtAC~BU}``MUe z0!~s0l>8XA>oXiS*EinbD^UhaC*`PDRX#y$FgK7Vn*+08{U_VloN{WQh&>9g4G+fo#;rF;7( zT{&PU+9<#!snAq#o60DOW_A1#Z+8+a8j&i9s2E~QbJI-dB&h%`1gw_jKx_#rZa5kZ zZD8u8LXt%l&|`*iJgH8PtdYzd<=VIDnJ*M&rV2=$wx~St*5Z(EF;E(&Mv{tBA0=G) zx@idYl%S0bEH$oNODQVi>O{mNYbhuB3Hei9wr}6!>)z_?*`$_Ap)HJXrH++Vif{lU z9IB5#-e!<@kDhQgJtDD($$V+d4Uz#`)=i_H&htPB&E}y4V^SjZyjM~)$HRTbk zF_Z6MX2+N0+X2#am1$C!k_F=eBJ^tr2W5^5-k>PAfygAyo*)nX5DF6eVlC88SujaJ zL70k(Fk|d55T?YC6#I+RDiV&}z8==i`1VWkY(I3GAR8r;5{;=?OoY(aP`Q-+TqGo5 zGS0*ls)1@wsuyPs6nl^yDTU?w^BcSvuvIEODIDNdLII6{28%rMtc4C9TzszEaRWYiIe8qXe zptoaxnU7hOw;Z>OCEB20e6VxV#<)@3E*Rs63!K;|nDEVuJ#jM z&s=J8d(8Trzs$#BB_C1SELiZQwh%ScSm8A_EYr6T^{XwsjTg0Fu*$eK3AQnQ+7y1_ zk+I9=*uCf#OJS8DO=~SBm%b;X#jULR-589_9fCdH1${-Bb7fBMyz946&azG zFLs5$@K~7X17@t68ZAvlCc&w1$ufQE)Yn{BpJLop>2oCN(c*L2QB?gC(rNH3nB@L{ zrYLVtaQsulFRInk=D1mKTrynJlpvU&oX28XT&UuoK>1fBI?^XTNUheeYOqa#f^?Pu#Dy2;Acia*$ zD_~ro$FwzWEg5gF;(lJXepY>M`X|&cIBkntP(+L zmf)7D&mCJ+$VsSKCiPHJ>RLIqS*TM>7}aA@J6;~EO%~*YeqgozGf;%eoyYjNl~IiH z_HUE?j9tL$rnmk`o|~#+_8V1-OFHaBenXxgnW`C=Tp0>0JDpLlNFwkd-|{08ta-j$ zB*rqeLel!qPeLgpyRkaAoN5*(F+rxK4mez}6&Be*OJ5&qUq= z(;R)9nW0A{5ty%%iyeXGH1P6>7!<21xC|zekRJ=XcJf zpiJ(uyqTdo?~rnkE1c&qWiud(PFKC|jtg zOkBLWb7p6%XRe}&mc|eB4{BE=_a*SR;HXO0Up+N*Ds^nm(Y(l6=%u?c)pK*lefOF< z_nNG`Bj@fw!yV7^3w52dHJObEvULY@bq6z52j^>>Qpau%Cl66pA4_O?HtRZ+a~;aq z53#zWT5qnu?`oTK(So?EGw13ot&!Dsf`8DkGO>5&VB+A%MKv<9m!5R>?Z*2J>*pHQ z&+fVF&NlSr8v5=xJTces#JxS)hNp53Psz!pS`21gLpj$_#y+$N&l-2KXJ$vrHCKTp ze%|I+dT=D``eM%Y#f<%n)W3+5JUg>1H8@w{Ay0w5Ji~5!_w9q}=&T_fy?yk4)8@IR z&3E_Qb7!0Sb4~sCoA%5#?fGC&w&|H%(=&;~^Y(^RYsS6`pPYSF+MTtxOHWxA_MoYa za=YK;n``pTM(-lswp`P;`%Sy%ns(ibW}Eipn)V=<`li%$w!RDfX-5^@^*6iHraR@C z>h+6Ujjj6=0*T!VwT*9YPQCKxmfKsdpT2o|k#jq`8A$RAHSPCny5?%SW{=Ij^4?Ik zW>cbYH$u6v8f zsCLcAtJ~8j-i+Ul7e}al3k4W!*MK}cYowQ-&%0}q$8Pqf_?ugAJ$d8Fth+tuZeQfg z)!p+eo07Ye{_A^_d#O8GZXe04TtDm2uH2{uf6_rMHv%aPJfolUn$>SMaa zy@YS+QXE=%@d*ULvtmEKm}iCT?NkQWl$Z!UQd7i7Sm@tVY|_-PaPv-g&gscmJ*DNF z5P_Ay7B~9xUs5M^zY2%VI0tBW+XRtjJThhIY@IT+dc6j*1?j|A0>=Qf)dQacv!?nL z^EoN_e^7kMJ!{@ochi@1c{9#Fe9-^wo1nE%sx#`SUL*@yonOaIiF1@;jC+sQyJSL% z={g08I&GXb>0AMgQ1!S%JC{i3Wa-S)mbf`?d70HhH1Ql9QVoOLOx4T==E!I|g6<); zVE+(MLQYZa$y>(yEN_VQ;D1xzZ~@;J@EyQ+U{jO?olZ0uM}n_RRq1;(_{#r6v}hOr zoDKhwy91fJt=YP*xw@?&R~wv%0Iu#&3?vN;jcd|pXIt-?-mlCwKAmlRI@kC# zIS?TZz;#Q~kQ|tI*QQ)Iw$XrVnP2Hi_0iv=5l@RoifQ4hY7#LC@oX-RvguNZ<;!>= zVo6hI$)!bUoG+kPuaQPpKO((aD$=X>`t-{1Fp8&9116cpr*QX$C}!LGT@y?2)oc0 z%p)~*FT>11`)rMAtSE77Or243#onFkbTP}8j~S_nm!(zvkx~=-ju*}GKtW}3*%rhY z`hIZ(qIs=s1Zf7;E`%~xCXzF)NbSj&OADv8ZtAtMu_k_zUcNwpeDLyya7=uWo`wmW zByb8KO7gAOF-D`91 zwHf!$*-gLE|K0w3uI&1qx%E3U_MP+2>g4fjmlDoWUPZit2~$M@iOcp^J7%xUqxKz6 z9HHK$n2Mt{IB6ITp(UJ@0-LGV$g32bv}Q$W-;Ki=XKTjVT3R;IswBvfq)3o!#Ily3 zjS;s)Z0XYS$+05)FogcEFyU<|;(CU-aY+^7RTkScZPsxa*n+gS8`(d`Hcscu1;8xbbmQiApN$xL`G8s0$c)xbRE=P-I@4xg{$y z;`4YvwBK8nH)7vKL97<7-rGB8Yu;xt`SVb z?m!FxN%>i5_RhA>p1rsC{Ue#?XS2=E=9-^f*2rxC&Pj@8L#4!S-_=zC`q`!+31XvG|F`116=2p%?H-e%$fGg_Xq9Meat&1Fk z!$!_oedXyz(-3dQbW#ID2Hp()UIiB3yoDZRU@@C(Fke+bC2u~-(`yx4>dcsZtDu`V zV=$<|Qf01P1*-KjGt&$ORj zce4F-98V`Zz45No*lfeT*Xw4>G_zs?;}XWf8{8YFH_Upmag|$s<4e?Vki()Oh1kHb>WyPhJAis?Q-c^cW z%a4Ll77{H6Y=Xf8BZ%-_{7VOQ=-YRR&#~*vJZhpE+LciH z)?*g+|M045Y0Sk-F_*t;7TrZ9mtQrP_B+Zq2^9rJ9_q5)e+1QI4haP}JVm(e+O><^ z5`*KTmpfa@jec{hv@e1pFu(fE9V{t5mqzZ|)$8l?ZPBFZ?C64@o}3alQiP5|rb}eP z@`&&3-|E}u+pfzij=YLaXWiNUi65}mTM`i zh*}RM@9Obwr#k$iYVgq1xhCq>F*_4z+)bVszXs;;%f?&WRg3uhS1p2Ozh0b!_rkx{ znzXJK4Nt)!Nl6SdmDzD%F@mvVm|4>5pj&>JO=CDbRy^|L(9Cz0S%p}RaLmNDR)jF( zj$)7CW9AH2yG+hth9x;0k+TswYg%7uU7R1~<3_e}XCPR@iY;PQDaP;qHm&Sw|GkzY z;zp#Gmg!buUkB}gdMN_W9de7r{(;~n_%6XmG7@G>d(w2X&>C)+PApkW_@YDm0tMcSj7w~@o_<&nrl zbgEJ2M!ubF4J^9Lr@k8oVMS?@{W&JsmF`8(Y3oRtQ)h46W@|GmeC&C_=}K<7`lXpK zrH);D4T9b=#FkoCWH!qhc&2J_zN+???S}1o#m$Pu?)j>^Tje*(uiJ0h6T26yxW<*t z^j`f|L#C=L$tPQqt;yD#=A?PHde(&K?uJ`CZtS?e^XAUP-uczE2ytJ`0Up;*7aH=QOo{GKcP49kleb&AZ)ROe)vZ4-gRmh9rr+*{R7 zXCh&TXHAs#KFlTsbBX~=n|r96?avQBw{OqL(9vi29T^n25IJ2WK;$7_0PtGGIKJ|h zSmb5+c8Rajg9#pI=fy8koN0h46c_G`5G_wh+b{EFa`%gG;Kd&5@L!@1{~TTZ<95zo zmpu0^W459_SJ9p^$)5#>b7uH$dwO*CQg-dutYcfwvF*x!7&!p!l~?vHnySrMb$cXO z%T?Et(umb6fP{%GEmPN~uEuBLc&KnDH($fdtkUM0x7Q{PBm-BE%p6G^nYUMe_0Y9L zDJ)LWB9*j0HSefS4qiKn>6sE+xO#NvXyWLjV_PZqqq4Io*(rs)p#<}e6MSl4y7l&f zS@&EMCVLb>Yl|TG!ED`7u5Kt(HAER7xOU*`vop^o@b^F_%l52&b!pRBPeS?Sqn4}W zxkoaK6LD4txGE?S*68g**<+S#{prhHy39CPI>`6J=Fb9sk5JU$W-6;{vbs{xs^kEj zs#b_ztU@VDS#f$Q%NOvN8d*sJMOxnS1<1WyOp1KBjp|wU8v}YoIipzC;r{rZ08jrsUKF?kWyrAQ}H&k)4p)za@Fh=gk6k z#hrTxQ&hE+-ZA9)OX4-8lq{fJ8?R@@=R{!nXGFh=Nh5xYLc&9yR6S=-swt|qn1~mW&Nykw5G)n z#^V19H+lxIF>u4orDwg7Q&^r`Qc>43a)D1XR*bB5m(FNhbWF$6b7!F_mVlLSonSy8 zy^DY8(hCOtlQht1iIwS8P;w2>aG6OgK!Jo1@oNM~e2M)4Sj+wA_{#H_b*m363p)?s zV!*b*L)FC|#Td4Oy@docV~U=@4V}H=nAx5@mg28IIrC(u;$-TTugBktfA7#eH!bG= ze%lBB-(CCR+CS|2Vb@2|?7(1dU@$ZELgvH^fVqJ$WVgSV+x}vmzqYGA+GwZM?RJhW`GChL-** z!%lqf>VL@Ze8}&5$oD_AxAYg*1RolJW3%{8D(}|`e1iaC@FD*y(UE$|gM|O|4uZ7j zg5XRKW00K-@@>2+80Q}Eln|uEIUz+U?FhxWAw{7@r--23DxngvT5tlogepKcJ1MM2 zaAA13^VR|E=An(>0h8Jej-!?^LLn2eWb~&MIqAY0IX43I%1^Y1KxGd!N0m;+F-lVh z+sJrygzbQ5?v!B1`;kt1SH_N4qC;_%ILg5{GG8Wp_f)k~L)BmLx2SJPDq#I!1yubR z$9zKz`TM~G4`1a8{7LLjttBGUJ!=yYC{Y}l%B4%GGtcT(>{rAD$dp5^2F-=vrz(^q zHRiES_sn$0+VnG)_+4aDQsR3E)HGZ{&j*E4F}<02HDg^>%I2vbSvzUAc8kN=OImLY zLwFrdTvGds<8b2ADxN%RG+{_6U&SXY6>v6)F0Zk$o|i9Y{xc(l-Y6OX`7D!x9}iPb75|b?&F>K|p2M3f+@ir~>z!|C zzTKFB%jD+^PPo+Zw%!7mZ&>y9=C_(>&DjQDjy{_yV&Wh?kZiq}nNk;CP926kdZ-OR56C#pOJ5Qd6Ak^vS7wmZ*+~MmF<5+jjTs}P&&L2 zp1AQuYBK9ylXI`hIM-yXYf5QOt=~GN(hGU?5wer6VnHd5Swr509k=Hsjp-O-7J1jv zODt?@N2C<=Csa=wR}g39TVLsT|f2%M>Uy{%Vj^xlj3l0ViRrFH+w2orycs#AonyiN399ASqFOu1B~ zsbT-2fHp?`OQMYkpY_#F+$=Ao^+HG_wBdLBs=r|JOwMU_xO@eRGr}bDR002*&_Ab^d8C%HF@ErV zP5WF;d-@C68egu)mvwH;IX7mk8%x_t21Q0kA=Xw18+?~u+64pcs%fzh(zh1aBBsYK z{mez7J5%1FDC;Amli0z`6o1nB^X0M%GIAjz{vHB!IH^=qNoc88yITL^F9|c9NKO6> z?)%P`IcLkay1o~^yX~Fn_olP0{khiud$IRl%Q}zdoJTX(qoo*98Pr~6^G4ADj0r3n z(7BFUy4YK!`sr~3)y{B4H>sxrHnUx}xcO!NS*SxapiehU^ULkHy;|fs`8;=v$D(JW zI-tR*tC8C~#=ODTo#&$xRZ(Ynhm#}>3;jEkuJ{HqjqsT{XM4ulURoJMlo*{DoB$_D zRVH8k6lILSZxSf@3;lo|-zV@p1U@A25rID?Fi#*);Liw*5}^K+$YGZ52>d2hZwJ5? z^>5K+G}A5?1(b7^%0+kxks5AZ^@$OWuYW-bfVU2LkSYL|L^TdWV30tu~z%mDt?OV(QWPkRxhJ=Yh9isD3H`E1`Mju&O8{aQhSW|5R? z-F!t=Gp^tb8FQBtzQscJlo6{b@MOgX7ThV9US(QFIf1`x+@v=R(^>2C@PxRiz;WBs zKnRK=sdS8(-?6aCAP!rmDPZ2lzG%@|B#VipJGS7CtvF(Gs0bOBSlyCDwhYx$@DHHV zPoG2@F!lM4_4hlr&UI|PXU=x)%606@TH6!0 zrZpH-awvB8VrK+s@g}>UMeR5{Kyx00V3Sur4o2= z7CKFPGC+}29c*zl`Cu3N(5c)@_(&SUQ~G+vJW33XyMS{A#FkiVA!Hf)EGm;u1GrLL z|I*VptZ7#v=fa(yqcG2Q+jM!nT)=L2t=V62$e6tpw+WRvOQiyF@LLJmIMe0falCcP zF&#_A5UQqa;pY*nT8>p*^U`w+))s1;t|{CX?gV#Qa|TKwJWlN>s-wPN^>hq7TuJ=t zavK-x(&VW*7XiJkQcbu8c0Dq0$QSH-H8@FKeGk537HZ+6QH z8r65)OJsg!7d9%|1o`ZP>9QmrK7exSWZYJO28qARF=wEaOmSrR|qTP70j=oVwv$*^s69` z9)IC~1>9)4g@Pv$;idF0aRvw8ae`G#=YoTHAn~Nf!{Rcr>h(Q5T^`$1K=Mngm3u9f z{cIFM{T}WYn&M`74|Z#*S*R_rwPnjG;y24vqOc0(*g$h0rS9N+2rI||Z}X957V}9g zvk5Y3U+VPOT`XSpD3hneHL7w0^De1iUW522_!9pefYziSvRTzSdG{GpRWz#55>qQ9 zXqK54)LUnwyuIE|1Tj#y5!cd zV?>xOjLoFvw&eNP&(#tWwS-ESTe9P5X2;PF{P%j^--4T>4{OE#hx{3~aQl#Nf7s0| zQS5lZXsJWo8R@iH949Dy$hCJotWu6SQH(V|`ad|6WXj-MhZcPk6%o?}zE0o{fo}m! zH5R9mGxV+NnEIC|*pZXIbyApe;v3E?REl8d7t-%MjD{v7lhR48aPYvP+U%edIwSTX zj(VlDRK(zU9KXyCi1vF%Cu4LhvieCZ9m(fc4 zIF2g)Aq9O%L6t|J+jo5T(9z=~gG0NI59Qr58fc}R{iFdMm0}sgVRjSvxr|U`EMIof zFNV>?yagxcVd|c@t63c0{has*lr;$`$May0_{3#80yIPv!t!Pv%@tCiX0>Xi4vw_222wv~K#%6VfRCXfwXE zJC5c6`;O-Nj%HhrW>-9yTk%}t0DcDqCyzWcU)PL-TI!n;&&<1#rMyXG zADxXdU)PwcTPs)c>NE3>y5uV}r`|r3ajgG@>$f#6RJGl&^3GLxXT8~~-dt5L9EY7% z$jtE!^UiF@^zP5rJ)Nt2I#c!Zd|Uf>wtZvUukX0CBWw304yD$@*C%;48At};#w@Tp zI~@28meuJeGH$pz+pE3vt8fMk?fkucGPyswf4;UQJv3KKKTT5fZoaK2c_?|P5TUHv zOJ|n6nAtXzZ9I`{Jdvq60nFZd_6D{U8d_fD|G~nwc7CI7)^gW!H+s)-H~RgGY|GAE z%g#^AjJ3Ny!*L_px|e#ROVb-w$piB0!eF+>o2&6=96*f+F{>NzSGUepx288^Ik(!EtM=Wm z-ZEFc1ufl|tscl#58SUlI9GiTRQlYBZ1wZG>gQ=Qc2i2sR<~e3HY0?AH=A!amyiLd z6=WdJoK6Mi==h38iSAq}cM6N`>6$qw8Fn0-=9|`}18FfGxP4@HDBIMNv@NdS>Q>*c zT{BmUH`&^Cx!QI2YkTHud+xSnYq#WTwlrWlsM~eG$o-MA`9O<) zMO(FG{0s1=VoL^eDKI7J((A_3<~CT9)b~m}`i46gsT?Jj)@2NNgV#wH)DZQ4v}oNy zuX5?1nBA4RFv9pnmSJgq_G`GyTj<9sf`a%13LXK_=`X*7fT?P&CdSt9j+2^l0PkoO zpdqy;U2~^7>+H@syE9h#L!RZYUVAka`1-lG&VB2`Z0x;P?@hk{TE=%Y+wokkBjuFZ)u zSZ}M?nF()OQWxKJ-gai4ojGS`#@fjci=7RvRIfGy?F1SL93}AQ1gM|IGSpgJLw7Wn zXI>3YNdlR)b*8d^hA7cj@br1XusvT{McZEuW*mC61hAgq&2ZS)z_7t!9t5M*K&zR4 zZ|YG%QrW2@q>!kWQM!|UW$LFf2M4V}xls1HNpD%fFHg}gRhF~goT^|ZV>?DqMZRia zay%p)VZV=bn11i>|8e`V_yC9z|1-eShTw+c_Ndn2AZSngm{QZahOU_w1OVaurv^ZB zn$jCp^-Kq|CJkGpVMiMhaFmRZOpP$ZsH}{+R~dOu>nf#Vfkmd?v?e8LBwzZD#Bn)T z1JD>)h@G$tH8D;bVPIJqUuIuw@!ntYt(a;qgjs|{LCFPEK88lY5b9uDoF;E*jqQC7ZbPHL=7kk2G;<%uey5ql{6OYA{6) z(}-jjjOG?)zcDtLgwS5C5l1%p!n6yQ%z`;XsF3^U*^>BdXfF|;g~JrAZ&Asi#M*;S zUveq6Ebb??M$HGUQ4N<8l`u02`c=AQ8ph$&5)FGDSj=3+yZvz3{3gDOJYU7Xqp<%* zfTod>N#$?oZXPzp=9V-t_xg-|{pUZf;Hny-Jld)YVBTJp z*h?;m)%1Ibm0azLWhO@{BRi|HX$d{K?!Kc1x@x9%OV+V9=YX7Q__^mUzxa8;19cuC zsoa=(;h;$ew(i+6_d76q_RitEgLCazX~pv|cIuaSX0cq~Z#vDHRLUrr&nVIjt^8U)?>YQKsj5cLcg zCvR)wLx$sc#MT4aA4eb?3;Xpg57TLPU{Bcw&y<;c!T9w9R8h2ph?mid>q12+I2-iD?UA_(C_o0=01gptO0JvtwBM;rUL*6Gn4cZt)5gD~fXAC}`(Ur*d~1YMys#+r~9 zYvmE#lqee-)k5FzOQ;rT*^6vtU0K1ATo{cK*Ddt(F%-nKUb(sN<9Vu-(P_Lb-=;=y zM>H@xlhL+ z4hL#auVXmSsauc00Bhen25G|%iML|6SIJ9wMF|DVCzswqvpyxk&YZlGy*IaCeN{t? zT8jF4YACvqQ%Ti^Q0~-H>Z#--{2kPd*6b8FD$z;zRRCVcV^4Ax2q)CN8o!?35Lgr_g5PW5t$t6eY zBE-V@EKqOQxA!BRX1Be6)sEpJr}wc;&8&Q~bRhMnwU z3?8ykcGg(FT>7~zOkAUq3erkZ|A7GMpzJ5UC3S-ahByF_cPPec_JhxP6a5e?`^8|% z_WU|Ue1ib9h`&X5Owal6bmu29Mu4P-v@?w)fpmb=L)_t#Yi zy;*l(&fS;cs^;CRq@XQX_tuR)AGpeM|D}&6hGQhq85t zb9IL^oO`}{Wzu!CF?Bdwy)IY1?n>o@a{G*Tz6jVVW*X)tuqI<$Ghfr2sp*|BU$f{g zuQI?gTxCF|>NwZRWJ6|UZ??KGSKW7|a?!MtuUX_u!6y{eU2{1{KTTN)-iGi&>$ qm!GgGpJ^jLZ{(_0qIAoCp2zT98YVAg)(=Q8KVeZmTP7kK!T%SLbysiz diff --git a/src/package_downloads/stats_from_anaconda_org.py b/src/package_downloads/stats_from_anaconda_org.py index 8bff8a1d4820b..70763773163e2 100644 --- a/src/package_downloads/stats_from_anaconda_org.py +++ b/src/package_downloads/stats_from_anaconda_org.py @@ -1,12 +1,13 @@ #! /usr/bin/env python from asyncio import run +from collections import defaultdict from functools import partial from itertools import islice from logging import INFO, basicConfig, getLogger from pathlib import Path from time import sleep -from typing import Any, Dict, Iterable, List, Tuple +from typing import Any, Dict, Iterable, List import re from aiohttp import ClientSession @@ -64,40 +65,35 @@ async def fetch_package_download_counts( continue downloads.append( { - "top": TOP_DIR, - "channel": channel, "package": package, "version": package_file_info["version"], "subdir": package_file_info["attrs"]["subdir"], - "build": package_file_info["attrs"]["build"], - "extension": PACKAGE_EXTENSION_RE.search(package_file_info["basename"])[0], + # "build": package_file_info["attrs"]["build"], + # "extension": PACKAGE_EXTENSION_RE.search(package_file_info["basename"])[0], "total": max(0, package_file_info["ndownloads"]), } ) - df = pd.DataFrame( + return pd.DataFrame( sorted( downloads, key=lambda e: ( - e["top"], - e["channel"], e["package"], VersionOrder(e["version"]), # VersionOrder can be ambiguous (e.g., "1.1" == "1.01"), so compare by str, too. e["version"], e["subdir"], - e["build"], - e["extension"], + # e["build"], + # e["extension"], ), ) ) - return df.set_index(df.loc[:, :"package"].columns.tolist()) async def get_batch_package_download_counts( date: str, channel_name: str, package_names: List[str] ) -> Iterable[pd.DataFrame]: - retries_per_chunk = 5 - retry_delay = 15 + retries_per_chunk = 2 + retry_delay = 60 retry = 0 while True: try: @@ -118,42 +114,66 @@ async def get_batch_package_download_counts( sleep(retry_delay) -async def save_counts(counts: Tuple[Tuple[str, ...], pd.DataFrame]) -> None: - index, totals = counts - path = Path(BASE_DIR).joinpath(*index[:-1], index[-1] + ".tsv") - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(totals.to_csv(sep="\t", lineterminator="\n", index=False)) - - -async def save_channel_stats( +async def get_channel_stats( date: str, channel_name: str, package_names: List[str] ) -> pd.DataFrame: - fetch_count = 0 - totals_list: List[pd.DataFrame] = [] chunk_size = 500 + fetch_count = 0 + stats_list: List[pd.DataFrame] = [] for chunk_package_names in chunked_lists(package_names, chunk_size): - chunk_totals = pd.concat( + stats_list.extend( await get_batch_package_download_counts(date, channel_name, chunk_package_names) ) - fetch_count += len(chunk_package_names) - log("save_counts: %s: %d of %d", channel_name, fetch_count, len(package_names)) + log("get_channel_stats: %s: %d of %d", channel_name, fetch_count, len(package_names)) + return pd.concat(stats_list) - grouped = chunk_totals.groupby(chunk_totals.index.names) - await gather_map(save_counts, grouped) - totals_list.append(grouped.sum("total")) - totals = pd.concat(totals_list) - while True: - names = totals.index.droplevel(-1).names - totals.reset_index(inplace=True) - totals.set_index(names, inplace=True) - grouped = totals.groupby(totals.index.names) - if len(totals.index.names) > 1: - await gather_map(save_counts, grouped) - totals = grouped.sum("total") - continue - return totals +def read_tsv(path: Path, **kwargs: Any) -> pd.DataFrame: + return pd.read_csv(path, sep="\t", dtype=defaultdict(lambda: str, total=int)) + + +def write_tsv(path: Path, data_frame: pd.DataFrame) -> None: + data_frame.to_csv(path, sep="\t", lineterminator="\n", index=True) + + +async def save_packages_stats(channel_dir: Path, totals: pd.DataFrame) -> None: + log("save_packages_stats: %s", channel_dir.name) + packages_totals = totals.groupby("package", sort=True) + write_tsv(channel_dir / "packages.tsv", packages_totals.sum("total")) + + versions_dir = channel_dir / "versions" + versions_dir.mkdir(parents=True, exist_ok=True) + for package, package_totals in packages_totals: + version_totals = package_totals.groupby("version", sort=False) + write_tsv(versions_dir / f"{package}.tsv", version_totals.sum("total")) + + +async def save_historic_channel_stats( + date: str, channel_dir: Path, totals: pd.DataFrame +) -> None: + channel_totals = pd.DataFrame([{"date": date, "total": totals["total"].sum()}]) + channel_tsv = channel_dir / "channel.tsv" + if channel_tsv.exists(): + channel_totals = pd.concat([read_tsv(channel_tsv), channel_totals]) + channel_totals.set_index("date", inplace=True) + write_tsv(channel_tsv, channel_totals) + + +async def save_channel_stats(date: str, channel_name: str, package_names: List[str]) -> None: + totals = await get_channel_stats(date, channel_name, package_names) + + log("save_channel_stats: %s: entries %d", channel_name, len(totals)) + + channel_dir = Path(BASE_DIR) / TOP_DIR / channel_name + channel_dir.mkdir(parents=True, exist_ok=True) + + await save_historic_channel_stats(date, channel_dir, totals) + + subdirs_totals = totals.groupby("subdir", sort=True) + write_tsv(channel_dir / "subdirs.tsv", subdirs_totals.sum("total")) + + await save_packages_stats(channel_dir, totals) async def main() -> str: @@ -164,14 +184,8 @@ async def main() -> str: for channel_name, channel_url in channels.items() } date = session.date - totals = pd.DataFrame() for channel_name, package_names in channel_package_names.items(): - channel_totals = await save_channel_stats(date, channel_name, package_names) - totals = pd.concat((totals, channel_totals)) - totals.insert(0, "date", date) - for index, entry in totals.groupby(totals.index.names[0]): - path = Path(BASE_DIR).joinpath(index + ".tsv") - path.write_text(entry.to_csv(sep="\t", lineterminator="\n", index=False)) + await save_channel_stats(date, channel_name, package_names) return date