-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
147 changed files
with
25,848 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Sphinx build info version 1 | ||
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. | ||
config: 942e2f0a72ac90acb86d49d7346ee8e9 | ||
tags: 645f666f9bcd5a90fca523b33c5a78b7 |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
387 changes: 387 additions & 0 deletions
387
_modules/data_juicer/analysis/column_wise_analysis.html
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
<!DOCTYPE html> | ||
<html class="writer-html5" lang="en" > | ||
<head> | ||
<meta charset="utf-8" /> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> | ||
<title>data_juicer.analysis.overall_analysis — data_juicer 0.0.1 documentation</title> | ||
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" /> | ||
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" /> | ||
<!--[if lt IE 9]> | ||
<script src="../../../_static/js/html5shiv.min.js"></script> | ||
<![endif]--> | ||
|
||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script> | ||
<script src="../../../_static/doctools.js"></script> | ||
<script src="../../../_static/sphinx_highlight.js"></script> | ||
<script src="../../../_static/js/theme.js"></script> | ||
<link rel="index" title="Index" href="../../../genindex.html" /> | ||
<link rel="search" title="Search" href="../../../search.html" /> | ||
</head> | ||
|
||
<body class="wy-body-for-nav"> | ||
<div class="wy-grid-for-nav"> | ||
<nav data-toggle="wy-nav-shift" class="wy-nav-side"> | ||
<div class="wy-side-scroll"> | ||
<div class="wy-side-nav-search" > | ||
|
||
|
||
|
||
<a href="../../../index.html" class="icon icon-home"> | ||
data_juicer | ||
</a> | ||
<div class="version"> | ||
0.0.1 | ||
</div> | ||
<div role="search"> | ||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get"> | ||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" /> | ||
<input type="hidden" name="check_keywords" value="yes" /> | ||
<input type="hidden" name="area" value="default" /> | ||
</form> | ||
</div> | ||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu"> | ||
<ul> | ||
<li class="toctree-l1"><a class="reference internal" href="../../../data_juicer.html">data_juicer package</a></li> | ||
</ul> | ||
|
||
</div> | ||
</div> | ||
</nav> | ||
|
||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" > | ||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i> | ||
<a href="../../../index.html">data_juicer</a> | ||
</nav> | ||
|
||
<div class="wy-nav-content"> | ||
<div class="rst-content"> | ||
<div role="navigation" aria-label="Page navigation"> | ||
<ul class="wy-breadcrumbs"> | ||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li> | ||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li> | ||
<li class="breadcrumb-item active">data_juicer.analysis.overall_analysis</li> | ||
<li class="wy-breadcrumbs-aside"> | ||
</li> | ||
</ul> | ||
<hr/> | ||
</div> | ||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article"> | ||
<div itemprop="articleBody"> | ||
|
||
<h1>Source code for data_juicer.analysis.overall_analysis</h1><div class="highlight"><pre> | ||
<span></span><span class="kn">import</span> <span class="nn">os</span> | ||
|
||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> | ||
|
||
|
||
<div class="viewcode-block" id="OverallAnalysis"><a class="viewcode-back" href="../../../data_juicer.analysis.html#data_juicer.analysis.overall_analysis.OverallAnalysis">[docs]</a><span class="k">class</span> <span class="nc">OverallAnalysis</span><span class="p">:</span> | ||
<span class="w"> </span><span class="sd">"""Apply analysis on the overall stats, including mean, std, quantiles,</span> | ||
<span class="sd"> etc."""</span> | ||
|
||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">output_path</span><span class="p">):</span> | ||
<span class="w"> </span><span class="sd">"""</span> | ||
<span class="sd"> Initialization method.</span> | ||
|
||
<span class="sd"> :param dataset: the dataset to be analysed</span> | ||
<span class="sd"> :param output_path: path to store the analysis results.</span> | ||
<span class="sd"> """</span> | ||
<span class="bp">self</span><span class="o">.</span><span class="n">stats</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">dataset</span><span class="p">[</span><span class="s1">'stats'</span><span class="p">])</span> | ||
<span class="bp">self</span><span class="o">.</span><span class="n">output_path</span> <span class="o">=</span> <span class="n">output_path</span> | ||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_path</span><span class="p">):</span> | ||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_path</span><span class="p">)</span> | ||
|
||
<span class="c1"># default percentiles to analyse</span> | ||
<span class="bp">self</span><span class="o">.</span><span class="n">default_percentiles</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">]</span> | ||
|
||
<div class="viewcode-block" id="OverallAnalysis.analyse"><a class="viewcode-back" href="../../../data_juicer.analysis.html#data_juicer.analysis.overall_analysis.OverallAnalysis.analyse">[docs]</a> <span class="k">def</span> <span class="nf">analyse</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">percentiles</span><span class="o">=</span><span class="p">[]):</span> | ||
<span class="w"> </span><span class="sd">"""</span> | ||
<span class="sd"> Apply overall analysis on the whole dataset based on the describe</span> | ||
<span class="sd"> method of pandas.</span> | ||
|
||
<span class="sd"> :param percentiles: percentiles to analyse</span> | ||
<span class="sd"> :return: the overall analysis result.</span> | ||
<span class="sd"> """</span> | ||
<span class="c1"># merge default and customized percentiles and get overall information</span> | ||
<span class="n">percentiles</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="n">percentiles</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">default_percentiles</span><span class="p">))</span> | ||
<span class="n">overall</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">stats</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">percentiles</span><span class="o">=</span><span class="n">percentiles</span><span class="p">,</span> <span class="n">include</span><span class="o">=</span><span class="s1">'all'</span><span class="p">)</span> | ||
|
||
<span class="c1"># export to result report file</span> | ||
<span class="n">overall</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_path</span><span class="p">,</span> <span class="s1">'overall.csv'</span><span class="p">))</span> | ||
<span class="n">overall</span><span class="o">.</span><span class="n">to_markdown</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_path</span><span class="p">,</span> <span class="s1">'overall.md'</span><span class="p">))</span> | ||
|
||
<span class="k">return</span> <span class="n">overall</span></div></div> | ||
</pre></div> | ||
|
||
</div> | ||
</div> | ||
<footer> | ||
|
||
<hr/> | ||
|
||
<div role="contentinfo"> | ||
<p>© Copyright 2023, SysML team.</p> | ||
</div> | ||
|
||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a | ||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a> | ||
provided by <a href="https://readthedocs.org">Read the Docs</a>. | ||
|
||
|
||
</footer> | ||
</div> | ||
</div> | ||
</section> | ||
</div> | ||
<script> | ||
jQuery(function () { | ||
SphinxRtdTheme.Navigation.enable(true); | ||
}); | ||
</script> | ||
|
||
</body> | ||
</html> |
Oops, something went wrong.