Prepare for v0.5.2 release (#526)

* Use "option(type)" instead of type | nil for df callback * Add changelog for version v0.5.2 * Add "Unreleased" section to changelog The idea is to make easier to annotate changes. * Final steps * Make always rustler optional * Revert "Make always rustler optional" This reverts commit bc38b18. * Add another condition to make "rustler" as required
elixir-explorer · Mar 1, 2023 · b371361 · b371361
1 parent 2805b14
commit b371361
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 10 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,57 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+## [v0.5.2] - 2023-02-28
+
+### Added
+
+- Add `across` and comprehensions to `Explorer.Query`. These features allow a
+  more flexible and elegant way to work with multiple columns at once. Example:
+
+  ```elixir
+  iris = Explorer.Datasets.iris()
+  Explorer.DataFrame.mutate(iris,
+   for col <- across(["sepal_width", "sepal_length", "petal_length", "petal_width"]) do
+     {col.name, (col - mean(col)) / variance(col)}
+   end
+  )
+  ```
+
+  See the `Explorer.Query` documentation for further details.
+
+- Add support for regexes to select columns of a dataframe. Example:
+
+  ```elixir
+  df = Explorer.Datasets.wine()
+  df[~r/(class|hue)/]
+  ```
+
+- Add the `:max_rows` and `:columns` options to `Explorer.DataFrame.from_parquet/2`. This mirrors
+  the `from_csv/2` function.
+
+- Allow `Explorer.Series` functions that accept floats to work with `:nan`, `:infinity`
+  and `:neg_infinity` values.
+
+- Add `Explorer.DataFrame.shuffle/2` and `Explorer.Series.shuffle/2`.
+
+- Add support for a list of filters in `Explorer.DataFrame.filter/2`. These filters are
+  joined as `and` expressions.
+
+### Fixed
+
+- Add `is_integer/1` guard to `Explorer.Series.shift/2`.
+- Raise if series sizes do not match for binary operations.
+
+### Changed
+
+- Rename the option `:replacement` to `:replace` for `Explorer.DataFrame.sample/3` and
+  `Explorer.Series.sample/3`.
+
+- Change the default behaviour of sampling to not shuffle by default. A new option
+  named `:shuffle` was added to control that.
+
 ## [v0.5.1] - 2023-02-17
 
 ### Added
@@ -293,6 +344,8 @@ properly compare floats.
 
 First release.
 
+[Unreleased]: https://github.com/elixir-nx/explorer/compare/v0.5.2...HEAD
+[v0.5.2]: https://github.com/elixir-nx/explorer/compare/v0.5.1...v0.5.2
 [v0.5.1]: https://github.com/elixir-nx/explorer/compare/v0.5.0...v0.5.1
 [v0.5.0]: https://github.com/elixir-nx/explorer/compare/v0.4.0...v0.5.0
 [v0.4.0]: https://github.com/elixir-nx/explorer/compare/v0.3.1...v0.4.0

diff --git a/lib/explorer/backend/data_frame.ex b/lib/explorer/backend/data_frame.ex
@@ -6,8 +6,11 @@ defmodule Explorer.Backend.DataFrame do
   @type t :: struct()
 
   @type df :: Explorer.DataFrame.t()
+
+  @type option(type) :: type | nil
   @type ok_result :: :ok | {:error, term()}
   @type result(t) :: {:ok, t} | {:error, term()}
+
   @type series :: Explorer.Series.t()
   @type column_name :: String.t()
   @type dtype :: Explorer.Series.dtype()
@@ -23,9 +26,8 @@ defmodule Explorer.Backend.DataFrame do
   @type lazy_frame :: Explorer.Backend.LazyFrame.t()
   @type lazy_series :: Explorer.Backend.LazySeries.t()
 
-  @type compression :: {algorithm :: atom() | nil, level :: integer() | nil}
+  @type compression :: {algorithm :: option(atom()), level :: option(integer())}
   @type columns_for_io :: list(column_name()) | list(pos_integer()) | nil
-  @type option(type) :: type | nil
 
   # IO: CSV
   @callback from_csv(
@@ -36,9 +38,9 @@ defmodule Explorer.Backend.DataFrame do
               skip_rows :: integer(),
               header? :: boolean(),
               encoding :: String.t(),
-              max_rows :: integer() | nil,
+              max_rows :: option(integer()),
               columns :: columns_for_io(),
-              infer_schema_length :: integer() | nil,
+              infer_schema_length :: option(integer()),
               parse_dates :: boolean()
             ) :: result(df)
   @callback to_csv(df, filename :: String.t(), header? :: boolean(), delimiter :: String.t()) ::
@@ -53,16 +55,16 @@ defmodule Explorer.Backend.DataFrame do
               skip_rows :: integer(),
               header? :: boolean(),
               encoding :: String.t(),
-              max_rows :: integer() | nil,
+              max_rows :: option(integer()),
               columns :: columns_for_io(),
-              infer_schema_length :: integer() | nil,
+              infer_schema_length :: option(integer()),
               parse_dates :: boolean()
             ) :: result(df)
 
   # IO: Parquet
   @callback from_parquet(
               filename :: String.t(),
-              max_rows :: integer() | nil,
+              max_rows :: option(integer()),
               columns :: columns_for_io()
             ) :: result(df)
   @callback to_parquet(
@@ -174,7 +176,7 @@ defmodule Explorer.Backend.DataFrame do
               values_to :: column_name()
             ) :: df
   @callback put(df, out_df :: df(), column_name(), series()) :: df
-  @callback describe(df, out_df :: df(), percentiles :: list(float()) | nil) :: df()
+  @callback describe(df, out_df :: df(), percentiles :: option(list(float()))) :: df()
 
   # Two or more table verbs
 

diff --git a/mix.exs b/mix.exs
@@ -2,8 +2,9 @@ defmodule Explorer.MixProject do
   use Mix.Project
 
   @source_url "https://github.com/elixir-nx/explorer"
-  @version "0.6.0-dev"
+  @version "0.5.2"
   @dev? String.ends_with?(@version, "-dev")
+  @force_build? System.get_env("EXPLORER_BUILD") in ["1", "true"]
 
   def project do
     [
@@ -39,7 +40,7 @@ defmodule Explorer.MixProject do
       {:table_rex, "~> 3.1.1"},
 
       ## Optional
-      {:rustler, "~> 0.27.0", optional: not @dev?},
+      {:rustler, "~> 0.27.0", optional: not (@dev? or @force_build?)},
       {:nx, "~> 0.4.0 or ~> 0.5.0", optional: true},
 
       ## Dev