Skip to content

Commit

Permalink
Prepare for v0.5.2 release (#526)
Browse files Browse the repository at this point in the history
* Use "option(type)" instead of type | nil for df callback

* Add changelog for version v0.5.2

* Add "Unreleased" section to changelog

The idea is to make easier to annotate changes.

* Final steps

* Make always rustler optional

* Revert "Make always rustler optional"

This reverts commit bc38b18.

* Add another condition to make "rustler" as required
  • Loading branch information
philss authored Mar 1, 2023
1 parent 2805b14 commit b371361
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 10 deletions.
53 changes: 53 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,57 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

## [v0.5.2] - 2023-02-28

### Added

- Add `across` and comprehensions to `Explorer.Query`. These features allow a
more flexible and elegant way to work with multiple columns at once. Example:

```elixir
iris = Explorer.Datasets.iris()
Explorer.DataFrame.mutate(iris,
for col <- across(["sepal_width", "sepal_length", "petal_length", "petal_width"]) do
{col.name, (col - mean(col)) / variance(col)}
end
)
```

See the `Explorer.Query` documentation for further details.

- Add support for regexes to select columns of a dataframe. Example:

```elixir
df = Explorer.Datasets.wine()
df[~r/(class|hue)/]
```

- Add the `:max_rows` and `:columns` options to `Explorer.DataFrame.from_parquet/2`. This mirrors
the `from_csv/2` function.

- Allow `Explorer.Series` functions that accept floats to work with `:nan`, `:infinity`
and `:neg_infinity` values.

- Add `Explorer.DataFrame.shuffle/2` and `Explorer.Series.shuffle/2`.

- Add support for a list of filters in `Explorer.DataFrame.filter/2`. These filters are
joined as `and` expressions.

### Fixed

- Add `is_integer/1` guard to `Explorer.Series.shift/2`.
- Raise if series sizes do not match for binary operations.

### Changed

- Rename the option `:replacement` to `:replace` for `Explorer.DataFrame.sample/3` and
`Explorer.Series.sample/3`.

- Change the default behaviour of sampling to not shuffle by default. A new option
named `:shuffle` was added to control that.

## [v0.5.1] - 2023-02-17

### Added
Expand Down Expand Up @@ -293,6 +344,8 @@ properly compare floats.

First release.

[Unreleased]: https://github.com/elixir-nx/explorer/compare/v0.5.2...HEAD
[v0.5.2]: https://github.com/elixir-nx/explorer/compare/v0.5.1...v0.5.2
[v0.5.1]: https://github.com/elixir-nx/explorer/compare/v0.5.0...v0.5.1
[v0.5.0]: https://github.com/elixir-nx/explorer/compare/v0.4.0...v0.5.0
[v0.4.0]: https://github.com/elixir-nx/explorer/compare/v0.3.1...v0.4.0
Expand Down
18 changes: 10 additions & 8 deletions lib/explorer/backend/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ defmodule Explorer.Backend.DataFrame do
@type t :: struct()

@type df :: Explorer.DataFrame.t()

@type option(type) :: type | nil
@type ok_result :: :ok | {:error, term()}
@type result(t) :: {:ok, t} | {:error, term()}

@type series :: Explorer.Series.t()
@type column_name :: String.t()
@type dtype :: Explorer.Series.dtype()
Expand All @@ -23,9 +26,8 @@ defmodule Explorer.Backend.DataFrame do
@type lazy_frame :: Explorer.Backend.LazyFrame.t()
@type lazy_series :: Explorer.Backend.LazySeries.t()

@type compression :: {algorithm :: atom() | nil, level :: integer() | nil}
@type compression :: {algorithm :: option(atom()), level :: option(integer())}
@type columns_for_io :: list(column_name()) | list(pos_integer()) | nil
@type option(type) :: type | nil

# IO: CSV
@callback from_csv(
Expand All @@ -36,9 +38,9 @@ defmodule Explorer.Backend.DataFrame do
skip_rows :: integer(),
header? :: boolean(),
encoding :: String.t(),
max_rows :: integer() | nil,
max_rows :: option(integer()),
columns :: columns_for_io(),
infer_schema_length :: integer() | nil,
infer_schema_length :: option(integer()),
parse_dates :: boolean()
) :: result(df)
@callback to_csv(df, filename :: String.t(), header? :: boolean(), delimiter :: String.t()) ::
Expand All @@ -53,16 +55,16 @@ defmodule Explorer.Backend.DataFrame do
skip_rows :: integer(),
header? :: boolean(),
encoding :: String.t(),
max_rows :: integer() | nil,
max_rows :: option(integer()),
columns :: columns_for_io(),
infer_schema_length :: integer() | nil,
infer_schema_length :: option(integer()),
parse_dates :: boolean()
) :: result(df)

# IO: Parquet
@callback from_parquet(
filename :: String.t(),
max_rows :: integer() | nil,
max_rows :: option(integer()),
columns :: columns_for_io()
) :: result(df)
@callback to_parquet(
Expand Down Expand Up @@ -174,7 +176,7 @@ defmodule Explorer.Backend.DataFrame do
values_to :: column_name()
) :: df
@callback put(df, out_df :: df(), column_name(), series()) :: df
@callback describe(df, out_df :: df(), percentiles :: list(float()) | nil) :: df()
@callback describe(df, out_df :: df(), percentiles :: option(list(float()))) :: df()

# Two or more table verbs

Expand Down
5 changes: 3 additions & 2 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ defmodule Explorer.MixProject do
use Mix.Project

@source_url "https://github.com/elixir-nx/explorer"
@version "0.6.0-dev"
@version "0.5.2"
@dev? String.ends_with?(@version, "-dev")
@force_build? System.get_env("EXPLORER_BUILD") in ["1", "true"]

def project do
[
Expand Down Expand Up @@ -39,7 +40,7 @@ defmodule Explorer.MixProject do
{:table_rex, "~> 3.1.1"},

## Optional
{:rustler, "~> 0.27.0", optional: not @dev?},
{:rustler, "~> 0.27.0", optional: not (@dev? or @force_build?)},
{:nx, "~> 0.4.0 or ~> 0.5.0", optional: true},

## Dev
Expand Down

0 comments on commit b371361

Please sign in to comment.