From 2efd34ab45b273aa083ad369e17657181b1cef9d Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sat, 25 Apr 2020 21:19:15 +0200
Subject: [PATCH 01/22] Sort output in 'ixmp report' CLI command

---
 ixmp/cli.py                            |  2 +-
 ixmp/tests/reporting/test_reporting.py | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ixmp/cli.py b/ixmp/cli.py
index 293df0690..1262deb0b 100644
--- a/ixmp/cli.py
+++ b/ixmp/cli.py
@@ -92,7 +92,7 @@ def report(context, config, key):
     r.configure(config)
 
     # Print the target
-    print(r.get(key))
+    print(r.get(key).to_series().sort_index())
 
 
 @main.command('show-versions')
diff --git a/ixmp/tests/reporting/test_reporting.py b/ixmp/tests/reporting/test_reporting.py
index 5aa04d1f2..6e9b3550b 100644
--- a/ixmp/tests/reporting/test_reporting.py
+++ b/ixmp/tests/reporting/test_reporting.py
@@ -620,12 +620,12 @@ def test_cli(ixmp_cli, test_mp, test_data_path):
     assert result.output.endswith(
         "i          j       "  # Trailing whitespace
         """
-seattle    new-york    2.5
-           chicago     1.7
-           topeka      1.8
-san-diego  new-york    2.5
-           chicago     1.8
+san-diego  chicago     1.8
+           new-york    2.5
            topeka      1.4
+seattle    chicago     1.7
+           new-york    2.5
+           topeka      1.8
 Name: value, dtype: float64
 """)
 

From 263ccbdacac521e409f97355244a0a876bfad8d0 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sat, 25 Apr 2020 21:28:08 +0200
Subject: [PATCH 02/22] Split reporting.quantity and reporting.attrseries

- Make Quantity() a factory method rather than class/type.
- Quantity.CLASS is a string used to check the current implementation.
- Remove reporting.as_quantity and all uses of this function;
  Quantity(...) now does the same.
- Move AttrSeries.align_levels from computations.product.
- Update testing.assert_qty_equal and .assert_qty_allclose.
---
 ixmp/reporting/__init__.py                |   3 +-
 ixmp/reporting/attrseries.py              | 163 ++++++++++++++++++
 ixmp/reporting/computations.py            |  37 ++--
 ixmp/reporting/quantity.py                | 196 +++-------------------
 ixmp/testing.py                           |  41 ++---
 ixmp/tests/reporting/__init__.py          |   1 +
 ixmp/tests/reporting/test_computations.py |  12 +-
 ixmp/tests/reporting/test_reporting.py    |  18 +-
 8 files changed, 235 insertions(+), 236 deletions(-)
 create mode 100644 ixmp/reporting/attrseries.py

diff --git a/ixmp/reporting/__init__.py b/ixmp/reporting/__init__.py
index 8e0341765..e1cd124e4 100644
--- a/ixmp/reporting/__init__.py
+++ b/ixmp/reporting/__init__.py
@@ -44,7 +44,7 @@
 from .describe import describe_recursive
 from .exceptions import ComputationError
 from .key import Key
-from .quantity import Quantity, as_quantity
+from .quantity import Quantity
 from .utils import (
     REPLACE_UNITS,
     RENAME_DIMS,
@@ -56,7 +56,6 @@
     'Key',
     'Quantity',
     'Reporter',
-    'as_quantity',
     'configure',
 ]
 
diff --git a/ixmp/reporting/attrseries.py b/ixmp/reporting/attrseries.py
new file mode 100644
index 000000000..4cbed5f0d
--- /dev/null
+++ b/ixmp/reporting/attrseries.py
@@ -0,0 +1,163 @@
+from collections.abc import Collection
+
+import pandas as pd
+import pandas.core.indexes.base as ibase
+import pint
+import xarray as xr
+
+
+class AttrSeries(pd.Series):
+    """:class:`pandas.Series` subclass imitating :class:`xarray.DataArray`.
+
+    Future versions of :mod:`ixmp.reporting` will use :class:`xarray.DataArray`
+    as :class:`Quantity`; however, because :mod:`xarray` currently lacks sparse
+    matrix support, ixmp quantities may be too large for available memory.
+
+    The AttrSeries class provides similar methods and behaviour to
+    :class:`xarray.DataArray`, so that :mod:`ixmp.reporting.computations`
+    methods can use xarray-like syntax.
+
+    Parameters
+    ----------
+    units : str or pint.Unit, optional
+        Set the units attribute. The value is converted to :class:`pint.Unit`
+        and added to `attrs`.
+    attrs : :class:`~collections.abc.Mapping`, optional
+        Set the :attr:`~pandas.Series.attrs` of the AttrSeries. This attribute
+        was added in `pandas 1.0
+        <https://pandas.pydata.org/docs/whatsnew/v1.0.0.html>`_, but is not
+        currently supported by the Series constructor.
+    """
+
+    # See https://pandas.pydata.org/docs/development/extending.html
+    @property
+    def _constructor(self):
+        return AttrSeries
+
+    def __init__(self, data=None, *args, name=None, units=None, attrs=None,
+                 **kwargs):
+        attrs = attrs or dict()
+        if units:
+            # Insert the units into the attrs
+            attrs['_unit'] = pint.Unit(units)
+
+        if isinstance(data, (AttrSeries, xr.DataArray)):
+            # Use attrs from an existing object
+            new_attrs = data.attrs.copy()
+
+            # Overwrite with explicit attrs argument
+            new_attrs.update(attrs)
+            attrs = new_attrs
+
+            # Pre-convert to pd.Series from xr.DataArray to preserve names and
+            # labels. For AttrSeries, this is a no-op (see below).
+            name = ibase.maybe_extract_name(name, data, type(self))
+            data = data.to_series()
+
+        # Don't pass attrs to pd.Series constructor; it currently does not
+        # accept them
+        super().__init__(data, *args, name=name, **kwargs)
+
+        # Update the attrs after initialization
+        self.attrs.update(attrs)
+
+    @classmethod
+    def from_series(cls, series, sparse=None):
+        return cls(series)
+
+    def assign_attrs(self, d):
+        self.attrs.update(d)
+        return self
+
+    def assign_coords(self, **kwargs):
+        return pd.concat([self], keys=kwargs.values(), names=kwargs.keys())
+
+    @property
+    def coords(self):
+        """Read-only."""
+        result = dict()
+        for name, levels in zip(self.index.names, self.index.levels):
+            result[name] = xr.Dataset(None, coords={name: levels})[name]
+        return result
+
+    @property
+    def dims(self):
+        return tuple(self.index.names)
+
+    def drop(self, label):
+        return self.droplevel(label)
+
+    def rename(self, new_name_or_name_dict):
+        if isinstance(new_name_or_name_dict, dict):
+            return self.rename_axis(index=new_name_or_name_dict)
+        else:
+            return super().rename(new_name_or_name_dict)
+
+    def sel(self, indexers=None, drop=False, **indexers_kwargs):
+        indexers = indexers or {}
+        indexers.update(indexers_kwargs)
+        if len(indexers) == 1:
+            level, key = list(indexers.items())[0]
+            if not isinstance(key, Collection) and not drop:
+                # When using .loc[] to select 1 label on 1 level, pandas drops
+                # the level. Use .xs() to avoid this behaviour unless drop=True
+                return AttrSeries(self.xs(key, level=level, drop_level=False))
+
+        idx = tuple(indexers.get(l, slice(None)) for l in self.index.names)
+        return AttrSeries(self.loc[idx])
+
+    def sum(self, *args, **kwargs):
+        obj = super(AttrSeries, self)
+        attrs = None
+
+        try:
+            dim = kwargs.pop('dim')
+        except KeyError:
+            dim = list(args)
+            args = tuple()
+
+        if isinstance(self.index, pd.MultiIndex):
+            if len(dim) == len(self.index.names):
+                # assume dimensions = full multi index, do simple sum
+                kwargs = {}
+            else:
+                # pivot and sum across columns
+                obj = self.unstack(dim)
+                kwargs['axis'] = 1
+                attrs = self.attrs
+        else:
+            if dim != [self.index.name]:
+                raise ValueError(dim, self.index.name, self)
+            kwargs['level'] = dim
+
+        return AttrSeries(obj.sum(*args, **kwargs), attrs=attrs)
+
+    def squeeze(self, *args, **kwargs):
+        kwargs.pop('drop')
+        return super().squeeze(*args, **kwargs) if len(self) > 1 else self
+
+    def as_xarray(self):
+        return xr.DataArray.from_series(self)
+
+    def transpose(self, *dims):
+        return self.reorder_levels(dims)
+
+    def to_dataframe(self):
+        return self.to_frame()
+
+    def to_series(self):
+        return self
+
+    def align_levels(self, other):
+        """Work around https://github.com/pandas-dev/pandas/issues/25760.
+
+        Return a copy of *obj* with common levels in the same order as *ref*.
+
+        .. todo:: remove when Quantity is xr.DataArray, or above issues is
+           closed.
+        """
+        if not isinstance(self.index, pd.MultiIndex):
+            return self
+        common = [n for n in other.index.names if n in self.index.names]
+        unique = [n for n in self.index.names if n not in common]
+        return self.reorder_levels(common + unique)
diff --git a/ixmp/reporting/computations.py b/ixmp/reporting/computations.py
index 4f95b7f5a..88a394248 100644
--- a/ixmp/reporting/computations.py
+++ b/ixmp/reporting/computations.py
@@ -10,7 +10,7 @@
 import pint
 import xarray as xr
 
-from .quantity import AttrSeries, Quantity, as_quantity
+from .quantity import Quantity
 from .utils import (
     RENAME_DIMS,
     dims_for_qty,
@@ -192,9 +192,10 @@ def data_for_quantity(ix_type, name, column, scenario, config):
     # log.debug(' '.join(map(str, info)))
 
     # Convert to a Quantity, assign attrbutes and name
-    qty = as_quantity(data[column]) \
-        .assign_attrs(attrs) \
-        .rename(name + ('-margin' if column == 'mrg' else ''))
+    qty = Quantity(
+        data[column],
+        name=name + ('-margin' if column == 'mrg' else ''),
+        attrs=attrs)
 
     try:
         # Remove length-1 dimensions for scalars
@@ -259,7 +260,7 @@ def concat(*objs, **kwargs):
     Reporter.
     """
     objs = filter_concat_args(objs)
-    if Quantity is AttrSeries:
+    if Quantity.CLASS == 'AttrSeries':
         kwargs.pop('dim')
         return pd.concat(objs, **kwargs)
     elif Quantity is xr.DataArray:  # pragma: no cover
@@ -281,24 +282,12 @@ def product(*quantities):
     # Initialize result values with first entry
     result, u_result = next(items)
 
-    def _align_levels(ref, obj):
-        """Work around https://github.com/pandas-dev/pandas/issues/25760
-
-        Return a copy of *obj* with common levels in the same order as *ref*.
-
-        TODO remove when Quantity is xr.DataArray, or above issues is closed.
-        """
-        if not isinstance(obj.index, pd.MultiIndex):
-            return obj
-        common = [n for n in ref.index.names if n in obj.index.names]
-        unique = [n for n in obj.index.names if n not in common]
-        return obj.reorder_levels(common + unique)
-
     # Iterate over remaining entries
     for q, u in items:
-        if Quantity is AttrSeries:
-            result = (result * _align_levels(result, q)).dropna()
-        else:  # pragma: no cover
+        if Quantity.CLASS == 'AttrSeries':
+            # Work around pandas-dev/pandas#25760; see attrseries.py
+            result = (result * q.align_levels(result)).dropna()
+        else:
             result = result * q
         u_result *= u
 
@@ -321,7 +310,7 @@ def ratio(numerator, denominator):
     result = numerator / denominator
     result.attrs['_unit'] = u_num / u_denom
 
-    if Quantity is AttrSeries:
+    if Quantity.CLASS == 'AttrSeries':
         result.dropna(inplace=True)
 
     return result
@@ -343,7 +332,7 @@ def select(qty, indexers, inverse=False):
         new_indexers = {}
         for dim, labels in indexers.items():
             new_indexers[dim] = list(filter(lambda l: l not in labels,
-                                            qty.coords[dim]))
+                                            qty.coords[dim].data))
         indexers = new_indexers
 
     return qty.sel(indexers)
@@ -433,7 +422,7 @@ def load_file(path, dims={}, units=None):
                        .rename(columns=dims)
             index_columns = list(dims.values())
 
-        return as_quantity(data.set_index(index_columns)['value'], units=units)
+        return Quantity(data.set_index(index_columns)['value'], units=units)
     elif path.suffix in ('.xls', '.xlsx'):
         # TODO define expected Excel data input format
         raise NotImplementedError  # pragma: no cover
diff --git a/ixmp/reporting/quantity.py b/ixmp/reporting/quantity.py
index 647348769..b18085fac 100644
--- a/ixmp/reporting/quantity.py
+++ b/ixmp/reporting/quantity.py
@@ -1,186 +1,40 @@
-from collections.abc import Collection
-
-import numpy
 import pandas as pd
-import pandas.core.indexes.base as ibase
 import pint
-import xarray as xr
 
 
-class AttrSeries(pd.Series):
-    """:class:`pandas.Series` subclass imitating :class:`xarray.DataArray`.
+class _QuantityFactory:
+    #: The current internal class used to represent reporting quantities.
+    #: :meth:`as_quantity` always converts to this type.
+    CLASS = 'AttrSeries'
+    # CLASS = 'SparseDataArray'
 
-    Future versions of :mod:`ixmp.reporting` will use :class:`xarray.DataArray`
-    as :class:`Quantity`; however, because :mod:`xarray` currently lacks sparse
-    matrix support, ixmp quantities may be too large for available memory.
+    def __call__(self, data, *args, **kwargs):
+        name = kwargs.pop('name', None)
+        units = kwargs.pop('units', None)
+        attrs = kwargs.pop('attrs', dict())
 
-    The AttrSeries class provides similar methods and behaviour to
-    :class:`xarray.DataArray`, so that :mod:`ixmp.reporting.computations`
-    methods can use xarray-like syntax.
+        if self.CLASS == 'AttrSeries':
+            from .attrseries import AttrSeries as cls
+        elif self.CLASS == 'SparseDataArray':
+            from .sparsedataarray import SparseDataArray as cls
 
-    Parameters
-    ----------
-    units : str or pint.Unit, optional
-        Set the units attribute. The value is converted to :class:`pint.Unit`
-        and added to `attrs`.
-    attrs : :class:`~collections.abc.Mapping`, optional
-        Set the :attr:`~pandas.Series.attrs` of the AttrSeries. This attribute
-        was added in `pandas 1.0
-        <https://pandas.pydata.org/docs/whatsnew/v1.0.0.html>`_, but is not
-        currently supported by the Series constructor.
-    """
+        if isinstance(data, pd.Series):
+            result = cls.from_series(data)
+        elif self.CLASS == 'AttrSeries':
+            result = cls(data, *args, **kwargs)
+        else:
+            assert len(args) == len(kwargs) == 0, (args, kwargs)
+            result = data._sda.convert()
 
-    # See https://pandas.pydata.org/docs/development/extending.html
-    @property
-    def _constructor(self):
-        return AttrSeries
+        if name:
+            result.name = name
 
-    def __init__(self, data=None, *args, name=None, units=None, attrs=None,
-                 **kwargs):
-        attrs = attrs or dict()
         if units:
-            # Insert the units into the attrs
             attrs['_unit'] = pint.Unit(units)
 
-        if isinstance(data, (AttrSeries, xr.DataArray)):
-            # Use attrs from an existing object
-            new_attrs = data.attrs.copy()
-
-            # Overwrite with explicit attrs argument
-            new_attrs.update(attrs)
-            attrs = new_attrs
-
-            # Pre-convert to pd.Series from xr.DataArray to preserve names and
-            # labels. For AttrSeries, this is a no-op (see below).
-            name = ibase.maybe_extract_name(name, data, type(self))
-            data = data.to_series()
-
-        # Don't pass attrs to pd.Series constructor; it currently does not
-        # accept them
-        super().__init__(data, *args, name=name, **kwargs)
-
-        # Update the attrs after initialization
-        self.attrs.update(attrs)
-
-    @classmethod
-    def from_series(cls, series, sparse=None):
-        return cls(series)
-
-    def assign_attrs(self, d):
-        self.attrs.update(d)
-        return self
-
-    def assign_coords(self, **kwargs):
-        return pd.concat([self], keys=kwargs.values(), names=kwargs.keys())
-
-    @property
-    def coords(self):
-        """Read-only."""
-        return dict(zip(self.index.names, self.index.levels))
-
-    @property
-    def dims(self):
-        return tuple(self.index.names)
-
-    def drop(self, label):
-        return self.droplevel(label)
-
-    def rename(self, new_name_or_name_dict):
-        if isinstance(new_name_or_name_dict, dict):
-            return self.rename_axis(index=new_name_or_name_dict)
-        else:
-            return super().rename(new_name_or_name_dict)
-
-    def sel(self, indexers=None, drop=False, **indexers_kwargs):
-        indexers = indexers or {}
-        indexers.update(indexers_kwargs)
-        if len(indexers) == 1:
-            level, key = list(indexers.items())[0]
-            if not isinstance(key, Collection) and not drop:
-                # When using .loc[] to select 1 label on 1 level, pandas drops
-                # the level. Use .xs() to avoid this behaviour unless drop=True
-                return AttrSeries(self.xs(key, level=level, drop_level=False))
-
-        idx = tuple(indexers.get(n, slice(None)) for n in self.index.names)
-        return AttrSeries(self.loc[idx])
-
-    def sum(self, *args, **kwargs):
-        try:
-            dim = kwargs.pop('dim')
-            if isinstance(self.index, pd.MultiIndex):
-                if len(dim) == len(self.index.names):
-                    # assume dimensions = full multi index, do simple sum
-                    obj = self
-                    kwargs = {}
-                else:
-                    # pivot and sum across columns
-                    obj = self.unstack(dim)
-                    kwargs['axis'] = 1
-            else:
-                if dim != [self.index.name]:
-                    raise ValueError(dim, self.index.name, self)
-                obj = super()
-                kwargs['level'] = dim
-        except KeyError:
-            obj = super()
-        return AttrSeries(obj.sum(*args, **kwargs))
-
-    def squeeze(self, *args, **kwargs):
-        kwargs.pop('drop')
-        return super().squeeze(*args, **kwargs) if len(self) > 1 else self
-
-    def as_xarray(self):
-        return xr.DataArray.from_series(self)
-
-    def transpose(self, *dims):
-        return self.reorder_levels(dims)
-
-    def to_dataframe(self):
-        return self.to_frame()
-
-    def to_series(self):
-        return self
-
-
-#: The current internal class used to represent reporting quantities.
-#: :meth:`as_quantity` always converts to this type.
-Quantity = AttrSeries
-# See also:
-# - test_report_size() for a test that shows how non-sparse xr.DataArray
-#   triggers MemoryError.
-# Quantity = xr.DataArray
-
-
-def as_sparse_xarray(obj, units=None):  # pragma: no cover
-    """Convert *obj* to :class:`xarray.DataArray` with sparse.COO storage."""
-    import sparse
-    from xarray.core.dtypes import maybe_promote
-
-    if isinstance(obj, xr.DataArray) and isinstance(obj.data, numpy.ndarray):
-        result = xr.DataArray(
-            data=sparse.COO.from_numpy(
-                obj.data,
-                fill_value=maybe_promote(obj.data.dtype)[1]),
-            coords=obj.coords,
-            dims=obj.dims,
-            name=obj.name,
-            attrs=obj.attrs,
-        )
-    elif isinstance(obj, pd.Series):
-        result = xr.DataArray.from_series(obj, sparse=True)
-    else:
-        result = obj
-
-    if units:
-        result.attrs['_unit'] = pint.Unit(units)
+        result.attrs.update(attrs)
 
-    return result
+        return result
 
 
-#: Convert args to :class:`.Quantity` class.
-#:
-#: Returns
-#: -------
-#: .Quantity
-#:     `obj` converted to the current Quantity type.
-as_quantity = AttrSeries if Quantity is AttrSeries else as_sparse_xarray
+Quantity = _QuantityFactory()
diff --git a/ixmp/testing.py b/ixmp/testing.py
index 4ccc8485d..c34736c03 100644
--- a/ixmp/testing.py
+++ b/ixmp/testing.py
@@ -59,6 +59,7 @@
 
 from . import cli, config as ixmp_config
 from .core import Platform, TimeSeries, Scenario, IAMC_IDX
+from .reporting import Quantity
 
 
 log = logging.getLogger(__name__)
@@ -480,51 +481,43 @@ def test_foo(caplog):
             pytest.fail('\n'.join(lines))
 
 
-def assert_qty_equal(a, b, check_attrs=True, **kwargs):
+def assert_qty_equal(a, b, check_type=True, check_attrs=True, **kwargs):
     """Assert that Quantity objects *a* and *b* are equal.
 
     When Quantity is AttrSeries, *a* and *b* are first passed through
     :meth:`as_quantity`.
     """
-    from xarray import DataArray
-    from xarray.testing import assert_equal as assert_xr_equal
-
-    from .reporting.quantity import AttrSeries, Quantity, as_quantity
-
-    if Quantity is AttrSeries:
-        # Convert pd.Series automatically
-        a = as_quantity(a) if isinstance(a, (pd.Series, DataArray)) else a
-        b = as_quantity(b) if isinstance(b, (pd.Series, DataArray)) else b
+    if not check_type:
+        a = Quantity(a)
+        b = Quantity(b)
 
+    if Quantity.CLASS == 'AttrSeries':
         assert_series_equal(a, b, check_dtype=False, **kwargs)
-    elif Quantity is DataArray:  # pragma: no cover
-        assert_xr_equal(a, b, **kwargs)
+    else:
+        import xarray.testing
+        xarray.testing.assert_equal(a, b, **kwargs)
 
     # check attributes are equal
     if check_attrs:
         assert a.attrs == b.attrs
 
 
-def assert_qty_allclose(a, b, check_attrs=True, **kwargs):
+def assert_qty_allclose(a, b, check_type=True, check_attrs=True, **kwargs):
     """Assert that Quantity objects *a* and *b* have numerically close values.
 
     When Quantity is AttrSeries, *a* and *b* are first passed through
     :meth:`as_quantity`.
     """
-    from xarray import DataArray
-    from xarray.testing import assert_allclose as assert_xr_allclose
-
-    from .reporting.quantity import AttrSeries, Quantity, as_quantity
-
-    if Quantity is AttrSeries:
-        # Convert pd.Series automatically
-        a = as_quantity(a) if isinstance(a, (pd.Series, DataArray)) else a
-        b = as_quantity(b) if isinstance(b, (pd.Series, DataArray)) else b
+    if not check_type:
+        a = Quantity(a)
+        b = Quantity(b)
 
+    if Quantity.CLASS == 'AttrSeries':
         assert_series_equal(a, b, **kwargs)
-    elif Quantity is DataArray:  # pragma: no cover
+    else:
+        import xarray.testing
         kwargs.pop('check_dtype', None)
-        assert_xr_allclose(a, b, **kwargs)
+        xarray.testing.assert_allclose(a._sda.dense, b._sda.dense, **kwargs)
 
     # check attributes are equal
     if check_attrs:
diff --git a/ixmp/tests/reporting/__init__.py b/ixmp/tests/reporting/__init__.py
index 7c92b1170..83530a0f7 100644
--- a/ixmp/tests/reporting/__init__.py
+++ b/ixmp/tests/reporting/__init__.py
@@ -23,6 +23,7 @@ def add_test_data(scen):
     x = xr.DataArray(np.random.rand(len(t), len(y)),
                      coords=[t, y], dims=['t', 'y'],
                      attrs={'_unit': ureg.Unit('kg')})
+    x = Quantity(x)
 
     # As a pd.DataFrame with units
     x_df = x.to_series().rename('value').reset_index()
diff --git a/ixmp/tests/reporting/test_computations.py b/ixmp/tests/reporting/test_computations.py
index 46608c0f3..63291ce2c 100644
--- a/ixmp/tests/reporting/test_computations.py
+++ b/ixmp/tests/reporting/test_computations.py
@@ -5,7 +5,7 @@
 import pytest
 
 import ixmp
-from ixmp.reporting import Reporter, as_quantity, computations
+from ixmp.reporting import Reporter, Quantity, computations
 from ixmp.testing import assert_logs
 
 from . import add_test_data
@@ -53,19 +53,19 @@ def test_select(data):
     # Unpack
     *_, t_foo, t_bar, x = data
 
-    x = as_quantity(x)
-    assert len(x) == 6 * 6
+    x = Quantity(x)
+    assert x.size == 6 * 6
 
     # Selection with inverse=False
     indexers = {'t': t_foo[0:1] + t_bar[0:1]}
     result_0 = computations.select(x, indexers=indexers)
-    assert len(result_0) == 2 * 6
+    assert result_0.size == 2 * 6
 
     # Single indexer along one dimension results in 1D data
     indexers['y'] = '2010'
     result_1 = computations.select(x, indexers=indexers)
-    assert len(result_1) == 2 * 1
+    assert result_1.size == 2 * 1
 
     # Selection with inverse=True
     result_2 = computations.select(x, indexers=indexers, inverse=True)
-    assert len(result_2) == 4 * 5
+    assert result_2.size == 4 * 5
diff --git a/ixmp/tests/reporting/test_reporting.py b/ixmp/tests/reporting/test_reporting.py
index 6e9b3550b..1a54cdb9a 100644
--- a/ixmp/tests/reporting/test_reporting.py
+++ b/ixmp/tests/reporting/test_reporting.py
@@ -20,7 +20,7 @@
     configure,
     computations,
 )
-from ixmp.reporting.quantity import AttrSeries, Quantity, as_quantity
+from ixmp.reporting import Quantity
 from ixmp.testing import (
     make_dantzig,
     assert_logs,
@@ -173,7 +173,7 @@ def test_reporter_add_product(test_mp, ureg):
     assert key == 'x squared:t-y'
 
     # Product has the expected value
-    exp = as_quantity(x * x, name='x')
+    exp = Quantity(x * x, name='x')
     exp.attrs['_unit'] = ureg('kilogram ** 2').units
     assert_qty_equal(exp, rep.get(key))
 
@@ -203,7 +203,7 @@ def test_reporter_from_dantzig(test_mp, ureg):
 
     # Summation across all dimensions results a 1-element Quantity
     d = rep.get('d:')
-    assert d.shape == ((1,) if Quantity is AttrSeries else tuple())
+    assert d.shape == ((1,) if Quantity.CLASS == 'AttrSeries' else tuple())
     assert d.size == 1
     assert np.isclose(d.values, 11.7)
 
@@ -231,7 +231,7 @@ def test_reporter_from_dantzig(test_mp, ureg):
     # Disaggregation with explicit data
     # (cases of canned food 'p'acked in oil or water)
     shares = xr.DataArray([0.8, 0.2], coords=[['oil', 'water']], dims=['p'])
-    new_key = rep.disaggregate('b:j', 'p', args=[as_quantity(shares)])
+    new_key = rep.disaggregate('b:j', 'p', args=[Quantity(shares)])
 
     # ...produces the expected key with new dimension added
     assert new_key == 'b:j-p'
@@ -377,7 +377,7 @@ def test_reporter_file(tmp_path):
 def test_file_formats(test_data_path, tmp_path):
     r = Reporter()
 
-    expected = as_quantity(
+    expected = Quantity(
         pd.read_csv(test_data_path / 'report-input0.csv',
                     index_col=['i', 'j'])['value'],
         units='km')
@@ -443,10 +443,10 @@ def test_units(ureg):
     # Create some dummy data
     dims = dict(coords=['a b c'.split()], dims=['x'])
     r.add('energy:x',
-          as_quantity(xr.DataArray([1., 3, 8], **dims), units='MJ'))
+          Quantity(xr.DataArray([1., 3, 8], **dims), units='MJ'))
     r.add('time',
-          as_quantity(xr.DataArray([5., 6, 8], **dims), units='hour'))
-    r.add('efficiency', as_quantity(xr.DataArray([0.9, 0.8, 0.95], **dims)))
+          Quantity(xr.DataArray([5., 6, 8], **dims), units='hour'))
+    r.add('efficiency', Quantity(xr.DataArray([0.9, 0.8, 0.95], **dims)))
 
     # Aggregation preserves units
     r.add('energy', (computations.sum, 'energy:x', None, ['x']))
@@ -701,7 +701,7 @@ def test_aggregate(test_mp):
     t_groups = {'foo': t_foo, 'bar': t_bar, 'baz': ['foo1', 'bar5', 'bar6']}
 
     # Use the computation directly
-    agg1 = computations.aggregate(as_quantity(x), {'t': t_groups}, True)
+    agg1 = computations.aggregate(Quantity(x), {'t': t_groups}, True)
 
     # Expected set of keys along the aggregated dimension
     assert set(agg1.coords['t'].values) == set(t) | set(t_groups.keys())

From 4b7248949eee20eef1945a6c5391c1d41877c482 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sat, 25 Apr 2020 21:36:01 +0200
Subject: [PATCH 03/22] Add reporting.sparsedataarray

---
 ixmp/reporting/sparsedataarray.py | 129 ++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 ixmp/reporting/sparsedataarray.py

diff --git a/ixmp/reporting/sparsedataarray.py b/ixmp/reporting/sparsedataarray.py
new file mode 100644
index 000000000..d090ac010
--- /dev/null
+++ b/ixmp/reporting/sparsedataarray.py
@@ -0,0 +1,129 @@
+from warnings import filterwarnings
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+from xarray.core.utils import either_dict_or_kwargs
+
+# sparse 0.9.1, numba 0.49.0
+filterwarnings(
+    action='ignore',
+    message="An import was requested from a module that has moved location.",
+    module='sparse._coo.numba_extension',
+    )
+
+import sparse  # noqa: E402
+
+
+@xr.register_dataarray_accessor('_sda')
+class SparseAccessor:
+    """:mod:`xarray` accessor to help :class:`SparseDataArray`."""
+    def __init__(self, obj):
+        if not isinstance(obj, xr.DataArray):
+            raise TypeError('._sda accessor only valid for xr.DataArray')
+        self.da = obj
+
+    def convert(self):
+        """Return a :class:`SparseDataArray` instance."""
+        if not self.da._sda.COO_data:
+            # Dense (numpy.ndarray) data; convert to sparse
+            data = sparse.COO.from_numpy(self.da.data, fill_value=None)
+        elif not np.isnan(self.da.data.fill_value):
+            # sparse.COO with non-NaN fill value; copy and change
+            data = self.da.data.copy(deep=False)
+            data.fill_value = data.dtype.type(np.nan)
+        else:
+            # No change
+            data = self.da.data
+
+        if isinstance(self.da, SparseDataArray):
+            # Replace the variable, returning a copy
+            variable = self.da.variable._replace(data=data)
+            return self.da._replace(variable=variable)
+        else:
+            # Construct
+            return SparseDataArray(
+                data=data,
+                coords=self.da.coords,
+                dims=self.da.dims,
+                name=self.da.name,
+                attrs=self.da.attrs,
+                )
+
+    @property
+    def COO_data(self):
+        """:obj:`True` if the DataArray has :class:`sparse.COO` data."""
+        return isinstance(self.da.data, sparse.COO)
+
+    @property
+    def dense(self):
+        """Return a copy with dense (:class:`.ndarray`) data."""
+        if self.COO_data:
+            # Use existing method xr.Variable._to_dense()
+            return self.da._replace(variable=self.da.variable._to_dense())
+        else:
+            return self.da
+
+    @property
+    def dense_super(self):
+        """Return a proxy to a :class:`.ndarray`-backed :class:`.DataArray`."""
+        return super(SparseDataArray, self.dense)
+
+
+class SparseDataArray(xr.DataArray):
+    """:class:`xr.DataArray` with sparse data.
+
+    SparseDataArray uses :class:`sparse.COO` for storage with :data:`numpy.nan`
+    as its :attr:`sparse.COO.fill_value`. Some methods of :class:`.DataArray`
+    are overridden to ensure data is in sparse, or dense, format as necessary,
+    to provide expected functionality not currently supported by :mod:`sparse`,
+    and to avoid exhausting memory for some operations that require dense data.
+
+    See Also
+    --------
+    SparseAccessor
+    """
+    __slots__ = tuple()
+
+    @classmethod
+    def from_series(cls, obj, sparse=True):
+        # Call the parent method always with sparse=True, then re-wrap
+        return xr.DataArray.from_series(obj, sparse=True)._sda.convert()
+
+    def equals(self, other):
+        """Necessary for :meth:`xarray.testing.assert_equal` to work."""
+        return self.variable.equals(other.variable, equiv=np.equal)
+
+    @property
+    def loc(self):
+        # FIXME doesn't allow assignment
+        return self._sda.dense_super.loc
+
+    def sel(self, indexers=None, method=None, tolerance=None, drop=False,
+            **indexers_kwargs) -> 'SparseDataArray':
+        """Handle >1-D indexers with sparse data."""
+        indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel')
+        if isinstance(indexers, dict) and len(indexers) > 1:
+            result = self
+            for k, v in indexers.items():
+                result = result.sel({k: v}, method=method, tolerance=tolerance,
+                                    drop=drop)
+            return result
+        else:
+            return super().sel(indexers=indexers, method=method,
+                               tolerance=tolerance, drop=drop)
+
+    def to_dataframe(self):
+        # FIXME this does exactly match the behaviour of xr.DataArray; it omits
+        #       coordinate variable
+        return self.to_series().to_frame()
+
+    def to_series(self) -> pd.Series:
+        # Use SparseArray.coords and .data (each already 1-D) to construct a
+        # pd.Series without first converting to a potentially very large
+        # ndarray
+
+        # Construct a pd.MultiIndex without using .from_product
+        index = pd.MultiIndex.from_arrays(self.data.coords, names=self.dims) \
+                  .set_levels([self.coords[d].values for d in self.dims])
+        return pd.Series(self.data.data, index=index, name=self.name)

From e7fe571e35bf9f5a1722a51116fd462f83ecf649 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sat, 25 Apr 2020 21:38:09 +0200
Subject: [PATCH 04/22] Use SparseDataArray in tests

---
 ixmp/reporting/computations.py        | 23 +++++++------
 ixmp/tests/reporting/test_quantity.py | 48 +++++++++++++++------------
 2 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/ixmp/reporting/computations.py b/ixmp/reporting/computations.py
index 88a394248..de6bd158f 100644
--- a/ixmp/reporting/computations.py
+++ b/ixmp/reporting/computations.py
@@ -226,10 +226,6 @@ def aggregate(quantity, groups, keep):
         Same dimensionality as `quantity`.
 
     """
-    # NB .transpose() below is necessary only for Quantity == AttrSeries. It
-    #   can be removed when Quantity = xr.DataArray.
-    dim_order = quantity.dims
-
     attrs = quantity.attrs.copy()
 
     for dim, dim_groups in groups.items():
@@ -238,10 +234,16 @@ def aggregate(quantity, groups, keep):
 
         # Aggregate each group
         for group, members in dim_groups.items():
-            values.append(quantity.sel({dim: members})
-                                  .sum(dim=dim)
-                                  .assign_coords(**{dim: group})
-                                  .transpose(*dim_order))
+            agg = quantity.sel({dim: members}) \
+                          .sum(dim=dim) \
+                          .assign_coords(**{dim: group})
+            if Quantity.CLASS == 'AttrSeries':
+                # .transpose() is necesary for AttrSeries
+                agg = agg.transpose(*quantity.dims)
+            else:
+                # Restore fill_value=NaN for compatibility
+                agg = agg._sda.convert()
+            values.append(agg)
 
         # Reassemble to a single dataarray
         quantity = concat(*values, dim=dim)
@@ -263,8 +265,9 @@ def concat(*objs, **kwargs):
     if Quantity.CLASS == 'AttrSeries':
         kwargs.pop('dim')
         return pd.concat(objs, **kwargs)
-    elif Quantity is xr.DataArray:  # pragma: no cover
-        return xr.concat(objs, **kwargs)
+    else:
+        # Correct fill-values
+        return xr.concat(objs, **kwargs)._sda.convert()
 
 
 def disaggregate_shares(quantity, shares):
diff --git a/ixmp/tests/reporting/test_quantity.py b/ixmp/tests/reporting/test_quantity.py
index 867290825..bfb13e50b 100644
--- a/ixmp/tests/reporting/test_quantity.py
+++ b/ixmp/tests/reporting/test_quantity.py
@@ -29,17 +29,17 @@ def test_assert(self, a):
         # Convert to pd.Series
         b = a.to_series()
 
-        assert_qty_equal(a, b)
-        assert_qty_equal(b, a)
-        assert_qty_allclose(a, b)
-        assert_qty_allclose(b, a)
+        assert_qty_equal(a, b, check_type=False)
+        assert_qty_equal(b, a, check_type=False)
+        assert_qty_allclose(a, b, check_type=False)
+        assert_qty_allclose(b, a, check_type=False)
 
         c = Quantity(a)
 
-        assert_qty_equal(a, c)
-        assert_qty_equal(c, a)
-        assert_qty_allclose(a, c)
-        assert_qty_allclose(c, a)
+        assert_qty_equal(a, c, check_type=True)
+        assert_qty_equal(c, a, check_type=True)
+        assert_qty_allclose(a, c, check_type=True)
+        assert_qty_allclose(c, a, check_type=True)
 
     def test_assert_with_attrs(self, a):
         """Test assertions about Quantity with attrs.
@@ -88,8 +88,7 @@ def test_others(self, foo):
         assert foo.drop('a').dims == ('b',)
 
 
-@pytest.mark.skip(reason="Pending #317")
-def test_as_sparse_xarray():
+def test_sda_accessor():
     """Test conversion to sparse.COO-backed xr.DataArray."""
     x_series = pd.Series(
         data=[1., 2, 3, 4],
@@ -98,25 +97,32 @@ def test_as_sparse_xarray():
     )
     y_series = pd.Series(data=[5., 6], index=pd.Index(['e', 'f'], name='baz'))
 
-    x = xr.DataArray.from_series(x_series, sparse=True)
-    y = xr.DataArray.from_series(y_series, sparse=True)
+    x = SparseDataArray.from_series(x_series)
+    y = SparseDataArray.from_series(y_series)
 
-    x_dense = xr.DataArray.from_series(x_series)
-    y_dense = xr.DataArray.from_series(y_series)
+    x_dense = x._sda.dense_super
+    y_dense = y._sda.dense_super
+    assert not x_dense._sda.COO_data or x_dense._sda.nan_fill
+    assert not y_dense._sda.COO_data or y_dense._sda.nan_fill
 
     with pytest.raises(ValueError, match='make sure that the broadcast shape'):
         x_dense * y
 
-    z1 = as_sparse_xarray(x_dense) * y
-    z2 = x * as_sparse_xarray(y_dense)
-    assert z1.dims == ('foo', 'bar', 'baz')
+    z1 = x_dense._sda.convert() * y
+
+    z2 = x * y_dense._sda.convert()
+    assert z1.dims == ('foo', 'bar', 'baz') == z2.dims
     assert_xr_equal(z1, z2)
 
-    z3 = as_sparse_xarray(x) * as_sparse_xarray(y)
+    z3 = x._sda.convert() * y._sda.convert()
     assert_xr_equal(z1, z3)
 
-    z4 = as_sparse_xarray(x) * y
+    z4 = x._sda.convert() * y
     assert_xr_equal(z1, z4)
 
-    z5 = as_sparse_xarray(x_series) * y
-    assert_xr_equal(z1, z5)
+    # Doesn't work: can't align automatically
+    with pytest.raises(ValueError, match='Please make sure that the broadcast '
+                       'shape of just the sparse arrays is the same as the '
+                       'broadcast shape of all the operands.'):
+        z5 = SparseDataArray(x_series) * y
+        assert_xr_equal(z1, z5)

From 0e3a2fee40653a80cbe156844a459034878529c7 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sat, 25 Apr 2020 21:45:48 +0200
Subject: [PATCH 05/22] Parametrize tests for both kinds of reporting.Quantity

---
 ixmp/testing.py                           | 11 +++++++++++
 ixmp/tests/reporting/__init__.py          |  3 +++
 ixmp/tests/reporting/test_computations.py |  3 +++
 ixmp/tests/reporting/test_quantity.py     | 19 +++++++++----------
 ixmp/tests/reporting/test_reporting.py    |  2 ++
 5 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/ixmp/testing.py b/ixmp/testing.py
index c34736c03..f15b04f27 100644
--- a/ixmp/testing.py
+++ b/ixmp/testing.py
@@ -98,6 +98,17 @@ def invoke(self, *args, **kwargs):
     yield Runner()
 
 
+@pytest.fixture(params=['AttrSeries', 'SparseDataArray'])
+def parametrize_quantity_class(request):
+    """Fixture to run tests twice, for both reporting Quantity classes."""
+    pre = Quantity.CLASS
+
+    Quantity.CLASS = request.param
+    yield
+
+    Quantity.CLASS = pre
+
+
 @pytest.fixture
 def protect_pint_app_registry():
     """Protect pint's application registry.
diff --git a/ixmp/tests/reporting/__init__.py b/ixmp/tests/reporting/__init__.py
index 83530a0f7..cae155e76 100644
--- a/ixmp/tests/reporting/__init__.py
+++ b/ixmp/tests/reporting/__init__.py
@@ -2,6 +2,9 @@
 import pint
 import xarray as xr
 
+from ixmp.reporting import Quantity
+
+
 REGISTRY = pint.get_application_registry()
 
 
diff --git a/ixmp/tests/reporting/test_computations.py b/ixmp/tests/reporting/test_computations.py
index 63291ce2c..8042272f3 100644
--- a/ixmp/tests/reporting/test_computations.py
+++ b/ixmp/tests/reporting/test_computations.py
@@ -11,6 +11,9 @@
 from . import add_test_data
 
 
+pytestmark = pytest.mark.usefixtures('parametrize_quantity_class')
+
+
 @pytest.fixture(scope='function')
 def data(test_mp, request):
     scen = ixmp.Scenario(test_mp, request.node.name, request.node.name, 'new')
diff --git a/ixmp/tests/reporting/test_quantity.py b/ixmp/tests/reporting/test_quantity.py
index bfb13e50b..cf8d93174 100644
--- a/ixmp/tests/reporting/test_quantity.py
+++ b/ixmp/tests/reporting/test_quantity.py
@@ -4,21 +4,20 @@
 import xarray as xr
 from xarray.testing import assert_equal as assert_xr_equal
 
-from ixmp.reporting.quantity import AttrSeries, Quantity, as_sparse_xarray
+from ixmp import Reporter, Scenario
+from ixmp.reporting import Quantity, computations
+from ixmp.reporting.attrseries import AttrSeries
+from ixmp.reporting.sparsedataarray import SparseDataArray
 from ixmp.testing import assert_qty_allclose, assert_qty_equal
 
 
+@pytest.mark.usefixtures('parametrize_quantity_class')
 class TestQuantity:
-    """Tests of Quantity.
-
-    NB. these tests should pass whether Quantity is set to AttrSeries or
-    xr.DataArray in ixmp.reporting.utils. As written, they only test the
-    current form of Quantity. @gidden tested both by hand-swapping the Quantity
-    class and running tests as of commit df1ec6f of PR #147.
-    """
-    @pytest.fixture()
+    """Tests of Quantity."""
+    @pytest.fixture
     def a(self):
-        yield xr.DataArray([0.8, 0.2], coords=[['oil', 'water']], dims=['p'])
+        da = xr.DataArray([0.8, 0.2], coords=[['oil', 'water']], dims=['p'])
+        yield Quantity(da)
 
     def test_assert(self, a):
         """Test assertions about Quantity.
diff --git a/ixmp/tests/reporting/test_reporting.py b/ixmp/tests/reporting/test_reporting.py
index 1a54cdb9a..ca877a4e7 100644
--- a/ixmp/tests/reporting/test_reporting.py
+++ b/ixmp/tests/reporting/test_reporting.py
@@ -31,6 +31,8 @@
 from . import add_test_data
 
 
+pytestmark = pytest.mark.usefixtures('parametrize_quantity_class')
+
 test_args = ('Douglas Adams', 'Hitchhiker')
 
 TS_DF = {'year': [2010, 2020], 'value': [23.7, 23.8]}

From c8252f86a9724c8d0e746ccd4f7b99fb19bcff20 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sat, 25 Apr 2020 21:46:58 +0200
Subject: [PATCH 06/22] Move test_report_size to TestQuantity.test_size

---
 ixmp/tests/reporting/test_quantity.py  | 66 ++++++++++++++++++++++++++
 ixmp/tests/reporting/test_reporting.py | 60 -----------------------
 2 files changed, 66 insertions(+), 60 deletions(-)

diff --git a/ixmp/tests/reporting/test_quantity.py b/ixmp/tests/reporting/test_quantity.py
index cf8d93174..df155be30 100644
--- a/ixmp/tests/reporting/test_quantity.py
+++ b/ixmp/tests/reporting/test_quantity.py
@@ -1,4 +1,5 @@
 """Tests for ixmp.reporting.quantity."""
+import numpy as np
 import pandas as pd
 import pytest
 import xarray as xr
@@ -19,6 +20,51 @@ def a(self):
         da = xr.DataArray([0.8, 0.2], coords=[['oil', 'water']], dims=['p'])
         yield Quantity(da)
 
+    @pytest.fixture(scope='class')
+    def scen_with_big_data(self, test_mp, num_params=10):
+        from itertools import zip_longest
+
+        # test_mp.add_unit('kg')
+        scen = Scenario(test_mp, 'TestQuantity', 'big data', version='new')
+
+        # Dimensions and their lengths (Fibonacci numbers)
+        N_dims = 6
+        dims = 'abcdefgh'[:N_dims + 1]
+        sizes = [1, 5, 21, 21, 89, 377, 1597, 6765][:N_dims + 1]
+
+        # commented: "377 / 73984365 elements = 0.00051% full"
+        # from functools import reduce
+        # from operator import mul
+        # size = reduce(mul, sizes)
+        # print('{} / {} elements = {:.5f}% full'
+        #       .format(max(sizes), size, 100 * max(sizes) / size))
+
+        # Names like f_0000 ... f_1596 along each dimension
+        coords = []
+        for d, N in zip(dims, sizes):
+            coords.append([f'{d}_{i:04d}' for i in range(N)])
+            # Add to Scenario
+            scen.init_set(d)
+            scen.add_set(d, coords[-1])
+
+        def _make_values():
+            """Make a DataFrame containing each label in *coords* ≥ 1 time."""
+            values = list(zip_longest(*coords, np.random.rand(max(sizes))))
+            result = pd.DataFrame(values, columns=list(dims) + ['value']) \
+                       .ffill()
+            result['unit'] = 'kg'
+            return result
+
+        # Fill the Scenario with quantities named q_01 ... q_09
+        names = []
+        for i in range(num_params):
+            name = f'q_{i:02d}'
+            scen.init_par(name, list(dims))
+            scen.add_par(name, _make_values())
+            names.append(name)
+
+        yield scen
+
     def test_assert(self, a):
         """Test assertions about Quantity.
 
@@ -64,6 +110,26 @@ def test_assert_with_attrs(self, a):
         a.attrs = {'bar': 'foo'}
         assert_qty_equal(a, b, check_attrs=False)
 
+    def test_size(self, scen_with_big_data):
+        """Stress-test reporting of large, sparse quantities."""
+        scen = scen_with_big_data
+
+        # Create the reporter
+        rep = Reporter.from_scenario(scen)
+
+        # Add a task to compute the product, i.e. requires all the q_*
+        keys = [rep.full_key(name) for name in scen.par_list()]
+        rep.add('bigmem', tuple([computations.product] + keys))
+
+        # One quantity fits in memory
+        rep.get(keys[0])
+
+        # All quantities can be multiplied without raising MemoryError
+        result = rep.get('bigmem')
+
+        # Result can be converted to pd.Series
+        result.to_series()
+
 
 class TestAttrSeries:
     """Tests of AttrSeries in particular."""
diff --git a/ixmp/tests/reporting/test_reporting.py b/ixmp/tests/reporting/test_reporting.py
index ca877a4e7..d375d08d8 100644
--- a/ixmp/tests/reporting/test_reporting.py
+++ b/ixmp/tests/reporting/test_reporting.py
@@ -632,66 +632,6 @@ def test_cli(ixmp_cli, test_mp, test_data_path):
 """)
 
 
-def test_report_size(test_mp):
-    """Stress-test reporting of large, sparse quantities."""
-    from itertools import zip_longest
-
-    import numpy as np
-
-    # test_mp.add_unit('kg')
-    scen = ixmp.Scenario(test_mp, 'size test', 'base', version='new')
-
-    # Dimensions and their lengths (Fibonacci numbers)
-    N_dims = 6
-    dims = 'abcdefgh'[:N_dims + 1]
-    sizes = [1, 5, 21, 21, 89, 377, 1597, 6765][:N_dims + 1]
-
-    # commented: "377 / 73984365 elements = 0.00051% full"
-    # from functools import reduce
-    # from operator import mul
-    # size = reduce(mul, sizes)
-    # print('{} / {} elements = {:.5f}% full'
-    #       .format(max(sizes), size, 100 * max(sizes) / size))
-
-    # Names like f_0000 ... f_1596 along each dimension
-    coords = []
-    for d, N in zip(dims, sizes):
-        coords.append([f'{d}_{i:04d}' for i in range(N)])
-        # Add to Scenario
-        scen.init_set(d)
-        scen.add_set(d, coords[-1])
-
-    def _make_values():
-        """Make a DataFrame containing each label in *coords* at least once."""
-        values = list(zip_longest(*coords, np.random.rand(max(sizes))))
-        result = pd.DataFrame(values, columns=list(dims) + ['value']) \
-                   .ffill()
-        result['unit'] = 'kg'
-        return result
-
-    # Fill the Scenario with quantities named q_01 ... q_09
-    N = 10
-    names = []
-    for i in range(10):
-        name = f'q_{i:02d}'
-        scen.init_par(name, list(dims))
-        scen.add_par(name, _make_values())
-        names.append(name)
-
-    # Create the reporter
-    rep = Reporter.from_scenario(scen)
-
-    # Add an operation that takes the product, i.e. requires all the q_*
-    keys = [rep.full_key(name) for name in names]
-    rep.add('bigmem', tuple([computations.product] + keys))
-
-    # One quantity fits in memory
-    rep.get(keys[0])
-
-    # All quantities together trigger MemoryError
-    rep.get('bigmem')
-
-
 def test_aggregate(test_mp):
     scen = ixmp.Scenario(test_mp, 'Group reporting', 'group reporting', 'new')
     t, t_foo, t_bar, x = add_test_data(scen)

From 21acfed05a229062ca7bc4dfb355243f2f05888b Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sun, 26 Apr 2020 12:21:58 +0200
Subject: [PATCH 07/22] Appease Stickler

---
 ixmp/reporting/sparsedataarray.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ixmp/reporting/sparsedataarray.py b/ixmp/reporting/sparsedataarray.py
index d090ac010..ed4309c5d 100644
--- a/ixmp/reporting/sparsedataarray.py
+++ b/ixmp/reporting/sparsedataarray.py
@@ -10,7 +10,7 @@
     action='ignore',
     message="An import was requested from a module that has moved location.",
     module='sparse._coo.numba_extension',
-    )
+)
 
 import sparse  # noqa: E402
 
@@ -48,7 +48,7 @@ def convert(self):
                 dims=self.da.dims,
                 name=self.da.name,
                 attrs=self.da.attrs,
-                )
+            )
 
     @property
     def COO_data(self):

From c3723f479ec6f81de88ebc8f5fee28a040fbe123 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sun, 26 Apr 2020 21:11:43 +0200
Subject: [PATCH 08/22] Improve coverage to 100% in reporting.attrseries and
 .sparsedataframe

---
 ixmp/reporting/attrseries.py           | 67 ++++++++++++++------------
 ixmp/reporting/computations.py         | 11 ++++-
 ixmp/reporting/sparsedataarray.py      | 21 ++------
 ixmp/tests/reporting/test_quantity.py  | 37 +++++++++++---
 ixmp/tests/reporting/test_reporting.py |  2 +-
 5 files changed, 82 insertions(+), 56 deletions(-)

diff --git a/ixmp/reporting/attrseries.py b/ixmp/reporting/attrseries.py
index 4cbed5f0d..3540ea261 100644
--- a/ixmp/reporting/attrseries.py
+++ b/ixmp/reporting/attrseries.py
@@ -1,8 +1,5 @@
-from collections.abc import Collection
-
 import pandas as pd
 import pandas.core.indexes.base as ibase
-import pint
 import xarray as xr
 
 
@@ -34,14 +31,10 @@ class AttrSeries(pd.Series):
     def _constructor(self):
         return AttrSeries
 
-    def __init__(self, data=None, *args, name=None, units=None, attrs=None,
-                 **kwargs):
+    def __init__(self, data=None, *args, name=None, attrs=None, **kwargs):
         attrs = attrs or dict()
-        if units:
-            # Insert the units into the attrs
-            attrs['_unit'] = pint.Unit(units)
 
-        if isinstance(data, (AttrSeries, xr.DataArray)):
+        if hasattr(data, 'attrs'):
             # Use attrs from an existing object
             new_attrs = data.attrs.copy()
 
@@ -49,9 +42,12 @@ def __init__(self, data=None, *args, name=None, units=None, attrs=None,
             new_attrs.update(attrs)
             attrs = new_attrs
 
+        if isinstance(data, (AttrSeries, xr.DataArray)):
+            # Extract name from existing object or use the argument
+            name = ibase.maybe_extract_name(name, data, type(self))
+
             # Pre-convert to pd.Series from xr.DataArray to preserve names and
             # labels. For AttrSeries, this is a no-op (see below).
-            name = ibase.maybe_extract_name(name, data, type(self))
             data = data.to_series()
 
         # Don't pass attrs to pd.Series constructor; it currently does not
@@ -65,10 +61,6 @@ def __init__(self, data=None, *args, name=None, units=None, attrs=None,
     def from_series(cls, series, sparse=None):
         return cls(series)
 
-    def assign_attrs(self, d):
-        self.attrs.update(d)
-        return self
-
     def assign_coords(self, **kwargs):
         return pd.concat([self], keys=kwargs.values(), names=kwargs.keys())
 
@@ -87,6 +79,13 @@ def dims(self):
     def drop(self, label):
         return self.droplevel(label)
 
+    def item(self, *args):
+        if len(args) and args != (None,):
+            raise NotImplementedError
+        elif self.size != 1:
+            raise ValueError
+        return self.iloc[0]
+
     def rename(self, new_name_or_name_dict):
         if isinstance(new_name_or_name_dict, dict):
             return self.rename_axis(index=new_name_or_name_dict)
@@ -98,10 +97,17 @@ def sel(self, indexers=None, drop=False, **indexers_kwargs):
         indexers.update(indexers_kwargs)
         if len(indexers) == 1:
             level, key = list(indexers.items())[0]
-            if not isinstance(key, Collection) and not drop:
-                # When using .loc[] to select 1 label on 1 level, pandas drops
-                # the level. Use .xs() to avoid this behaviour unless drop=True
-                return AttrSeries(self.xs(key, level=level, drop_level=False))
+            if isinstance(key, str) and not drop:
+                if isinstance(self.index, pd.MultiIndex):
+                    # When using .loc[] to select 1 label on 1 level, pandas
+                    # drops the level. Use .xs() to avoid this behaviour unless
+                    # drop=True
+                    return AttrSeries(self.xs(key, level=level,
+                                              drop_level=False))
+                else:
+                    # No MultiIndex; use .loc with a slice to avoid returning
+                    # scalar
+                    return self.loc[slice(key, key)]
 
         idx = tuple(indexers.get(l, slice(None)) for l in self.index.names)
         return AttrSeries(self.loc[idx])
@@ -116,19 +122,20 @@ def sum(self, *args, **kwargs):
             dim = list(args)
             args = tuple()
 
-        if isinstance(self.index, pd.MultiIndex):
-            if len(dim) == len(self.index.names):
-                # assume dimensions = full multi index, do simple sum
-                kwargs = {}
-            else:
-                # pivot and sum across columns
-                obj = self.unstack(dim)
-                kwargs['axis'] = 1
-                attrs = self.attrs
+        if len(dim) == len(self.index.names):
+            bad_dims = set(dim) - set(self.index.names)
+            if bad_dims:
+                raise ValueError(f'{bad_dims} not found in array dimensions '
+                                 f'{self.index.names}')
+            # Simple sum
+            kwargs = {}
         else:
-            if dim != [self.index.name]:
-                raise ValueError(dim, self.index.name, self)
-            kwargs['level'] = dim
+            # pivot and sum across columns
+            obj = self.unstack(dim)
+            kwargs['axis'] = 1
+            # Result will be DataFrame; re-attach attrs when converted to
+            # AttrSeries
+            attrs = self.attrs
 
         return AttrSeries(obj.sum(*args, **kwargs), attrs=attrs)
 
diff --git a/ixmp/reporting/computations.py b/ixmp/reporting/computations.py
index de6bd158f..77684c591 100644
--- a/ixmp/reporting/computations.py
+++ b/ixmp/reporting/computations.py
@@ -5,10 +5,10 @@
 from collections.abc import Mapping
 import logging
 from pathlib import Path
+from warnings import filterwarnings
 
 import pandas as pd
 import pint
-import xarray as xr
 
 from .quantity import Quantity
 from .utils import (
@@ -35,6 +35,15 @@
 ]
 
 
+# sparse 0.9.1, numba 0.49.0, triggered by xarray import
+for msg in ["No direct replacement for 'numba.targets' available",
+            "An import was requested from a module that has moved location."]:
+    filterwarnings(action='ignore', message=msg,
+                   module='sparse._coo.numba_extension')
+
+import xarray as xr  # noqa: E402
+
+
 log = logging.getLogger(__name__)
 
 # Carry unit attributes automatically
diff --git a/ixmp/reporting/sparsedataarray.py b/ixmp/reporting/sparsedataarray.py
index ed4309c5d..e141c7816 100644
--- a/ixmp/reporting/sparsedataarray.py
+++ b/ixmp/reporting/sparsedataarray.py
@@ -1,26 +1,14 @@
-from warnings import filterwarnings
-
 import numpy as np
 import pandas as pd
+import sparse  # NB warnings from sparse are filtered in computations.py
 import xarray as xr
 from xarray.core.utils import either_dict_or_kwargs
 
-# sparse 0.9.1, numba 0.49.0
-filterwarnings(
-    action='ignore',
-    message="An import was requested from a module that has moved location.",
-    module='sparse._coo.numba_extension',
-)
-
-import sparse  # noqa: E402
-
 
 @xr.register_dataarray_accessor('_sda')
 class SparseAccessor:
     """:mod:`xarray` accessor to help :class:`SparseDataArray`."""
     def __init__(self, obj):
-        if not isinstance(obj, xr.DataArray):
-            raise TypeError('._sda accessor only valid for xr.DataArray')
         self.da = obj
 
     def convert(self):
@@ -58,11 +46,8 @@ def COO_data(self):
     @property
     def dense(self):
         """Return a copy with dense (:class:`.ndarray`) data."""
-        if self.COO_data:
-            # Use existing method xr.Variable._to_dense()
-            return self.da._replace(variable=self.da.variable._to_dense())
-        else:
-            return self.da
+        # Use existing method xr.Variable._to_dense()
+        return self.da._replace(variable=self.da.variable._to_dense())
 
     @property
     def dense_super(self):
diff --git a/ixmp/tests/reporting/test_quantity.py b/ixmp/tests/reporting/test_quantity.py
index df155be30..5e6cab3bd 100644
--- a/ixmp/tests/reporting/test_quantity.py
+++ b/ixmp/tests/reporting/test_quantity.py
@@ -139,18 +139,43 @@ def foo(self):
                                          names=['a', 'b'])
         yield AttrSeries([0, 1, 2, 3], index=idx)
 
-    def test_sum(self, foo):
+    @pytest.fixture
+    def bar(self):
+        yield AttrSeries([0, 1], index=pd.Index(['a1', 'a2'], name='a'))
+
+    def test_rename(self, foo):
+        assert foo.rename({'a': 'c', 'b': 'd'}).dims == ('c', 'd')
+
+    def test_sel(self, bar):
+        # Selecting 1 element from 1-D parameter still returns AttrSeries
+        result = bar.sel(a='a2')
+        assert isinstance(result, AttrSeries)
+        assert result.size == 1
+        assert result.dims == ('a',)
+        assert result.iloc[0] == 1
+
+    def test_sum(self, foo, bar):
         # AttrSeries can be summed across all dimensions
         result = foo.sum(dim=['a', 'b'])
         assert isinstance(result, AttrSeries)  # returns an AttrSeries
-        assert len(result) == 1                # with one element
-        assert result[0] == 6                  # that has the correct value
+        assert result.size == 1                # with one element
+        assert result.item() == 6              # that has the correct value
+
+        # Sum with wrong dim raises ValueError
+        with pytest.raises(ValueError):
+            bar.sum('b')
 
-    def test_others(self, foo):
+    def test_others(self, foo, bar):
         # Exercise other compatibility functions
         assert isinstance(foo.as_xarray(), xr.DataArray)
         assert type(foo.to_frame()) is pd.DataFrame
         assert foo.drop('a').dims == ('b',)
+        assert bar.dims == ('a',)
+
+        with pytest.raises(NotImplementedError):
+            bar.item('a2')
+        with pytest.raises(ValueError):
+            bar.item()
 
 
 def test_sda_accessor():
@@ -189,5 +214,5 @@ def test_sda_accessor():
     with pytest.raises(ValueError, match='Please make sure that the broadcast '
                        'shape of just the sparse arrays is the same as the '
                        'broadcast shape of all the operands.'):
-        z5 = SparseDataArray(x_series) * y
-        assert_xr_equal(z1, z5)
+        SparseDataArray(x_series) * y  # = z5
+        # assert_xr_equal(z1, z5)
diff --git a/ixmp/tests/reporting/test_reporting.py b/ixmp/tests/reporting/test_reporting.py
index d375d08d8..4a8af78e2 100644
--- a/ixmp/tests/reporting/test_reporting.py
+++ b/ixmp/tests/reporting/test_reporting.py
@@ -86,7 +86,7 @@ def test_reporter_add():
     with pytest.raises(KeyExistsError, match=r"key 'a' already exists"):
         r.add('a', 5, strict=True)
 
-    def gen(other):
+    def gen(other):  # pragma: no cover
         """A generator for apply()."""
         return (lambda a, b: a * b, 'a', other)
 

From 21e872f8948feea6dfebc7db0cf2ff5f28f7bfe3 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Sun, 10 May 2020 14:09:23 +0200
Subject: [PATCH 09/22] Catch more exceptions in reporting.utils.parse_units

---
 ixmp/reporting/utils.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/ixmp/reporting/utils.py b/ixmp/reporting/utils.py
index 295db9c19..f443dcd5f 100644
--- a/ixmp/reporting/utils.py
+++ b/ixmp/reporting/utils.py
@@ -134,15 +134,16 @@ def define_unit_parts(expr):
         # Quantity has no unit
         unit = registry.parse_units('')
     except pint.UndefinedUnitError:
-        # Unit(s) do not exist; define them in the UnitRegistry
-        define_unit_parts(unit)
-
-        # Try to parse again
         try:
+            # Unit(s) do not exist; define them in the UnitRegistry
+            define_unit_parts(unit)
+
+            # Try to parse again
             unit = registry.parse_units(unit)
-        except pint.UndefinedUnitError:
-            # Handle the silent failure of define(), above
-            raise invalid(unit)  # from None
+        except (pint.UndefinedUnitError, pint.RedefinitionError):
+            # Handle the silent failure of define(), above; or
+            # define_unit_parts didn't work
+            raise invalid(unit)
     except AttributeError:
         # Unit contains a character like '-' that throws off pint
         # NB this 'except' clause must be *after* UndefinedUnitError, since

From 38a03e79acda76ef50e517a3f2fdcbc6bb3f7c5a Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Tue, 2 Jun 2020 23:19:29 +0200
Subject: [PATCH 10/22] Bump sparse requirement, adjust test_sda_accessor()

---
 ixmp/tests/reporting/test_quantity.py | 13 +++++--------
 setup.cfg                             |  2 +-
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/ixmp/tests/reporting/test_quantity.py b/ixmp/tests/reporting/test_quantity.py
index 5e6cab3bd..cecf9e7fa 100644
--- a/ixmp/tests/reporting/test_quantity.py
+++ b/ixmp/tests/reporting/test_quantity.py
@@ -195,8 +195,9 @@ def test_sda_accessor():
     assert not x_dense._sda.COO_data or x_dense._sda.nan_fill
     assert not y_dense._sda.COO_data or y_dense._sda.nan_fill
 
-    with pytest.raises(ValueError, match='make sure that the broadcast shape'):
-        x_dense * y
+    # As of sparse 0.10, sparse `y` is automatically broadcast to `x_dense`
+    # Previously, this raised ValueError.
+    x_dense * y
 
     z1 = x_dense._sda.convert() * y
 
@@ -210,9 +211,5 @@ def test_sda_accessor():
     z4 = x._sda.convert() * y
     assert_xr_equal(z1, z4)
 
-    # Doesn't work: can't align automatically
-    with pytest.raises(ValueError, match='Please make sure that the broadcast '
-                       'shape of just the sparse arrays is the same as the '
-                       'broadcast shape of all the operands.'):
-        SparseDataArray(x_series) * y  # = z5
-        # assert_xr_equal(z1, z5)
+    z5 = SparseDataArray.from_series(x_series) * y
+    assert_xr_equal(z1, z5)
diff --git a/setup.cfg b/setup.cfg
index b6c96343a..ac7c19953 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -20,6 +20,7 @@ install_requires =
     pandas >= 1.0
     pint
     PyYAML
+    sparse >= 0.10
     xarray
     xlrd
     xlsxwriter
@@ -36,7 +37,6 @@ tests =
     pretenders >= 1.4.4
     pytest >= 5
     pytest-cov
-    sparse
 docs =
     numpydoc
     sphinx >= 3.0

From 38c4271fe2cc7f3567eba5aedd2aa8e7d2d830b7 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Thu, 11 Jun 2020 13:38:47 +0200
Subject: [PATCH 11/22] Lint with flake8

---
 ixmp/reporting/attrseries.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ixmp/reporting/attrseries.py b/ixmp/reporting/attrseries.py
index 3540ea261..a6bdfd178 100644
--- a/ixmp/reporting/attrseries.py
+++ b/ixmp/reporting/attrseries.py
@@ -109,7 +109,7 @@ def sel(self, indexers=None, drop=False, **indexers_kwargs):
                     # scalar
                     return self.loc[slice(key, key)]
 
-        idx = tuple(indexers.get(l, slice(None)) for l in self.index.names)
+        idx = tuple(indexers.get(n, slice(None)) for n in self.index.names)
         return AttrSeries(self.loc[idx])
 
     def sum(self, *args, **kwargs):

From a1b8156d7b2e180b91fc2a7bd20ebf5c77f78089 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 16:52:25 +0200
Subject: [PATCH 12/22] Add reporting.testing.random_qty

---
 ixmp/reporting/testing.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 ixmp/reporting/testing.py

diff --git a/ixmp/reporting/testing.py b/ixmp/reporting/testing.py
new file mode 100644
index 000000000..3b02db437
--- /dev/null
+++ b/ixmp/reporting/testing.py
@@ -0,0 +1,36 @@
+from typing import Dict
+
+import numpy as np
+import xarray as xr
+
+from .quantity import Quantity
+
+
+def random_qty(shape: Dict[str, int], **kwargs):
+    """Return a Quantity with *shape* and random contents.
+
+    Parameters
+    ----------
+    shape : dict
+        Mapping from dimension names to
+    kwargs
+        Other keyword arguments to :class:`Quantity`.
+
+    Returns
+    -------
+    Quantity
+        Keys in `shape`—e.g. "foo"—result in a dimension named "foo" with
+        coords "foo0", "foo1", etc., with total length matching the value.
+        Data is random.
+    """
+    return Quantity(
+        xr.DataArray(
+            np.random.rand(*shape.values()),
+            coords={
+                dim: [f"{dim}{i}" for i in range(length)]
+                for dim, length in shape.items()
+            },
+            dims=shape.keys(),
+        ),
+        **kwargs,
+    )

From f25835bd3d426f1334d97962688df371d0152b49 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 16:53:30 +0200
Subject: [PATCH 13/22] Add reporting.quantity.assert_quantity()

---
 ixmp/reporting/quantity.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/ixmp/reporting/quantity.py b/ixmp/reporting/quantity.py
index b18085fac..77f3ba6ea 100644
--- a/ixmp/reporting/quantity.py
+++ b/ixmp/reporting/quantity.py
@@ -38,3 +38,19 @@ def __call__(self, data, *args, **kwargs):
 
 
 Quantity = _QuantityFactory()
+
+
+def assert_quantity(*args):
+    """Assert that each of `args` is a Quantity object.
+
+    Raises
+    ------
+    TypeError
+        with a indicative message.
+    """
+    for i, arg in enumerate(args):
+        if arg.__class__.__name__ != Quantity.CLASS:
+            raise TypeError(
+                f"arg #{i} ({repr(arg)}) is not Quantity; likely an incorrect "
+                "key"
+            )

From 269e45052c9cb7cb6d12d45f92645c5eaeb0c9ce Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 16:53:54 +0200
Subject: [PATCH 14/22] Add reporting.computations.add (from message_ix)

---
 ixmp/reporting/computations.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/ixmp/reporting/computations.py b/ixmp/reporting/computations.py
index 77684c591..ba5ac64f0 100644
--- a/ixmp/reporting/computations.py
+++ b/ixmp/reporting/computations.py
@@ -10,7 +10,7 @@
 import pandas as pd
 import pint
 
-from .quantity import Quantity
+from .quantity import Quantity, assert_quantity
 from .utils import (
     RENAME_DIMS,
     dims_for_qty,
@@ -50,6 +50,28 @@
 xr.set_options(keep_attrs=True)
 
 
+def add(*quantities, fill_value=0.0):
+    """Sum across multiple *quantities*."""
+    # TODO check units
+    assert_quantity(*quantities)
+
+    if Quantity.CLASS == "SparseDataArray":
+        quantities = map(Quantity, xr.broadcast(*quantities))
+
+    # Initialize result values with first entry
+    items = iter(quantities)
+    result = next(items)
+
+    # Iterate over remaining entries
+    for q in items:
+        if Quantity.CLASS == 'AttrSeries':
+            result = result.add(q, fill_value=fill_value).dropna()
+        else:
+            result = result + q
+
+    return result
+
+
 def apply_units(qty, units, quiet=False):
     """Simply apply *units* to *qty*.
 

From da0b78b426f04143383609e61913bf580e8fea7e Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 16:57:15 +0200
Subject: [PATCH 15/22] Expand tests of computations.product

---
 ixmp/tests/reporting/test_computations.py | 36 ++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/ixmp/tests/reporting/test_computations.py b/ixmp/tests/reporting/test_computations.py
index 8042272f3..3af40dfdb 100644
--- a/ixmp/tests/reporting/test_computations.py
+++ b/ixmp/tests/reporting/test_computations.py
@@ -1,12 +1,15 @@
 import logging
 
+import numpy as np
 from pandas.testing import assert_series_equal
 import pint
 import pytest
+import xarray as xr
 
 import ixmp
 from ixmp.reporting import Reporter, Quantity, computations
-from ixmp.testing import assert_logs
+from ixmp.reporting.testing import random_qty
+from ixmp.testing import assert_logs, assert_qty_equal
 
 from . import add_test_data
 
@@ -52,6 +55,37 @@ def test_apply_units(data, caplog):
     assert_series_equal(result.to_series(), x.to_series())
 
 
+@pytest.mark.xfail(
+    reason="Outer join of non-intersecting dimensions (AttrSeries only)"
+)
+def test_product0():
+    A = Quantity(
+        xr.DataArray([1, 2], coords=[["a0", "a1"]], dims=["a"])
+    )
+    B = Quantity(
+        xr.DataArray([3, 4], coords=[["b0", "b1"]], dims=["b"])
+    )
+    exp = Quantity(
+        xr.DataArray(
+            [[3, 4], [6, 8]],
+            coords=[["a0", "a1"], ["b0", "b1"]],
+            dims=["a", "b"],
+        ),
+        units="1",
+    )
+
+    assert_qty_equal(exp, computations.product(A, B))
+    computations.product(exp, B)
+
+
+def test_product1():
+    """Product of quantities with overlapping dimensions."""
+    A = random_qty(dict(a=2, b=2, c=2, d=2))
+    B = random_qty(dict(b=2, c=2, d=2, e=2, f=2))
+
+    assert computations.product(A, B).size == 2 ** 6
+
+
 def test_select(data):
     # Unpack
     *_, t_foo, t_bar, x = data

From 8f7105ae563e2ed20c81aae155b18295df0f309b Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 17:00:09 +0200
Subject: [PATCH 16/22] Expand arguments accepted by Quantity() constructor

---
 ixmp/reporting/quantity.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ixmp/reporting/quantity.py b/ixmp/reporting/quantity.py
index 77f3ba6ea..1ee9794ca 100644
--- a/ixmp/reporting/quantity.py
+++ b/ixmp/reporting/quantity.py
@@ -22,9 +22,12 @@ def __call__(self, data, *args, **kwargs):
             result = cls.from_series(data)
         elif self.CLASS == 'AttrSeries':
             result = cls(data, *args, **kwargs)
-        else:
-            assert len(args) == len(kwargs) == 0, (args, kwargs)
+        elif len(args) == len(kwargs) == 0:
+            # Single argument, possibly an xr.DataArray; convert to
+            # SparseDataArray
             result = data._sda.convert()
+        else:
+            result = cls(data, *args, **kwargs)
 
         if name:
             result.name = name

From 3e21ac1d5d6ba5b7074a023060a6302be22b491b Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 17:02:46 +0200
Subject: [PATCH 17/22] Add tests of reporting.computations.add

---
 ixmp/tests/reporting/test_computations.py | 44 ++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/ixmp/tests/reporting/test_computations.py b/ixmp/tests/reporting/test_computations.py
index 3af40dfdb..02bdfb652 100644
--- a/ixmp/tests/reporting/test_computations.py
+++ b/ixmp/tests/reporting/test_computations.py
@@ -20,8 +20,50 @@
 @pytest.fixture(scope='function')
 def data(test_mp, request):
     scen = ixmp.Scenario(test_mp, request.node.name, request.node.name, 'new')
+    data_objs = list(add_test_data(scen))
     rep = Reporter.from_scenario(scen)
-    yield [scen, rep] + list(add_test_data(scen))
+    yield [scen, rep] + data_objs
+
+
+@pytest.mark.parametrize("operands, size", [
+    (("a", "a"), 18),
+    (("a", "x"), 36),
+    (("x", "b"), 36),
+    (("a", "b"), 36),
+    (("a", "x", "b"), 36),
+])
+def test_add(data, operands, size):
+    scen, rep, t, t_foo, t_bar, x = data
+
+    y = scen.set("y").tolist()
+    x = rep.get("x:t-y")
+    a = Quantity(
+        xr.DataArray(
+            np.random.rand(len(t_foo), len(y)),
+            coords=[t_foo, y],
+            dims=['t', 'y']
+        ),
+        units=x.attrs['_unit'],
+    )
+    b = Quantity(
+        xr.DataArray(
+            np.random.rand(len(t_bar), len(y)),
+            coords=[t_bar, y],
+            dims=['t', 'y']
+        ),
+        units=x.attrs['_unit'],
+    )
+
+    rep.add("a:t-y", a)
+    rep.add("b:t-y", b)
+
+    key = rep.add(
+        "result",
+        tuple([computations.add] + [f"{name}:t-y" for name in operands])
+    )
+
+    result = rep.get(key)
+    assert size == result.size, result.to_series()
 
 
 def test_apply_units(data, caplog):

From 176c36b3b7a22ad5f94b735daf91999e78a28a7a Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 19:10:23 +0200
Subject: [PATCH 18/22] Remove commented diagnostic code in
 computations.data_for_quantity

---
 ixmp/reporting/computations.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/ixmp/reporting/computations.py b/ixmp/reporting/computations.py
index ba5ac64f0..a129f6c09 100644
--- a/ixmp/reporting/computations.py
+++ b/ixmp/reporting/computations.py
@@ -211,17 +211,6 @@ def data_for_quantity(ix_type, name, column, scenario, config):
         data = data.rename(columns=RENAME_DIMS) \
                    .set_index(dims)
 
-    # Check sparseness
-    # try:
-    #     shape = list(map(len, data.index.levels))
-    # except AttributeError:
-    #     shape = [data.index.size]
-    # size = reduce(mul, shape)
-    # filled = 100 * len(data) / size if size else 'NA'
-    # need_to_chunk = size > 1e7 and filled < 1
-    # info = (name, shape, filled, size, need_to_chunk)
-    # log.debug(' '.join(map(str, info)))
-
     # Convert to a Quantity, assign attrbutes and name
     qty = Quantity(
         data[column],

From 6b51340c20921d2ddcc93f42e547291ef1716085 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 19:48:24 +0200
Subject: [PATCH 19/22] Improve AttrSeries.squeeze()

---
 ixmp/reporting/attrseries.py          | 31 ++++++++++++++++++++++++---
 ixmp/reporting/computations.py        |  3 ++-
 ixmp/tests/reporting/test_quantity.py |  4 ++++
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/ixmp/reporting/attrseries.py b/ixmp/reporting/attrseries.py
index a6bdfd178..498b63c47 100644
--- a/ixmp/reporting/attrseries.py
+++ b/ixmp/reporting/attrseries.py
@@ -139,9 +139,34 @@ def sum(self, *args, **kwargs):
 
         return AttrSeries(obj.sum(*args, **kwargs), attrs=attrs)
 
-    def squeeze(self, *args, **kwargs):
-        kwargs.pop('drop')
-        return super().squeeze(*args, **kwargs) if len(self) > 1 else self
+    def squeeze(self, dim=None, *args, **kwargs):
+        assert kwargs.pop("drop", True)
+
+        try:
+            idx = self.index.remove_unused_levels()
+        except AttributeError:
+            return self
+
+        to_drop = []
+        for i, name in enumerate(idx.names):
+            if dim and name != dim:
+                continue
+            elif len(idx.levels[i]) > 1:
+                if dim is None:
+                    continue
+                else:
+                    raise ValueError(
+                        "cannot select a dimension to squeeze out which has "
+                        "length greater than one"
+                    )
+
+            to_drop.append(name)
+
+        if dim and not to_drop:
+            # Specified dimension does not exist
+            raise KeyError(dim)
+
+        return self.droplevel(to_drop)
 
     def as_xarray(self):
         return xr.DataArray.from_series(self)
diff --git a/ixmp/reporting/computations.py b/ixmp/reporting/computations.py
index a129f6c09..72d432ec8 100644
--- a/ixmp/reporting/computations.py
+++ b/ixmp/reporting/computations.py
@@ -220,7 +220,8 @@ def data_for_quantity(ix_type, name, column, scenario, config):
     try:
         # Remove length-1 dimensions for scalars
         qty = qty.squeeze('index', drop=True)
-    except KeyError:
+    except (KeyError, ValueError):
+        # KeyError if "index" does not exist; ValueError if its length is > 1
         pass
 
     return qty
diff --git a/ixmp/tests/reporting/test_quantity.py b/ixmp/tests/reporting/test_quantity.py
index cecf9e7fa..e8f0cb1e2 100644
--- a/ixmp/tests/reporting/test_quantity.py
+++ b/ixmp/tests/reporting/test_quantity.py
@@ -154,6 +154,10 @@ def test_sel(self, bar):
         assert result.dims == ('a',)
         assert result.iloc[0] == 1
 
+    def test_squeeze(self, foo):
+        assert foo.sel(a="a1").squeeze().dims == ("b",)
+        assert foo.sel(a="a2", b="b1").squeeze().values == 2
+
     def test_sum(self, foo, bar):
         # AttrSeries can be summed across all dimensions
         result = foo.sum(dim=['a', 'b'])

From a317fe43dc7171e56aff3e18b37795ecee0e0717 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 19:48:46 +0200
Subject: [PATCH 20/22] Move reporting assertions to reporting.testing

---
 ixmp/reporting/testing.py              | 49 ++++++++++++++++++++++++++
 ixmp/testing.py                        | 48 ++-----------------------
 ixmp/tests/reporting/test_reporting.py |  8 ++---
 3 files changed, 56 insertions(+), 49 deletions(-)

diff --git a/ixmp/reporting/testing.py b/ixmp/reporting/testing.py
index 3b02db437..ea32e896d 100644
--- a/ixmp/reporting/testing.py
+++ b/ixmp/reporting/testing.py
@@ -1,11 +1,60 @@
 from typing import Dict
 
 import numpy as np
+from pandas.testing import assert_series_equal
 import xarray as xr
 
 from .quantity import Quantity
 
 
+def assert_qty_equal(a, b, check_type=True, check_attrs=True, **kwargs):
+    """Assert that Quantity objects *a* and *b* are equal.
+
+    When Quantity is AttrSeries, *a* and *b* are first passed through
+    :meth:`as_quantity`.
+    """
+    if not check_type:
+        a = Quantity(a)
+        b = Quantity(b)
+
+    if Quantity.CLASS == 'AttrSeries':
+        try:
+            a = a.sort_index()
+            b = b.sort_index()
+        except TypeError:
+            pass
+        assert_series_equal(a, b, check_dtype=False, **kwargs)
+    else:
+        import xarray.testing
+        xarray.testing.assert_equal(a, b, **kwargs)
+
+    # check attributes are equal
+    if check_attrs:
+        assert a.attrs == b.attrs
+
+
+def assert_qty_allclose(a, b, check_type=True, check_attrs=True, **kwargs):
+    """Assert that Quantity objects *a* and *b* have numerically close values.
+
+    When Quantity is AttrSeries, *a* and *b* are first passed through
+    :meth:`as_quantity`.
+    """
+    if not check_type:
+        a = Quantity(a)
+        b = Quantity(b)
+
+    if Quantity.CLASS == 'AttrSeries':
+        assert_series_equal(a.sort_index(), b.sort_index(), **kwargs)
+    else:
+        import xarray.testing
+        kwargs.pop('check_dtype', None)
+        xarray.testing.assert_allclose(a._sda.dense, b._sda.dense, **kwargs)
+
+    # check attributes are equal
+    if check_attrs:
+        assert a.attrs == b.attrs
+
+
 def random_qty(shape: Dict[str, int], **kwargs):
     """Return a Quantity with *shape* and random contents.
 
diff --git a/ixmp/testing.py b/ixmp/testing.py
index f15b04f27..509581b83 100644
--- a/ixmp/testing.py
+++ b/ixmp/testing.py
@@ -54,13 +54,14 @@
 from click.testing import CliRunner
 import numpy as np
 import pandas as pd
-from pandas.testing import assert_series_equal
 import pytest
 
 from . import cli, config as ixmp_config
 from .core import Platform, TimeSeries, Scenario, IAMC_IDX
 from .reporting import Quantity
-
+from .reporting.testing import (  # noqa: F401
+    assert_qty_equal, assert_qty_allclose
+)
 
 log = logging.getLogger(__name__)
 
@@ -492,49 +493,6 @@ def test_foo(caplog):
             pytest.fail('\n'.join(lines))
 
 
-def assert_qty_equal(a, b, check_type=True, check_attrs=True, **kwargs):
-    """Assert that Quantity objects *a* and *b* are equal.
-
-    When Quantity is AttrSeries, *a* and *b* are first passed through
-    :meth:`as_quantity`.
-    """
-    if not check_type:
-        a = Quantity(a)
-        b = Quantity(b)
-
-    if Quantity.CLASS == 'AttrSeries':
-        assert_series_equal(a, b, check_dtype=False, **kwargs)
-    else:
-        import xarray.testing
-        xarray.testing.assert_equal(a, b, **kwargs)
-
-    # check attributes are equal
-    if check_attrs:
-        assert a.attrs == b.attrs
-
-
-def assert_qty_allclose(a, b, check_type=True, check_attrs=True, **kwargs):
-    """Assert that Quantity objects *a* and *b* have numerically close values.
-
-    When Quantity is AttrSeries, *a* and *b* are first passed through
-    :meth:`as_quantity`.
-    """
-    if not check_type:
-        a = Quantity(a)
-        b = Quantity(b)
-
-    if Quantity.CLASS == 'AttrSeries':
-        assert_series_equal(a, b, **kwargs)
-    else:
-        import xarray.testing
-        kwargs.pop('check_dtype', None)
-        xarray.testing.assert_allclose(a._sda.dense, b._sda.dense, **kwargs)
-
-    # check attributes are equal
-    if check_attrs:
-        assert a.attrs == b.attrs
-
-
 # Data structure for memory information used by :meth:`memory_usage`.
 _MemInfo = namedtuple('MemInfo', [
     'profiled',
diff --git a/ixmp/tests/reporting/test_reporting.py b/ixmp/tests/reporting/test_reporting.py
index 4a8af78e2..58f2c0944 100644
--- a/ixmp/tests/reporting/test_reporting.py
+++ b/ixmp/tests/reporting/test_reporting.py
@@ -223,10 +223,10 @@ def test_reporter_from_dantzig(test_mp, ureg):
     # ...produces the expected new value
     obs = rep.get(new_key)
     d_ij = rep.get('d:i-j')
-    exp = (d_ij * weights).sum(dim=['j']) / weights.sum(dim=['j'])
-    # FIXME attrs has to be explicitly copied here because math is done which
-    #       returns a pd.Series
-    exp.attrs = d_ij.attrs
+    exp = Quantity(
+        (d_ij * weights).sum(dim=['j']) / weights.sum(dim=['j']),
+        attrs=d_ij.attrs,
+    )
 
     assert_qty_equal(exp, obs)
 

From d33e2e98c90ceecc29ed9b729b5e55dcf684f05a Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 22:16:39 +0200
Subject: [PATCH 21/22] Update reporting.quantity documentation

---
 doc/source/conf.py                    |  1 +
 doc/source/reporting.rst              | 51 +++++++++++++++++----
 ixmp/reporting/attrseries.py          | 24 +++++-----
 ixmp/reporting/quantity.py            | 26 ++++++++++-
 ixmp/reporting/sparsedataarray.py     | 64 ++++++++++++++++++---------
 ixmp/tests/reporting/test_quantity.py |  1 -
 6 files changed, 124 insertions(+), 43 deletions(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 7cc6ec267..9b501bff9 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -102,6 +102,7 @@
     'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
     'pint': ('https://pint.readthedocs.io/en/stable/', None),
     'python': ('https://docs.python.org/3/', None),
+    'sparse': ('https://sparse.pydata.org/en/stable/', None),
     'xarray': ('https://xarray.pydata.org/en/stable/', None),
 }
 
diff --git a/doc/source/reporting.rst b/doc/source/reporting.rst
index aadc92247..e83848fdc 100644
--- a/doc/source/reporting.rst
+++ b/doc/source/reporting.rst
@@ -1,7 +1,7 @@
 .. currentmodule:: ixmp.reporting
 
 Reporting
-=========
+*********
 
 Top-level methods and classes:
 
@@ -11,7 +11,6 @@ Top-level methods and classes:
    Reporter
    Key
    Quantity
-   as_quantity
 
 Others:
 
@@ -184,12 +183,30 @@ Others:
           >>> foo('a b c')
           foo:a-b-c
 
-.. automodule:: ixmp.reporting
-   :members: Quantity, as_quantity
+.. autodata:: ixmp.reporting.Quantity(data, *args, **kwargs)
+   :annotation:
+
+The :data:`.Quantity` constructor converts its arguments to an internal, :class:`xarray.DataArray`-like data format:
+
+.. code-block:: python
+
+   # Existing data
+   data = pd.Series(...)
+
+   # Convert to a Quantity for use in reporting calculations
+   qty = Quantity(data, name="Quantity name", units="kg")
+   rep.add("new_qty", qty)
+
+Common :mod:`ixmp.reporting` usage, e.g. in :mod:`message_ix`, creates large, sparse data frames (billions of possible elements, but <1% populated); :class:`~xarray.DataArray`'s default, 'dense' storage format would be too large for available memory.
+
+- Currently, Quantity is :class:`.AttrSeries`, a wrapped :class:`pandas.Series` that behaves like a :class:`~xarray.DataArray`.
+- In the future, :mod:`ixmp.reporting` will use :class:`.SparseDataArray`, and eventually :class:`~xarray.DataArray` backed by sparse data, directly.
+
+The goal is that reporting code, including built-in and user computations, can treat quantity arguments as if they were :class:`~xarray.DataArray`.
 
 
 Computations
-------------
+============
 
 .. automodule:: ixmp.reporting.computations
    :members:
@@ -201,6 +218,7 @@ Computations
    Calculations:
 
    .. autosummary::
+      add
       aggregate
       apply_units
       disaggregate_shares
@@ -221,10 +239,27 @@ Computations
       concat
 
 
-Utilities
----------
+Internal format for reporting quantities
+========================================
 
-.. autoclass:: ixmp.reporting.quantity.AttrSeries
+.. currentmodule:: ixmp.reporting.quantity
+
+.. automodule:: ixmp.reporting.quantity
+   :members: assert_quantity
+
+.. currentmodule:: ixmp.reporting.attrseries
+
+.. automodule:: ixmp.reporting.attrseries
+   :members:
+
+.. currentmodule:: ixmp.reporting.sparsedataarray
+
+.. automodule:: ixmp.reporting.sparsedataarray
+   :members: SparseDataArray, SparseAccessor
+
+
+Utilities
+=========
 
 .. automodule:: ixmp.reporting.utils
    :members:
diff --git a/ixmp/reporting/attrseries.py b/ixmp/reporting/attrseries.py
index 498b63c47..9dd18cb46 100644
--- a/ixmp/reporting/attrseries.py
+++ b/ixmp/reporting/attrseries.py
@@ -6,10 +6,6 @@
 class AttrSeries(pd.Series):
     """:class:`pandas.Series` subclass imitating :class:`xarray.DataArray`.
 
-    Future versions of :mod:`ixmp.reporting` will use :class:`xarray.DataArray`
-    as :class:`Quantity`; however, because :mod:`xarray` currently lacks sparse
-    matrix support, ixmp quantities may be too large for available memory.
-
     The AttrSeries class provides similar methods and behaviour to
     :class:`xarray.DataArray`, so that :mod:`ixmp.reporting.computations`
     methods can use xarray-like syntax.
@@ -59,14 +55,16 @@ def __init__(self, data=None, *args, name=None, attrs=None, **kwargs):
 
     @classmethod
     def from_series(cls, series, sparse=None):
+        """Like :meth:`xarray.DataArray.from_series`."""
         return cls(series)
 
     def assign_coords(self, **kwargs):
+        """Like :meth:`xarray.DataArray.assign_coords`."""
         return pd.concat([self], keys=kwargs.values(), names=kwargs.keys())
 
     @property
     def coords(self):
-        """Read-only."""
+        """Like :attr:`xarray.DataArray.coords`. Read-only."""
         result = dict()
         for name, levels in zip(self.index.names, self.index.levels):
             result[name] = xr.Dataset(None, coords={name: levels})[name]
@@ -74,12 +72,15 @@ def coords(self):
 
     @property
     def dims(self):
+        """Like :attr:`xarray.DataArray.dims`."""
         return tuple(self.index.names)
 
     def drop(self, label):
+        """Like :meth:`xarray.DataArray.drop`."""
         return self.droplevel(label)
 
     def item(self, *args):
+        """Like :meth:`xarray.DataArray.item`."""
         if len(args) and args != (None,):
             raise NotImplementedError
         elif self.size != 1:
@@ -87,12 +88,14 @@ def item(self, *args):
         return self.iloc[0]
 
     def rename(self, new_name_or_name_dict):
+        """Like :meth:`xarray.DataArray.rename`."""
         if isinstance(new_name_or_name_dict, dict):
             return self.rename_axis(index=new_name_or_name_dict)
         else:
             return super().rename(new_name_or_name_dict)
 
     def sel(self, indexers=None, drop=False, **indexers_kwargs):
+        """Like :meth:`xarray.DataArray.sel`."""
         indexers = indexers or {}
         indexers.update(indexers_kwargs)
         if len(indexers) == 1:
@@ -113,6 +116,7 @@ def sel(self, indexers=None, drop=False, **indexers_kwargs):
         return AttrSeries(self.loc[idx])
 
     def sum(self, *args, **kwargs):
+        """Like :meth:`xarray.DataArray.sum`."""
         obj = super(AttrSeries, self)
         attrs = None
 
@@ -140,6 +144,7 @@ def sum(self, *args, **kwargs):
         return AttrSeries(obj.sum(*args, **kwargs), attrs=attrs)
 
     def squeeze(self, dim=None, *args, **kwargs):
+        """Like :meth:`xarray.DataArray.squeeze`."""
         assert kwargs.pop("drop", True)
 
         try:
@@ -168,25 +173,22 @@ def squeeze(self, dim=None, *args, **kwargs):
 
         return self.droplevel(to_drop)
 
-    def as_xarray(self):
-        return xr.DataArray.from_series(self)
-
     def transpose(self, *dims):
+        """Like :meth:`xarray.DataArray.transpose`."""
         return self.reorder_levels(dims)
 
     def to_dataframe(self):
+        """Like :meth:`xarray.DataArray.to_dataframe`."""
         return self.to_frame()
 
     def to_series(self):
+        """Like :meth:`xarray.DataArray.to_series`."""
         return self
 
     def align_levels(self, other):
         """Work around https://github.com/pandas-dev/pandas/issues/25760.
 
         Return a copy of *obj* with common levels in the same order as *ref*.
-
-        .. todo:: remove when Quantity is xr.DataArray, or above issues is
-           closed.
         """
         if not isinstance(self.index, pd.MultiIndex):
             return self
diff --git a/ixmp/reporting/quantity.py b/ixmp/reporting/quantity.py
index 1ee9794ca..2ded612fa 100644
--- a/ixmp/reporting/quantity.py
+++ b/ixmp/reporting/quantity.py
@@ -3,8 +3,30 @@
 
 
 class _QuantityFactory:
-    #: The current internal class used to represent reporting quantities.
-    #: :meth:`as_quantity` always converts to this type.
+    """Convert arguments to the internal Quantity data format.
+
+    Parameters
+    ----------
+    data
+        Quantity data.
+    args
+        Positional arguments, passed to :class:`.AttrSeries` or
+        :class:`.SparseDataArray`.
+    kwargs
+        Keyword arguments, passed to :class:`.AttrSeries` or
+        :class:`.SparseDataArray`.
+
+    Other parameters
+    ----------------
+    name : str, optional
+        Quantity name.
+    units : str, optional
+        Quantity units.
+    attrs : dict, optional
+        Dictionary of attributes; similar to :attr:`~xarray.DataArray.attrs`.
+    """
+    # The current internal class used to represent reporting quantities.
+    # :meth:`as_quantity` always converts to this type.
     CLASS = 'AttrSeries'
     # CLASS = 'SparseDataArray'
 
diff --git a/ixmp/reporting/sparsedataarray.py b/ixmp/reporting/sparsedataarray.py
index e141c7816..b65405fe0 100644
--- a/ixmp/reporting/sparsedataarray.py
+++ b/ixmp/reporting/sparsedataarray.py
@@ -7,7 +7,11 @@
 
 @xr.register_dataarray_accessor('_sda')
 class SparseAccessor:
-    """:mod:`xarray` accessor to help :class:`SparseDataArray`."""
+    """:mod:`xarray` accessor to help :class:`SparseDataArray`.
+
+    See the xarray accessor documentation, e.g.
+    :func:`~xarray.register_dataarray_accessor`.
+    """
     def __init__(self, obj):
         self.da = obj
 
@@ -56,37 +60,48 @@ def dense_super(self):
 
 
 class SparseDataArray(xr.DataArray):
-    """:class:`xr.DataArray` with sparse data.
+    """:class:`~xarray.DataArray` with sparse data.
 
     SparseDataArray uses :class:`sparse.COO` for storage with :data:`numpy.nan`
-    as its :attr:`sparse.COO.fill_value`. Some methods of :class:`.DataArray`
-    are overridden to ensure data is in sparse, or dense, format as necessary,
-    to provide expected functionality not currently supported by :mod:`sparse`,
-    and to avoid exhausting memory for some operations that require dense data.
-
-    See Also
-    --------
-    SparseAccessor
+    as its :attr:`sparse.COO.fill_value`. Some methods of
+    :class:`~xarray.DataArray` are overridden to ensure data is in sparse, or
+    dense, format as necessary, to provide expected functionality not currently
+    supported by :mod:`sparse`, and to avoid exhausting memory for some
+    operations that require dense data.
     """
     __slots__ = tuple()
 
     @classmethod
     def from_series(cls, obj, sparse=True):
+        """Convert a pandas.Series into a SparseDataArray."""
         # Call the parent method always with sparse=True, then re-wrap
         return xr.DataArray.from_series(obj, sparse=True)._sda.convert()
 
-    def equals(self, other):
-        """Necessary for :meth:`xarray.testing.assert_equal` to work."""
+    def equals(self, other) -> bool:
+        """True if two SparseDataArrays have the same dims, coords, and values.
+
+        Overrides :meth:`~xarray.DataArray.equals` for sparse data.
+        """
+        # Necessary for :meth:`xarray.testing.assert_equal` to work.
         return self.variable.equals(other.variable, equiv=np.equal)
 
     @property
     def loc(self):
-        # FIXME doesn't allow assignment
+        """Attribute for location based indexing like pandas.
+
+        .. note:: This version does not allow assignment, since the underlying
+           sparse array is read-only. To modify the contents, create a copy or
+           perform an operation that returns a new array.
+        """
         return self._sda.dense_super.loc
 
     def sel(self, indexers=None, method=None, tolerance=None, drop=False,
             **indexers_kwargs) -> 'SparseDataArray':
-        """Handle >1-D indexers with sparse data."""
+        """Return a new array by selecting labels along the specified dim(s).
+
+        Overrides :meth:`~xarray.DataArray.sel` to handle >1-D indexers with
+        sparse data.
+        """
         indexers = either_dict_or_kwargs(indexers, indexers_kwargs, 'sel')
         if isinstance(indexers, dict) and len(indexers) > 1:
             result = self
@@ -98,15 +113,22 @@ def sel(self, indexers=None, method=None, tolerance=None, drop=False,
             return super().sel(indexers=indexers, method=method,
                                tolerance=tolerance, drop=drop)
 
-    def to_dataframe(self):
-        # FIXME this does exactly match the behaviour of xr.DataArray; it omits
-        #       coordinate variable
-        return self.to_series().to_frame()
+    def to_dataframe(self, name=None):
+        """Convert this array and its coords into a :class:`~xarray.DataFrame`.
+
+        Overrides :meth:`~xarray.DataArray.to_dataframe`.
+        """
+        return self.to_series().to_frame(name)
 
     def to_series(self) -> pd.Series:
-        # Use SparseArray.coords and .data (each already 1-D) to construct a
-        # pd.Series without first converting to a potentially very large
-        # ndarray
+        """Convert this array into a :class:`~pandas.Series`.
+
+        Overrides :meth:`~xarray.DataArray.to_series` to create the series
+        without first converting to a potentially very large
+        :class:`numpy.ndarray`.
+        """
+        # Use SparseArray.coords and .data (each already 1-D) to construct the
+        # pd.Series
 
         # Construct a pd.MultiIndex without using .from_product
         index = pd.MultiIndex.from_arrays(self.data.coords, names=self.dims) \
diff --git a/ixmp/tests/reporting/test_quantity.py b/ixmp/tests/reporting/test_quantity.py
index e8f0cb1e2..935c35eae 100644
--- a/ixmp/tests/reporting/test_quantity.py
+++ b/ixmp/tests/reporting/test_quantity.py
@@ -171,7 +171,6 @@ def test_sum(self, foo, bar):
 
     def test_others(self, foo, bar):
         # Exercise other compatibility functions
-        assert isinstance(foo.as_xarray(), xr.DataArray)
         assert type(foo.to_frame()) is pd.DataFrame
         assert foo.drop('a').dims == ('b',)
         assert bar.dims == ('a',)

From 28d59ae3bc188e996ddeb051ac03fb8fec364783 Mon Sep 17 00:00:00 2001
From: Paul Natsuo Kishimoto <mail@paul.kishimoto.name>
Date: Fri, 19 Jun 2020 22:16:50 +0200
Subject: [PATCH 22/22] Add #317 to release notes

---
 RELEASE_NOTES.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index c55deac18..ed7c75e4e 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -4,6 +4,7 @@ Next release
 All changes
 -----------
 
+- :pull:`317`: Make :class:`reporting.Quantity` classes interchangeable.
 - :pull:`330`: Use GitHub Actions for continuous testing and integration.