diff --git a/doc/code/qml_data.rst b/doc/code/qml_data.rst index b98ccf14808..24076ec99a9 100644 --- a/doc/code/qml_data.rst +++ b/doc/code/qml_data.rst @@ -3,6 +3,5 @@ qml.data .. currentmodule:: pennylane.data -.. automodapi:: pennylane.data - :no-heading: - :no-inherited-members: +.. automodule:: pennylane.data + diff --git a/doc/introduction/data.rst b/doc/introduction/data.rst index 0fc53abae8c..329f22dfa6d 100644 --- a/doc/introduction/data.rst +++ b/doc/introduction/data.rst @@ -31,20 +31,22 @@ The :func:`~pennylane.data.load` function returns a ``list`` with the desired da >>> H2datasets = qml.data.load("qchem", molname="H2", basis="STO-3G", bondlength=1.1) >>> print(H2datasets) -[] +[] >>> H2data = H2datasets[0] We can load datasets for multiple parameter values by providing a list of values instead of a single value. -To load all possible values, use the special value :const:`~pennylane.data.FULL` or the string 'full': +To load all possible values, use the special value :const:`~pennylane.data.FULL` or the string ``"full"``: >>> H2datasets = qml.data.load("qchem", molname="H2", basis="full", bondlength=[0.5, 1.1]) >>> print(H2datasets) -[, - , - , - ] - -When we only want to download portions of a large dataset, we can specify the desired properties (referred to as `attributes`). +[, +, +, +, +, +] + +When we only want to download portions of a large dataset, we can specify the desired properties (referred to as 'attributes'). For example, we can download or load only the molecule and energy of a dataset as follows: >>> part = qml.data.load("qchem", molname="H2", basis="STO-3G", bondlength=1.1, @@ -57,16 +59,20 @@ For example, we can download or load only the molecule and energy of a dataset a To determine what attributes are available for a type of dataset, we can use the function :func:`~pennylane.data.list_attributes`: >>> qml.data.list_attributes(data_name="qchem") -["molecule", -"hamiltonian", -"sparse_hamiltonian", -... -"tapered_hamiltonian", -"full"] +['molname', + 'basis', + 'bondlength', + ... + 'vqe_params', + 'vqe_energy'] .. note:: - "full" is the default value for ``attributes``, and it means that all available attributes for the Dataset will be downloaded. + The default values for attributes are as follows: + + - Molecules: ``basis`` is the smallest available basis, usually ``"STO-3G"``, and ``bondlength`` is the optimal bondlength for the molecule or an alternative if the optimal is not known. + + - Spin systems: ``periodicity`` is ``"open"``, ``lattice`` is ``"chain"``, and ``layout`` is ``1x4`` for ``chain`` systems and ``2x2`` for ``rectangular`` systems. Using Datasets in PennyLane --------------------------- @@ -151,19 +157,6 @@ array([-1.5, -0.5, 0.5, 1.5]) Quantum Datasets Functions and Classes -------------------------------------- -Classes -^^^^^^^ - -.. autosummary:: - :nosignatures: - - ~pennylane.data.Dataset - -:html:`` - -Functions -^^^^^^^^^ - :html:`
` .. autosummary:: @@ -173,5 +166,6 @@ Functions ~pennylane.data.list_attributes ~pennylane.data.load ~pennylane.data.load_interactive + ~pennylane.data.Dataset :html:`
` diff --git a/pennylane/data/__init__.py b/pennylane/data/__init__.py index 6593757f520..ec286524aa1 100644 --- a/pennylane/data/__init__.py +++ b/pennylane/data/__init__.py @@ -11,7 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""The data subpackage provides functionality to access, store and manipulate quantum datasets. +"""The data subpackage provides functionality to access, store and manipulate `quantum datasets `_. + +.. note:: + + For more details on using datasets, please see the + :doc:`quantum datasets quickstart guide `. + +Overview +-------- Datasets are generally stored and accessed using the :class:`~pennylane.data.Dataset` class. Pre-computed datasets are available for download and can be accessed using the :func:`~pennylane.data.load` or @@ -19,16 +27,41 @@ Additionally, users can easily create, write to disk, and read custom datasets using functions within the :class:`~pennylane.data.Dataset` class. -.. currentmodule:: pennylane.data .. autosummary:: - :toctree: api + :toctree: api -Description ------------ + attribute + field + Dataset + DatasetNotWriteableError + load + load_interactive + list_attributes + list_datasets + +In addition, various dataset types are provided + +.. autosummary:: + :toctree: api + + AttributeInfo + DatasetAttribute + DatasetArray + DatasetScalar + DatasetString + DatasetList + DatasetDict + DatasetOperator + DatasetNone + DatasetMolecule + DatasetSparseArray + DatasetJSON + DatasetTuple Datasets -~~~~~~~~ -The :class:`Dataset` class provides a portable storage format for information describing a physical +-------- + +The :class:`~.Dataset` class provides a portable storage format for information describing a physical system and its evolution. For example, a dataset for an arbitrary quantum system could have a Hamiltonian, its ground state, and an efficient state-preparation circuit for that state. Datasets can contain a range of object types, including: @@ -41,10 +74,13 @@ - ``dict`` of any supported type, as long as the keys are strings +For more details on using datasets, please see the +:doc:`quantum datasets quickstart guide `. + Creating a Dataset -~~~~~~~~~~~~~~~~~~ +------------------ -To create a new dataset in-memory, initialize a new ``Dataset`` with the desired attributes: +To create a new dataset in-memory, initialize a new :class:`~.Dataset` with the desired attributes: >>> hamiltonian = qml.Hamiltonian([1., 1.], [qml.PauliZ(wires=0), qml.PauliZ(wires=1)]) >>> eigvals, eigvecs = np.linalg.eigh(qml.matrix(hamiltonian)) @@ -53,7 +89,8 @@ ... eigen = {"eigvals": eigvals, "eigvecs": eigvecs} ... ) >>> dataset.hamiltonian - +(1.0) [Z0] ++ (1.0) [Z1] >>> dataset.eigen {'eigvals': array([-2., 0., 0., 2.]), 'eigvecs': array([[0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j], @@ -63,69 +100,71 @@ Attributes can also be assigned to the instance after creation: - >>> dataset.ground_state = np.transpose(eigvecs)[np.argmin(eigvals)] - >>> dataset.ground_state - array([0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j]) +>>> dataset.ground_state = np.transpose(eigvecs)[np.argmin(eigvals)] +>>> dataset.ground_state +array([0.+0.j, 0.+0.j, 0.+0.j, 1.+0.j]) Reading and Writing Datasets -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +---------------------------- Datasets can be saved to disk for later use. Datasets use the HDF5 format for serialization, which uses the '.h5' file extension. To save a dataset, use the :meth:`Dataset.write()` method: - >>> my_dataset = Dataset(...) - >>> my_dataset.write("~/datasets/my_dataset.h5") +>>> my_dataset = Dataset(...) +>>> my_dataset.write("~/datasets/my_dataset.h5") To open a dataset from a file, use :meth:`Dataset.open()` class method: - >>> my_dataset = Dataset.open("~/datasets/my_dataset.h5", mode="r") +>>> my_dataset = Dataset.open("~/datasets/my_dataset.h5", mode="r") -The `mode` argument follow the standard library convention - 'r' for reading, 'w-' and `w` for create and overwrite, -and 'a' for editing. ``open()`` can be used to create a new dataset directly on disk: +The ``mode`` argument follow the standard library convention --- ``r`` for +reading, ``w-`` and ``w`` for create and overwrite, and 'a' for editing. +``open()`` can be used to create a new dataset directly on disk: - >>> new_dataset = Dataset.open("~/datasets/new_datasets.h5", mode="w") +>>> new_dataset = Dataset.open("~/datasets/new_datasets.h5", mode="w") By default, any changes made to an opened dataset will be committed directly to the file, which will fail -if the file is opened read-only. The `"copy"` mode can be used to load the dataset into memory and detach +if the file is opened read-only. The ``"copy"`` mode can be used to load the dataset into memory and detach it from the file: - >>> my_dataset = Dataset.open("~/dataset/my_dataset/h5", mode="copy") - >>> my_dataset.new_attribute = "abc" +>>> my_dataset = Dataset.open("~/dataset/my_dataset/h5", mode="copy") +>>> my_dataset.new_attribute = "abc" Attribute Metadata -~~~~~~~~~~~~~~~~~~ +------------------ -Dataset attributes can also contain additional metadata, such as docstrings. The :func:`qml.data.attribute` +Dataset attributes can also contain additional metadata, such as docstrings. The :func:`~.data.attribute` function can be used to attach metadata on assignment or initialization. - >>> hamiltonian = qml.Hamiltonian([1., 1.], [qml.PauliZ(wires=0), qml.PauliZ(wires=1)]) - >>> eigvals, eigvecs = np.linalg.eigh(qml.matrix(hamiltonian)) - >>> dataset = qml.data.Dataset(hamiltonian = qml.data.attribute( - hamiltonian, - doc="The hamiltonian of the system")) - >>> dataset.eigen = qml.data.attribute( - {"eigvals": eigvals, "eigvecs": eigvecs}, - doc="Eigenvalues and eigenvectors of the hamiltonain") +>>> hamiltonian = qml.Hamiltonian([1., 1.], [qml.PauliZ(wires=0), qml.PauliZ(wires=1)]) +>>> eigvals, eigvecs = np.linalg.eigh(qml.matrix(hamiltonian)) +>>> dataset = qml.data.Dataset(hamiltonian = qml.data.attribute( +... hamiltonian, +... doc="The hamiltonian of the system")) +>>> dataset.eigen = qml.data.attribute( +... {"eigvals": eigvals, "eigvecs": eigvecs}, +... doc="Eigenvalues and eigenvectors of the hamiltonain") This metadata can then be accessed using the :meth:`Dataset.attr_info` mapping: - >>> dataset.attr_info["eigen"]["doc"] - 'The hamiltonian of the system' +>>> dataset.attr_info["eigen"]["doc"] +'Eigenvalues and eigenvectors of the hamiltonain' Declarative API -~~~~~~~~~~~~~~~ +--------------- When creating datasets to model a physical system, it is common to collect the same data for a system under different conditions or assumptions. For example, a collection of datasets describing a quantum oscillator, which contains the first 1000 energy levels for different masses and force constants. -The datasets declarative API allows us to create subclasses of ``Dataset`` that define the required attributes, -or 'fields', and their associated type and documentation: +The datasets declarative API allows us to create subclasses +of :class:`Dataset` that define the required attributes, or 'fields', and +their associated type and documentation: .. code-block:: python @@ -144,9 +183,14 @@ class QuantumOscillator(qml.data.Dataset, data_name="quantum_oscillator", identi When a ``QuantumOscillator`` dataset is created, its attributes will have the documentation from the field definition: - >>> dataset = QuantumOscillator(mass=1, force_constant=0.5, hamiltonian=..., energy_levels=...) - >>> dataset.attr_info["mass"]["doc"] - 'The mass of the particle' +>>> dataset = QuantumOscillator( +... mass=1, +... force_constant=0.5, +... hamiltonian=qml.PauliX(0), +... energy_levels=np.array([0.1, 0.2]) +... ) +>>> dataset.attr_info["mass"]["doc"] +'The mass of the particle' """ diff --git a/pennylane/data/base/attribute.py b/pennylane/data/base/attribute.py index 900ef3ddd7a..5a2a0774d1b 100644 --- a/pennylane/data/base/attribute.py +++ b/pennylane/data/base/attribute.py @@ -416,8 +416,35 @@ def __init_subclass__( # pylint: disable=arguments-differ def attribute( val: T, doc: Optional[str] = None, **kwargs: Any ) -> DatasetAttribute[HDF5Any, T, Any]: - """Returns ``DatasetAttribute`` class matching ``val``, with other arguments passed - to the ``AttributeInfo`` class.""" + """Creates a dataset attribute that contains both a value and associated metadata. + + Args: + val (any): the dataset attribute value + doc (str): the docstring that describes the attribute + **kwargs: Additional keyword arguments may be passed, which represents metadata + which describes the attribute. + + Returns: + DatasetAttribute: an attribute object + + .. seealso:: :class:`~.Dataset` + + **Example** + + >>> hamiltonian = qml.Hamiltonian([1., 1.], [qml.PauliZ(wires=0), qml.PauliZ(wires=1)]) + >>> eigvals, eigvecs = np.linalg.eigh(qml.matrix(hamiltonian)) + >>> dataset = qml.data.Dataset(hamiltonian = qml.data.attribute( + ... hamiltonian, + ... doc="The hamiltonian of the system")) + >>> dataset.eigen = qml.data.attribute( + ... {"eigvals": eigvals, "eigvecs": eigvecs}, + ... doc="Eigenvalues and eigenvectors of the hamiltonain") + + This metadata can then be accessed using the :meth:`~.Dataset.attr_info` mapping: + + >>> dataset.attr_info["eigen"]["doc"] + 'Eigenvalues and eigenvectors of the hamiltonain' + """ return match_obj_type(val)(val, AttributeInfo(doc=doc, py_type=type(val), **kwargs)) diff --git a/pennylane/data/base/dataset.py b/pennylane/data/base/dataset.py index dbc84ee3c04..1142ddca497 100644 --- a/pennylane/data/base/dataset.py +++ b/pennylane/data/base/dataset.py @@ -77,6 +77,43 @@ def field( # pylint: disable=too-many-arguments, unused-argument py_type: Type annotation or string describing this object's type. If not provided, the annotation on the class will be used kwargs: Extra arguments to ``AttributeInfo`` + + Returns: + Field: + + .. seealso:: :class:`~.Dataset`, :func:`~.data.attribute` + + **Example** + + The datasets declarative API allows us to create subclasses + of :class:`Dataset` that define the required attributes, or 'fields', and + their associated type and documentation: + + .. code-block:: python + + class QuantumOscillator(qml.data.Dataset, data_name="quantum_oscillator", identifiers=["mass", "force_constant"]): + \"""Dataset describing a quantum oscillator.\""" + + mass: float = qml.data.field(doc = "The mass of the particle") + force_constant: float = qml.data.field(doc = "The force constant of the oscillator") + hamiltonian: qml.Hamiltonian = qml.data.field(doc = "The hamiltonian of the particle") + energy_levels: np.ndarray = qml.data.field(doc = "The first 1000 energy levels of the system") + + The ``data_name`` keyword argument specifies a category or descriptive name for the dataset type, and the ``identifiers`` + keyword argument specifies fields that function as parameters, i.e., they determine the behaviour + of the system. + + When a ``QuantumOscillator`` dataset is created, its attributes will have the documentation from the field + definition: + + >>> dataset = QuantumOscillator( + ... mass=1, + ... force_constant=0.5, + ... hamiltonian=qml.PauliX(0), + ... energy_levels=np.array([0.1, 0.2]) + ... ) + >>> dataset.attr_info["mass"]["doc"] + 'The mass of the particle' """ return Field( diff --git a/pennylane/data/data_manager/__init__.py b/pennylane/data/data_manager/__init__.py index d7e11c8656b..061e786ef6b 100644 --- a/pennylane/data/data_manager/__init__.py +++ b/pennylane/data/data_manager/__init__.py @@ -106,9 +106,9 @@ def load( # pylint: disable=too-many-arguments cache_dir: Optional[Path] = Path(".cache"), **params: Union[ParamArg, str, List[str]], ): - r"""Downloads the data if it is not already present in the directory and return it to user as a - :class:`~pennylane.data.Dataset` object. For the full list of available datasets, please see the - `datasets website `_. + r"""Downloads the data if it is not already present in the directory and returns it as a list of + :class:`~pennylane.data.Dataset` objects. For the full list of available datasets, please see + the `datasets website `_. Args: data_name (str) : A string representing the type of data required such as `qchem`, `qpsin`, etc. @@ -122,6 +122,69 @@ def load( # pylint: disable=too-many-arguments Returns: list[:class:`~pennylane.data.Dataset`] + + .. seealso:: :func:`~.load_interactive`, :func:`~.list_attributes`, :func:`~.list_datasets`. + + **Example** + + The :func:`~pennylane.data.load` function returns a ``list`` with the desired data. + + >>> H2datasets = qml.data.load("qchem", molname="H2", basis="STO-3G", bondlength=1.1) + >>> print(H2datasets) + [] + + .. note:: + + If not otherwise specified, ``qml.data.load`` will download the + default parameter value specified by the dataset. + + The default values for attributes are as follows: + + - Molecules: ``basis`` is the smallest available basis, usually ``"STO-3G"``, and ``bondlength`` is the optimal bondlength for the molecule or an alternative if the optimal is not known. + + - Spin systems: ``periodicity`` is ``"open"``, ``lattice`` is ``"chain"``, and ``layout`` is ``1x4`` for ``chain`` systems and ``2x2`` for ``rectangular`` systems. + + We can load datasets for multiple parameter values by providing a list of values instead of a single value. + To load all possible values, use the special value :const:`~pennylane.data.FULL` or the string 'full': + + >>> H2datasets = qml.data.load("qchem", molname="H2", basis="full", bondlength=[0.5, 1.1]) + >>> print(H2datasets) + [, + , + , + , + , + ] + + When we only want to download portions of a large dataset, we can specify + the desired properties (referred to as 'attributes'). For example, we + can download or load only the molecule and energy of a dataset as + follows: + + >>> part = qml.data.load( + ... "qchem", + ... molname="H2", + ... basis="STO-3G", + ... bondlength=1.1, + ... attributes=["molecule", "fci_energy"])[0] + >>> part.molecule + + + To determine what attributes are available, please see :func:`~.list_attributes`. + + The loaded data items are fully compatible with PennyLane. We can + therefore use them directly in a PennyLane circuit as follows: + + >>> H2data = qml.data.load("qchem", molname="H2", basis="STO-3G", bondlength=1.1)[0] + >>> dev = qml.device("default.qubit",wires=4) + >>> @qml.qnode(dev) + ... def circuit(): + ... qml.BasisState(H2data.hf_state, wires = [0, 1, 2, 3]) + ... for op in H2data.vqe_gates: + ... qml.apply(op) + ... return qml.expval(H2data.hamiltonian) + >>> print(circuit()) + -1.0791430411076344 """ params = format_params(**params) @@ -157,28 +220,26 @@ def list_datasets() -> dict: Return: dict: Nested dictionary representing the directory structure of the hosted datasets. + .. seealso:: :func:`~.load_interactive`, :func:`~.list_attributes`, :func:`~.load`. + **Example:** Note that the results of calling this function may differ from this example as more datasets are added. For updates on available data see the `datasets website `_. - .. code-block :: pycon - - >>> qml.data.list_datasets() - {'qchem': {'H2': {'6-31G': ['0.5', '0.54', '0.58', ... '2.02', '2.06', '2.1'], - 'STO-3G': ['0.5', '0.54', '0.58', ... '2.02', '2.06', '2.1']}, - 'HeH+': {'6-31G': ['0.5', '0.54', '0.58', ... '2.02', '2.06', '2.1'], - 'STO-3G': ['0.5', '0.54', '0.58', ... '2.02', '2.06', '2.1']}, - 'LiH': {'STO-3G': ['0.5', '0.54', '0.58', ... '2.02', '2.06', '2.1']}, - 'OH-': {'STO-3G': ['0.5', '0.54', '0.58', ... '0.94', '0.98', '1.02']}}, - 'qspin': {'Heisenberg': {'closed': {'chain': ['1x16', '1x4', '1x8'], - 'rectangular': ['2x2', '2x4', '2x8', '4x4']}, - 'open': {'chain': ['1x16', '1x4', '1x8'], - 'rectangular': ['2x2', '2x4', '2x8', '4x4']}}, - 'Ising': {'closed': {'chain': ['1x16', '1x4', '1x8'], - 'rectangular': ['2x2', '2x4', '2x8', '4x4']}, - 'open': {'chain': ['1x16', '1x4', '1x8'], - 'rectangular': ['2x2', '2x4', '2x8', '4x4']}}}} + >>> available_data = qml.data.list_datasets() + >>> available_data.keys() + dict_keys(["qspin", "qchem"]) + >>> available_data["qchem"].keys() + dict_keys(["H2", "LiH", ...]) + >>> available_data['qchem']['H2'].keys() + dict_keys(["CC-PVDZ", "6-31G", "STO-3G"]) + >>> print(available_data['qchem']['H2']['STO-3G']) + ["0.5", "0.54", "0.62", "0.66", ...] + + Note that this example limits the results of the function calls for + clarity and that as more data becomes available, the results of these + function calls will change. """ def remove_paths(foldermap): @@ -202,6 +263,18 @@ def list_attributes(data_name): Returns: list (str): A list of accepted attributes for a given data name + + .. seealso:: :func:`~.load_interactive`, :func:`~.list_datasets`, :func:`~.load`. + + **Example** + + >>> qml.data.list_attributes(data_name="qchem") + ['molname', + 'basis', + 'bondlength', + ... + 'vqe_params', + 'vqe_energy'] """ data_struct = _get_data_struct() if data_name not in data_struct: @@ -255,6 +328,8 @@ def load_interactive(): **Example** + .. seealso:: :func:`~.load`, :func:`~.list_attributes`, :func:`~.list_datasets`. + .. code-block :: pycon >>> qml.data.load_interactive()