-
Notifications
You must be signed in to change notification settings - Fork 653
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
FEAT-#7308: Interoperability between query compilers #7376
Changes from 33 commits
ba48d29
40e1cd4
a574172
6f9795b
07d8d3a
ac75854
52b3136
4aed823
7149ba3
b0e0a82
507d58a
88a6207
b01f7b9
1a61931
a105677
8e29fbe
1620f82
4392165
7a1a30f
4e24de2
2308295
d240ac8
a4c1697
89dabf9
b212c3b
90ba1a4
e9060a7
60f9512
80ce01c
6a068dc
cac89d2
8e8ec46
c1b0942
f488872
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
# Licensed to Modin Development Team under one or more contributor license agreements. | ||
# See the NOTICE file distributed with this work for additional information regarding | ||
# copyright ownership. The Modin Development Team licenses this file to you under the | ||
# Apache License, Version 2.0 (the "License"); you may not use this file except in | ||
# compliance with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software distributed under | ||
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF | ||
# ANY KIND, either express or implied. See the License for the specific language | ||
# governing permissions and limitations under the License. | ||
|
||
""" | ||
Module contains ``QueryCompilerCaster`` class. | ||
|
||
``QueryCompilerCaster`` is used for automatically casting query compiler | ||
arguments to the type of the current query compiler for query compiler class functions. | ||
This ensures compatibility between different query compiler classes. | ||
""" | ||
|
||
import functools | ||
import inspect | ||
from types import FunctionType, MethodType | ||
from typing import Any, Dict, Tuple, TypeVar | ||
|
||
from pandas.core.indexes.frozen import FrozenList | ||
|
||
from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler | ||
Check notice Code scanning / CodeQL Cyclic import Note
Import of module
modin.core.storage_formats.base.query_compiler Error loading related location Loading |
||
|
||
Fn = TypeVar("Fn", bound=Any) | ||
|
||
|
||
class QueryCompilerCaster: | ||
"""Cast all query compiler arguments of the member function to current query compiler.""" | ||
|
||
@classmethod | ||
def __init_subclass__( | ||
cls, | ||
**kwargs: Dict, | ||
) -> None: | ||
""" | ||
Apply type casting to all children of ``QueryCompilerCaster``. | ||
|
||
This method is called automatically when a class inherits from | ||
``QueryCompilerCaster``. It ensures that all member functions within the | ||
subclass have their arguments automatically casted to the current query | ||
compiler type. | ||
|
||
Parameters | ||
---------- | ||
**kwargs : Additional keyword arguments | ||
""" | ||
super().__init_subclass__(**kwargs) | ||
apply_argument_cast(cls) | ||
|
||
|
||
def cast_nested_args_to_current_qc_type(arguments, current_qc): | ||
""" | ||
Cast all arguments in nested fashion to current query compiler. | ||
|
||
Parameters | ||
---------- | ||
arguments : tuple or dict | ||
current_qc : BaseQueryCompiler | ||
|
||
Returns | ||
------- | ||
tuple or dict | ||
Returns args and kwargs with all query compilers casted to current_qc. | ||
""" | ||
|
||
def cast_arg_to_current_qc(arg): | ||
current_qc_type = type(current_qc) | ||
if isinstance(arg, BaseQueryCompiler) and not isinstance(arg, current_qc_type): | ||
data_cls = current_qc._modin_frame | ||
return current_qc_type.from_pandas(arg.to_pandas(), data_cls) | ||
else: | ||
return arg | ||
|
||
imutable_types = (FrozenList, tuple) | ||
if isinstance(arguments, imutable_types): | ||
args_type = type(arguments) | ||
arguments = list(arguments) | ||
arguments = cast_nested_args_to_current_qc_type(arguments, current_qc) | ||
|
||
return args_type(arguments) | ||
if isinstance(arguments, list): | ||
for i in range(len(arguments)): | ||
if isinstance(arguments[i], (list, dict)): | ||
cast_nested_args_to_current_qc_type(arguments[i], current_qc) | ||
else: | ||
arguments[i] = cast_arg_to_current_qc(arguments[i]) | ||
elif isinstance(arguments, dict): | ||
for key in arguments: | ||
if isinstance(arguments[key], (list, dict)): | ||
cast_nested_args_to_current_qc_type(arguments[key], current_qc) | ||
else: | ||
arguments[key] = cast_arg_to_current_qc(arguments[key]) | ||
return arguments | ||
|
||
|
||
def apply_argument_cast(obj: Fn) -> Fn: | ||
""" | ||
Cast all arguments that are query compilers to the current query compiler. | ||
|
||
Parameters | ||
---------- | ||
obj : function | ||
|
||
Returns | ||
------- | ||
function | ||
Returns decorated function which does argument casting. | ||
""" | ||
if isinstance(obj, type): | ||
all_attrs = dict(inspect.getmembers(obj)) | ||
all_attrs.pop("__abstractmethods__") | ||
|
||
# This is required because inspect converts class methods to member functions | ||
current_class_attrs = vars(obj) | ||
for key in current_class_attrs: | ||
all_attrs[key] = current_class_attrs[key] | ||
|
||
for attr_name, attr_value in all_attrs.items(): | ||
if isinstance( | ||
attr_value, (FunctionType, MethodType, classmethod, staticmethod) | ||
): | ||
wrapped = apply_argument_cast(attr_value) | ||
setattr(obj, attr_name, wrapped) | ||
return obj # type: ignore [return-value] | ||
elif isinstance(obj, classmethod): | ||
return classmethod(apply_argument_cast(obj.__func__)) # type: ignore [return-value, arg-type] | ||
elif isinstance(obj, staticmethod): | ||
return staticmethod(apply_argument_cast(obj.__func__)) | ||
|
||
@functools.wraps(obj) | ||
def cast_args(*args: Tuple, **kwargs: Dict) -> Any: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @arunjose696 doesn't this function break type hints? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not clear how this would break type hints, as this function is similar to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It should be visible in IDE. Do you see problems with it or not? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for checking. |
||
""" | ||
Add casting for query compiler arguments. | ||
|
||
Parameters | ||
---------- | ||
*args : tuple | ||
The function arguments. | ||
**kwargs : dict | ||
The function keyword arguments. | ||
|
||
Returns | ||
------- | ||
Any | ||
""" | ||
current_qc = args[0] | ||
if isinstance(current_qc, BaseQueryCompiler): | ||
kwargs = cast_nested_args_to_current_qc_type(kwargs, current_qc) | ||
args = cast_nested_args_to_current_qc_type(args, current_qc) | ||
return obj(*args, **kwargs) | ||
|
||
return cast_args |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2993,9 +2993,8 @@ def _create_or_update_from_compiler( | |
DataFrame or None | ||
None if update was done, ``DataFrame`` otherwise. | ||
""" | ||
assert ( | ||
isinstance(new_query_compiler, type(self._query_compiler)) | ||
or type(new_query_compiler) in self._query_compiler.__class__.__bases__ | ||
Comment on lines
-2996
to
-2998
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. new_query_compiler can also be an instance of a different query compiler(Nativequerycompiler or PandasQueryCompiler), Eg during operations like insert in the _create_or_update_from_compiler the constructor gets called directly eg , Thus it would be normal for the cases where constructor may return a new query compiler for a case where user changes the query compiler mode between creating data_frame and insert operation |
||
assert isinstance( | ||
new_query_compiler, self._query_compiler.__class__.__bases__ | ||
), "Invalid Query Compiler object: {}".format(type(new_query_compiler)) | ||
if not inplace: | ||
return self.__constructor__(query_compiler=new_query_compiler) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Licensed to Modin Development Team under one or more contributor license agreements. | ||
# See the NOTICE file distributed with this work for additional information regarding | ||
# copyright ownership. The Modin Development Team licenses this file to you under the | ||
# Apache License, Version 2.0 (the "License"); you may not use this file except in | ||
# compliance with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software distributed under | ||
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF | ||
# ANY KIND, either express or implied. See the License for the specific language | ||
# governing permissions and limitations under the License. |
Check notice
Code scanning / CodeQL
Cyclic import Note