Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to have default config options for spark #830

Merged
merged 1 commit into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

### Updates
- Added support for http requests session adapter configuration
- Added support default spark session config settings. Any settings set in as defaults
are preserved unless explicitly overridden by the user

## 0.20.5

Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ After installing, you need to register the custom authenticator with Sparkmagic
}
```

## Spark config settings

There are two config options for spark settings `session_configs_defaults` and `session_configs`. `session_configs_defaults` sets default setting that have to be explicitly overidden in order for a user to change them. `session_configs` provides defaults that are all replaced whenever a user changes them using the configure magic.

## HTTP Session Adapters

If you need to customize HTTP request behavior for specific domains by modifying headers, implementing custom logic (e.g., using mTLS, retrying requests), or handling them differently, you can use a custom adapter to gain fine-grained control over request processing.
Expand Down
5 changes: 4 additions & 1 deletion sparkmagic/example_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@
"driverMemory": "1000M",
"executorCores": 2
},

"session_configs_defaults": {
"conf": {
"spark.sql.catalog.spark_catalog.type": "hive"
},
"use_auto_viz": true,
"coerce_dataframe": true,
"max_results_sql": 2500,
Expand Down
70 changes: 70 additions & 0 deletions sparkmagic/sparkmagic/tests/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,73 @@ def test_share_config_between_pyspark_and_pyspark3():
conf.base64_kernel_python3_credentials()
== conf.base64_kernel_python_credentials()
)


def test_get_session_properties():
assert conf.get_session_properties("python") == {"kind": "pyspark"}


def test_get_session_properties_no_defaults():
conf.override(
conf.session_configs.__name__, {"foo": "bar", "config": {"foo": "bar"}}
)
assert conf.get_session_properties("python") == {
"kind": "pyspark",
"foo": "bar",
"config": {"foo": "bar"},
}


def test_get_session_properties_with_defaults():
conf.override(
conf.session_configs_defaults.__name__,
{"foo": "default", "config": {"foo": "default", "default": "default"}},
)
assert conf.get_session_properties("python") == {
"kind": "pyspark",
"foo": "default",
"config": {"foo": "default", "default": "default"},
}


devstein marked this conversation as resolved.
Show resolved Hide resolved
def test_get_session_properties_with_defaults_and_overides():
conf.override(
conf.session_configs.__name__,
{
"foobar": "foobar",
"foo": "bar",
"config": {"foo": "bar"},
"l1": {
"l1k1": {
"l2k1": "bar",
"l2k3": "bar",
},
"l1k2": "bar",
"l1k4": "bar",
},
},
)
conf.override(
conf.session_configs_defaults.__name__,
{
"foo": "default",
"l1": {
"l1k1": {"l2k1": "default", "l2k2": "default"},
"l1k2": "default",
"l1k3": "default",
},
"config": {"foo": "bar", "default": "default"},
},
)
assert conf.get_session_properties("python") == {
"kind": "pyspark",
"foobar": "foobar",
"foo": "bar",
"config": {"foo": "bar", "default": "default"},
"l1": {
"l1k1": {"l2k1": "bar", "l2k2": "default", "l2k3": "bar"},
"l1k2": "bar",
"l1k3": "default",
"l1k4": "bar",
},
}
22 changes: 21 additions & 1 deletion sparkmagic/sparkmagic/utils/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import copy
import sys
import base64
from collections.abc import MutableMapping
from hdijupyterutils.constants import (
EVENTS_HANDLER_CLASS_NAME,
LOGGING_CONFIG_CLASS_NAME,
Expand Down Expand Up @@ -81,10 +82,24 @@ def authenticators():


# Configs
def _recursive_merge(target, updates):
"""
Recursively merge two dictionaries of dictionaries.
This function can mutate both parameters.
Callers should deepcopy dictionaries before passing them in.
"""
for k, v in target.items():
if k in updates:
if all(isinstance(e, MutableMapping) for e in (v, updates[k])):
updates[k] = _recursive_merge(v, updates[k])
out = target.copy()
out.update(updates)
return out


def get_session_properties(language):
properties = copy.deepcopy(session_configs())
properties = copy.deepcopy(session_configs_defaults())
properties = _recursive_merge(properties, copy.deepcopy(session_configs()))
properties[LIVY_KIND_PARAM] = get_livy_kind(language)
return properties

Expand All @@ -94,6 +109,11 @@ def session_configs():
return {}


@_with_override
def session_configs_defaults():
return {}


@_with_override
def kernel_python_credentials():
return {
Expand Down