-
Notifications
You must be signed in to change notification settings - Fork 515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support SparkIntegration activation after SparkContext created #3411
Changes from 11 commits
74edf67
4c673d6
52559be
fd48489
2f0d7be
68aaed6
6af8cd4
f762f7b
33e22b5
da491d4
1828149
d536268
b4066ed
08d46ee
b2b0a92
2421099
7b99ba8
8fddd3c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import sentry_sdk | ||
from sentry_sdk.integrations import Integration | ||
from sentry_sdk.utils import capture_internal_exceptions, ensure_integration_enabled | ||
from sentry_sdk.scope import Scope | ||
|
||
from sentry_sdk._types import TYPE_CHECKING | ||
|
||
|
@@ -9,6 +10,7 @@ | |
from typing import Optional | ||
|
||
from sentry_sdk._types import Event, Hint | ||
from pyspark import SparkContext | ||
|
||
|
||
class SparkIntegration(Integration): | ||
|
@@ -17,7 +19,7 @@ class SparkIntegration(Integration): | |
@staticmethod | ||
def setup_once(): | ||
# type: () -> None | ||
patch_spark_context_init() | ||
_setup_sentry_tracing() | ||
|
||
|
||
def _set_app_properties(): | ||
|
@@ -37,7 +39,7 @@ def _set_app_properties(): | |
|
||
|
||
def _start_sentry_listener(sc): | ||
# type: (Any) -> None | ||
# type: (SparkContext) -> None | ||
""" | ||
Start java gateway server to add custom `SparkListener` | ||
""" | ||
|
@@ -49,7 +51,51 @@ def _start_sentry_listener(sc): | |
sc._jsc.sc().addSparkListener(listener) | ||
|
||
|
||
def patch_spark_context_init(): | ||
def _add_event_processor(sc): | ||
# type: (SparkContext) -> None | ||
scope = sentry_sdk.get_isolation_scope() | ||
|
||
@scope.add_event_processor | ||
def process_event(event, hint): | ||
# type: (Event, Hint) -> Optional[Event] | ||
with capture_internal_exceptions(): | ||
if sentry_sdk.get_client().get_integration(SparkIntegration) is None: | ||
return event | ||
|
||
if sc._active_spark_context is None: | ||
return event | ||
|
||
event.setdefault("user", {}).setdefault("id", sc.sparkUser()) | ||
|
||
event.setdefault("tags", {}).setdefault( | ||
"executor.id", sc._conf.get("spark.executor.id") | ||
) | ||
event["tags"].setdefault( | ||
"spark-submit.deployMode", | ||
sc._conf.get("spark.submit.deployMode"), | ||
) | ||
event["tags"].setdefault("driver.host", sc._conf.get("spark.driver.host")) | ||
event["tags"].setdefault("driver.port", sc._conf.get("spark.driver.port")) | ||
event["tags"].setdefault("spark_version", sc.version) | ||
event["tags"].setdefault("app_name", sc.appName) | ||
event["tags"].setdefault("application_id", sc.applicationId) | ||
event["tags"].setdefault("master", sc.master) | ||
event["tags"].setdefault("spark_home", sc.sparkHome) | ||
|
||
event.setdefault("extra", {}).setdefault("web_url", sc.uiWebUrl) | ||
|
||
return event | ||
|
||
|
||
def _activate_integration(sc): | ||
# type: (SparkContext) -> None | ||
|
||
_start_sentry_listener(sc) | ||
_set_app_properties() | ||
_add_event_processor(sc) | ||
|
||
|
||
def _patch_spark_context_init(): | ||
# type: () -> None | ||
from pyspark import SparkContext | ||
|
||
|
@@ -59,51 +105,22 @@ def patch_spark_context_init(): | |
def _sentry_patched_spark_context_init(self, *args, **kwargs): | ||
# type: (SparkContext, *Any, **Any) -> Optional[Any] | ||
rv = spark_context_init(self, *args, **kwargs) | ||
_start_sentry_listener(self) | ||
_set_app_properties() | ||
|
||
scope = sentry_sdk.get_isolation_scope() | ||
|
||
@scope.add_event_processor | ||
def process_event(event, hint): | ||
# type: (Event, Hint) -> Optional[Event] | ||
with capture_internal_exceptions(): | ||
if sentry_sdk.get_client().get_integration(SparkIntegration) is None: | ||
return event | ||
|
||
if self._active_spark_context is None: | ||
return event | ||
|
||
event.setdefault("user", {}).setdefault("id", self.sparkUser()) | ||
|
||
event.setdefault("tags", {}).setdefault( | ||
"executor.id", self._conf.get("spark.executor.id") | ||
) | ||
event["tags"].setdefault( | ||
"spark-submit.deployMode", | ||
self._conf.get("spark.submit.deployMode"), | ||
) | ||
event["tags"].setdefault( | ||
"driver.host", self._conf.get("spark.driver.host") | ||
) | ||
event["tags"].setdefault( | ||
"driver.port", self._conf.get("spark.driver.port") | ||
) | ||
event["tags"].setdefault("spark_version", self.version) | ||
event["tags"].setdefault("app_name", self.appName) | ||
event["tags"].setdefault("application_id", self.applicationId) | ||
event["tags"].setdefault("master", self.master) | ||
event["tags"].setdefault("spark_home", self.sparkHome) | ||
|
||
event.setdefault("extra", {}).setdefault("web_url", self.uiWebUrl) | ||
|
||
return event | ||
|
||
_activate_integration(self) | ||
return rv | ||
|
||
SparkContext._do_init = _sentry_patched_spark_context_init | ||
|
||
|
||
def _setup_sentry_tracing(): | ||
# type: () -> None | ||
from pyspark import SparkContext | ||
|
||
if SparkContext._active_spark_context is not None: | ||
_activate_integration(SparkContext._active_spark_context) | ||
return | ||
_patch_spark_context_init() | ||
Comment on lines
+117
to
+120
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When the Spark context already exists, |
||
|
||
|
||
class SparkListener: | ||
def onApplicationEnd(self, applicationEnd): # noqa: N802,N803 | ||
# type: (Any) -> None | ||
|
@@ -208,10 +225,20 @@ class Java: | |
|
||
|
||
class SentryListener(SparkListener): | ||
def _add_breadcrumb( | ||
self, | ||
level, # type: str | ||
message, # type: str | ||
data=None, # type: Optional[dict[str, Any]] | ||
): | ||
# type: (...) -> None | ||
Scope.set_isolation_scope(Scope.get_global_scope()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should not be setting the isolation scope to the global scope. So I can suggest a better alternative, what are you trying to accomplish here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @szokeasaurusrex
print insert class SentryListener(SparkListener):
def _add_breadcrumb(
self,
level, # type: str
message, # type: str
data=None, # type: Optional[dict[str, Any]]
):
# type: (...) -> None
# Scope.set_isolation_scope(Scope.get_global_scope())
print(f"* sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() current pid: {os.getpid()}, current thread: {threading.get_ident()}")
print(f"** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: {sentry_sdk.Scope.get_isolation_scope()._breadcrumbs}")
print(f"*** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): {id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs)}")
sentry_sdk.add_breadcrumb(level=level, message=message, data=data) def capture_sql_exception(f: Callable[..., Any]) -> Callable[..., Any]:
def deco(*a: Any, **kw: Any) -> Any:
try:
return f(*a, **kw)
except Py4JJavaError as e:
converted = convert_exception(e.java_exception)
import sentry_sdk
import os
import threading
print(f"- pyspark/errors/exceptions/captuted.py current pid: {os.getpid()}, current thread: {threading.get_ident()}")
print(f"-- pyspark/errors/exceptions/captuted.py sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: {sentry_sdk.Scope.get_isolation_scope()._breadcrumbs}")
print(f"--- pyspark/errors/exceptions/captuted.py id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): {id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs)}")
if not isinstance(converted, UnknownException):
# Hide where the exception came from that shows a non-Pythonic
# JVM exception message.
raise converted from None
else:
raise
return deco test
from pyspark.sql import SparkSession
import sentry_sdk
from sentry_sdk.integrations.spark import SparkIntegration
import os
import threading
if __name__ == "__main__":
spark = SparkSession.builder \
.appName("Simple Example") \
.master("local[*]") \
.getOrCreate()
sentry_sdk.init(
integrations=[SparkIntegration()],
dsn="",
)
print(f"====== main() pid: {os.getpid()}, current thread: {threading.get_ident()}")
data = [1, 2, 3, 4, 5]
rdd = spark.sparkContext.parallelize(data)
result_rdd = rdd.map(lambda x: x * x)
result = result_rdd.collect()
print(result)
print(f"====== main() pid: {os.getpid()}, current thread: {threading.get_ident()}")
spark.read.csv("/path/deos/not/exist/error/raise") output Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/08/27 23:54:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
====== main() pid: 19639, current thread: 4306142592
* sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() current pid: 19639, current thread: 6232780800
** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([])
*** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4378114848
* sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() current pid: 19639, current thread: 6232780800
** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([{'level': 'info', 'message': 'Job 0 Started', 'data': None, 'timestamp': datetime.datetime(2024, 8, 27, 14, 54, 26, 414304, tzinfo=datetime.timezone.utc), 'type': 'default'}])
*** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4378114848
[1, 4, 9, 16, 25]
====== main() pid: 19639, current thread: 4306142592
- pyspark/errors/exceptions/captuted.py current pid: 19639, current thread: 6232780800
-- pyspark/errors/exceptions/captuted.py sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([{'level': 'info', 'message': 'Job 0 Started', 'data': None, 'timestamp': datetime.datetime(2024, 8, 27, 14, 54, 26, 414304, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Submitted', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_after_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 54, 26, 435786, tzinfo=datetime.timezone.utc), 'type': 'default'}])
--- pyspark/errors/exceptions/captuted.py id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4378114848
* sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() current pid: 19639, current thread: 6232780800
** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([{'level': 'info', 'message': 'Job 0 Started', 'data': None, 'timestamp': datetime.datetime(2024, 8, 27, 14, 54, 26, 414304, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Submitted', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_after_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 54, 26, 435786, tzinfo=datetime.timezone.utc), 'type': 'default'}])
*** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4378114848
* sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() current pid: 19639, current thread: 6232780800
** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([{'level': 'info', 'message': 'Job 0 Started', 'data': None, 'timestamp': datetime.datetime(2024, 8, 27, 14, 54, 26, 414304, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Submitted', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_after_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 54, 26, 435786, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Completed', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_after_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 54, 29, 173172, tzinfo=datetime.timezone.utc), 'type': 'default'}])
*** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4378114848
- pyspark/errors/exceptions/captuted.py current pid: 19639, current thread: 4306142592
-- pyspark/errors/exceptions/captuted.py sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([])
--- pyspark/errors/exceptions/captuted.py id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4372017600
Traceback (most recent call last):
File "/Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_after_create_spark_session.py", line 28, in <module>
spark.read.csv("/path/deos/not/exist/error/raise")
File "/Users/kakao/Desktop/shaun/opensource/sentry-python-test/venv/lib/python3.9/site-packages/pyspark/sql/readwriter.py", line 727, in csv
return self._df(self._jreader.csv(self._spark._sc._jvm.PythonUtils.toSeq(path)))
File "/Users/kakao/Desktop/shaun/opensource/sentry-python-test/venv/lib/python3.9/site-packages/py4j/java_gateway.py", line 1322, in __call__
return_value = get_return_value(
File "/Users/kakao/Desktop/shaun/opensource/sentry-python-test/venv/lib/python3.9/site-packages/pyspark/errors/exceptions/captured.py", line 181, in deco
raise converted from None
pyspark.errors.exceptions.captured.AnalysisException: [PATH_NOT_FOUND] Path does not exist: file:/path/deos/not/exist/error/raise.
Process finished with exit code 1
from pyspark.sql import SparkSession
import sentry_sdk
from sentry_sdk.integrations.spark import SparkIntegration
import os
import threading
if __name__ == "__main__":
sentry_sdk.init(
integrations=[SparkIntegration()],
dsn="",
)
print(f"====== main() pid: {os.getpid()}, current thread: {threading.get_ident()}")
spark = SparkSession.builder \
.appName("Simple Example") \
.master("local[*]") \
.getOrCreate()
data = [1, 2, 3, 4, 5]
rdd = spark.sparkContext.parallelize(data)
result_rdd = rdd.map(lambda x: x * x)
result = result_rdd.collect()
print(result)
print(f"====== main() pid: {os.getpid()}, current thread: {threading.get_ident()}")
spark.read.csv("/path/deos/not/exist/error/raise") output ====== main() pid: 19741, current thread: 4370892160
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/08/27 23:55:33 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
* sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() current pid: 19741, current thread: 6166802432
** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([])
*** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4394201536
* sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() current pid: 19741, current thread: 6166802432
** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([{'level': 'info', 'message': 'Job 0 Started', 'data': None, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 35, 744012, tzinfo=datetime.timezone.utc), 'type': 'default'}])
*** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4394201536
[1, 4, 9, 16, 25]
====== main() pid: 19741, current thread: 4370892160
- pyspark/errors/exceptions/captuted.py current pid: 19741, current thread: 6166802432
-- pyspark/errors/exceptions/captuted.py sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([{'level': 'info', 'message': 'Job 0 Started', 'data': None, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 35, 744012, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Submitted', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_before_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 35, 765673, tzinfo=datetime.timezone.utc), 'type': 'default'}])
--- pyspark/errors/exceptions/captuted.py id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4394201536
* sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() current pid: 19741, current thread: 6166802432
** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([{'level': 'info', 'message': 'Job 0 Started', 'data': None, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 35, 744012, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Submitted', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_before_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 35, 765673, tzinfo=datetime.timezone.utc), 'type': 'default'}])
*** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4394201536
* sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() current pid: 19741, current thread: 6166802432
** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([{'level': 'info', 'message': 'Job 0 Started', 'data': None, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 35, 744012, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Submitted', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_before_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 35, 765673, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Completed', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_before_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 38, 718804, tzinfo=datetime.timezone.utc), 'type': 'default'}])
*** sentry_sdk/integrations/spark/spark_drvier.py SentryListner._add_breadcrumb() id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4394201536
- pyspark/errors/exceptions/captuted.py current pid: 19741, current thread: 4370892160
-- pyspark/errors/exceptions/captuted.py sentry_sdk.Scope.get_isolation_scope()._breadcrumbs: deque([{'level': 'info', 'message': 'Job 0 Started', 'data': None, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 35, 744012, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Submitted', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_before_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 35, 765673, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Stage 0 Completed', 'data': {'attemptId': 0, 'name': 'collect at /Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_before_create_spark_session.py:24'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 38, 718804, tzinfo=datetime.timezone.utc), 'type': 'default'}, {'level': 'info', 'message': 'Job 0 Ended', 'data': {'result': 'JobSucceeded'}, 'timestamp': datetime.datetime(2024, 8, 27, 14, 55, 38, 721299, tzinfo=datetime.timezone.utc), 'type': 'default'}])
--- pyspark/errors/exceptions/captuted.py id(sentry_sdk.Scope.get_isolation_scope()._breadcrumbs): 4394201536
Traceback (most recent call last):
File "/Users/kakao/Desktop/shaun/opensource/sentry-python-test/local_test_before_create_spark_session.py", line 28, in <module>
spark.read.csv("/path/deos/not/exist/error/raise")
File "/Users/kakao/Desktop/shaun/opensource/sentry-python-test/venv/lib/python3.9/site-packages/pyspark/sql/readwriter.py", line 727, in csv
return self._df(self._jreader.csv(self._spark._sc._jvm.PythonUtils.toSeq(path)))
File "/Users/kakao/Desktop/shaun/opensource/sentry-python-test/venv/lib/python3.9/site-packages/py4j/java_gateway.py", line 1322, in __call__
return_value = get_return_value(
File "/Users/kakao/Desktop/shaun/opensource/sentry-python-test/venv/lib/python3.9/site-packages/pyspark/errors/exceptions/captured.py", line 181, in deco
raise converted from None
pyspark.errors.exceptions.captured.AnalysisException: [PATH_NOT_FOUND] Path does not exist: file:/path/deos/not/exist/error/raise.
Process finished with exit code 1 If you have any questions, please feel free to let me know! Thank you. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. We need to find a different solution here though, because we cannot set the global scope to the isolation scope. Doing so will likely mess up isolation elsewhere, and cause data unrelated to other events to be sent along with them. Maybe we need to fork the isolation or current scope somewhere in the Spark integration? I can also try to take a look at this later if you are struggling to figure out how to avoid setting the global scope to the isolation scope. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I see. I will look into this further and work on fixing the issue. I will update you after conducting some more tests. |
||
sentry_sdk.add_breadcrumb(level=level, message=message, data=data) | ||
antonpirker marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def onJobStart(self, jobStart): # noqa: N802,N803 | ||
# type: (Any) -> None | ||
message = "Job {} Started".format(jobStart.jobId()) | ||
sentry_sdk.add_breadcrumb(level="info", message=message) | ||
self._add_breadcrumb(level="info", message=message) | ||
_set_app_properties() | ||
|
||
def onJobEnd(self, jobEnd): # noqa: N802,N803 | ||
|
@@ -227,14 +254,14 @@ def onJobEnd(self, jobEnd): # noqa: N802,N803 | |
level = "warning" | ||
message = "Job {} Failed".format(jobEnd.jobId()) | ||
|
||
sentry_sdk.add_breadcrumb(level=level, message=message, data=data) | ||
self._add_breadcrumb(level=level, message=message, data=data) | ||
|
||
def onStageSubmitted(self, stageSubmitted): # noqa: N802,N803 | ||
# type: (Any) -> None | ||
stage_info = stageSubmitted.stageInfo() | ||
message = "Stage {} Submitted".format(stage_info.stageId()) | ||
data = {"attemptId": stage_info.attemptId(), "name": stage_info.name()} | ||
sentry_sdk.add_breadcrumb(level="info", message=message, data=data) | ||
self._add_breadcrumb(level="info", message=message, data=data) | ||
_set_app_properties() | ||
|
||
def onStageCompleted(self, stageCompleted): # noqa: N802,N803 | ||
|
@@ -255,4 +282,4 @@ def onStageCompleted(self, stageCompleted): # noqa: N802,N803 | |
message = "Stage {} Completed".format(stage_info.stageId()) | ||
level = "info" | ||
|
||
sentry_sdk.add_breadcrumb(level=level, message=message, data=data) | ||
self._add_breadcrumb(level=level, message=message, data=data) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have separated this part into a distinct function.