Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FP hists: filter out bad/weird procstatus #285

Merged
merged 3 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config.defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ kowalski:
365,
],
},
{ "$eq": ["$$item.procstatus", "0"] },
],
},
},
Expand Down
125 changes: 69 additions & 56 deletions kowalski/alert_brokers/alert_broker.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,74 +834,87 @@ def make_photometry(
# fp_hists is already in flux space, so we process it separately
df_fp_hists = pd.DataFrame(alert.get("fp_hists", []))

# filter out bad data:
# filter out bad data, where procstatus is not 0 or "0"
mask_good_fp_hists = df_fp_hists["procstatus"].apply(
lambda x: x in [0, "0"]
)
df_fp_hists = df_fp_hists.loc[mask_good_fp_hists]

# filter out bad data using diffmaglim
mask_good_diffmaglim = df_fp_hists["diffmaglim"] > 0
df_fp_hists = df_fp_hists.loc[mask_good_diffmaglim]

# add filter column
if self.instrument == "ZTF":
df_fp_hists["filter"] = df_fp_hists["fid"].apply(
lambda x: ztf_filters[x]
)
else:
raise NotImplementedError(
f"Processing of forced photometry for {self.instrument} not implemented"
)
if len(df_fp_hists) > 0:
# filter out bad data using diffmaglim
mask_good_diffmaglim = df_fp_hists["diffmaglim"] > 0
df_fp_hists = df_fp_hists.loc[mask_good_diffmaglim]

# add the zeropoint now (used in missing upper limit calculation)
df_fp_hists["zp"] = df_fp_hists["magzpsci"]
# add filter column
if self.instrument == "ZTF":
df_fp_hists["filter"] = df_fp_hists["fid"].apply(
lambda x: ztf_filters[x]
)
else:
raise NotImplementedError(
f"Processing of forced photometry for {self.instrument} not implemented"
)

# add mjd and convert columns to float
df_fp_hists["mjd"] = df_fp_hists["jd"] - 2400000.5
df_fp_hists["mjd"] = df_fp_hists["mjd"].apply(lambda x: np.float64(x))
df_fp_hists["flux"] = (
df_fp_hists["forcediffimflux"].apply(lambda x: np.float64(x))
if "forcediffimflux" in df_fp_hists
else np.nan
)
df_fp_hists["fluxerr"] = (
df_fp_hists["forcediffimfluxunc"].apply(lambda x: np.float64(x))
if "forcediffimfluxunc" in df_fp_hists
else np.nan
)
# add the zeropoint now (used in missing upper limit calculation)
df_fp_hists["zp"] = df_fp_hists["magzpsci"]

# where the flux is np.nan, we consider it a non-detection
# for these, we compute the upper limits from the diffmaglim
missing_flux = df_fp_hists["flux"].isna()
df_fp_hists.loc[missing_flux, "fluxerr"] = (
10
** (
-0.4
* (
df_fp_hists.loc[mask_good_diffmaglim, "diffmaglim"]
- df_fp_hists.loc[mask_good_diffmaglim, "zp"]
)
# add mjd and convert columns to float
df_fp_hists["mjd"] = df_fp_hists["jd"] - 2400000.5
df_fp_hists["mjd"] = df_fp_hists["mjd"].apply(lambda x: np.float64(x))
df_fp_hists["flux"] = (
df_fp_hists["forcediffimflux"].apply(lambda x: np.float64(x))
if "forcediffimflux" in df_fp_hists
else np.nan
)
df_fp_hists["fluxerr"] = (
df_fp_hists["forcediffimfluxunc"].apply(lambda x: np.float64(x))
if "forcediffimfluxunc" in df_fp_hists
else np.nan
)
/ 5.0
) # as diffmaglim is the 5-sigma depth

# calculate the snr (as absolute value of the flux divided by the fluxerr)
df_fp_hists["snr"] = np.abs(df_fp_hists["flux"] / df_fp_hists["fluxerr"])
# where the flux is np.nan, we consider it a non-detection
# for these, we compute the upper limits from the diffmaglim
missing_flux = df_fp_hists["flux"].isna()
df_fp_hists.loc[missing_flux, "fluxerr"] = (
10
** (
-0.4
* (
df_fp_hists.loc[mask_good_diffmaglim, "diffmaglim"]
- df_fp_hists.loc[mask_good_diffmaglim, "zp"]
)
)
/ 5.0
) # as diffmaglim is the 5-sigma depth

# we only consider datapoints as detections if they have an snr > 3
# (for reference, alert detections are considered if they have an snr > 5)
# otherwise, we set flux to np.nan
df_fp_hists["flux"] = df_fp_hists["flux"].where(
df_fp_hists["snr"] > 3, other=np.nan
)
# calculate the snr (as absolute value of the flux divided by the fluxerr)
df_fp_hists["snr"] = np.abs(
df_fp_hists["flux"] / df_fp_hists["fluxerr"]
)

# deduplicate by mjd, filter, and flux
df_fp_hists = (
df_fp_hists.drop_duplicates(subset=["mjd", "filter", "flux"])
.reset_index(drop=True)
.sort_values(by=["mjd"])
)
# we only consider datapoints as detections if they have an snr > 3
# (for reference, alert detections are considered if they have an snr > 5)
# otherwise, we set flux to np.nan
df_fp_hists["flux"] = df_fp_hists["flux"].where(
df_fp_hists["snr"] > 3, other=np.nan
)

# deduplicate by mjd, filter, and flux
df_fp_hists = (
df_fp_hists.drop_duplicates(subset=["mjd", "filter", "flux"])
.reset_index(drop=True)
.sort_values(by=["mjd"])
)

# add an origin field with the value "alert_fp" so SkyPortal knows it's forced photometry
df_fp_hists["origin"] = "alert_fp"
# add an origin field with the value "alert_fp" so SkyPortal knows it's forced photometry
df_fp_hists["origin"] = "alert_fp"

# step 6: merge the fp_hists section with the light curve
df_light_curve = pd.concat([df_light_curve, df_fp_hists], sort=False)
# step 6: merge the fp_hists section with the light curve
df_light_curve = pd.concat([df_light_curve, df_fp_hists], sort=False)

# step 6: set the magnitude system
df_light_curve["zpsys"] = "ab"
Expand Down
12 changes: 6 additions & 6 deletions kowalski/tests/test_alert_broker_ztf.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,10 @@ def test_make_photometry_with_fp_hists(self):
post_alert(self.worker, record, fp_cutoff=1)

df_photometry = self.worker.make_photometry(record, include_fp_hists=True)
assert len(df_photometry) == 42
assert len(df_photometry) == 28
# prv_candidates have origin = None, and fp_hists have origin = 'alert_fp'
assert "origin" in df_photometry.columns
assert len(df_photometry[df_photometry["origin"] == "alert_fp"]) == 21
assert len(df_photometry[df_photometry["origin"] == "alert_fp"]) == 7
assert len(df_photometry[df_photometry["origin"].isnull()]) == 21
# verify that they all have a fluxerr value
assert all(df_photometry["fluxerr"].notnull())
Expand Down Expand Up @@ -1047,10 +1047,10 @@ def test_alert_filter__user_defined__with_fp_hists(self):

assert response.status_code == 200
photometry = response.json()["data"]
assert len(photometry) == 42
assert len(photometry) == 28

assert "origin" in photometry[0]
assert len([p for p in photometry if p["origin"] == "alert_fp"]) == 21
assert len([p for p in photometry if p["origin"] == "alert_fp"]) == 7
assert len([p for p in photometry if p["origin"] in [None, "None"]]) == 21

assert (
Expand Down Expand Up @@ -1099,10 +1099,10 @@ def test_alert_filter__user_defined__with_fp_hists(self):

assert response.status_code == 200
photometry = response.json()["data"]
assert len(photometry) == 42
assert len(photometry) == 28

assert "origin" in photometry[0]
assert len([p for p in photometry if p["origin"] == "alert_fp"]) == 21
assert len([p for p in photometry if p["origin"] == "alert_fp"]) == 7
assert len([p for p in photometry if p["origin"] in [None, "None"]]) == 21

assert (
Expand Down
Loading