Skip to content

Commit

Permalink
[daggy-u] standardize indentation style across lessons
Browse files Browse the repository at this point in the history
  • Loading branch information
cmpadden committed Feb 7, 2024
1 parent 0a93b7e commit 23e8203
Show file tree
Hide file tree
Showing 33 changed files with 460 additions and 460 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,46 +33,46 @@ In Lesson 9, you created the `adhoc_request` asset. During materialization, the

@asset
def adhoc_request(config: AdhocRequestConfig, taxi_zones, taxi_trips, database: DuckDBResource):
"""
The response to an request made in the `requests` directory.
See `requests/README.md` for more information.
"""

# strip the file extension from the filename, and use it as the output filename
file_path = constants.REQUEST_DESTINATION_TEMPLATE_FILE_PATH.format(config.filename.split('.')[0])

# count the number of trips that picked up in a given borough, aggregated by time of day and hour of day
query = f"""
select
date_part('hour', pickup_datetime) as hour_of_day,
date_part('dayofweek', pickup_datetime) as day_of_week_num,
case date_part('dayofweek', pickup_datetime)
when 0 then 'Sunday'
when 1 then 'Monday'
when 2 then 'Tuesday'
when 3 then 'Wednesday'
when 4 then 'Thursday'
when 5 then 'Friday'
when 6 then 'Saturday'
end as day_of_week,
count(*) as num_trips
from trips
left join zones on trips.pickup_zone_id = zones.zone_id
where pickup_datetime >= '{config.start_date}'
and pickup_datetime < '{config.end_date}'
and pickup_zone_id in (
select zone_id
from zones
where borough = '{config.borough}'
)
group by 1, 2
order by 1, 2 asc
"""

with database.get_connection() as conn:
results = conn.execute(query).fetch_df()

fig = px.bar(
"""
The response to an request made in the `requests` directory.
See `requests/README.md` for more information.
"""

# strip the file extension from the filename, and use it as the output filename
file_path = constants.REQUEST_DESTINATION_TEMPLATE_FILE_PATH.format(config.filename.split('.')[0])

# count the number of trips that picked up in a given borough, aggregated by time of day and hour of day
query = f"""
select
date_part('hour', pickup_datetime) as hour_of_day,
date_part('dayofweek', pickup_datetime) as day_of_week_num,
case date_part('dayofweek', pickup_datetime)
when 0 then 'Sunday'
when 1 then 'Monday'
when 2 then 'Tuesday'
when 3 then 'Wednesday'
when 4 then 'Thursday'
when 5 then 'Friday'
when 6 then 'Saturday'
end as day_of_week,
count(*) as num_trips
from trips
left join zones on trips.pickup_zone_id = zones.zone_id
where pickup_datetime >= '{config.start_date}'
and pickup_datetime < '{config.end_date}'
and pickup_zone_id in (
select zone_id
from zones
where borough = '{config.borough}'
)
group by 1, 2
order by 1, 2 asc
"""

with database.get_connection() as conn:
results = conn.execute(query).fetch_df()

fig = px.bar(
results,
x="hour_of_day",
y="num_trips",
Expand All @@ -83,10 +83,10 @@ In Lesson 9, you created the `adhoc_request` asset. During materialization, the
"hour_of_day": "Hour of Day",
"day_of_week": "Day of Week",
"num_trips": "Number of Trips"
}
)
}
)

pio.write_image(fig, file_path)
pio.write_image(fig, file_path)
```

3. Add the `base64` and `MaterializeResult` imports to the top of the file:
Expand All @@ -100,7 +100,7 @@ In Lesson 9, you created the `adhoc_request` asset. During materialization, the

```python
with open(file_path, 'rb') as file:
image_data = file.read()
image_data = file.read()
```

5. Next, we’ll use base64 encoding to convert the chart to Markdown. After the `image_data` line, add the following code:
Expand Down Expand Up @@ -142,53 +142,53 @@ import base64
from . import constants

class AdhocRequestConfig(Config):
filename: str
borough: str
start_date: str
end_date: str
filename: str
borough: str
start_date: str
end_date: str

@asset
def adhoc_request(config: AdhocRequestConfig, taxi_zones, taxi_trips, database: DuckDBResource):
"""
The response to an request made in the `requests` directory.
See `requests/README.md` for more information.
"""

# strip the file extension from the filename, and use it as the output filename
file_path = constants.REQUEST_DESTINATION_TEMPLATE_FILE_PATH.format(config.filename.split('.')[0])

# count the number of trips that picked up in a given borough, aggregated by time of day and hour of day
query = f"""
select
date_part('hour', pickup_datetime) as hour_of_day,
date_part('dayofweek', pickup_datetime) as day_of_week_num,
case date_part('dayofweek', pickup_datetime)
when 0 then 'Sunday'
when 1 then 'Monday'
when 2 then 'Tuesday'
when 3 then 'Wednesday'
when 4 then 'Thursday'
when 5 then 'Friday'
when 6 then 'Saturday'
end as day_of_week,
count(*) as num_trips
from trips
left join zones on trips.pickup_zone_id = zones.zone_id
where pickup_datetime >= '{config.start_date}'
and pickup_datetime < '{config.end_date}'
and pickup_zone_id in (
select zone_id
from zones
where borough = '{config.borough}'
)
group by 1, 2
order by 1, 2 asc
"""

with database.get_connection() as conn:
results = conn.execute(query).fetch_df()

fig = px.bar(
"""
The response to an request made in the `requests` directory.
See `requests/README.md` for more information.
"""

# strip the file extension from the filename, and use it as the output filename
file_path = constants.REQUEST_DESTINATION_TEMPLATE_FILE_PATH.format(config.filename.split('.')[0])

# count the number of trips that picked up in a given borough, aggregated by time of day and hour of day
query = f"""
select
date_part('hour', pickup_datetime) as hour_of_day,
date_part('dayofweek', pickup_datetime) as day_of_week_num,
case date_part('dayofweek', pickup_datetime)
when 0 then 'Sunday'
when 1 then 'Monday'
when 2 then 'Tuesday'
when 3 then 'Wednesday'
when 4 then 'Thursday'
when 5 then 'Friday'
when 6 then 'Saturday'
end as day_of_week,
count(*) as num_trips
from trips
left join zones on trips.pickup_zone_id = zones.zone_id
where pickup_datetime >= '{config.start_date}'
and pickup_datetime < '{config.end_date}'
and pickup_zone_id in (
select zone_id
from zones
where borough = '{config.borough}'
)
group by 1, 2
order by 1, 2 asc
"""

with database.get_connection() as conn:
results = conn.execute(query).fetch_df()

fig = px.bar(
results,
x="hour_of_day",
y="num_trips",
Expand All @@ -199,22 +199,22 @@ def adhoc_request(config: AdhocRequestConfig, taxi_zones, taxi_trips, database:
"hour_of_day": "Hour of Day",
"day_of_week": "Day of Week",
"num_trips": "Number of Trips"
}
)
}
)

pio.write_image(fig, file_path)
pio.write_image(fig, file_path)

with open(file_path, 'rb') as file:
image_data = file.read()
with open(file_path, 'rb') as file:
image_data = file.read()

base64_data = base64.b64encode(image_data).decode('utf-8')
md_content = f"![Image](data:image/jpeg;base64,{base64_data})"
base64_data = base64.b64encode(image_data).decode('utf-8')
md_content = f"![Image](data:image/jpeg;base64,{base64_data})"

return MaterializeResult(
metadata={
"preview": MetadataValue.md(md_content)
}
)
return MaterializeResult(
metadata={
"preview": MetadataValue.md(md_content)
}
)
```

---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ For the assets in the `raw_files` and `ingested` groups, your assets should look

```python {% obfuscated="true" %}
@asset(
group_name="GROUP_NAME"
group_name="GROUP_NAME"
)
def name_of_asset():
```
Expand All @@ -36,8 +36,8 @@ For the `adhoc_request` asset, your code should look like this:

```python {% obfuscated="true" %}
request_assets = load_assets_from_modules(
modules=[requests],
group_name="requests",
modules=[requests],
group_name="requests",
)
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,20 @@ from dagster import MaterializeResult
group_name="raw_files",
)
def taxi_zones_file():
"""
The raw CSV file for the taxi zones dataset. Sourced from the NYC Open Data portal.
"""
raw_taxi_zones = requests.get(
"https://data.cityofnewyork.us/api/views/755u-8jsi/rows.csv?accessType=DOWNLOAD"
)

with open(constants.TAXI_ZONES_FILE_PATH, "wb") as output_file:
output_file.write(raw_taxi_zones.content)
num_rows = MetadataValue.int(len(pd.read_csv(constants.TAXI_ZONES_FILE_PATH)))

return MaterializeResult(
metadata={
'Number of records': MetadataValue.int(num_rows)
}
)
"""
The raw CSV file for the taxi zones dataset. Sourced from the NYC Open Data portal.
"""
raw_taxi_zones = requests.get(
"https://data.cityofnewyork.us/api/views/755u-8jsi/rows.csv?accessType=DOWNLOAD"
)

with open(constants.TAXI_ZONES_FILE_PATH, "wb") as output_file:
output_file.write(raw_taxi_zones.content)
num_rows = MetadataValue.int(len(pd.read_csv(constants.TAXI_ZONES_FILE_PATH)))

return MaterializeResult(
metadata={
'Number of records': MetadataValue.int(num_rows)
}
)
```
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ from dagster import asset

@asset
def taxi_zones_file():
"""
The raw CSV file for the taxi zones dataset. Sourced from the NYC Open Data portal.
"""
raw_taxi_zones = requests.get(
"https://data.cityofnewyork.us/api/views/755u-8jsi/rows.csv?accessType=DOWNLOAD"
)

with open(constants.TAXI_ZONES_FILE_PATH, "wb") as output_file:
output_file.write(raw_taxi_zones.content)
"""
The raw CSV file for the taxi zones dataset. Sourced from the NYC Open Data portal.
"""
raw_taxi_zones = requests.get(
"https://data.cityofnewyork.us/api/views/755u-8jsi/rows.csv?accessType=DOWNLOAD"
)

with open(constants.TAXI_ZONES_FILE_PATH, "wb") as output_file:
output_file.write(raw_taxi_zones.content)
```

---
Expand All @@ -47,18 +47,18 @@ For example:
from dagster import asset

@asset(
description="The raw CSV file for the taxi zones dataset. Sourced from the NYC Open Data portal."
description="The raw CSV file for the taxi zones dataset. Sourced from the NYC Open Data portal."
)
def taxi_zones_file():
"""
This will not show in the Dagster UI
"""
raw_taxi_zones = requests.get(
"https://data.cityofnewyork.us/api/views/755u-8jsi/rows.csv?accessType=DOWNLOAD"
)

with open(constants.TAXI_ZONES_FILE_PATH, "wb") as output_file:
output_file.write(raw_taxi_zones.content)
"""
This will not show in the Dagster UI
"""
raw_taxi_zones = requests.get(
"https://data.cityofnewyork.us/api/views/755u-8jsi/rows.csv?accessType=DOWNLOAD"
)

with open(constants.TAXI_ZONES_FILE_PATH, "wb") as output_file:
output_file.write(raw_taxi_zones.content)
```

---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ from dagster import load_assets_from_modules
from .assets import metrics

metric_assets = load_assets_from_modules(
modules=[metrics],
group_name="metrics",
modules=[metrics],
group_name="metrics",
)
```

Expand All @@ -54,18 +54,18 @@ You can also specify groups on individual assets by using the `group_name` param
from dagster import asset

@asset(
group_name="raw_files",
group_name="raw_files",
)
def taxi_zones_file():
"""
The raw CSV file for the taxi zones dataset. Sourced from the NYC Open Data portal.
"""
raw_taxi_zones = requests.get(
"https://data.cityofnewyork.us/api/views/755u-8jsi/rows.csv?accessType=DOWNLOAD"
)

with open(constants.TAXI_ZONES_FILE_PATH, "wb") as output_file:
output_file.write(raw_taxi_zones.content)
"""
The raw CSV file for the taxi zones dataset. Sourced from the NYC Open Data portal.
"""
raw_taxi_zones = requests.get(
"https://data.cityofnewyork.us/api/views/755u-8jsi/rows.csv?accessType=DOWNLOAD"
)

with open(constants.TAXI_ZONES_FILE_PATH, "wb") as output_file:
output_file.write(raw_taxi_zones.content)
```

In this example, the `taxi_zones_file` asset is grouped into the `raw_files` asset group.
Expand Down
Loading

0 comments on commit 23e8203

Please sign in to comment.