diff --git a/examples/project_atproto_dashboard/.env.example b/examples/project_atproto_dashboard/.env.example new file mode 100644 index 0000000000000..4ea1e239f6bb6 --- /dev/null +++ b/examples/project_atproto_dashboard/.env.example @@ -0,0 +1,17 @@ +AWS_ENDPOINT_URL= +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_BUCKET_NAME= +AWS_ACCOUNT_ID= + +MOTHERDUCK_TOKEN= + +BSKY_LOGIN= +BSKY_APP_PASSWORD= + +DBT_TARGET= + +AZURE_POWERBI_CLIENT_ID= +AZURE_POWERBI_CLIENT_SECRET= +AZURE_POWERBI_TENANT_ID= +AZURE_POWERBI_WORKSPACE_ID= diff --git a/examples/project_atproto_dashboard/.gitignore b/examples/project_atproto_dashboard/.gitignore new file mode 100644 index 0000000000000..ace8bc76e6a41 --- /dev/null +++ b/examples/project_atproto_dashboard/.gitignore @@ -0,0 +1,5 @@ +tmp*/ +storage/ +schedules/ +history/ +atproto-session.txt diff --git a/examples/project_atproto_dashboard/README.md b/examples/project_atproto_dashboard/README.md new file mode 100644 index 0000000000000..4c15d93cc89cf --- /dev/null +++ b/examples/project_atproto_dashboard/README.md @@ -0,0 +1,52 @@ +# project_atproto_dashboard + +An end-to-end demonstration of ingestion data from the ATProto API, modeling it with dbt, and presenting it with Power BI. + +![Architecture Diagram](./architecture-diagram.png) + +![Project asset lineage](./lineage.svg) + +## Features used + +1. Ingestion of data-related Bluesky posts + - Dynamic partitions + - Declarative automation + - Concurrency limits +2. Modelling data using _dbt_ +3. Representing data in a dashboard + +## Getting started + +### Environment Setup + +Ensure the following environments have been populated in your `.env` file. Start by copying the +template. + +``` +cp .env.example .env +``` + +And then populate the fields. + +### Development + +Install the project dependencies: + + pip install -e ".[dev]" + +Start Dagster: + + DAGSTER_HOME=$(pwd) dagster dev + +### Unit testing + +Tests are in the `project_atproto_dashboard_tests` directory and you can run tests using `pytest`: + + pytest project_atproto_dashboard_tests + +## Resources + +- https://docs.bsky.app/docs/tutorials/viewing-feeds +- https://docs.bsky.app/docs/advanced-guides/rate-limits +- https://atproto.blue/en/latest/atproto_client/auth.html#session-string +- https://tenacity.readthedocs.io/en/latest/#waiting-before-retrying diff --git a/examples/project_atproto_dashboard/architecture-diagram.png b/examples/project_atproto_dashboard/architecture-diagram.png new file mode 100644 index 0000000000000..af16cc6c57430 Binary files /dev/null and b/examples/project_atproto_dashboard/architecture-diagram.png differ diff --git a/examples/project_atproto_dashboard/dagster.yaml b/examples/project_atproto_dashboard/dagster.yaml new file mode 100644 index 0000000000000..c9705420e83ca --- /dev/null +++ b/examples/project_atproto_dashboard/dagster.yaml @@ -0,0 +1,6 @@ +run_coordinator: + module: dagster.core.run_coordinator + class: QueuedRunCoordinator + +concurrency: + default_op_concurrency_limit: 1 diff --git a/examples/project_atproto_dashboard/dbt_project/.gitignore b/examples/project_atproto_dashboard/dbt_project/.gitignore new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/dbt_project/.sqlfluff b/examples/project_atproto_dashboard/dbt_project/.sqlfluff new file mode 100644 index 0000000000000..6fffb098b0115 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/.sqlfluff @@ -0,0 +1,2 @@ +[sqlfluff:rules:capitalisation.keywords] +capitalisation_policy = upper diff --git a/examples/project_atproto_dashboard/dbt_project/dbt_project.yml b/examples/project_atproto_dashboard/dbt_project/dbt_project.yml new file mode 100644 index 0000000000000..5dc13e8c3997a --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/dbt_project.yml @@ -0,0 +1,13 @@ +name: "dbt_project" +version: "1.0.0" +config-version: 2 + +profile: "bluesky" + +target-path: "target" +clean-targets: + - "target" + - "dbt_packages" + +models: + +materialized: table diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/activity_over_time.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/activity_over_time.sql new file mode 100644 index 0000000000000..794065c8723e7 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/activity_over_time.sql @@ -0,0 +1,14 @@ +WITH final AS ( + SELECT + date_trunc('day', created_at) AS post_date, + count(DISTINCT post_text) AS unique_posts, + count(DISTINCT author_handle) AS active_authors, + sum(likes) AS total_likes, + sum(replies) AS total_comments, + sum(quotes) AS total_quotes + FROM {{ ref("latest_feed") }} + GROUP BY date_trunc('day', created_at) + ORDER BY date_trunc('day', created_at) DESC +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/all_profiles.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/all_profiles.sql new file mode 100644 index 0000000000000..5f4e21734bff1 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/all_profiles.sql @@ -0,0 +1,105 @@ +WITH max_profile_data AS ( + SELECT + json_extract_string(json, '$.subject.did') AS profile_did, + max( + strptime( + regexp_extract( + filename, + 'dagster-demo/atproto_starter_pack_snapshot/(\d{4}-\d{2}-\d{2}/\d{2}/\d{2})', + 1 + ), + '%Y-%m-%d/%H/%M' + ) + ) AS max_extracted_timestamp + FROM {{ ref("stg_profiles") }} + GROUP BY + json_extract_string(json, '$.subject.did') +), + +profiles AS ( + SELECT + json_extract_string(json, '$.subject.handle') AS handle_subject, + json_extract_string(json, '$.subject.did') AS profile_did, + json_extract_string(json, '$.subject.avatar') AS profile_avatar, + json_extract_string(json, '$.subject.display_name') + AS profile_display_name, + json_extract_string(json, '$.subject.created_at') + AS profile_created_date, + json_extract_string(json, '$.subject.description') + AS profile_description + FROM {{ ref("stg_profiles") }} stg_prof + JOIN max_profile_data + ON + json_extract_string(stg_prof.json, '$.subject.did') + = max_profile_data.profile_did + AND strptime( + regexp_extract( + stg_prof.filename, + 'dagster-demo/atproto_starter_pack_snapshot/(\d{4}-\d{2}-\d{2}/\d{2}/\d{2})', + 1 + ), + '%Y-%m-%d/%H/%M' + ) + = max_profile_data.max_extracted_timestamp +), + +user_aggregates AS ( + SELECT + replace(author_handle, '"', '') AS author_handle, + count(*) AS num_posts, + avg(cast(lf.likes AS int)) AS average_likes, + sum(cast(lf.likes AS int)) AS total_likes, + sum(cast(lf.replies AS int)) AS total_replies, + sum(cast(lf.likes AS int)) / count(*) AS total_likes_by_num_of_posts, + round( + count(*) + / count(DISTINCT date_trunc('day', cast(created_at AS timestamp))), + 2 + ) AS avg_posts_per_day, + ntile(100) + OVER ( + ORDER BY sum(cast(lf.likes AS int)) + ) + AS likes_percentile, + ntile(100) + OVER ( + ORDER BY sum(cast(lf.replies AS int)) + ) + AS replies_percentile, + ntile(100) OVER ( + ORDER BY count(*) + ) AS posts_percentile, + (ntile(100) OVER ( + ORDER BY sum(cast(lf.likes AS int))) + ntile(100) OVER ( + ORDER BY sum(cast(lf.replies AS int))) + ntile(100) OVER ( + ORDER BY count(*) + )) + / 3.0 AS avg_score + FROM {{ ref("latest_feed") }} lf + GROUP BY replace(author_handle, '"', '') +), + +final AS ( + SELECT DISTINCT + profiles.handle_subject AS profile_handle, + profiles.profile_did, + profiles.profile_display_name, + profiles.profile_avatar, + profiles.profile_created_date, + profiles.profile_description, + user_aggregates.num_posts, + user_aggregates.average_likes, + user_aggregates.total_likes, + user_aggregates.total_replies, + user_aggregates.total_likes_by_num_of_posts, + user_aggregates.avg_posts_per_day, + user_aggregates.likes_percentile, + user_aggregates.replies_percentile, + user_aggregates.posts_percentile, + user_aggregates.avg_score + FROM profiles + LEFT JOIN user_aggregates + ON user_aggregates.author_handle = profiles.handle_subject +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/calendar.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/calendar.sql new file mode 100644 index 0000000000000..91f1ae0ea62e0 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/calendar.sql @@ -0,0 +1,45 @@ +WITH date_spine AS ( + SELECT CAST(range AS DATE) AS date_key + FROM RANGE( + (SELECT MIN(created_at) FROM {{ ref("latest_feed") }}), + CURRENT_DATE(), + INTERVAL 1 DAY + ) +) + +SELECT + date_key AS date_key, + DAYOFYEAR(date_key) AS day_of_year, + WEEKOFYEAR(date_key) AS week_of_year, + DAYOFWEEK(date_key) AS day_of_week, + ISODOW(date_key) AS iso_day_of_week, + DAYNAME(date_key) AS day_name, + DATE_TRUNC('week', date_key) AS first_day_of_week, + DATE_TRUNC('week', date_key) + 6 AS last_day_of_week, + YEAR(date_key) || RIGHT('0' || MONTH(date_key), 2) AS month_key, + MONTH(date_key) AS month_of_year, + DAYOFMONTH(date_key) AS day_of_month, + LEFT(MONTHNAME(date_key), 3) AS month_name_short, + MONTHNAME(date_key) AS month_name, + DATE_TRUNC('month', date_key) AS first_day_of_month, + LAST_DAY(date_key) AS last_day_of_month, + CAST(YEAR(date_key) || QUARTER(date_key) AS INT) AS quarter_key, + QUARTER(date_key) AS quarter_of_year, + CAST(date_key - DATE_TRUNC('Quarter', date_key) + 1 AS INT) + AS day_of_quarter, + ('Q' || QUARTER(date_key)) AS quarter_desc_short, + ('Quarter ' || QUARTER(date_key)) AS quarter_desc, + DATE_TRUNC('quarter', date_key) AS first_day_of_quarter, + LAST_DAY(DATE_TRUNC('quarter', date_key) + INTERVAL 2 MONTH) + AS last_day_of_quarter, + CAST(YEAR(date_key) AS INT) AS year_key, + DATE_TRUNC('Year', date_key) AS first_day_of_year, + DATE_TRUNC('Year', date_key) - 1 + INTERVAL 1 YEAR AS last_day_of_year, + ROW_NUMBER() + OVER ( + PARTITION BY YEAR(date_key), MONTH(date_key), DAYOFWEEK(date_key) + ORDER BY date_key + ) + AS ordinal_weekday_of_month +FROM date_spine +WHERE CAST(YEAR(date_key) AS INT) >= 2020 diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/latest_feed.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/latest_feed.sql new file mode 100644 index 0000000000000..09f0e23ab1855 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/latest_feed.sql @@ -0,0 +1,57 @@ +WITH max_update AS ( + SELECT + max( + strptime( + regexp_extract( + filename, + 'dagster-demo/atproto_actor_feed_snapshot/(\d{4}-\d{2}-\d{2}/\d{2}/\d{2})', + 1 + ), + '%Y-%m-%d/%H/%M' + ) + ) AS max_extracted_timestamp, + regexp_extract(filename, 'did:(.*?)\.json') AS profile_id + FROM {{ ref("stg_feed_snapshots") }} + GROUP BY + regexp_extract(filename, 'did:(.*?)\.json') +), + +final AS ( + SELECT + json_extract_string(sfs.json, '$.post.author.handle') AS author_handle, + json_extract_string(sfs.json, '$.post.author.did') AS author_id, + cast(sfs.json.post.like_count AS int) AS likes, + cast(sfs.json.post.quote_count AS int) AS quotes, + cast(sfs.json.post.reply_count AS int) AS replies, + json_extract_string(sfs.json, '$.post.record.text') AS post_text, + sfs.json.post.record.embed, + json_extract_string( + sfs.json, '$.post.record.embed.external.description' + ) AS external_embed_description, + json_extract_string(sfs.json, '$.post.record.embed.external.uri') + AS external_embed_link, + sfs.json.post.record.embed.external.thumb AS external_embed_thumbnail, + cast(sfs.json.post.record.created_at AS timestamp) AS created_at, + CASE + WHEN json_extract_string(sfs.json.post.record.embed, '$.images[0].image.ref.link') IS NULL THEN NULL + ELSE concat('https://cdn.bsky.app/img/feed_thumbnail/plain/', json_extract_string(sfs.json, '$.post.author.did') ,'/' ,json_extract_string(sfs.json.post.record.embed, '$.images[0].image.ref.link'), '@jpeg') + END AS image_url, + max_update.max_extracted_timestamp, + max_update.profile_id + FROM {{ ref("stg_feed_snapshots") }} sfs + JOIN max_update + ON + max_update.profile_id + = regexp_extract(sfs.filename, 'did:(.*?)\.json') + AND max_update.max_extracted_timestamp + = strptime( + regexp_extract( + sfs.filename, + 'dagster-demo/atproto_actor_feed_snapshot/(\d{4}-\d{2}-\d{2}/\d{2}/\d{2})', + 1 + ), + '%Y-%m-%d/%H/%M' + ) +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/schema.yml b/examples/project_atproto_dashboard/dbt_project/models/analysis/schema.yml new file mode 100644 index 0000000000000..404d89541a5db --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/schema.yml @@ -0,0 +1,18 @@ +version: 2 + +models: + - name: all_profiles + description: "table showing data for all the profiles posts are collected from and some high level statistics" + columns: + - name: profile_handle + data_tests: + - unique + - not_null + - name: latest_feed + description: "the latest feed of posts" + - name: activity_over_time + description: "daily activity of posts overtime" + - name: top_daily_posts + description: "top posts ranked for a given day" + - name: top_external_links + description: "top external content grouped by type shared in the community" diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/top_daily_posts.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/top_daily_posts.sql new file mode 100644 index 0000000000000..a53c8435f60c6 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/top_daily_posts.sql @@ -0,0 +1,46 @@ +WITH distinct_posts AS ( + SELECT DISTINCT ON (author_handle, post_text, date_trunc('day', created_at)) + author_handle, + post_text, + likes, + quotes, + replies, + image_url, + external_embed_link, + external_embed_thumbnail, + external_embed_description, + created_at + FROM {{ ref("latest_feed") }} +), + +scored_posts AS ( + SELECT + *, + (likes * 0.2) + (quotes * 0.4) + (replies * 0.4) AS engagement_score, + date_trunc('day', created_at) AS post_date, + row_number() OVER ( + PARTITION BY date_trunc('day', created_at) + ORDER BY (likes * 0.2) + (quotes * 0.4) + (replies * 0.4) DESC + ) AS daily_rank + FROM distinct_posts +), + +final AS ( + SELECT + post_date, + author_handle, + post_text, + likes, + quotes, + replies, + image_url, + external_embed_link, + external_embed_thumbnail, + external_embed_description, + round(engagement_score, 2) AS engagement_score, + daily_rank + FROM scored_posts + WHERE daily_rank <= 10 +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/analysis/top_external_links.sql b/examples/project_atproto_dashboard/dbt_project/models/analysis/top_external_links.sql new file mode 100644 index 0000000000000..5e207b0d664be --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/analysis/top_external_links.sql @@ -0,0 +1,73 @@ +WITH distinct_posts AS ( + SELECT DISTINCT ON (author_handle, post_text, date_trunc('day', created_at)) + author_handle, + post_text, + likes, + quotes, + replies, + created_at, + image_url, + embed, + external_embed_link, + external_embed_thumbnail, + external_embed_description, + CASE + WHEN external_embed_link LIKE '%youtu%' THEN 'YouTube' + WHEN external_embed_link LIKE '%docs%' THEN 'Docs' + WHEN external_embed_link LIKE '%github%' THEN 'GitHub' + WHEN external_embed_link LIKE '%substack%' THEN 'SubStack' + WHEN external_embed_link LIKE '%twitch%' THEN 'Twitch' + WHEN external_embed_link LIKE '%msnbc%' THEN 'News' + WHEN external_embed_link LIKE '%theguardian%' THEN 'News' + WHEN external_embed_link LIKE '%foreignpolicy%' THEN 'News' + WHEN external_embed_link LIKE '%nytimes%' THEN 'News' + WHEN external_embed_link LIKE '%wsj%' THEN 'News' + WHEN external_embed_link LIKE '%bloomberg%' THEN 'News' + WHEN external_embed_link LIKE '%theverge%' THEN 'News' + WHEN external_embed_link LIKE '%cnbc%' THEN 'News' + WHEN external_embed_link LIKE '%.ft.%' THEN 'News' + WHEN external_embed_link LIKE '%washingtonpost%' THEN 'News' + WHEN external_embed_link LIKE '%newrepublic%' THEN 'News' + WHEN external_embed_link LIKE '%huffpost%' THEN 'News' + WHEN external_embed_link LIKE '%wired%' THEN 'News' + WHEN external_embed_link LIKE '%medium%' THEN 'Medium' + WHEN external_embed_link LIKE '%reddit%' THEN 'Reddit' + WHEN external_embed_link LIKE '%/blog/%' THEN 'Blog' + ELSE 'Other' + END AS external_link_type + FROM {{ ref("latest_feed") }} + WHERE external_embed_link IS NOT null +), + +scored_posts AS ( + SELECT + *, + (likes * 0.2) + (quotes * 0.4) + (replies * 0.4) AS engagement_score, + date_trunc('day', created_at) AS post_date, + row_number() OVER ( + PARTITION BY date_trunc('day', created_at), external_link_type + ORDER BY (likes * 0.2) + (quotes * 0.4) + (replies * 0.4) DESC + ) AS daily_rank + FROM distinct_posts +), + +final AS ( + SELECT + post_date, + author_handle, + post_text, + likes, + quotes, + replies, + round(engagement_score, 2) AS engagement_score, + daily_rank, + embed, + external_embed_link, + external_embed_thumbnail, + external_embed_description, + external_link_type + FROM scored_posts + WHERE daily_rank <= 10 +) + +SELECT * FROM final diff --git a/examples/project_atproto_dashboard/dbt_project/models/sources.yml b/examples/project_atproto_dashboard/dbt_project/models/sources.yml new file mode 100644 index 0000000000000..8b9e72a31b2bc --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/sources.yml @@ -0,0 +1,14 @@ +version: 2 + +sources: + - name: r2_bucket + tables: + - name: actor_feed_snapshot + description: "external r2 bucket with json files of actor feeds" + meta: + external_location: "read_ndjson_objects('r2://dagster-demo/atproto_actor_feed_snapshot/**/*.json', filename=true)" + - name: starter_pack_snapshot + description: "external r2 bucket with json files for feed snapshots" + meta: + external_location: "read_ndjson_objects('r2://dagster-demo/atproto_starter_pack_snapshot/**/*.json', filename=true)" + diff --git a/examples/project_atproto_dashboard/dbt_project/models/staging/schema.yml b/examples/project_atproto_dashboard/dbt_project/models/staging/schema.yml new file mode 100644 index 0000000000000..61f4b3d774b18 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/staging/schema.yml @@ -0,0 +1,7 @@ +version: 2 + +models: + - name: stg_profiles + description: "raw data from r2 bucket" + - name: stg_feed_snapshots + description: "raw posts data from r2 bucket" \ No newline at end of file diff --git a/examples/project_atproto_dashboard/dbt_project/models/staging/stg_feed_snapshots.sql b/examples/project_atproto_dashboard/dbt_project/models/staging/stg_feed_snapshots.sql new file mode 100644 index 0000000000000..92674ee054769 --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/staging/stg_feed_snapshots.sql @@ -0,0 +1,5 @@ +WITH raw AS ( + SELECT * FROM {{ source('r2_bucket', 'actor_feed_snapshot') }} +) + +SELECT * FROM raw diff --git a/examples/project_atproto_dashboard/dbt_project/models/staging/stg_profiles.sql b/examples/project_atproto_dashboard/dbt_project/models/staging/stg_profiles.sql new file mode 100644 index 0000000000000..7e4eeba113e2c --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/models/staging/stg_profiles.sql @@ -0,0 +1,5 @@ +WITH raw AS ( + SELECT * FROM {{ source('r2_bucket', 'starter_pack_snapshot') }} +) + +SELECT * FROM raw diff --git a/examples/project_atproto_dashboard/dbt_project/profiles.yml b/examples/project_atproto_dashboard/dbt_project/profiles.yml new file mode 100644 index 0000000000000..462a3a6a9e31b --- /dev/null +++ b/examples/project_atproto_dashboard/dbt_project/profiles.yml @@ -0,0 +1,27 @@ +bluesky: + target: prod + outputs: + dev: + type: duckdb + schema: bluesky_dev + path: "local.duckdb" + threads: 16 + extensions: + - httpfs + settings: + s3_region: "auto" + s3_access_key_id: "{{ env_var('AWS_ACCESS_KEY_ID') }}" + s3_secret_access_key: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}" + s3_endpoint: "{{ env_var('AWS_ENDPOINT_URL') | replace('https://', '') }}" + prod: + type: duckdb + schema: bluesky + path: "md:prod_bluesky?MOTHERDUCK_TOKEN={{ env_var('MOTHERDUCK_TOKEN') }}" + threads: 16 + extensions: + - httpfs + settings: + s3_region: "auto" + s3_access_key_id: "{{ env_var('AWS_ACCESS_KEY_ID') }}" + s3_secret_access_key: "{{ env_var('AWS_SECRET_ACCESS_KEY') }}" + s3_endpoint: "{{ env_var('AWS_ENDPOINT_URL') | replace('https://', '') }}" diff --git a/examples/project_atproto_dashboard/lineage.svg b/examples/project_atproto_dashboard/lineage.svg new file mode 100644 index 0000000000000..578247eaff975 --- /dev/null +++ b/examples/project_atproto_dashboard/lineage.svg @@ -0,0 +1,3 @@ +
default
project_atproto_dashboard.definitions
staging
project_atproto_dashboard.definitions
analysis
project_atproto_dashboard.definitions
ingestion
project_atproto_dashboard.definitions
reporting
project_atproto_dashboard.definitions
Daily Rank
No description
Loading...
activity_over_time
daily activity of posts overtime
Never materialized
dbt
DuckDB
actor_feed_snapshot
Snapshot of full user feed written to S3 storage.
—
—
—
Loading...
Python
all_profiles
table showing data for all the profiles posts are collected from and some high level statistics
Never materialized
Checks
2
dbt
DuckDB
calendar
dbt model calendar
Never materialized
dbt
DuckDB
latest_feed
the latest feed of posts
Never materialized
dbt
DuckDB
powerbi_bluesky_report
No description
–
Power BI
Report
powerbi_bluesky_model
No description
–
Power BI
Semantic Model
starter_pack_snapshot
Snapshot of members in a Bluesky starter pack partitioned by starter pack ID and written to S3 storage.
—
—
—
Loading...
Python
stg_feed_snapshots
raw posts data from r2 bucket
Never materialized
dbt
DuckDB
stg_profiles
raw data from r2 bucket
Never materialized
dbt
DuckDB
top_daily_posts
top posts ranked for a given day
Never materialized
dbt
DuckDB
top_external_links
top external content grouped by type shared in the community
Never materialized
dbt
DuckDB
\ No newline at end of file diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/__init__.py new file mode 100644 index 0000000000000..8b137891791fe --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/__init__.py @@ -0,0 +1 @@ + diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/definitions.py b/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/definitions.py new file mode 100644 index 0000000000000..dba89ead146d3 --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/dashboard/definitions.py @@ -0,0 +1,49 @@ +import dagster as dg +from dagster_powerbi import ( + DagsterPowerBITranslator, + PowerBIServicePrincipal, + PowerBIWorkspace, + load_powerbi_asset_specs, +) +from dagster_powerbi.translator import PowerBIContentData + +power_bi_workspace = PowerBIWorkspace( + credentials=PowerBIServicePrincipal( + client_id=dg.EnvVar("AZURE_POWERBI_CLIENT_ID"), + client_secret=dg.EnvVar("AZURE_POWERBI_CLIENT_SECRET"), + tenant_id=dg.EnvVar("AZURE_POWERBI_TENANT_ID"), + ), + workspace_id=dg.EnvVar("AZURE_POWERBI_WORKSPACE_ID"), +) + + +class CustomDagsterPowerBITranslator(DagsterPowerBITranslator): + def get_report_spec(self, data: PowerBIContentData) -> dg.AssetSpec: + return ( + super() + .get_report_spec(data) + .replace_attributes( + group_name="reporting", + ) + ) + + def get_semantic_model_spec(self, data: PowerBIContentData) -> dg.AssetSpec: + upsteam_table_deps = [ + dg.AssetKey(table.get("name")) for table in data.properties.get("tables", []) + ] + return ( + super() + .get_semantic_model_spec(data) + .replace_attributes( + group_name="reporting", + deps=upsteam_table_deps, + ) + ) + + +power_bi_specs = load_powerbi_asset_specs( + power_bi_workspace, + dagster_powerbi_translator=CustomDagsterPowerBITranslator, +) + +defs = dg.Definitions(assets=[*power_bi_specs], resources={"power_bi": power_bi_workspace}) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/definitions.py b/examples/project_atproto_dashboard/project_atproto_dashboard/definitions.py new file mode 100644 index 0000000000000..f2a2b5ebd5f2e --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/definitions.py @@ -0,0 +1,9 @@ +import dagster as dg + +import project_atproto_dashboard.dashboard.definitions as dashboard_definitions +import project_atproto_dashboard.ingestion.definitions as ingestion_definitions +import project_atproto_dashboard.modeling.definitions as modeling_definitions + +defs = dg.Definitions.merge( + ingestion_definitions.defs, modeling_definitions.defs, dashboard_definitions.defs +) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/definitions.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/definitions.py new file mode 100644 index 0000000000000..41b2f2e969c6b --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/definitions.py @@ -0,0 +1,138 @@ +import os +from datetime import datetime + +import dagster as dg +from dagster_aws.s3 import S3Resource + +from project_atproto_dashboard.ingestion.resources import ATProtoResource +from project_atproto_dashboard.ingestion.utils.atproto import ( + get_all_feed_items, + get_all_starter_pack_members, +) + +AWS_BUCKET_NAME = os.environ.get("AWS_BUCKET_NAME", "dagster-demo") + + +atproto_did_dynamic_partition = dg.DynamicPartitionsDefinition(name="atproto_did_dynamic_partition") + + +@dg.asset( + partitions_def=dg.StaticPartitionsDefinition( + partition_keys=[ + "at://did:plc:lc5jzrr425fyah724df3z5ik/app.bsky.graph.starterpack/3l7cddlz5ja24", # https://bsky.app/starter-pack/christiannolan.bsky.social/3l7cddlz5ja24 + ] + ), + automation_condition=dg.AutomationCondition.on_cron("0 0 * * *"), # Midnight + kinds={"python"}, + group_name="ingestion", +) +def starter_pack_snapshot( + context: dg.AssetExecutionContext, + atproto_resource: ATProtoResource, + s3_resource: S3Resource, +) -> dg.MaterializeResult: + """Snapshot of members in a Bluesky starter pack partitioned by starter pack ID and written to S3 storage. + + Args: + context (AssetExecutionContext) Dagster context + atproto_resource (ATProtoResource) Resource for interfacing with atmosphere protocol + s3_resource (S3Resource) Resource for uploading files to S3 storage + + """ + atproto_client = atproto_resource.get_client() + + starter_pack_uri = context.partition_key + + list_items = get_all_starter_pack_members(atproto_client, starter_pack_uri) + + _bytes = os.linesep.join([member.model_dump_json() for member in list_items]).encode("utf-8") + + datetime_now = datetime.now() + object_key = "/".join( + ( + "atproto_starter_pack_snapshot", + datetime_now.strftime("%Y-%m-%d"), + datetime_now.strftime("%H"), + datetime_now.strftime("%M"), + f"{starter_pack_uri}.json", + ) + ) + + s3_resource.get_client().put_object(Body=_bytes, Bucket=AWS_BUCKET_NAME, Key=object_key) + + context.instance.add_dynamic_partitions( + partitions_def_name="atproto_did_dynamic_partition", + partition_keys=[list_item_view.subject.did for list_item_view in list_items], + ) + + return dg.MaterializeResult( + metadata={ + "len_members": len(list_items), + "s3_object_key": object_key, + } + ) + + +@dg.asset( + partitions_def=atproto_did_dynamic_partition, + deps=[dg.AssetDep(starter_pack_snapshot, partition_mapping=dg.AllPartitionMapping())], + automation_condition=dg.AutomationCondition.eager(), + kinds={"python"}, + group_name="ingestion", + op_tags={"dagster/concurrency_key": "ingestion"}, +) +def actor_feed_snapshot( + context: dg.AssetExecutionContext, + atproto_resource: ATProtoResource, + s3_resource: S3Resource, +) -> dg.MaterializeResult: + """Snapshot of full user feed written to S3 storage.""" + client = atproto_resource.get_client() + actor_did = context.partition_key + + # NOTE: we may need to yield chunks to be more memory efficient + items = get_all_feed_items(client, actor_did) + + datetime_now = datetime.now() + + object_key = "/".join( + ( + "atproto_actor_feed_snapshot", + datetime_now.strftime("%Y-%m-%d"), + datetime_now.strftime("%H"), + datetime_now.strftime("%M"), + f"{actor_did}.json", + ) + ) + + _bytes = os.linesep.join([item.model_dump_json() for item in items]).encode("utf-8") + + s3_resource.get_client().put_object(Body=_bytes, Bucket=AWS_BUCKET_NAME, Key=object_key) + + return dg.MaterializeResult( + metadata={ + "len_feed_items": len(items), + "s3_object_key": object_key, + } + ) + + +atproto_resource = ATProtoResource( + login=dg.EnvVar("BSKY_LOGIN"), password=dg.EnvVar("BSKY_APP_PASSWORD") +) + +s3_resource = S3Resource( + endpoint_url=dg.EnvVar("AWS_ENDPOINT_URL"), + aws_access_key_id=dg.EnvVar("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=dg.EnvVar("AWS_SECRET_ACCESS_KEY"), + region_name="auto", +) + + +defs = dg.Definitions( + assets=[starter_pack_snapshot, actor_feed_snapshot], + resources={ + "atproto_resource": atproto_resource, + "s3_resource": s3_resource, + }, +) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/resources.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/resources.py new file mode 100644 index 0000000000000..38163e85896df --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/resources.py @@ -0,0 +1,29 @@ +import os + +import dagster as dg +from atproto import Client + + +class ATProtoResource(dg.ConfigurableResource): + login: str + password: str + session_cache_path: str = "atproto-session.txt" + + def _login(self, client): + """Create a re-usable session to be used across resource instances; we are rate limited to 30/5 minutes or 300/day session.""" + if os.path.exists(self.session_cache_path): + with open(self.session_cache_path, "r") as f: + session_string = f.read() + client.login(session_string=session_string) + else: + client.login(login=self.login, password=self.password) + session_string = client.export_session_string() + with open(self.session_cache_path, "w") as f: + f.write(session_string) + + def get_client( + self, + ) -> Client: + client = Client() + self._login(client) + return client diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/atproto.py b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/atproto.py new file mode 100644 index 0000000000000..fe8fadb7e857a --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/ingestion/utils/atproto.py @@ -0,0 +1,59 @@ +from typing import TYPE_CHECKING, List, Optional + +from atproto import Client + +if TYPE_CHECKING: + from atproto_client import models + + +def get_all_feed_items(client: Client, actor: str) -> List["models.AppBskyFeedDefs.FeedViewPost"]: + """Retrieves all author feed items for a given `actor`. + + Args: + client (Client): AT Protocol client + actor (str): author identifier (did) + + Returns: + List['models.AppBskyFeedDefs.FeedViewPost'] list of feed + + """ + import math + + import tenacity + + @tenacity.retry( + stop=tenacity.stop_after_attempt(5), + wait=tenacity.wait_fixed(math.ceil(60 * 2.5)), + ) + def _get_feed_with_retries(client: Client, actor: str, cursor: Optional[str]): + return client.get_author_feed(actor=actor, cursor=cursor, limit=100) + + feed = [] + cursor = None + while True: + data = _get_feed_with_retries(client, actor, cursor) + feed.extend(data.feed) + cursor = data.cursor + if not cursor: + break + + return feed + + +def get_all_list_members(client: Client, list_uri: str): + cursor = None + members = [] + while True: + response = client.app.bsky.graph.get_list( + {"list": list_uri, "cursor": cursor, "limit": 100} + ) + members.extend(response.items) + if not response.cursor: + break + cursor = response.cursor + return members + + +def get_all_starter_pack_members(client: Client, starter_pack_uri: str): + response = client.app.bsky.graph.get_starter_pack({"starter_pack": starter_pack_uri}) + return get_all_list_members(client, response.starter_pack.list.uri) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/definitions.py b/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/definitions.py new file mode 100644 index 0000000000000..6e7acf2d55f78 --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard/modeling/definitions.py @@ -0,0 +1,45 @@ +import os +from pathlib import Path +from typing import Any, Mapping, Optional + +import dagster as dg +from dagster_dbt import DagsterDbtTranslator, DbtCliResource, DbtProject, dbt_assets + +dbt_project = DbtProject( + project_dir=Path(__file__).joinpath("..", "..", "..", "dbt_project").resolve(), + target=os.getenv("DBT_TARGET"), +) +dbt_project.prepare_if_dev() +dbt_resource = DbtCliResource(project_dir=dbt_project) + + +class CustomizedDagsterDbtTranslator(DagsterDbtTranslator): + def get_group_name(self, dbt_resource_props: Mapping[str, Any]) -> Optional[str]: + asset_path = dbt_resource_props["fqn"][1:-1] + if asset_path: + return "_".join(asset_path) + return "default" + + def get_asset_key(self, dbt_resource_props): + resource_type = dbt_resource_props["resource_type"] + name = dbt_resource_props["name"] + if resource_type == "source": + return dg.AssetKey(name) + else: + return super().get_asset_key(dbt_resource_props) + + +@dbt_assets( + manifest=dbt_project.manifest_path, + dagster_dbt_translator=CustomizedDagsterDbtTranslator(), +) +def dbt_bluesky(context: dg.AssetExecutionContext, dbt: DbtCliResource): + yield from (dbt.cli(["build"], context=context).stream().fetch_row_counts()) + + +defs = dg.Definitions( + assets=[dbt_bluesky], + resources={ + "dbt": dbt_resource, + }, +) diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard_tests/__init__.py b/examples/project_atproto_dashboard/project_atproto_dashboard_tests/__init__.py new file mode 100644 index 0000000000000..8b137891791fe --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard_tests/__init__.py @@ -0,0 +1 @@ + diff --git a/examples/project_atproto_dashboard/project_atproto_dashboard_tests/test_assets.py b/examples/project_atproto_dashboard/project_atproto_dashboard_tests/test_assets.py new file mode 100644 index 0000000000000..8b137891791fe --- /dev/null +++ b/examples/project_atproto_dashboard/project_atproto_dashboard_tests/test_assets.py @@ -0,0 +1 @@ + diff --git a/examples/project_atproto_dashboard/pyproject.toml b/examples/project_atproto_dashboard/pyproject.toml new file mode 100644 index 0000000000000..068f8e743f8a5 --- /dev/null +++ b/examples/project_atproto_dashboard/pyproject.toml @@ -0,0 +1,34 @@ +[project] +name = "project_atproto_dashboard" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.9,<3.13" +dependencies = [ + "atproto", + "dagster", + "dagster-aws", + "dagster-dbt", + "dagster-duckdb", + "dagster-powerbi", + "dbt-duckdb", + "tenacity", +] + +[project.optional-dependencies] +dev = [ + "dagster-webserver", + "pytest", + "ruff", +] + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.dagster] +module_name = "project_atproto_dashboard.definitions" +project_name = "project_atproto_dashboard" + +[tool.setuptools.packages.find] +exclude=["project_atproto_dashboard_tests"] diff --git a/pyright/alt-1/requirements-pinned.txt b/pyright/alt-1/requirements-pinned.txt index 6865024a2edaf..9c55cd90ee281 100644 --- a/pyright/alt-1/requirements-pinned.txt +++ b/pyright/alt-1/requirements-pinned.txt @@ -4,7 +4,7 @@ aiofile==3.9.0 aiohappyeyeballs==2.4.4 aiohttp==3.11.10 aioitertools==0.12.0 -aiosignal==1.3.1 +aiosignal==1.3.2 alembic==1.14.0 annotated-types==0.7.0 antlr4-python3-runtime==4.13.2 @@ -18,7 +18,7 @@ asn1crypto==1.5.1 astroid==3.3.6 asttokens==3.0.0 async-lru==2.0.4 -attrs==24.2.0 +attrs==24.3.0 babel==2.16.0 backoff==2.2.1 backports-tarfile==1.2.0 @@ -27,11 +27,11 @@ bleach==6.2.0 boto3==1.35.36 boto3-stubs-lite==1.35.70 botocore==1.35.36 -botocore-stubs==1.35.78 +botocore-stubs==1.35.82 buildkite-test-collector==0.1.9 cachetools==5.5.0 caio==0.9.17 -certifi==2024.8.30 +certifi==2024.12.14 cffi==1.17.1 chardet==5.2.0 charset-normalizer==3.4.0 @@ -41,7 +41,7 @@ coloredlogs==14.0 comm==0.2.2 contourpy==1.3.1 coverage==7.6.9 -croniter==3.0.4 +croniter==5.0.1 cryptography==44.0.0 cycler==0.12.1 daff==1.3.46 @@ -73,7 +73,7 @@ dbt-duckdb==1.9.1 dbt-extractor==0.5.1 dbt-semantic-interfaces==0.5.1 dbt-snowflake==1.9.0 -debugpy==1.8.9 +debugpy==1.8.11 decopatch==1.4.10 decorator==5.1.1 deepdiff==7.0.1 @@ -95,8 +95,8 @@ frozenlist==1.5.0 fsspec==2024.3.0 gcsfs==0.8.0 google-api-core==2.24.0 -google-api-python-client==2.154.0 -google-auth==2.36.0 +google-api-python-client==2.155.0 +google-auth==2.37.0 google-auth-httplib2==0.2.0 google-auth-oauthlib==1.2.1 google-cloud-bigquery==3.27.0 @@ -141,7 +141,7 @@ jsonschema==4.23.0 jsonschema-specifications==2024.10.1 jupyter-client==8.6.3 jupyter-core==5.7.2 -jupyter-events==0.10.0 +jupyter-events==0.11.0 jupyter-lsp==2.2.5 jupyter-server==2.14.2 jupyter-server-terminals==0.5.3 @@ -157,7 +157,7 @@ mako==1.3.8 markdown-it-py==3.0.0 markupsafe==3.0.2 mashumaro==3.14 -matplotlib==3.9.3 +matplotlib==3.10.0 matplotlib-inline==0.1.7 mccabe==0.7.0 mdurl==0.1.2 @@ -171,10 +171,10 @@ multimethod==1.12 mypy==1.13.0 mypy-boto3-ecs==1.35.77 mypy-boto3-emr==1.35.68 -mypy-boto3-emr-serverless==1.35.25 -mypy-boto3-glue==1.35.74 -mypy-boto3-logs==1.35.72 -mypy-boto3-s3==1.35.76.post1 +mypy-boto3-emr-serverless==1.35.79 +mypy-boto3-glue==1.35.80 +mypy-boto3-logs==1.35.81 +mypy-boto3-s3==1.35.81 mypy-extensions==1.0.0 mypy-protobuf==3.6.0 nbclient==0.10.1 @@ -231,14 +231,14 @@ pyproject-api==1.8.0 pyright==1.1.379 pyspark==3.5.3 pytest==8.3.4 -pytest-asyncio==0.24.0 +pytest-asyncio==0.25.0 pytest-cases==3.8.6 pytest-cov==5.0.0 pytest-mock==3.14.0 pytest-xdist==3.6.1 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 -python-json-logger==2.0.7 +python-json-logger==3.2.1 python-slugify==8.0.4 pytimeparse==1.1.8 pytz==2024.2 @@ -275,7 +275,7 @@ sqlglot==26.0.0 sqlglotrs==0.3.0 sqlparse==0.5.3 stack-data==0.6.3 -starlette==0.41.3 +starlette==0.42.0 structlog==24.4.0 syrupy==4.8.0 tabulate==0.9.0 @@ -292,7 +292,7 @@ tqdm==4.67.1 traitlets==5.14.3 typeguard==4.4.1 typer==0.15.1 -types-awscrt==0.23.4 +types-awscrt==0.23.5 types-backports==0.1.3 types-certifi==2021.10.8.3 types-cffi==1.16.0.20240331 @@ -318,11 +318,11 @@ typing-extensions==4.12.2 typing-inspect==0.9.0 tzdata==2024.2 ujson==5.10.0 -universal-pathlib==0.2.5 +universal-pathlib==0.2.6 uri-template==1.3.0 uritemplate==4.1.1 urllib3==2.2.3 -uvicorn==0.32.1 +uvicorn==0.34.0 uvloop==0.21.0 virtualenv==20.28.0 watchdog==5.0.3 diff --git a/pyright/master/requirements-pinned.txt b/pyright/master/requirements-pinned.txt index 18552e740a2a3..f3383f88c3e96 100644 --- a/pyright/master/requirements-pinned.txt +++ b/pyright/master/requirements-pinned.txt @@ -5,7 +5,7 @@ aiohappyeyeballs==2.4.4 aiohttp==3.10.11 aiohttp-retry==2.8.3 aioresponses==0.7.7 -aiosignal==1.3.1 +aiosignal==1.3.2 alabaster==1.0.0 alembic==1.14.0 altair==4.2.2 @@ -36,7 +36,8 @@ asn1crypto==1.5.1 -e examples/assets_pandas_pyspark asttokens==3.0.0 async-lru==2.0.4 -attrs==24.2.0 +atproto==0.0.56 +attrs==24.3.0 autodocsumm==0.2.14 autoflake==2.3.1 -e python_modules/automation @@ -57,10 +58,10 @@ billiard==4.2.1 bleach==6.2.0 blinker==1.9.0 bokeh==3.6.2 -boto3==1.35.78 +boto3==1.35.82 boto3-stubs-lite==1.35.70 -botocore==1.35.78 -botocore-stubs==1.35.78 +botocore==1.35.82 +botocore-stubs==1.35.82 buildkite-test-collector==0.1.9 cachecontrol==0.14.1 cached-property==2.0.1 @@ -70,9 +71,9 @@ caio==0.9.17 callee==0.3.1 cattrs==23.1.2 celery==5.4.0 -certifi==2024.8.30 +certifi==2024.12.14 cffi==1.17.1 -cfn-lint==1.22.0 +cfn-lint==1.22.2 chardet==5.2.0 charset-normalizer==3.4.0 click==8.1.7 @@ -93,8 +94,8 @@ connexion==2.14.2 contourpy==1.3.1 coverage==7.6.9 cron-descriptor==1.4.5 -croniter==3.0.4 -cryptography==44.0.0 +croniter==5.0.1 +cryptography==43.0.3 cssselect==1.2.0 cssutils==2.11.1 cycler==0.12.1 @@ -121,6 +122,7 @@ dagster-contrib-modal==0.0.2 -e python_modules/libraries/dagster-deltalake -e python_modules/libraries/dagster-deltalake-pandas -e python_modules/libraries/dagster-deltalake-polars +-e python_modules/libraries/dagster-dg -e examples/experimental/dagster-dlift -e python_modules/libraries/dagster-docker -e python_modules/libraries/dagster-duckdb @@ -189,7 +191,7 @@ dbt-duckdb==1.9.1 -e examples/starlift-demo dbt-extractor==0.5.1 dbt-semantic-interfaces==0.5.1 -debugpy==1.8.9 +debugpy==1.8.11 decopatch==1.4.10 decorator==5.1.1 deepdiff==7.0.1 @@ -221,7 +223,7 @@ execnet==2.1.1 executing==2.1.0 expandvars==0.12.0 faiss-cpu==1.8.0 -fastapi==0.115.6 +fastapi==0.1.17 fastavro==1.9.7 fastjsonschema==2.21.1 -e examples/feature_graph_backed_assets @@ -250,8 +252,8 @@ gitdb==4.0.11 gitpython==3.1.43 giturlparse==0.12.0 google-api-core==2.24.0 -google-api-python-client==2.154.0 -google-auth==2.36.0 +google-api-python-client==2.155.0 +google-auth==2.37.0 google-auth-httplib2==0.2.0 google-auth-oauthlib==1.2.1 google-cloud-bigquery==3.27.0 @@ -281,7 +283,7 @@ html5lib==1.1 httpcore==1.0.7 httplib2==0.22.0 httptools==0.6.4 -httpx==0.28.1 +httpx==0.27.2 httpx-sse==0.4.0 humanfriendly==10.0 humanize==4.11.0 @@ -321,7 +323,7 @@ jsonschema-path==0.3.3 jsonschema-specifications==2023.12.1 jupyter-client==7.4.9 jupyter-core==5.7.2 -jupyter-events==0.10.0 +jupyter-events==0.11.0 jupyter-lsp==2.2.5 jupyter-server==2.14.2 jupyter-server-terminals==0.5.3 @@ -333,17 +335,18 @@ keyring==25.5.0 -e python_modules/libraries/dagster-airlift/kitchen-sink kiwisolver==1.4.7 kombu==5.4.2 -kopf==1.37.3 +kopf==1.37.4 kubernetes==31.0.0 kubernetes-asyncio==31.1.1 langchain==0.3.7 langchain-community==0.3.5 -langchain-core==0.3.24 +langchain-core==0.3.25 langchain-openai==0.2.5 -langchain-text-splitters==0.3.2 +langchain-text-splitters==0.3.3 langsmith==0.1.147 lazy-object-proxy==1.10.0 leather==0.4.0 +libipld==3.0.0 limits==3.14.1 linkify-it-py==2.0.3 lkml==1.3.6 @@ -361,7 +364,7 @@ marshmallow==3.23.1 marshmallow-oneofschema==3.1.1 marshmallow-sqlalchemy==0.26.1 mashumaro==3.15 -matplotlib==3.9.3 +matplotlib==3.10.0 matplotlib-inline==0.1.3 mbstrdecoder==1.1.3 mdit-py-plugins==0.4.2 @@ -370,7 +373,7 @@ minimal-snowplow-tracker==0.0.2 mistune==3.0.2 mixpanel==4.10.1 mlflow==1.27.0 -modal==0.67.46 +modal==0.68.26 more-itertools==10.5.0 morefs==0.2.2 moto==4.2.14 @@ -382,10 +385,10 @@ multidict==6.1.0 multimethod==1.12 mypy-boto3-ecs==1.35.77 mypy-boto3-emr==1.35.68 -mypy-boto3-emr-serverless==1.35.25 -mypy-boto3-glue==1.35.74 -mypy-boto3-logs==1.35.72 -mypy-boto3-s3==1.35.76.post1 +mypy-boto3-emr-serverless==1.35.79 +mypy-boto3-glue==1.35.80 +mypy-boto3-logs==1.35.81 +mypy-boto3-s3==1.35.81 mypy-extensions==1.0.0 mypy-protobuf==3.6.0 mysql-connector-python==9.1.0 @@ -395,7 +398,7 @@ nbconvert==7.16.4 nbformat==5.10.4 nest-asyncio==1.6.0 networkx==3.4.2 -nh3==0.2.19 +nh3==0.2.20 nodeenv==1.9.1 notebook==7.3.1 notebook-shim==0.2.4 @@ -406,7 +409,7 @@ objgraph==3.6.2 onnx==1.17.0 onnxconverter-common==1.13.0 onnxruntime==1.20.1 -openai==1.57.2 +openai==1.57.4 openapi-schema-validator==0.6.2 openapi-spec-validator==0.7.1 opentelemetry-api==1.29.0 @@ -451,6 +454,7 @@ portalocker==2.10.1 premailer==3.10.0 prison==0.2.1 progressbar2==4.5.0 +-e examples/project_atproto_dashboard -e examples/project_dagster_modal_pipes prometheus-client==0.21.1 prometheus-flask-exporter==0.23.1 @@ -471,7 +475,7 @@ pyasn1-modules==0.4.1 pycparser==2.22 pydantic==2.10.3 pydantic-core==2.27.1 -pydantic-settings==2.6.1 +pydantic-settings==2.7.0 pydata-google-auth==1.9.0 pyflakes==3.2.0 pygments==2.18.0 @@ -487,7 +491,7 @@ pysocks==1.7.1 pyspark==3.5.3 pytablereader==0.31.4 pytest==8.3.4 -pytest-asyncio==0.24.0 +pytest-asyncio==0.25.0 pytest-cases==3.8.6 pytest-cov==5.0.0 pytest-mock==3.14.0 @@ -497,9 +501,9 @@ python-dateutil==2.9.0.post0 python-dotenv==1.0.1 python-frontmatter==1.1.0 python-jose==3.3.0 -python-json-logger==2.0.7 +python-json-logger==3.2.1 python-liquid==1.12.1 -python-multipart==0.0.19 +python-multipart==0.0.20 python-nvd3==0.16.0 python-slugify==8.0.4 python-utils==3.9.1 @@ -529,7 +533,7 @@ rpds-py==0.22.3 rsa==4.9 ruamel-yaml==0.17.40 ruamel-yaml-clib==0.2.12 -ruff==0.8.2 +ruff==0.8.3 s3transfer==0.10.4 scikit-learn==1.6.0 scipy==1.14.1 @@ -550,8 +554,8 @@ six==1.17.0 skein==0.8.2 skl2onnx==1.17.0 slack-sdk==3.33.5 -sling==1.3.2 -sling-mac-arm64==1.3.2 +sling==1.3.3 +sling-mac-arm64==1.3.3 smmap==5.0.1 sniffio==1.3.1 snowballstemmer==2.2.0 @@ -580,10 +584,10 @@ sqlparse==0.5.3 sshpubkeys==3.3.1 sshtunnel==0.4.0 stack-data==0.6.3 -starlette==0.41.3 +starlette==0.42.0 structlog==24.4.0 sympy==1.13.1 -synchronicity==0.9.5 +synchronicity==0.9.6 syrupy==4.8.0 tableauserverclient==0.34 tabledata==1.3.3 @@ -611,12 +615,12 @@ trio==0.27.0 trio-websocket==0.11.1 -e examples/airlift-migration-tutorial -e examples/tutorial_notebook_assets -twilio==9.3.8 +twilio==9.4.1 twine==6.0.1 typeguard==4.4.1 typepy==1.3.2 typer==0.15.1 -types-awscrt==0.23.4 +types-awscrt==0.23.5 types-backports==0.1.3 types-certifi==2021.10.8.3 types-cffi==1.16.0.20240331 @@ -645,23 +649,23 @@ tzdata==2024.2 tzlocal==5.2 uc-micro-py==1.0.3 unicodecsv==0.14.1 -universal-pathlib==0.2.5 +universal-pathlib==0.2.6 uri-template==1.3.0 uritemplate==4.1.1 urllib3==2.2.3 -e examples/use_case_repository -uvicorn==0.32.1 +uvicorn==0.34.0 uvloop==0.21.0 vine==5.1.0 virtualenv==20.28.0 -wandb==0.19.0 +wandb==0.19.1 watchdog==5.0.3 watchfiles==1.0.3 wcwidth==0.2.13 webcolors==24.11.1 webencodings==0.5.1 websocket-client==1.8.0 -websockets==14.1 +websockets==13.1 werkzeug==2.2.3 wheel==0.45.1 widgetsnbextension==4.0.13 diff --git a/pyright/master/requirements.txt b/pyright/master/requirements.txt index 066776fc53a73..1d9b6d64ff895 100644 --- a/pyright/master/requirements.txt +++ b/pyright/master/requirements.txt @@ -142,4 +142,5 @@ types-sqlalchemy==1.4.53.34 -e python_modules/libraries/dagster-airlift/perf-harness -e examples/airlift-migration-tutorial -e examples/use_case_repository[dev] +-e examples/project_atproto_dashboard -e examples/project_dagster_modal_pipes