diff --git a/end_to_end/music_recommendation/01_data_exploration.ipynb b/end_to_end/music_recommendation/01_data_exploration.ipynb index 4257f40f0e..20cc4be251 100644 --- a/end_to_end/music_recommendation/01_data_exploration.ipynb +++ b/end_to_end/music_recommendation/01_data_exploration.ipynb @@ -119,7 +119,7 @@ "metadata": {}, "outputs": [], "source": [ - "new_data_paths = get_data([f\"{s3_bucket_music_data}/tracks.csv\", f\"{s3_bucket_music_data}/ratings.csv\"], bucket, sample_data=0.70)\n", + "new_data_paths = get_data(s3_client, [f\"{s3_bucket_music_data}/tracks.csv\", f\"{s3_bucket_music_data}/ratings.csv\"], bucket, prefix, sample_data=0.70)\n", "print(new_data_paths)" ] }, diff --git a/end_to_end/music_recommendation/03_train_deploy_debugger_explain_monitor_registry.ipynb b/end_to_end/music_recommendation/03_train_deploy_debugger_explain_monitor_registry.ipynb index d7390e33f3..7e65dc0a4f 100644 --- a/end_to_end/music_recommendation/03_train_deploy_debugger_explain_monitor_registry.ipynb +++ b/end_to_end/music_recommendation/03_train_deploy_debugger_explain_monitor_registry.ipynb @@ -525,40 +525,6 @@ "metadata": {}, "outputs": [], "source": [ - "# # random user ID. You can try any other ID\n", - "# sample_user_id = 11005" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# featurestore_runtime = boto_session.client(service_name='sagemaker-featurestore-runtime', region_name=region)\n", - "\n", - "# feature_store_session = sagemaker.Session(\n", - "# boto_session=boto_session,\n", - "# sagemaker_client=sagemaker_client,\n", - "# sagemaker_featurestore_runtime_client=featurestore_runtime\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# # pull the sample user's 5 star preferences record from the feature store\n", - "# fg_response = featurestore_runtime.get_record(\n", - "# FeatureGroupName='user-5star-track-features-music-rec', \n", - "# RecordIdentifierValueAsString=str(sample_user_id)\n", - "# )\n", - "\n", - "# record = fg_response['Record']\n", - "# df_user = pd.DataFrame(record).set_index('FeatureName')\n", - "# df_user.to_csv(\"./data/sample_user.csv\")\n", "df_user = pd.read_csv(\"./data/sample_user.csv\")\n", "df_user = df_user.set_index('FeatureName')" ] @@ -576,20 +542,6 @@ "metadata": {}, "outputs": [], "source": [ - "# # pull a sample of the tracks data (multiple records) from the feature store using athena query\n", - "# fg_name_tracks_obj = FeatureGroup(name='track-features-music-rec', sagemaker_session=feature_store_session)\n", - "# tracks_query = fg_name_tracks_obj.athena_query()\n", - "# tracks_table = tracks_query.table_name\n", - "\n", - "# # use escaped quotes aound table name since it contains '-' symbols\n", - "# query_string = (\"SELECT * FROM \\\"{}\\\" LIMIT 1000\".format(tracks_table))\n", - "# print(\"Running \" + query_string)\n", - "\n", - "# # run Athena query. The output is loaded to a Pandas dataframe.\n", - "# tracks_query.run(query_string=query_string, output_location=f\"s3://{bucket}/{prefix}/query_results/\")\n", - "# tracks_query.wait()\n", - "# df_tracks = tracks_query.as_dataframe()\n", - "# df_tracks.to_csv(\"./data/sample_tracks.csv\")\n", "df_tracks = pd.read_csv(\"./data/sample_tracks.csv\")" ] }, @@ -676,7 +628,8 @@ "metadata": {}, "outputs": [], "source": [ - "df_train = pd.read_csv(train_data_uri)\n", + "s3_client.download_file(bucket, f\"{prefix}/data/train/train_data.csv\", f\"train_data.csv\")\n", + "df_train = pd.read_csv(\"train_data.csv\")\n", "\n", "label = 'rating'" ] diff --git a/end_to_end/music_recommendation/end_to_end_pipeline.ipynb b/end_to_end/music_recommendation/end_to_end_pipeline.ipynb index 9961e1a5e8..f8e130a4b1 100644 --- a/end_to_end/music_recommendation/end_to_end_pipeline.ipynb +++ b/end_to_end/music_recommendation/end_to_end_pipeline.ipynb @@ -114,7 +114,10 @@ " sagemaker_client=sagemaker_boto_client)\n", "sagemaker_role = sagemaker.get_execution_role()\n", "\n", - "account_id = boto3.client('sts').get_caller_identity()[\"Account\"]" + "account_id = boto3.client('sts').get_caller_identity()[\"Account\"]\n", + "\n", + "bucket = sess.default_bucket()\n", + "prefix='music-recommendation'" ] }, {