udacity · brip2001 · Oct 11, 2024 · Oct 11, 2024
@@ -0,0 +1,129 @@
+from azureml.core import Workspace, Experiment
+
+ws = Workspace.from_config()
+exp = Experiment(workspace=ws, name="udacity-project")
+
+print('Workspace name: ' + ws.name, 
+      'Azure region: ' + ws.location, 
+      'Subscription id: ' + ws.subscription_id, 
+      'Resource group: ' + ws.resource_group, sep = '\n')
+#
+run = exp.start_logging()
+
+
+from azureml.core.compute import ComputeTarget, AmlCompute
+
+cluster_name = "my-compute-cluster"
+
+# TODO: Create compute cluster
+# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
+# max_nodes should be no greater than 4.
+
+### YOUR CODE HERE ###
+compute_config=AmlCompute.provisioning_configuration(vm_size="Standard_D1_v2",max_nodes=4)
+compute_target=ComputeTarget.create(ws,cluster_name,compute_config)
+compute_target.wait_for_completion(show_output=True)
+
+
+from azureml.widgets import RunDetails
+from azureml.train.sklearn import SKLearn
+from azureml.train.hyperdrive.run import PrimaryMetricGoal
+from azureml.train.hyperdrive.policy import BanditPolicy
+from azureml.train.hyperdrive.sampling import RandomParameterSampling
+from azureml.train.hyperdrive.runconfig import HyperDriveConfig
+from azureml.train.hyperdrive.parameter_expressions import choice, uniform
+from azureml.core import Environment, ScriptRunConfig
+import os
+
+# Specify parameter sampler
+ps =RandomParameterSampling({
+    '--C': uniform(0.01,1.0),
+    '--max_iter':choice(50,100,150)
+})
+
+# Specify a Policy
+policy = BanditPolicy(slack_factor=0.1,evaluation_interval=2,delay_evaluation=5)
+
+if "training" not in os.listdir():
+    os.mkdir("./training")
+
+# Setup environment for your training run
+sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')
+
+
+# Create a ScriptRunConfig Object to specify the configuration details of your training job
+src = ScriptRunConfig(source_directory='.',
+script='train.py',
+compute_target=compute_target,
+environment=sklearn_env)
+
+# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
+hyperdrive_config = HyperDriveConfig(run_config=src,
+hyperparameter_sampling=ps,
+primary_metric_name='Accuracy',
+primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
+max_total_runs=10,
+max_concurrent_runs=4)
+
+
+# Submit your hyperdrive run to the experiment and show run details with the widget.
+
+### YOUR CODE HERE ###
+hyperdrive_run=exp.submit(hyperdrive_config)
+RunDetails(hyperdrive_run).show()
+
+
+hyperdrive_run.wait_for_completion(show_output=True)
+
+
+import joblib
+# Get your best run and save the model from that run.
+
+### YOUR CODE HERE ###
+best_run=hyperdrive_run.get_best_run_by_primary_metric()
+best_model=best_run.register_model(model_name='best_logistic_regression_model',model_path='outputs/model.joblib')
+
+
+
+from azureml.data.dataset_factory import TabularDatasetFactory
+
+# Create TabularDataset using TabularDatasetFactory
+# Data is available at: 
+# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
+
+### YOUR CODE HERE ###
+data_path="https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
+
+ds = TabularDatasetFactory.from_delimited_files(path=data_path)
+
+
+
+from train import clean_data
+
+# Use the clean_data function to clean your data.
+x, y = clean_data(ds)
+
+
+from azureml.train.automl import AutoMLConfig
+
+
+
+
+
+# Set parameters for AutoMLConfig
+# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
+# If you wish to run the experiment longer, you will need to run this notebook in your own
+# Azure tenant, which will incur personal costs.
+automl_config = AutoMLConfig(
+    experiment_timeout_minutes=30,
+    task='classification',
+    primary_metric='accuracy',
+    training_data=ds,
+    label_column_name='y',
+    n_cross_validations=5)
+
+
+# Submit your automl run
+
+### YOUR CODE HERE ###
+automl_run=exp.submit(automl_config,show_output=True)