Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch 3 #17

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions updated_code.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name,
'Azure region: ' + ws.location,
'Subscription id: ' + ws.subscription_id,
'Resource group: ' + ws.resource_group, sep = '\n')
#
run = exp.start_logging()


from azureml.core.compute import ComputeTarget, AmlCompute

cluster_name = "my-compute-cluster"

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
compute_config=AmlCompute.provisioning_configuration(vm_size="Standard_D1_v2",max_nodes=4)
compute_target=ComputeTarget.create(ws,cluster_name,compute_config)
compute_target.wait_for_completion(show_output=True)


from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps =RandomParameterSampling({
'--C': uniform(0.01,1.0),
'--max_iter':choice(50,100,150)
})

# Specify a Policy
policy = BanditPolicy(slack_factor=0.1,evaluation_interval=2,delay_evaluation=5)

if "training" not in os.listdir():
os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')


# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(source_directory='.',
script='train.py',
compute_target=compute_target,
environment=sklearn_env)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src,
hyperparameter_sampling=ps,
primary_metric_name='Accuracy',
primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
max_total_runs=10,
max_concurrent_runs=4)


# Submit your hyperdrive run to the experiment and show run details with the widget.

### YOUR CODE HERE ###
hyperdrive_run=exp.submit(hyperdrive_config)
RunDetails(hyperdrive_run).show()


hyperdrive_run.wait_for_completion(show_output=True)


import joblib
# Get your best run and save the model from that run.

### YOUR CODE HERE ###
best_run=hyperdrive_run.get_best_run_by_primary_metric()
best_model=best_run.register_model(model_name='best_logistic_regression_model',model_path='outputs/model.joblib')



from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at:
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

### YOUR CODE HERE ###
data_path="https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds = TabularDatasetFactory.from_delimited_files(path=data_path)



from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)


from azureml.train.automl import AutoMLConfig





# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
experiment_timeout_minutes=30,
task='classification',
primary_metric='accuracy',
training_data=ds,
label_column_name='y',
n_cross_validations=5)


# Submit your automl run

### YOUR CODE HERE ###
automl_run=exp.submit(automl_config,show_output=True)
Loading