diff --git a/check-copy-progess.sh b/check-copy-progess.sh index 16d23bc..b73fc67 100755 --- a/check-copy-progess.sh +++ b/check-copy-progess.sh @@ -2,4 +2,4 @@ # Usage # check-copy-progress -pssh -i -x "-i $1" -l root -h $2 '~/ephemeral-hdfs/bin/hadoop fs -dus /' +parallel-ssh -i -x "-i $1" -l root -h $2 '~/ephemeral-hdfs/bin/hadoop fs -dus /' diff --git a/launch_ampcamp3_cluster.py b/launch_ampcamp3_cluster.py index d52db9f..d8c1cb1 100755 --- a/launch_ampcamp3_cluster.py +++ b/launch_ampcamp3_cluster.py @@ -34,6 +34,8 @@ def parse_args(): parser.add_option("-t", "--instance-type", default="m1.xlarge", help="Type of instance to launch (default: m1.xlarge). " + "WARNING: must be 64-bit; small instances won't work") + parser.add_option("-r", "--region", default="us-east-1", + help="EC2 region zone to launch instances in") parser.add_option("-w", "--wait", type="int", default=120, help="Seconds to wait for nodes to start (default: 120)") @@ -54,7 +56,6 @@ def parse_args(): def main(): (opts, spark_script_path) = parse_args() - availability_zones = ["us-east-1b", "us-east-1d", "us-east-1a"] subprocesses = [] cluster_names = [] @@ -79,9 +80,8 @@ def main(): args.append('-w') args.append(str(opts.wait)) -# NOTE(shivaram): Don't pass availability zone as EC2 will pick one on its own - args.append('-z') - args.append(availability_zones[cluster % len(availability_zones)]) + args.append('-r') + args.append(opts.region) if opts.copy: args.append('--copy') diff --git a/spark_ec2.py b/spark_ec2.py index 643410f..6439b73 100755 --- a/spark_ec2.py +++ b/spark_ec2.py @@ -414,7 +414,7 @@ def setup_cluster(conn, master_nodes, slave_nodes, zoo_nodes, opts, deploy_ssh_k # NOTE: We should clone the repository before running deploy_files to # prevent ec2-variables.sh from being overwritten ssh(master, opts, - "rm -rf spark-ec2 && git clone -b ampcamp3 https://github.com/mesos/spark-ec2.git") + "rm -rf spark-ec2 && git clone -b sparksummit1 https://github.com/mesos/spark-ec2.git") print "Deploying files to master..." deploy_files(conn, "deploy.generic", opts, master_nodes, slave_nodes,