From c7372850ad5d9499760fdd10645bfac40aaa6740 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Sat, 11 Jul 2015 11:03:36 -0700 Subject: [PATCH 01/36] add Spark to the cluter config --- .gitignore | 1 + manifests/master-single.pp | 1 + manifests/master.pp | 1 + modules/spark/manifests/init.pp | 33 +++++++++++++++++++++++ modules/spark/templates/spark-path.sh.erb | 2 ++ 5 files changed, 38 insertions(+) create mode 100644 modules/spark/manifests/init.pp create mode 100644 modules/spark/templates/spark-path.sh.erb diff --git a/.gitignore b/.gitignore index 0b67016..b0a1221 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ employees.tgz hadoop-*.tar.gz hadoop-*.tar.gz.mds hbase-*.tar.gz +spark-*.tgz diff --git a/manifests/master-single.pp b/manifests/master-single.pp index 38b17ef..872b801 100644 --- a/manifests/master-single.pp +++ b/manifests/master-single.pp @@ -6,5 +6,6 @@ } #include hbase +include spark include avahi include cascading diff --git a/manifests/master.pp b/manifests/master.pp index d45d0b5..2385fd8 100644 --- a/manifests/master.pp +++ b/manifests/master.pp @@ -1,5 +1,6 @@ include base include hadoop include hbase +include spark include avahi include cascading diff --git a/modules/spark/manifests/init.pp b/modules/spark/manifests/init.pp new file mode 100644 index 0000000..6bf4022 --- /dev/null +++ b/modules/spark/manifests/init.pp @@ -0,0 +1,33 @@ +class spark { + $spark_version = "1.4.0" + $hadoop_version = "2.6" + $spark_home = "/opt/spark-${spark_version}-bin-hadoop${hadoop_version}" + $spark_tarball = "spark-${spark_version}-bin-hadoop${hadoop_version}.tgz" + + package { "scala" : + ensure => present, + require => Exec['apt-get update'] + } + + exec { "download_spark": + command => "/tmp/grrr spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -O /vagrant/$spark_tarball --read-timeout=5 --tries=0", + timeout => 1800, + path => $path, + creates => "/vagrant/$spark_tarball", + require => [ Package["scala"], Exec["download_grrr"]] + } + + exec { "unpack_spark" : + command => "tar xf /vagrant/${spark_tarball} -C /opt", + path => $path, + creates => "${spark_home}", + require => Exec["download_spark"] + } + + file { "/etc/profile.d/spark-path.sh": + content => template("spark/spark-path.sh.erb"), + owner => vagrant, + group => root, + } + +} diff --git a/modules/spark/templates/spark-path.sh.erb b/modules/spark/templates/spark-path.sh.erb new file mode 100644 index 0000000..d5e2fb2 --- /dev/null +++ b/modules/spark/templates/spark-path.sh.erb @@ -0,0 +1,2 @@ +export SPARK_HOME=<%=spark_home%> +export PATH=$SPARK_HOME/bin:$PATH From 2b11ac26f0cdefb012f663df035c7b92ba1898bf Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 21 Jul 2015 12:37:52 -0700 Subject: [PATCH 02/36] configuration to allow runing spark in standalone mode --- manifests/datanode.pp | 1 + modules/spark/manifests/init.pp | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/manifests/datanode.pp b/manifests/datanode.pp index 6fac66f..1351fe1 100644 --- a/manifests/datanode.pp +++ b/manifests/datanode.pp @@ -1,4 +1,5 @@ include base include hadoop include hbase +include spark include avahi diff --git a/modules/spark/manifests/init.pp b/modules/spark/manifests/init.pp index 6bf4022..f2be6bc 100644 --- a/modules/spark/manifests/init.pp +++ b/modules/spark/manifests/init.pp @@ -30,4 +30,18 @@ group => root, } + # for spark standalone mode + file { "${spark_home}/logs": + ensure => "directory", + owner => "mapred", + group => "mapred", + mode => 755, + require => Exec["unpack_spark"] + } + exec { "spark_slaves" : + command => "ln -s /opt/hadoop-*/etc/hadoop/slaves ${spark_home}/conf/slaves", + path => $path, + creates => "${spark_home}/conf/slaves", + require => Exec["unpack_spark"] + } } From 688da4c90626a8dd79f7091276fd828598562c1f Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Wed, 22 Jul 2015 11:03:35 -0700 Subject: [PATCH 03/36] clean up README URLs --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e8ed93d..1b12687 100644 --- a/README.md +++ b/README.md @@ -102,9 +102,9 @@ start up a new cluster. You can access all services of the cluster with your web-browser. -* namenode: http://master.local:50070/dfshealth.jsp -* application master: http://master.local:8088/cluster -* job history server: http://master.local:19888/jobhistory +* namenode: http://master.local:50070/ +* application master: http://master.local:8088/ +* job history server: http://master.local:19888/ ### Command line From 96050fbf74986bb2b4f627cd68bb71adf289948e Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Mon, 27 Jul 2015 21:45:59 -0700 Subject: [PATCH 04/36] basics of hive install --- manifests/master.pp | 1 + modules/hive/manifests/init.pp | 27 +++++++++++++++++++++++++ modules/hive/templates/hive-path.sh.erb | 2 ++ 3 files changed, 30 insertions(+) create mode 100644 modules/hive/manifests/init.pp create mode 100644 modules/hive/templates/hive-path.sh.erb diff --git a/manifests/master.pp b/manifests/master.pp index 2385fd8..fa35b79 100644 --- a/manifests/master.pp +++ b/manifests/master.pp @@ -1,6 +1,7 @@ include base include hadoop include hbase +include hive include spark include avahi include cascading diff --git a/modules/hive/manifests/init.pp b/modules/hive/manifests/init.pp new file mode 100644 index 0000000..ae3d1cb --- /dev/null +++ b/modules/hive/manifests/init.pp @@ -0,0 +1,27 @@ +class hive { + $hive_version = "1.1.1" + $hive_tarball = "apache-hive-${hive_version}-bin.tar.gz" + $hive_home = "/opt/apache-hive-${hive_version}-bin" + + exec { "download_hive": + command => "/tmp/grrr hive/hive-${hive_version}/apache-hive-${hive_version}-bin.tar.gz -O /vagrant/$hive_tarball --read-timeout=5 --tries=0", + timeout => 1800, + path => $path, + creates => "/vagrant/$hive_tarball", + require => [ Exec["download_grrr"]] + } + + exec { "unpack_hive" : + command => "tar xf /vagrant/${hive_tarball} -C /opt", + path => $path, + creates => "${hive_home}", + require => Exec["download_hive"] + } + + file { "/etc/profile.d/hive-path.sh": + content => template("hive/hive-path.sh.erb"), + owner => vagrant, + group => root, + } + +} diff --git a/modules/hive/templates/hive-path.sh.erb b/modules/hive/templates/hive-path.sh.erb new file mode 100644 index 0000000..7ea1e3b --- /dev/null +++ b/modules/hive/templates/hive-path.sh.erb @@ -0,0 +1,2 @@ +export HIVE_HOME=<%=hive_home%> +export HADOOP_USER_CLASSPATH_FIRST=true From ad76e726442b9d0b95050f6bed72a5d6f35de2be Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 28 Jul 2015 01:28:00 -0700 Subject: [PATCH 05/36] upgrade openjdk from 6 to 7 (for hive) --- modules/base/manifests/init.pp | 2 +- modules/cascading/files/ccsdk.sh | 2 +- modules/cascading/manifests/init.pp | 2 +- modules/hadoop/files/hadoop-env.sh | 2 +- modules/hadoop/manifests/init.pp | 2 +- modules/hbase/files/hbase-env.sh | 2 +- modules/hbase/manifests/init.pp | 2 +- modules/hive/manifests/init.pp | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/base/manifests/init.pp b/modules/base/manifests/init.pp index 8bd6e6a..a32fb10 100644 --- a/modules/base/manifests/init.pp +++ b/modules/base/manifests/init.pp @@ -6,7 +6,7 @@ command => '/usr/bin/apt-get update', } - package { "openjdk-6-jdk" : + package { "openjdk-7-jdk" : ensure => present, require => Exec['apt-get update'] } diff --git a/modules/cascading/files/ccsdk.sh b/modules/cascading/files/ccsdk.sh index bb3f651..0cdbb81 100644 --- a/modules/cascading/files/ccsdk.sh +++ b/modules/cascading/files/ccsdk.sh @@ -1,4 +1,4 @@ -export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64 +export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 export CASCADING_SDK_HOME=/opt/CascadingSDK . $CASCADING_SDK_HOME/etc/setenv.sh diff --git a/modules/cascading/manifests/init.pp b/modules/cascading/manifests/init.pp index f9a7520..9d9ed9d 100644 --- a/modules/cascading/manifests/init.pp +++ b/modules/cascading/manifests/init.pp @@ -10,7 +10,7 @@ # S3 can be slow at times hence a longer timeout timeout => 1800, unless => "ls /opt | grep CascadingSDK", - require => Package["openjdk-6-jdk"] + require => Package["openjdk-7-jdk"] } exec { "unpack_sdk" : diff --git a/modules/hadoop/files/hadoop-env.sh b/modules/hadoop/files/hadoop-env.sh index 45469bb..4f63e8e 100644 --- a/modules/hadoop/files/hadoop-env.sh +++ b/modules/hadoop/files/hadoop-env.sh @@ -24,7 +24,7 @@ # remote nodes. # The java implementation to use. -export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64 +export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 # The jsvc implementation to use. Jsvc is required to run secure datanodes. #export JSVC_HOME=${JSVC_HOME} diff --git a/modules/hadoop/manifests/init.pp b/modules/hadoop/manifests/init.pp index afbb24a..199d83d 100644 --- a/modules/hadoop/manifests/init.pp +++ b/modules/hadoop/manifests/init.pp @@ -41,7 +41,7 @@ timeout => 1800, path => $path, creates => "/vagrant/$hadoop_tarball", - require => [ Package["openjdk-6-jdk"], Exec["download_grrr"]] + require => [ Package["openjdk-7-jdk"], Exec["download_grrr"]] } exec { "download_checksum": diff --git a/modules/hbase/files/hbase-env.sh b/modules/hbase/files/hbase-env.sh index 3d95499..e6a5c7d 100644 --- a/modules/hbase/files/hbase-env.sh +++ b/modules/hbase/files/hbase-env.sh @@ -27,7 +27,7 @@ # The java implementation to use. Java 1.6 required. # export JAVA_HOME=/usr/java/jdk1.6.0/ -export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64 +export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 # Extra Java CLASSPATH elements. Optional. # export HBASE_CLASSPATH= diff --git a/modules/hbase/manifests/init.pp b/modules/hbase/manifests/init.pp index 447d2cc..707228f 100644 --- a/modules/hbase/manifests/init.pp +++ b/modules/hbase/manifests/init.pp @@ -13,7 +13,7 @@ timeout => 1800, path => $path, creates => "/vagrant/$hbase_tarball", - require => [ Package["openjdk-6-jdk"], Exec["download_grrr"]] + require => [ Package["openjdk-7-jdk"], Exec["download_grrr"]] } exec { "unpack_hbase" : diff --git a/modules/hive/manifests/init.pp b/modules/hive/manifests/init.pp index ae3d1cb..7444716 100644 --- a/modules/hive/manifests/init.pp +++ b/modules/hive/manifests/init.pp @@ -1,5 +1,5 @@ class hive { - $hive_version = "1.1.1" + $hive_version = "1.2.1" $hive_tarball = "apache-hive-${hive_version}-bin.tar.gz" $hive_home = "/opt/apache-hive-${hive_version}-bin" @@ -15,7 +15,7 @@ command => "tar xf /vagrant/${hive_tarball} -C /opt", path => $path, creates => "${hive_home}", - require => Exec["download_hive"] + require => Exec["download_hive", "unpack_hadoop"] } file { "/etc/profile.d/hive-path.sh": From fe6f1bc0b41e219b53700749a813a3e3d399ac71 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 28 Jul 2015 08:23:14 -0700 Subject: [PATCH 06/36] setup script for Hive --- .gitignore | 2 ++ modules/hive/files/prepare-hive.sh | 7 +++++++ modules/hive/manifests/init.pp | 9 +++++++++ 3 files changed, 18 insertions(+) create mode 100644 modules/hive/files/prepare-hive.sh diff --git a/.gitignore b/.gitignore index b0a1221..07e10ee 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ hadoop-*.tar.gz hadoop-*.tar.gz.mds hbase-*.tar.gz spark-*.tgz +apache-hive-*.tar.gz + diff --git a/modules/hive/files/prepare-hive.sh b/modules/hive/files/prepare-hive.sh new file mode 100644 index 0000000..18a35df --- /dev/null +++ b/modules/hive/files/prepare-hive.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +. /etc/profile + +export HDFS_USER=hdfs + +su - $HDFS_USER -c "$HADOOP_PREFIX/bin/hadoop fs -mkdir -p /tmp /user/hive/warehouse; $HADOOP_PREFIX/bin/hadoop fs -chmod g+w /tmp /user/hive/warehouse" diff --git a/modules/hive/manifests/init.pp b/modules/hive/manifests/init.pp index 7444716..f50a173 100644 --- a/modules/hive/manifests/init.pp +++ b/modules/hive/manifests/init.pp @@ -23,5 +23,14 @@ owner => vagrant, group => root, } + + file { + "${hive_home}/bin/prepare-hive.sh": + source => "puppet:///modules/hive/prepare-hive.sh", + mode => 755, + owner => vagrant, + group => root, + require => Exec["unpack_hive"] + } } From f03dd7730c1c274d4e11a638c03c6976b3d06b85 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 28 Jul 2015 11:46:23 -0700 Subject: [PATCH 07/36] decrease HDFS default replication so files not available on all nodes --- modules/hadoop/files/hdfs-site.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/hadoop/files/hdfs-site.xml b/modules/hadoop/files/hdfs-site.xml index 0b778a6..d91d530 100644 --- a/modules/hadoop/files/hdfs-site.xml +++ b/modules/hadoop/files/hdfs-site.xml @@ -3,7 +3,7 @@ dfs.replication - 3 + 2 The actual number of replications can be specified when the file is created. From 314dce4e18ef3fab02f96694cf291457409b50f2 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 18 Aug 2015 16:45:05 -0700 Subject: [PATCH 08/36] update tools to current versions: Hadoop 2.7.1 et al --- modules/hadoop/files/hdfs-site-single.xml | 6 +++--- modules/hadoop/files/hdfs-site.xml | 6 +++--- modules/hadoop/manifests/init.pp | 2 +- modules/hbase/manifests/init.pp | 7 +++---- modules/spark/manifests/init.pp | 13 +++++++------ 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/modules/hadoop/files/hdfs-site-single.xml b/modules/hadoop/files/hdfs-site-single.xml index bee05f5..fa6d672 100644 --- a/modules/hadoop/files/hdfs-site-single.xml +++ b/modules/hadoop/files/hdfs-site-single.xml @@ -7,7 +7,7 @@ The actual number of replications can be specified when the file is created. - dfs.permissions + dfs.permissions.enabled false If "true", enable permission checking in HDFS. @@ -18,11 +18,11 @@ - dfs.data.dir + dfs.datanode.data.dir /srv/hadoop/datanode - dfs.name.dir + dfs.namenode.name.dir /srv/hadoop/namenode diff --git a/modules/hadoop/files/hdfs-site.xml b/modules/hadoop/files/hdfs-site.xml index d91d530..5b41456 100644 --- a/modules/hadoop/files/hdfs-site.xml +++ b/modules/hadoop/files/hdfs-site.xml @@ -7,7 +7,7 @@ The actual number of replications can be specified when the file is created. - dfs.permissions + dfs.permissions.enabled false If "true", enable permission checking in HDFS. @@ -18,11 +18,11 @@ - dfs.data.dir + dfs.datanode.data.dir /srv/hadoop/datanode - dfs.name.dir + dfs.namenode.name.dir /srv/hadoop/namenode diff --git a/modules/hadoop/manifests/init.pp b/modules/hadoop/manifests/init.pp index 199d83d..3aa0dd4 100644 --- a/modules/hadoop/manifests/init.pp +++ b/modules/hadoop/manifests/init.pp @@ -1,6 +1,6 @@ class hadoop($slaves_file = undef, $hdfs_site_file = undef) { - $hadoop_version = "2.6.0" + $hadoop_version = "2.7.1" $hadoop_home = "/opt/hadoop-${hadoop_version}" $hadoop_tarball = "hadoop-${hadoop_version}.tar.gz" $hadoop_tarball_checksums = "${hadoop_tarball}.mds" diff --git a/modules/hbase/manifests/init.pp b/modules/hbase/manifests/init.pp index 707228f..9d49590 100644 --- a/modules/hbase/manifests/init.pp +++ b/modules/hbase/manifests/init.pp @@ -1,8 +1,7 @@ class hbase { - $hbase_version = "0.98.13" - $hbase_platform = "hadoop2" - $hbase_home = "/opt/hbase-${hbase_version}-${hbase_platform}" - $hbase_tarball = "hbase-${hbase_version}-${hbase_platform}-bin.tar.gz" + $hbase_version = "1.1.1" + $hbase_home = "/opt/hbase-${hbase_version}" + $hbase_tarball = "hbase-${hbase_version}-bin.tar.gz" file { "/srv/zookeeper": ensure => "directory" diff --git a/modules/spark/manifests/init.pp b/modules/spark/manifests/init.pp index f2be6bc..22f276c 100644 --- a/modules/spark/manifests/init.pp +++ b/modules/spark/manifests/init.pp @@ -1,8 +1,9 @@ class spark { - $spark_version = "1.4.0" - $hadoop_version = "2.6" - $spark_home = "/opt/spark-${spark_version}-bin-hadoop${hadoop_version}" - $spark_tarball = "spark-${spark_version}-bin-hadoop${hadoop_version}.tgz" + $spark_version = "1.4.1" + $hadoop_version = "2.7.1" # installed Hadoop version + $hadoop_spark = "2.6" # Hadoop version for spark compatibility + $spark_home = "/opt/spark-${spark_version}-bin-hadoop${hadoop_spark}" + $spark_tarball = "spark-${spark_version}-bin-hadoop${hadoop_spark}.tgz" package { "scala" : ensure => present, @@ -10,7 +11,7 @@ } exec { "download_spark": - command => "/tmp/grrr spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -O /vagrant/$spark_tarball --read-timeout=5 --tries=0", + command => "/tmp/grrr spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_spark}.tgz -O /vagrant/$spark_tarball --read-timeout=5 --tries=0", timeout => 1800, path => $path, creates => "/vagrant/$spark_tarball", @@ -39,7 +40,7 @@ require => Exec["unpack_spark"] } exec { "spark_slaves" : - command => "ln -s /opt/hadoop-*/etc/hadoop/slaves ${spark_home}/conf/slaves", + command => "ln -s /opt/hadoop-${hadoop_version}/etc/hadoop/slaves ${spark_home}/conf/slaves", path => $path, creates => "${spark_home}/conf/slaves", require => Exec["unpack_spark"] From bf6cc14090cb4e7d73be3e13c6b7994ef03bbe53 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 28 Jul 2015 11:46:23 -0700 Subject: [PATCH 09/36] decrease HDFS default replication so files not available on all nodes --- modules/hadoop/files/hdfs-site.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/hadoop/files/hdfs-site.xml b/modules/hadoop/files/hdfs-site.xml index 0b778a6..d91d530 100644 --- a/modules/hadoop/files/hdfs-site.xml +++ b/modules/hadoop/files/hdfs-site.xml @@ -3,7 +3,7 @@ dfs.replication - 3 + 2 The actual number of replications can be specified when the file is created. From 8e8fab39e94077c5770d6946938061c09fca35e8 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 18 Aug 2015 16:45:05 -0700 Subject: [PATCH 10/36] update tools to current versions: Hadoop 2.7.1 et al --- modules/hadoop/files/hdfs-site-single.xml | 6 +++--- modules/hadoop/files/hdfs-site.xml | 6 +++--- modules/hadoop/manifests/init.pp | 2 +- modules/hbase/manifests/init.pp | 7 +++---- modules/spark/manifests/init.pp | 13 +++++++------ 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/modules/hadoop/files/hdfs-site-single.xml b/modules/hadoop/files/hdfs-site-single.xml index bee05f5..fa6d672 100644 --- a/modules/hadoop/files/hdfs-site-single.xml +++ b/modules/hadoop/files/hdfs-site-single.xml @@ -7,7 +7,7 @@ The actual number of replications can be specified when the file is created. - dfs.permissions + dfs.permissions.enabled false If "true", enable permission checking in HDFS. @@ -18,11 +18,11 @@ - dfs.data.dir + dfs.datanode.data.dir /srv/hadoop/datanode - dfs.name.dir + dfs.namenode.name.dir /srv/hadoop/namenode diff --git a/modules/hadoop/files/hdfs-site.xml b/modules/hadoop/files/hdfs-site.xml index d91d530..5b41456 100644 --- a/modules/hadoop/files/hdfs-site.xml +++ b/modules/hadoop/files/hdfs-site.xml @@ -7,7 +7,7 @@ The actual number of replications can be specified when the file is created. - dfs.permissions + dfs.permissions.enabled false If "true", enable permission checking in HDFS. @@ -18,11 +18,11 @@ - dfs.data.dir + dfs.datanode.data.dir /srv/hadoop/datanode - dfs.name.dir + dfs.namenode.name.dir /srv/hadoop/namenode diff --git a/modules/hadoop/manifests/init.pp b/modules/hadoop/manifests/init.pp index afbb24a..f6263bf 100644 --- a/modules/hadoop/manifests/init.pp +++ b/modules/hadoop/manifests/init.pp @@ -1,6 +1,6 @@ class hadoop($slaves_file = undef, $hdfs_site_file = undef) { - $hadoop_version = "2.6.0" + $hadoop_version = "2.7.1" $hadoop_home = "/opt/hadoop-${hadoop_version}" $hadoop_tarball = "hadoop-${hadoop_version}.tar.gz" $hadoop_tarball_checksums = "${hadoop_tarball}.mds" diff --git a/modules/hbase/manifests/init.pp b/modules/hbase/manifests/init.pp index 447d2cc..7de1358 100644 --- a/modules/hbase/manifests/init.pp +++ b/modules/hbase/manifests/init.pp @@ -1,8 +1,7 @@ class hbase { - $hbase_version = "0.98.13" - $hbase_platform = "hadoop2" - $hbase_home = "/opt/hbase-${hbase_version}-${hbase_platform}" - $hbase_tarball = "hbase-${hbase_version}-${hbase_platform}-bin.tar.gz" + $hbase_version = "1.1.1" + $hbase_home = "/opt/hbase-${hbase_version}" + $hbase_tarball = "hbase-${hbase_version}-bin.tar.gz" file { "/srv/zookeeper": ensure => "directory" diff --git a/modules/spark/manifests/init.pp b/modules/spark/manifests/init.pp index f2be6bc..22f276c 100644 --- a/modules/spark/manifests/init.pp +++ b/modules/spark/manifests/init.pp @@ -1,8 +1,9 @@ class spark { - $spark_version = "1.4.0" - $hadoop_version = "2.6" - $spark_home = "/opt/spark-${spark_version}-bin-hadoop${hadoop_version}" - $spark_tarball = "spark-${spark_version}-bin-hadoop${hadoop_version}.tgz" + $spark_version = "1.4.1" + $hadoop_version = "2.7.1" # installed Hadoop version + $hadoop_spark = "2.6" # Hadoop version for spark compatibility + $spark_home = "/opt/spark-${spark_version}-bin-hadoop${hadoop_spark}" + $spark_tarball = "spark-${spark_version}-bin-hadoop${hadoop_spark}.tgz" package { "scala" : ensure => present, @@ -10,7 +11,7 @@ } exec { "download_spark": - command => "/tmp/grrr spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -O /vagrant/$spark_tarball --read-timeout=5 --tries=0", + command => "/tmp/grrr spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_spark}.tgz -O /vagrant/$spark_tarball --read-timeout=5 --tries=0", timeout => 1800, path => $path, creates => "/vagrant/$spark_tarball", @@ -39,7 +40,7 @@ require => Exec["unpack_spark"] } exec { "spark_slaves" : - command => "ln -s /opt/hadoop-*/etc/hadoop/slaves ${spark_home}/conf/slaves", + command => "ln -s /opt/hadoop-${hadoop_version}/etc/hadoop/slaves ${spark_home}/conf/slaves", path => $path, creates => "${spark_home}/conf/slaves", require => Exec["unpack_spark"] From 9f649bd2baf5e21d698dd9b90ca6716ce7060ddb Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 18 Aug 2015 16:49:49 -0700 Subject: [PATCH 11/36] update README for version changes --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1b12687..bfe924c 100644 --- a/README.md +++ b/README.md @@ -36,9 +36,9 @@ This will set up 4 machines - `master`, `hadoop1`, `hadoop2` and `hadoop3`. Each RAM. If this is too much for your machine, adjust the `Vagrantfile`. The machines will be provisioned using [Puppet](http://puppetlabs.com/). All of them will have hadoop -(apache-hadoop-2.6.0) installed, ssh will be configured and local name resolution also works. +(apache-hadoop-2.7.1) installed, ssh will be configured and local name resolution also works. -Hadoop is installed in `/opt/hadoop-2.6.0` and all tools are in the `PATH`. +Hadoop is installed in `/opt/hadoop-2.7.1` and all tools are in the `PATH`. The `master` machine acts as the namenode and the yarn resource manager, the 3 others are data nodes and run node managers. @@ -58,7 +58,7 @@ is required. ### Starting the cluster -This cluster uses the `ssh-into-all-the-boxes-and-start-things-up`-approach, which is fine for testing. +This cluster uses the `ssh`-into-all-the-boxes-and-start-things-up-approach, which is fine for testing. Once all machines are up and provisioned, the cluster can be started. Log into the master, format hdfs and start the cluster. @@ -131,7 +131,7 @@ the `PATH`. The SDK itself can be found in `/opt/CascadingSDK`. ### Driven -The SDK allows you to install the [Driven plugin for Cascading]((http://cascading.io/driven) , by simply running +The SDK allows you to install the [Driven plugin for Cascading](http://cascading.io/driven) , by simply running `install-driven-plugin`. This will install the plugin for the vagrant user in `/home/vagrant/.cascading/.driven-plugin`. Installing the plugin will cause every Cascading based application to send telemetry to `https://driven.cascading.io`. From 383f7002c7505cbddbd1459d1c64d573e025f000 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Thu, 20 Aug 2015 22:56:09 -0700 Subject: [PATCH 12/36] fix hbase URL in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bfe924c..b37b467 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,7 @@ The setup is fully distributed. `hadoop1`, `hadoop2` and `hadoop3` are running a [zookeeper](http://zookeeper.apache.org) instance and a region-server each. The HBase master is running on the `master` VM. -The webinterface of the HBase master is http://master.local:60010. +The webinterface of the HBase master is http://master.local:16010. ## Hacking & Troubleshooting & Tips & Tricks From c01a2eb1a8c482969069158c9d65a8249b1e2476 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Thu, 20 Aug 2015 23:12:09 -0700 Subject: [PATCH 13/36] change hbase dfs replication to 2 (like the HDFS default, not all data will be on all nodes that way) --- modules/hbase/files/hbase-site.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/hbase/files/hbase-site.xml b/modules/hbase/files/hbase-site.xml index f50a33c..d34809c 100644 --- a/modules/hbase/files/hbase-site.xml +++ b/modules/hbase/files/hbase-site.xml @@ -23,4 +23,8 @@ true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh) + + dfs.replication + 2 + From ec6fdac5bd63035f1c25a0a87fbdd05631e839e8 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Thu, 20 Aug 2015 23:12:09 -0700 Subject: [PATCH 14/36] change hbase dfs replication to 2 (like the HDFS default, not all data will be on all nodes that way) --- modules/hbase/files/hbase-site.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/hbase/files/hbase-site.xml b/modules/hbase/files/hbase-site.xml index f50a33c..d34809c 100644 --- a/modules/hbase/files/hbase-site.xml +++ b/modules/hbase/files/hbase-site.xml @@ -23,4 +23,8 @@ true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh) + + dfs.replication + 2 + From 65f6c4e1b5c3efbd2e29fd446b1fad42079e2fb0 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Thu, 20 Aug 2015 22:56:09 -0700 Subject: [PATCH 15/36] fix hbase URL in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bfe924c..b37b467 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,7 @@ The setup is fully distributed. `hadoop1`, `hadoop2` and `hadoop3` are running a [zookeeper](http://zookeeper.apache.org) instance and a region-server each. The HBase master is running on the `master` VM. -The webinterface of the HBase master is http://master.local:60010. +The webinterface of the HBase master is http://master.local:16010. ## Hacking & Troubleshooting & Tips & Tricks From dfdc3eb520e1ded55107cb8693bd6e3d69116eb1 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Fri, 21 Aug 2015 14:03:27 -0700 Subject: [PATCH 16/36] configure HBase so it works on single-node; enable by default --- Vagrantfile | 4 ++- manifests/master-single.pp | 6 ++++- modules/hbase/files/hbase-site-single.xml | 30 +++++++++++++++++++++++ modules/hbase/files/regionservers-single | 1 + modules/hbase/manifests/init.pp | 19 +++++++++++--- single-node/Vagrantfile | 4 ++- 6 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 modules/hbase/files/hbase-site-single.xml create mode 100644 modules/hbase/files/regionservers-single diff --git a/Vagrantfile b/Vagrantfile index 99cfe69..51df500 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -8,7 +8,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.box_url = "http://files.vagrantup.com/precise64.box" config.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "512"] + vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "1024"] end config.vm.provider "vmware_fusion" do |v, override| @@ -51,6 +51,8 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" + config.vm.synced_folder "/home/ggbaker/crs/732", "/home/vagrant/732" + config.vm.provision :puppet do |puppet| puppet.manifest_file = "master.pp" puppet.module_path = "modules" diff --git a/manifests/master-single.pp b/manifests/master-single.pp index 872b801..ee5d254 100644 --- a/manifests/master-single.pp +++ b/manifests/master-single.pp @@ -4,8 +4,12 @@ slaves_file => "puppet:///modules/hadoop/slaves-single", hdfs_site_file => "puppet:///modules/hadoop/hdfs-site-single.xml" } +class{ 'hbase': + regionservers_file => "puppet:///modules/hbase/regionservers-single", + hbase_site_file => "puppet:///modules/hbase/hbase-site-single.xml" +} -#include hbase +include hbase include spark include avahi include cascading diff --git a/modules/hbase/files/hbase-site-single.xml b/modules/hbase/files/hbase-site-single.xml new file mode 100644 index 0000000..01364ee --- /dev/null +++ b/modules/hbase/files/hbase-site-single.xml @@ -0,0 +1,30 @@ + + + + + hbase.zookeeper.quorum + master.local + + + hbase.zookeeper.property.dataDir + /srv/zookeeper + Property from ZooKeeper's config zoo.cfg. The directory where the snapshot is stored. + + + hbase.rootdir + hdfs://master.local:9000/hbase + The directory shared by RegionServers. + + + hbase.cluster.distributed + false + The mode the cluster will be in. Possible values are + false: standalone and pseudo-distributed setups with managed Zookeeper + true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh) + + + + dfs.replication + 1 + + diff --git a/modules/hbase/files/regionservers-single b/modules/hbase/files/regionservers-single new file mode 100644 index 0000000..8948a26 --- /dev/null +++ b/modules/hbase/files/regionservers-single @@ -0,0 +1 @@ +master.local diff --git a/modules/hbase/manifests/init.pp b/modules/hbase/manifests/init.pp index 9d49590..38cb43b 100644 --- a/modules/hbase/manifests/init.pp +++ b/modules/hbase/manifests/init.pp @@ -1,8 +1,21 @@ -class hbase { +class hbase($regionservers_file = undef, $hbase_site_file = undef) { $hbase_version = "1.1.1" $hbase_home = "/opt/hbase-${hbase_version}" $hbase_tarball = "hbase-${hbase_version}-bin.tar.gz" + if $regionservers_file == undef { + $_regionservers_file = "puppet:///modules/hbase/regionservers_file" + } + else { + $_regionservers_file = $regionservers_file + } + if $hbase_site_file == undef { + $_hbase_site_file = "puppet:///modules/hbase/hbase-site.xml" + } + else { + $_hbase_site_file = $hbase_site_file + } + file { "/srv/zookeeper": ensure => "directory" } @@ -24,7 +37,7 @@ file { "${hbase_home}/conf/regionservers": - source => "puppet:///modules/hbase/regionservers", + source => $_regionservers_file, mode => 644, owner => root, group => root, @@ -33,7 +46,7 @@ file { "${hbase_home}/conf/hbase-site.xml": - source => "puppet:///modules/hbase/hbase-site.xml", + source => $_hbase_site_file, mode => 644, owner => root, group => root, diff --git a/single-node/Vagrantfile b/single-node/Vagrantfile index b2ff2a4..b2b5835 100644 --- a/single-node/Vagrantfile +++ b/single-node/Vagrantfile @@ -8,12 +8,14 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.box_url = "http://files.vagrantup.com/precise64.box" config.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--cpus", "2", "--memory", "1536"] + vb.customize ["modifyvm", :id, "--cpus", "2", "--memory", "2048"] end config.vm.define :master do |master| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" + + config.vm.synced_folder "/home/ggbaker/crs/732", "/home/vagrant/732" config.vm.provision :puppet do |puppet| puppet.manifest_file = "master-single.pp" From 601cbebe04364e976569ae81496f02b339e513a2 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Fri, 21 Aug 2015 14:03:27 -0700 Subject: [PATCH 17/36] configure HBase so it works on single-node; enable by default --- Vagrantfile | 4 ++- manifests/master-single.pp | 6 ++++- modules/hbase/files/hbase-site-single.xml | 30 +++++++++++++++++++++++ modules/hbase/files/regionservers-single | 1 + modules/hbase/manifests/init.pp | 19 +++++++++++--- single-node/Vagrantfile | 4 ++- 6 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 modules/hbase/files/hbase-site-single.xml create mode 100644 modules/hbase/files/regionservers-single diff --git a/Vagrantfile b/Vagrantfile index 99cfe69..51df500 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -8,7 +8,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.box_url = "http://files.vagrantup.com/precise64.box" config.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "512"] + vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "1024"] end config.vm.provider "vmware_fusion" do |v, override| @@ -51,6 +51,8 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" + config.vm.synced_folder "/home/ggbaker/crs/732", "/home/vagrant/732" + config.vm.provision :puppet do |puppet| puppet.manifest_file = "master.pp" puppet.module_path = "modules" diff --git a/manifests/master-single.pp b/manifests/master-single.pp index 872b801..ee5d254 100644 --- a/manifests/master-single.pp +++ b/manifests/master-single.pp @@ -4,8 +4,12 @@ slaves_file => "puppet:///modules/hadoop/slaves-single", hdfs_site_file => "puppet:///modules/hadoop/hdfs-site-single.xml" } +class{ 'hbase': + regionservers_file => "puppet:///modules/hbase/regionservers-single", + hbase_site_file => "puppet:///modules/hbase/hbase-site-single.xml" +} -#include hbase +include hbase include spark include avahi include cascading diff --git a/modules/hbase/files/hbase-site-single.xml b/modules/hbase/files/hbase-site-single.xml new file mode 100644 index 0000000..01364ee --- /dev/null +++ b/modules/hbase/files/hbase-site-single.xml @@ -0,0 +1,30 @@ + + + + + hbase.zookeeper.quorum + master.local + + + hbase.zookeeper.property.dataDir + /srv/zookeeper + Property from ZooKeeper's config zoo.cfg. The directory where the snapshot is stored. + + + hbase.rootdir + hdfs://master.local:9000/hbase + The directory shared by RegionServers. + + + hbase.cluster.distributed + false + The mode the cluster will be in. Possible values are + false: standalone and pseudo-distributed setups with managed Zookeeper + true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh) + + + + dfs.replication + 1 + + diff --git a/modules/hbase/files/regionservers-single b/modules/hbase/files/regionservers-single new file mode 100644 index 0000000..8948a26 --- /dev/null +++ b/modules/hbase/files/regionservers-single @@ -0,0 +1 @@ +master.local diff --git a/modules/hbase/manifests/init.pp b/modules/hbase/manifests/init.pp index 7de1358..5b026b4 100644 --- a/modules/hbase/manifests/init.pp +++ b/modules/hbase/manifests/init.pp @@ -1,8 +1,21 @@ -class hbase { +class hbase($regionservers_file = undef, $hbase_site_file = undef) { $hbase_version = "1.1.1" $hbase_home = "/opt/hbase-${hbase_version}" $hbase_tarball = "hbase-${hbase_version}-bin.tar.gz" + if $regionservers_file == undef { + $_regionservers_file = "puppet:///modules/hbase/regionservers_file" + } + else { + $_regionservers_file = $regionservers_file + } + if $hbase_site_file == undef { + $_hbase_site_file = "puppet:///modules/hbase/hbase-site.xml" + } + else { + $_hbase_site_file = $hbase_site_file + } + file { "/srv/zookeeper": ensure => "directory" } @@ -24,7 +37,7 @@ file { "${hbase_home}/conf/regionservers": - source => "puppet:///modules/hbase/regionservers", + source => $_regionservers_file, mode => 644, owner => root, group => root, @@ -33,7 +46,7 @@ file { "${hbase_home}/conf/hbase-site.xml": - source => "puppet:///modules/hbase/hbase-site.xml", + source => $_hbase_site_file, mode => 644, owner => root, group => root, diff --git a/single-node/Vagrantfile b/single-node/Vagrantfile index b2ff2a4..b2b5835 100644 --- a/single-node/Vagrantfile +++ b/single-node/Vagrantfile @@ -8,12 +8,14 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.box_url = "http://files.vagrantup.com/precise64.box" config.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--cpus", "2", "--memory", "1536"] + vb.customize ["modifyvm", :id, "--cpus", "2", "--memory", "2048"] end config.vm.define :master do |master| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" + + config.vm.synced_folder "/home/ggbaker/crs/732", "/home/vagrant/732" config.vm.provision :puppet do |puppet| puppet.manifest_file = "master-single.pp" From e9e5907ca8770e86a449b4b301dd03ddfb246c0d Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Fri, 21 Aug 2015 15:26:32 -0700 Subject: [PATCH 18/36] Add Phoenix manifst (but disable by default) --- .gitignore | 3 ++- manifests/master-single.pp | 2 ++ manifests/master.pp | 3 ++- modules/phoenix/manifests/init.pp | 32 +++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 modules/phoenix/manifests/init.pp diff --git a/.gitignore b/.gitignore index 07e10ee..f7350ef 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*~ *.sw[a-z] .vagrant employees.tgz @@ -6,4 +7,4 @@ hadoop-*.tar.gz.mds hbase-*.tar.gz spark-*.tgz apache-hive-*.tar.gz - +phoenix-*.tar.gz diff --git a/manifests/master-single.pp b/manifests/master-single.pp index ee5d254..cd2a2b4 100644 --- a/manifests/master-single.pp +++ b/manifests/master-single.pp @@ -10,6 +10,8 @@ } include hbase +#include hive +#include phoenix include spark include avahi include cascading diff --git a/manifests/master.pp b/manifests/master.pp index fa35b79..cdf9c52 100644 --- a/manifests/master.pp +++ b/manifests/master.pp @@ -1,7 +1,8 @@ include base include hadoop include hbase -include hive +#include hive +#include phoenix include spark include avahi include cascading diff --git a/modules/phoenix/manifests/init.pp b/modules/phoenix/manifests/init.pp new file mode 100644 index 0000000..d8a0d2f --- /dev/null +++ b/modules/phoenix/manifests/init.pp @@ -0,0 +1,32 @@ +class phoenix { + $phoenix_version = "4.5.1" + $hbase_compat = "1.1" # phoenix compatibility version + $hbase_version = "1.1.1" # actual installed version + $phoenix_tarball = "phoenix-${phoenix_version}-HBase-${hbase_compat}-bin.tar.gz" + $phoenix_home = "/opt/phoenix-${phoenix_version}-HBase-${hbase_compat}-bin" + + $server_jar = "phoenix-${phoenix_version}-HBase-${hbase_compat}-server.jar" + $client_jar = "phoenix-${phoenix_version}-HBase-${hbase_compat}-client.jar" + +#http://apache.mirror.iweb.ca/phoenix/phoenix-4.5.1-HBase-1.1/bin/phoenix-4.5.1-HBase-1.1-bin.tar.gz + + exec { "download_phoenix": + command => "/tmp/grrr hive/phoenix/phoenix-${phoenix_version}-HBase-${hbase_compat}/bin/phoenix-${phoenix_version}-HBase-${hbase_compat}-bin.tar.gz -O /vagrant/$phoenix_tarball --read-timeout=5 --tries=0", + timeout => 1800, + path => $path, + creates => "/vagrant/${phoenix_tarball}", + require => [ Exec["download_grrr"]] + } + + exec { "unpack_phoenix" : + command => "tar xf /vagrant/${phoenix_tarball} -C /opt", + path => $path, + creates => "${phoenix_home}", + require => Exec["download_phoenix", "unpack_hadoop"] + } + + file { "/opt/hbase-${hbase_version}/lib/${server_jar}": + ensure => 'link', + target => "${phoenix_home}/${server_jar}", + } +} From abb943da2cf094e0da708cac8c1310abae2e9cc2 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Fri, 21 Aug 2015 15:28:03 -0700 Subject: [PATCH 19/36] undo local changes from my Vagrantfiles --- Vagrantfile | 4 +--- single-node/Vagrantfile | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 51df500..99cfe69 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -8,7 +8,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.box_url = "http://files.vagrantup.com/precise64.box" config.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "1024"] + vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "512"] end config.vm.provider "vmware_fusion" do |v, override| @@ -51,8 +51,6 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" - config.vm.synced_folder "/home/ggbaker/crs/732", "/home/vagrant/732" - config.vm.provision :puppet do |puppet| puppet.manifest_file = "master.pp" puppet.module_path = "modules" diff --git a/single-node/Vagrantfile b/single-node/Vagrantfile index b2b5835..0253418 100644 --- a/single-node/Vagrantfile +++ b/single-node/Vagrantfile @@ -8,15 +8,13 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.box_url = "http://files.vagrantup.com/precise64.box" config.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--cpus", "2", "--memory", "2048"] + vb.customize ["modifyvm", :id, "--cpus", "2", "--memory", "1536"] end config.vm.define :master do |master| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" - config.vm.synced_folder "/home/ggbaker/crs/732", "/home/vagrant/732" - config.vm.provision :puppet do |puppet| puppet.manifest_file = "master-single.pp" puppet.module_path = "../modules" From 81a2e788b438ed5fd144fd925c5ebf52b5e2c730 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Fri, 21 Aug 2015 15:28:03 -0700 Subject: [PATCH 20/36] undo local changes from my Vagrantfiles --- Vagrantfile | 4 +--- single-node/Vagrantfile | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 51df500..99cfe69 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -8,7 +8,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.box_url = "http://files.vagrantup.com/precise64.box" config.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "1024"] + vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "512"] end config.vm.provider "vmware_fusion" do |v, override| @@ -51,8 +51,6 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" - config.vm.synced_folder "/home/ggbaker/crs/732", "/home/vagrant/732" - config.vm.provision :puppet do |puppet| puppet.manifest_file = "master.pp" puppet.module_path = "modules" diff --git a/single-node/Vagrantfile b/single-node/Vagrantfile index b2b5835..0253418 100644 --- a/single-node/Vagrantfile +++ b/single-node/Vagrantfile @@ -8,15 +8,13 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| config.vm.box_url = "http://files.vagrantup.com/precise64.box" config.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--cpus", "2", "--memory", "2048"] + vb.customize ["modifyvm", :id, "--cpus", "2", "--memory", "1536"] end config.vm.define :master do |master| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" - config.vm.synced_folder "/home/ggbaker/crs/732", "/home/vagrant/732" - config.vm.provision :puppet do |puppet| puppet.manifest_file = "master-single.pp" puppet.module_path = "../modules" From 74fea080d344414551e39743dbeef6535e54f5d2 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Fri, 21 Aug 2015 18:01:18 -0700 Subject: [PATCH 21/36] fix filename for hbase regionfile --- modules/hbase/manifests/init.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/hbase/manifests/init.pp b/modules/hbase/manifests/init.pp index 38cb43b..45cf1d9 100644 --- a/modules/hbase/manifests/init.pp +++ b/modules/hbase/manifests/init.pp @@ -4,7 +4,7 @@ $hbase_tarball = "hbase-${hbase_version}-bin.tar.gz" if $regionservers_file == undef { - $_regionservers_file = "puppet:///modules/hbase/regionservers_file" + $_regionservers_file = "puppet:///modules/hbase/regionservers" } else { $_regionservers_file = $regionservers_file From c3cd70793b76f54cc873418aa8b6754abcc645ba Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Fri, 21 Aug 2015 18:01:18 -0700 Subject: [PATCH 22/36] fix filename for hbase regionfile --- modules/hbase/manifests/init.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/hbase/manifests/init.pp b/modules/hbase/manifests/init.pp index 5b026b4..a2c655c 100644 --- a/modules/hbase/manifests/init.pp +++ b/modules/hbase/manifests/init.pp @@ -4,7 +4,7 @@ $hbase_tarball = "hbase-${hbase_version}-bin.tar.gz" if $regionservers_file == undef { - $_regionservers_file = "puppet:///modules/hbase/regionservers_file" + $_regionservers_file = "puppet:///modules/hbase/regionservers" } else { $_regionservers_file = $regionservers_file From dce709c25ba54ca06afaee1222e095e0d06f1b76 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Mon, 27 Jul 2015 21:45:59 -0700 Subject: [PATCH 23/36] basics of hive install --- manifests/master.pp | 1 + modules/hive/manifests/init.pp | 27 +++++++++++++++++++++++++ modules/hive/templates/hive-path.sh.erb | 2 ++ 3 files changed, 30 insertions(+) create mode 100644 modules/hive/manifests/init.pp create mode 100644 modules/hive/templates/hive-path.sh.erb diff --git a/manifests/master.pp b/manifests/master.pp index 2385fd8..fa35b79 100644 --- a/manifests/master.pp +++ b/manifests/master.pp @@ -1,6 +1,7 @@ include base include hadoop include hbase +include hive include spark include avahi include cascading diff --git a/modules/hive/manifests/init.pp b/modules/hive/manifests/init.pp new file mode 100644 index 0000000..ae3d1cb --- /dev/null +++ b/modules/hive/manifests/init.pp @@ -0,0 +1,27 @@ +class hive { + $hive_version = "1.1.1" + $hive_tarball = "apache-hive-${hive_version}-bin.tar.gz" + $hive_home = "/opt/apache-hive-${hive_version}-bin" + + exec { "download_hive": + command => "/tmp/grrr hive/hive-${hive_version}/apache-hive-${hive_version}-bin.tar.gz -O /vagrant/$hive_tarball --read-timeout=5 --tries=0", + timeout => 1800, + path => $path, + creates => "/vagrant/$hive_tarball", + require => [ Exec["download_grrr"]] + } + + exec { "unpack_hive" : + command => "tar xf /vagrant/${hive_tarball} -C /opt", + path => $path, + creates => "${hive_home}", + require => Exec["download_hive"] + } + + file { "/etc/profile.d/hive-path.sh": + content => template("hive/hive-path.sh.erb"), + owner => vagrant, + group => root, + } + +} diff --git a/modules/hive/templates/hive-path.sh.erb b/modules/hive/templates/hive-path.sh.erb new file mode 100644 index 0000000..7ea1e3b --- /dev/null +++ b/modules/hive/templates/hive-path.sh.erb @@ -0,0 +1,2 @@ +export HIVE_HOME=<%=hive_home%> +export HADOOP_USER_CLASSPATH_FIRST=true From 715c4e346906df562d4720f1ead41418edffbd2a Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 28 Jul 2015 01:28:00 -0700 Subject: [PATCH 24/36] upgrade openjdk from 6 to 7 (for hive) --- modules/base/manifests/init.pp | 2 +- modules/cascading/files/ccsdk.sh | 2 +- modules/cascading/manifests/init.pp | 2 +- modules/hadoop/files/hadoop-env.sh | 2 +- modules/hadoop/manifests/init.pp | 2 +- modules/hbase/files/hbase-env.sh | 2 +- modules/hbase/manifests/init.pp | 2 +- modules/hive/manifests/init.pp | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/base/manifests/init.pp b/modules/base/manifests/init.pp index 8bd6e6a..a32fb10 100644 --- a/modules/base/manifests/init.pp +++ b/modules/base/manifests/init.pp @@ -6,7 +6,7 @@ command => '/usr/bin/apt-get update', } - package { "openjdk-6-jdk" : + package { "openjdk-7-jdk" : ensure => present, require => Exec['apt-get update'] } diff --git a/modules/cascading/files/ccsdk.sh b/modules/cascading/files/ccsdk.sh index bb3f651..0cdbb81 100644 --- a/modules/cascading/files/ccsdk.sh +++ b/modules/cascading/files/ccsdk.sh @@ -1,4 +1,4 @@ -export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64 +export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 export CASCADING_SDK_HOME=/opt/CascadingSDK . $CASCADING_SDK_HOME/etc/setenv.sh diff --git a/modules/cascading/manifests/init.pp b/modules/cascading/manifests/init.pp index f9a7520..9d9ed9d 100644 --- a/modules/cascading/manifests/init.pp +++ b/modules/cascading/manifests/init.pp @@ -10,7 +10,7 @@ # S3 can be slow at times hence a longer timeout timeout => 1800, unless => "ls /opt | grep CascadingSDK", - require => Package["openjdk-6-jdk"] + require => Package["openjdk-7-jdk"] } exec { "unpack_sdk" : diff --git a/modules/hadoop/files/hadoop-env.sh b/modules/hadoop/files/hadoop-env.sh index 45469bb..4f63e8e 100644 --- a/modules/hadoop/files/hadoop-env.sh +++ b/modules/hadoop/files/hadoop-env.sh @@ -24,7 +24,7 @@ # remote nodes. # The java implementation to use. -export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64 +export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 # The jsvc implementation to use. Jsvc is required to run secure datanodes. #export JSVC_HOME=${JSVC_HOME} diff --git a/modules/hadoop/manifests/init.pp b/modules/hadoop/manifests/init.pp index f6263bf..3aa0dd4 100644 --- a/modules/hadoop/manifests/init.pp +++ b/modules/hadoop/manifests/init.pp @@ -41,7 +41,7 @@ timeout => 1800, path => $path, creates => "/vagrant/$hadoop_tarball", - require => [ Package["openjdk-6-jdk"], Exec["download_grrr"]] + require => [ Package["openjdk-7-jdk"], Exec["download_grrr"]] } exec { "download_checksum": diff --git a/modules/hbase/files/hbase-env.sh b/modules/hbase/files/hbase-env.sh index 3d95499..e6a5c7d 100644 --- a/modules/hbase/files/hbase-env.sh +++ b/modules/hbase/files/hbase-env.sh @@ -27,7 +27,7 @@ # The java implementation to use. Java 1.6 required. # export JAVA_HOME=/usr/java/jdk1.6.0/ -export JAVA_HOME=/usr/lib/jvm/java-6-openjdk-amd64 +export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 # Extra Java CLASSPATH elements. Optional. # export HBASE_CLASSPATH= diff --git a/modules/hbase/manifests/init.pp b/modules/hbase/manifests/init.pp index a2c655c..45cf1d9 100644 --- a/modules/hbase/manifests/init.pp +++ b/modules/hbase/manifests/init.pp @@ -25,7 +25,7 @@ timeout => 1800, path => $path, creates => "/vagrant/$hbase_tarball", - require => [ Package["openjdk-6-jdk"], Exec["download_grrr"]] + require => [ Package["openjdk-7-jdk"], Exec["download_grrr"]] } exec { "unpack_hbase" : diff --git a/modules/hive/manifests/init.pp b/modules/hive/manifests/init.pp index ae3d1cb..7444716 100644 --- a/modules/hive/manifests/init.pp +++ b/modules/hive/manifests/init.pp @@ -1,5 +1,5 @@ class hive { - $hive_version = "1.1.1" + $hive_version = "1.2.1" $hive_tarball = "apache-hive-${hive_version}-bin.tar.gz" $hive_home = "/opt/apache-hive-${hive_version}-bin" @@ -15,7 +15,7 @@ command => "tar xf /vagrant/${hive_tarball} -C /opt", path => $path, creates => "${hive_home}", - require => Exec["download_hive"] + require => Exec["download_hive", "unpack_hadoop"] } file { "/etc/profile.d/hive-path.sh": From 2b09a1e5a8d6cb0b08ff0ff1285aecfda7f16e2e Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 28 Jul 2015 08:23:14 -0700 Subject: [PATCH 25/36] setup script for Hive --- .gitignore | 2 ++ modules/hive/files/prepare-hive.sh | 7 +++++++ modules/hive/manifests/init.pp | 9 +++++++++ 3 files changed, 18 insertions(+) create mode 100644 modules/hive/files/prepare-hive.sh diff --git a/.gitignore b/.gitignore index b0a1221..07e10ee 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ hadoop-*.tar.gz hadoop-*.tar.gz.mds hbase-*.tar.gz spark-*.tgz +apache-hive-*.tar.gz + diff --git a/modules/hive/files/prepare-hive.sh b/modules/hive/files/prepare-hive.sh new file mode 100644 index 0000000..18a35df --- /dev/null +++ b/modules/hive/files/prepare-hive.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +. /etc/profile + +export HDFS_USER=hdfs + +su - $HDFS_USER -c "$HADOOP_PREFIX/bin/hadoop fs -mkdir -p /tmp /user/hive/warehouse; $HADOOP_PREFIX/bin/hadoop fs -chmod g+w /tmp /user/hive/warehouse" diff --git a/modules/hive/manifests/init.pp b/modules/hive/manifests/init.pp index 7444716..f50a173 100644 --- a/modules/hive/manifests/init.pp +++ b/modules/hive/manifests/init.pp @@ -23,5 +23,14 @@ owner => vagrant, group => root, } + + file { + "${hive_home}/bin/prepare-hive.sh": + source => "puppet:///modules/hive/prepare-hive.sh", + mode => 755, + owner => vagrant, + group => root, + require => Exec["unpack_hive"] + } } From ecb4e351f2bfab2f7b5ab45b0ea1366f8f002270 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Fri, 21 Aug 2015 15:26:32 -0700 Subject: [PATCH 26/36] Add Phoenix manifst (but disable by default) --- .gitignore | 3 ++- manifests/master-single.pp | 2 ++ manifests/master.pp | 3 ++- modules/phoenix/manifests/init.pp | 32 +++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 modules/phoenix/manifests/init.pp diff --git a/.gitignore b/.gitignore index 07e10ee..f7350ef 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*~ *.sw[a-z] .vagrant employees.tgz @@ -6,4 +7,4 @@ hadoop-*.tar.gz.mds hbase-*.tar.gz spark-*.tgz apache-hive-*.tar.gz - +phoenix-*.tar.gz diff --git a/manifests/master-single.pp b/manifests/master-single.pp index ee5d254..cd2a2b4 100644 --- a/manifests/master-single.pp +++ b/manifests/master-single.pp @@ -10,6 +10,8 @@ } include hbase +#include hive +#include phoenix include spark include avahi include cascading diff --git a/manifests/master.pp b/manifests/master.pp index fa35b79..cdf9c52 100644 --- a/manifests/master.pp +++ b/manifests/master.pp @@ -1,7 +1,8 @@ include base include hadoop include hbase -include hive +#include hive +#include phoenix include spark include avahi include cascading diff --git a/modules/phoenix/manifests/init.pp b/modules/phoenix/manifests/init.pp new file mode 100644 index 0000000..d8a0d2f --- /dev/null +++ b/modules/phoenix/manifests/init.pp @@ -0,0 +1,32 @@ +class phoenix { + $phoenix_version = "4.5.1" + $hbase_compat = "1.1" # phoenix compatibility version + $hbase_version = "1.1.1" # actual installed version + $phoenix_tarball = "phoenix-${phoenix_version}-HBase-${hbase_compat}-bin.tar.gz" + $phoenix_home = "/opt/phoenix-${phoenix_version}-HBase-${hbase_compat}-bin" + + $server_jar = "phoenix-${phoenix_version}-HBase-${hbase_compat}-server.jar" + $client_jar = "phoenix-${phoenix_version}-HBase-${hbase_compat}-client.jar" + +#http://apache.mirror.iweb.ca/phoenix/phoenix-4.5.1-HBase-1.1/bin/phoenix-4.5.1-HBase-1.1-bin.tar.gz + + exec { "download_phoenix": + command => "/tmp/grrr hive/phoenix/phoenix-${phoenix_version}-HBase-${hbase_compat}/bin/phoenix-${phoenix_version}-HBase-${hbase_compat}-bin.tar.gz -O /vagrant/$phoenix_tarball --read-timeout=5 --tries=0", + timeout => 1800, + path => $path, + creates => "/vagrant/${phoenix_tarball}", + require => [ Exec["download_grrr"]] + } + + exec { "unpack_phoenix" : + command => "tar xf /vagrant/${phoenix_tarball} -C /opt", + path => $path, + creates => "${phoenix_home}", + require => Exec["download_phoenix", "unpack_hadoop"] + } + + file { "/opt/hbase-${hbase_version}/lib/${server_jar}": + ensure => 'link', + target => "${phoenix_home}/${server_jar}", + } +} From 5bdcdedf2d1dc97006de5a95b506b47a1ba50b9d Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Sun, 20 Sep 2015 18:30:00 -0700 Subject: [PATCH 27/36] add module to install Phoenix --- manifests/datanode.pp | 1 + manifests/master-single.pp | 2 +- manifests/master.pp | 2 +- modules/phoenix/manifests/init.pp | 4 +--- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/manifests/datanode.pp b/manifests/datanode.pp index 1351fe1..e1232da 100644 --- a/manifests/datanode.pp +++ b/manifests/datanode.pp @@ -1,5 +1,6 @@ include base include hadoop include hbase +include phoenix include spark include avahi diff --git a/manifests/master-single.pp b/manifests/master-single.pp index cd2a2b4..25773e5 100644 --- a/manifests/master-single.pp +++ b/manifests/master-single.pp @@ -11,7 +11,7 @@ include hbase #include hive -#include phoenix +include phoenix include spark include avahi include cascading diff --git a/manifests/master.pp b/manifests/master.pp index cdf9c52..02e2bd4 100644 --- a/manifests/master.pp +++ b/manifests/master.pp @@ -2,7 +2,7 @@ include hadoop include hbase #include hive -#include phoenix +include phoenix include spark include avahi include cascading diff --git a/modules/phoenix/manifests/init.pp b/modules/phoenix/manifests/init.pp index d8a0d2f..520cddc 100644 --- a/modules/phoenix/manifests/init.pp +++ b/modules/phoenix/manifests/init.pp @@ -8,10 +8,8 @@ $server_jar = "phoenix-${phoenix_version}-HBase-${hbase_compat}-server.jar" $client_jar = "phoenix-${phoenix_version}-HBase-${hbase_compat}-client.jar" -#http://apache.mirror.iweb.ca/phoenix/phoenix-4.5.1-HBase-1.1/bin/phoenix-4.5.1-HBase-1.1-bin.tar.gz - exec { "download_phoenix": - command => "/tmp/grrr hive/phoenix/phoenix-${phoenix_version}-HBase-${hbase_compat}/bin/phoenix-${phoenix_version}-HBase-${hbase_compat}-bin.tar.gz -O /vagrant/$phoenix_tarball --read-timeout=5 --tries=0", + command => "/tmp/grrr phoenix/phoenix-${phoenix_version}-HBase-${hbase_compat}/bin/phoenix-${phoenix_version}-HBase-${hbase_compat}-bin.tar.gz -O /vagrant/${phoenix_tarball} --read-timeout=5 --tries=0", timeout => 1800, path => $path, creates => "/vagrant/${phoenix_tarball}", From 0df66bd6b7327644c06ef16e77e29b5f9e8fc221 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Tue, 22 Sep 2015 21:48:03 -0700 Subject: [PATCH 28/36] bump Spark version --- modules/spark/manifests/init.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/spark/manifests/init.pp b/modules/spark/manifests/init.pp index 22f276c..ce4e922 100644 --- a/modules/spark/manifests/init.pp +++ b/modules/spark/manifests/init.pp @@ -1,5 +1,5 @@ class spark { - $spark_version = "1.4.1" + $spark_version = "1.5.0" $hadoop_version = "2.7.1" # installed Hadoop version $hadoop_spark = "2.6" # Hadoop version for spark compatibility $spark_home = "/opt/spark-${spark_version}-bin-hadoop${hadoop_spark}" From 8536e06a224b1ba31c4000d0c9aaeec3bd8f5971 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Wed, 23 Sep 2015 11:08:18 -0700 Subject: [PATCH 29/36] update mapred staging directories so permissions work out --- modules/hadoop/files/mapred-site.xml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/modules/hadoop/files/mapred-site.xml b/modules/hadoop/files/mapred-site.xml index ed0dd62..e3d71b8 100644 --- a/modules/hadoop/files/mapred-site.xml +++ b/modules/hadoop/files/mapred-site.xml @@ -26,4 +26,17 @@ mapreduce.framework.name yarn + + + mapreduce.jobtracker.staging.root.dir + /user + + + mapred.system.dir + /user/${user.name}/.staging + + + yarn.app.mapreduce.am.staging-dir + /user + From 02137b3b7305b658d5e4345730d95feddf2abf95 Mon Sep 17 00:00:00 2001 From: Aleksei Date: Sat, 28 Nov 2015 02:03:25 -0800 Subject: [PATCH 30/36] Fix a bug in vagrant file causing multiple initializations of puppet --- Vagrantfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 99cfe69..b4f8736 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -21,7 +21,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| hadoop1.vm.network "private_network", ip: "192.168.7.12" hadoop1.vm.hostname = "hadoop1.local" - config.vm.provision :puppet do |puppet| + hadoop1.vm.provision :puppet do |puppet| puppet.manifest_file = "datanode.pp" puppet.module_path = "modules" end @@ -31,7 +31,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| hadoop2.vm.network "private_network", ip: "192.168.7.13" hadoop2.vm.hostname = "hadoop2.local" - config.vm.provision :puppet do |puppet| + hadoop2.vm.provision :puppet do |puppet| puppet.manifest_file = "datanode.pp" puppet.module_path = "modules" end @@ -41,7 +41,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| hadoop3.vm.network "private_network", ip: "192.168.7.14" hadoop3.vm.hostname = "hadoop3.local" - config.vm.provision :puppet do |puppet| + hadoop3.vm.provision :puppet do |puppet| puppet.manifest_file = "datanode.pp" puppet.module_path = "modules" end @@ -51,7 +51,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" - config.vm.provision :puppet do |puppet| + master.vm.provision :puppet do |puppet| puppet.manifest_file = "master.pp" puppet.module_path = "modules" end From 4d5aa61c20a437d19f5a79e8da614229c6b211fc Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Sun, 31 Jul 2016 15:18:37 -0700 Subject: [PATCH 31/36] update versions to current --- modules/hadoop/manifests/init.pp | 2 +- modules/hbase/manifests/init.pp | 2 +- modules/hive/manifests/init.pp | 2 +- modules/phoenix/manifests/init.pp | 4 ++-- modules/spark/manifests/init.pp | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/hadoop/manifests/init.pp b/modules/hadoop/manifests/init.pp index 3aa0dd4..5a01f24 100644 --- a/modules/hadoop/manifests/init.pp +++ b/modules/hadoop/manifests/init.pp @@ -1,6 +1,6 @@ class hadoop($slaves_file = undef, $hdfs_site_file = undef) { - $hadoop_version = "2.7.1" + $hadoop_version = "2.7.2" $hadoop_home = "/opt/hadoop-${hadoop_version}" $hadoop_tarball = "hadoop-${hadoop_version}.tar.gz" $hadoop_tarball_checksums = "${hadoop_tarball}.mds" diff --git a/modules/hbase/manifests/init.pp b/modules/hbase/manifests/init.pp index 45cf1d9..c0ae454 100644 --- a/modules/hbase/manifests/init.pp +++ b/modules/hbase/manifests/init.pp @@ -1,5 +1,5 @@ class hbase($regionservers_file = undef, $hbase_site_file = undef) { - $hbase_version = "1.1.1" + $hbase_version = "1.2.2" $hbase_home = "/opt/hbase-${hbase_version}" $hbase_tarball = "hbase-${hbase_version}-bin.tar.gz" diff --git a/modules/hive/manifests/init.pp b/modules/hive/manifests/init.pp index f50a173..e0243ec 100644 --- a/modules/hive/manifests/init.pp +++ b/modules/hive/manifests/init.pp @@ -1,5 +1,5 @@ class hive { - $hive_version = "1.2.1" + $hive_version = "2.1.0" $hive_tarball = "apache-hive-${hive_version}-bin.tar.gz" $hive_home = "/opt/apache-hive-${hive_version}-bin" diff --git a/modules/phoenix/manifests/init.pp b/modules/phoenix/manifests/init.pp index 520cddc..5226fa8 100644 --- a/modules/phoenix/manifests/init.pp +++ b/modules/phoenix/manifests/init.pp @@ -1,7 +1,7 @@ class phoenix { - $phoenix_version = "4.5.1" + $phoenix_version = "4.7.0" $hbase_compat = "1.1" # phoenix compatibility version - $hbase_version = "1.1.1" # actual installed version + $hbase_version = "1.2.2" # actual installed version $phoenix_tarball = "phoenix-${phoenix_version}-HBase-${hbase_compat}-bin.tar.gz" $phoenix_home = "/opt/phoenix-${phoenix_version}-HBase-${hbase_compat}-bin" diff --git a/modules/spark/manifests/init.pp b/modules/spark/manifests/init.pp index ce4e922..c2cb713 100644 --- a/modules/spark/manifests/init.pp +++ b/modules/spark/manifests/init.pp @@ -1,7 +1,7 @@ class spark { - $spark_version = "1.5.0" - $hadoop_version = "2.7.1" # installed Hadoop version - $hadoop_spark = "2.6" # Hadoop version for spark compatibility + $spark_version = "2.0.0" + $hadoop_version = "2.7.2" # installed Hadoop version + $hadoop_spark = "2.7" # Hadoop version for spark compatibility $spark_home = "/opt/spark-${spark_version}-bin-hadoop${hadoop_spark}" $spark_tarball = "spark-${spark_version}-bin-hadoop${hadoop_spark}.tgz" From 16f67d47e985d85c050a14d99fce3af470b56d68 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Thu, 18 Aug 2016 11:05:48 -0700 Subject: [PATCH 32/36] un-fix mapred permissions, which break newer version --- modules/hadoop/files/mapred-site.xml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/modules/hadoop/files/mapred-site.xml b/modules/hadoop/files/mapred-site.xml index e3d71b8..ed0dd62 100644 --- a/modules/hadoop/files/mapred-site.xml +++ b/modules/hadoop/files/mapred-site.xml @@ -26,17 +26,4 @@ mapreduce.framework.name yarn - - - mapreduce.jobtracker.staging.root.dir - /user - - - mapred.system.dir - /user/${user.name}/.staging - - - yarn.app.mapreduce.am.staging-dir - /user - From 232991e74f0fc6d32a92bacbac2adb45524bd276 Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Thu, 18 Aug 2016 14:19:05 -0700 Subject: [PATCH 33/36] fix puppet call in single-code vagrantfile --- single-node/Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/single-node/Vagrantfile b/single-node/Vagrantfile index 0253418..cf04207 100644 --- a/single-node/Vagrantfile +++ b/single-node/Vagrantfile @@ -15,7 +15,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| master.vm.network "private_network", ip: "192.168.7.10" master.vm.hostname = "master.local" - config.vm.provision :puppet do |puppet| + master.vm.provision :puppet do |puppet| puppet.manifest_file = "master-single.pp" puppet.module_path = "../modules" puppet.manifests_path = "../manifests" From cec58e0da0a7d906cec2699401916035f4ac2b0f Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Thu, 18 Aug 2016 21:52:39 -0700 Subject: [PATCH 34/36] upgrade to Ubuntu Vivid --- Vagrantfile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index b4f8736..05b8a74 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -4,16 +4,15 @@ VAGRANTFILE_API_VERSION = "2" Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| - config.vm.box = "cascading-hadoop-base" - config.vm.box_url = "http://files.vagrantup.com/precise64.box" + config.vm.box = "larryli/vivid64" config.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "512"] + vb.customize ["modifyvm", :id, "--cpus", "1", "--memory", "1024"] end config.vm.provider "vmware_fusion" do |v, override| override.vm.box_url = "http://files.vagrantup.com/precise64_vmware.box" - v.vmx["memsize"] = "512" + v.vmx["memsize"] = "1024" v.vmx["numvcpus"] = "1" end From edde5fcb4adaa3e6a010ad035d560a91c10e998c Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Thu, 18 Aug 2016 23:00:06 -0700 Subject: [PATCH 35/36] update erb syntax to silence warnings --- modules/hadoop/templates/hadoop-path.sh.erb | 12 ++++++------ modules/hbase/templates/hbase-path.sh.erb | 2 +- modules/hive/templates/hive-path.sh.erb | 2 +- modules/spark/templates/spark-path.sh.erb | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/hadoop/templates/hadoop-path.sh.erb b/modules/hadoop/templates/hadoop-path.sh.erb index 8bff0ec..d11cc5f 100644 --- a/modules/hadoop/templates/hadoop-path.sh.erb +++ b/modules/hadoop/templates/hadoop-path.sh.erb @@ -1,10 +1,10 @@ export HADOOP_HOME_WARN_SUPPRESS="true" -export HADOOP_HOME=<%=hadoop_home%> +export HADOOP_HOME=<%= @hadoop_home %> export HADOOP_YARN_HOME=$HADOOP_HOME export HADOOP_PREFIX=$HADOOP_HOME -export HADOOP_CONF_DIR=<%=hadoop_conf_dir%> -export YARN_CONF_DIR=<%=hadoop_conf_dir%> +export HADOOP_CONF_DIR=<%= @hadoop_conf_dir %> +export YARN_CONF_DIR=<%= @hadoop_conf_dir %> export PATH=$HADOOP_HOME/bin:$PATH -export YARN_LOG_DIR=<%=yarn_log_dir%> -export HADOOP_LOG_DIR=<%=hadoop_log_dir%> -export HADOOP_MAPRED_LOG_DIR=<%=mapred_log_dir%> +export YARN_LOG_DIR=<%= @yarn_log_dir %> +export HADOOP_LOG_DIR=<%= @hadoop_log_dir %> +export HADOOP_MAPRED_LOG_DIR=<%= @mapred_log_dir %> diff --git a/modules/hbase/templates/hbase-path.sh.erb b/modules/hbase/templates/hbase-path.sh.erb index c6301ae..90893f4 100644 --- a/modules/hbase/templates/hbase-path.sh.erb +++ b/modules/hbase/templates/hbase-path.sh.erb @@ -1,3 +1,3 @@ -export HBASE_HOME=<%=hbase_home%> +export HBASE_HOME=<%= @hbase_home %> export HBASE_CONF_DIR=$HBASE_HOME/conf export PATH=$HBASE_HOME/bin:$PATH diff --git a/modules/hive/templates/hive-path.sh.erb b/modules/hive/templates/hive-path.sh.erb index 7ea1e3b..57d03bd 100644 --- a/modules/hive/templates/hive-path.sh.erb +++ b/modules/hive/templates/hive-path.sh.erb @@ -1,2 +1,2 @@ -export HIVE_HOME=<%=hive_home%> +export HIVE_HOME=<%= @hive_home %> export HADOOP_USER_CLASSPATH_FIRST=true diff --git a/modules/spark/templates/spark-path.sh.erb b/modules/spark/templates/spark-path.sh.erb index d5e2fb2..14015dc 100644 --- a/modules/spark/templates/spark-path.sh.erb +++ b/modules/spark/templates/spark-path.sh.erb @@ -1,2 +1,2 @@ -export SPARK_HOME=<%=spark_home%> +export SPARK_HOME=<%= @spark_home %> export PATH=$SPARK_HOME/bin:$PATH From 55df60f1da5d6a29ad2af909550b6dd9a48bd61a Mon Sep 17 00:00:00 2001 From: Greg Baker Date: Thu, 18 Aug 2016 23:25:51 -0700 Subject: [PATCH 36/36] fix puppet dependencies --- modules/phoenix/manifests/init.pp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/phoenix/manifests/init.pp b/modules/phoenix/manifests/init.pp index 5226fa8..681cb90 100644 --- a/modules/phoenix/manifests/init.pp +++ b/modules/phoenix/manifests/init.pp @@ -26,5 +26,6 @@ file { "/opt/hbase-${hbase_version}/lib/${server_jar}": ensure => 'link', target => "${phoenix_home}/${server_jar}", + require => [ Exec["download_grrr"], Exec["unpack_hbase"] ] } }