CURRENT=`/bin/date +%Y%m%d` /softWare/hadoop-2.2.0/bin/hadoop jar /cleaner.jar /flume/$CURRENT /cleaned/$CURRENT
# chmod +x a1.sh # ./a1.sh
create external table wxkj(ip string,time string,urls string) partitioned by (logdate string) row format delimited fields terminated by '\t' location '/cleaned';
a2.sh脚本的内容('-e' 表示不用启动hive直接在外部执行)
CURRENT=`/bin/date +%Y%m%d` /softWare/apache-hive-0.13.0-bin/bin/hive -e "alter table wxkj add partition (logdate=$CURRENT) location '/cleaned/$CURRENT'"
# chmod +x a1.sh # ./a1.sh hive> select * from wxkj limit 4; OK 20130530173820 data/cache/style_1_widthauto.css?y7a 20180731 20130530173820 source/plugin/wsh_wx/img/wsh_zk.css 20180731 20130530173820 data/cache/style_1_forum_index.css?y7a 20180731 20130530173820 source/plugin/wsh_wx/img/wx_jqr.gif 20180731
a3.sh脚本的内容('-e' 表示不用启动hive直接在外部执行)
CURRENT=`/bin/date +%Y%m%d` /softWare/apache-hive-0.13.0-bin/bin/hive -e "select count(*) from wxkj where logdate = $CURRENT"
# chmod +x a1.sh # ./a3.sh 18/07/31 11:45:13 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces 18/07/31 11:45:13 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize ..................................................... Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks determined at compile time: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1533054465340_0011, Tracking URL = http://hadoop03:8088/proxy/application_ 1533054465340_0011/ Kill Command = /softWare/hadoop-2.2.0/bin/hadoop job -kill job_1533054465340_0011 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 2018-07-31 11:45:27,096 Stage-1 map = 0%, reduce = 0% 2018-07-31 11:45:35,860 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 3.43 sec 2018-07-31 11:45:42,143 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 4.48 sec MapReduce Total cumulative CPU time: 4 seconds 480 msec Ended Job = job_1533054465340_0011 MapReduce Jobs Launched: Job 0: Map: 1 Reduce: 1 Cumulative CPU: 4.48 sec HDFS Read: 12756871 HDFS Write: 7 SUCCESS Total MapReduce CPU Time Spent: 4 seconds 480 msec OK 169859 Time taken: 27.3 seconds, Fetched: 1 row(s)
a4.sh脚本的内容('-e' 表示不用启动hive直接在外部执行)
CURRENT=`/bin/date +%Y%m%d` /softWare/apache-hive-0.13.0-bin/bin/hive -e "select count(distinct ip) from wxkj where logdate = $CURRENT"
# chmod +x a1.sh # ./a4.sh 18/07/31 11:57:23 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces .................................................... Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks determined at compile time: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1533054465340_0013, Tracking URL = http://hadoop03:8088/proxy/application_ 1533054465340_0013/ Kill Command = /softWare/hadoop-2.2.0/bin/hadoop job -kill job_1533054465340_0013 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 2018-07-31 11:57:36,443 Stage-1 map = 0%, reduce = 0% 2018-07-31 11:57:42,744 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 2.13 sec 2018-07-31 11:57:49,010 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 3.18 sec MapReduce Total cumulative CPU time: 3 seconds 180 msec Ended Job = job_1533054465340_0013 MapReduce Jobs Launched: Job 0: Map: 1 Reduce: 1 Cumulative CPU: 3.18 sec HDFS Read: 12756871 HDFS Write: 6 SUCCESS Total MapReduce CPU Time Spent: 3 seconds 180 msec OK 10413 Time taken: 23.223 seconds, Fetched: 1 row(s)
a5.sh脚本的内容('-e' 表示不用启动hive直接在外部执行)
CURRENT=`/bin/date +%Y%m%d` /softWare/apache-hive-0.13.0-bin/bin/hive -e "select count(*) from wxkj where logdate = $CURRENT and instr(urls, 'member.php?mod=register')>0;"
# chmod +x a1.sh # ./a5.sh 18/07/31 12:01:51 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces Total jobs = 1 Launching Job 1 out of 1 Number of reduce tasks determined at compile time: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1533054465340_0014, Tracking URL = http://hadoop03:8088/proxy/application_ 1533054465340_0014/ Kill Command = /softWare/hadoop-2.2.0/bin/hadoop job -kill job_1533054465340_0014 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 2018-07-31 12:02:04,637 Stage-1 map = 0%, reduce = 0% 2018-07-31 12:02:09,971 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.5 sec 2018-07-31 12:02:16,294 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 2.42 sec MapReduce Total cumulative CPU time: 2 seconds 420 msec Ended Job = job_1533054465340_0014 MapReduce Jobs Launched: Job 0: Map: 1 Reduce: 1 Cumulative CPU: 2.42 sec HDFS Read: 12756871 HDFS Write: 3 SUCCESS Total MapReduce CPU Time Spent: 2 seconds 420 msec OK 28 Time taken: 21.985 seconds, Fetched: 1 row(s)
a6.sh脚本的内容('-e' 表示不用启动hive直接在外部执行)
CURRENT=`/bin/date +%Y%m%d` /softWare/apache-hive-0.13.0-bin/bin/hive -e "create table vip_$CURRENT row format delimited fields terminated by '\t' as select ip, count(*) as vtimes from wxkj where logdate = $CURRENT group by ip having vtimes >= 50 order by vtimes desc limit 20"
# chmod +x a1.sh # ./a6.sh 18/07/31 12:45:52 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces Total jobs = 2 Launching Job 1 out of 2 Number of reduce tasks not specified. Estimated from input data size: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1533054465340_0015, Tracking URL = http://hadoop03:8088/proxy/application_ 1533054465340_0015/ Kill Command = /softWare/hadoop-2.2.0/bin/hadoop job -kill job_1533054465340_0015 Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1 2018-07-31 12:46:04,552 Stage-1 map = 0%, reduce = 0% 2018-07-31 12:46:10,908 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 1.52 sec 2018-07-31 12:46:17,186 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 2.99 sec MapReduce Total cumulative CPU time: 2 seconds 990 msec Ended Job = job_1533054465340_0015 Launching Job 2 out of 2 Number of reduce tasks determined at compile time: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number> Starting Job = job_1533054465340_0016, Tracking URL = http://hadoop03:8088/proxy/application_ 1533054465340_0016/ Kill Command = /softWare/hadoop-2.2.0/bin/hadoop job -kill job_1533054465340_0016 Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1 2018-07-31 12:46:24,744 Stage-2 map = 0%, reduce = 0% 2018-07-31 12:46:30,035 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 1.04 sec 2018-07-31 12:46:35,290 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 1.87 sec MapReduce Total cumulative CPU time: 1 seconds 870 msec Ended Job = job_1533054465340_0016 Moving data to: hdfs://ns1/user/hive/warehouse/vip_20180731 Table default.vip_20180731 stats: [numFiles=1, numRows=20, totalSize=373, rawDataSize=353] MapReduce Jobs Launched: Job 0: Map: 1 Reduce: 1 Cumulative CPU: 2.99 sec HDFS Read: 12756871 HDFS Write: 21822 SUCCESS Job 1: Map: 1 Reduce: 1 Cumulative CPU: 1.87 sec HDFS Read: 22174 HDFS Write: 450 SUCCESS Total MapReduce CPU Time Spent: 4 seconds 860 msec OK
CREATE TABLE `vip` ( `ip` varchar(100) NOT NULL, `times` varchar(255) NOT NULL, PRIMARY KEY (`ip`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
a7.sh脚本的内容('-e' 表示不用启动hive直接在外部执行)
CURRENT=`/bin/date +%Y%m%d` /softWare/sqoop-1.4.4.bin__hadoop-2.0.4-alpha/bin/sqoop export --connect jdbc:mysql:// --username root --password root --export-dir "/user/hive/warehouse/vip_$CURRENT" --table vip --fields-terminated-by '\t'
# chmod +x a1.sh # ./a7.sh Warning: /usr/lib/hbase does not exist! HBase imports will fail. Please set $HBASE_HOME to the root of your HBase installation. Warning: /usr/lib/hcatalog does not exist! HCatalog jobs will fail. Please set $HCAT_HOME to the root of your HCatalog installation. 18/07/31 13:04:44 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead. 18/07/31 13:04:44 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset. 18/07/31 13:04:44 INFO tool.CodeGenTool: Beginning code generation 18/07/31 13:04:45 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `vip` AS t LIMIT 1 18/07/31 13:04:45 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `vip` AS t LIMIT 1 18/07/31 13:04:45 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /softWare/hadoop-2.2.0 ........................... 18/07/31 13:04:50 INFO mapreduce.Job: Running job: job_1533054465340_0017 18/07/31 13:04:56 INFO mapreduce.Job: Job job_1533054465340_0017 running in uber mode : false 18/07/31 13:04:56 INFO mapreduce.Job: map 0% reduce 0% 18/07/31 13:05:11 INFO mapreduce.Job: map 100% reduce 0% 18/07/31 13:05:11 INFO mapreduce.Job: Job job_1533054465340_0017 completed successfully 18/07/31 13:05:11 INFO mapreduce.Job: Counters: 27 ............................. File System Counters FILE: Number of bytes read=0 FILE: Number of bytes written=364524 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of write operations=0 HDFS: Number of bytes read=1597 HDFS: Number of bytes written=0 HDFS: Number of read operations=19 HDFS: Number of large read operations=0 HDFS: Number of write operations=0 Job Counters Launched map tasks=4 Rack-local map tasks=4 Total time spent by all maps in occupied slots (ms)=52047 Total time spent by all reduces in occupied slots (ms)=0 Map-Reduce Framework Map input records=20 Map output records=20 Input split bytes=601 Spilled Records=0 Failed Shuffles=0 Merged Map outputs=0 GC time elapsed (ms)=729 CPU time spent (ms)=2300 Physical memory (bytes) snapshot=232755200 Virtual memory (bytes) snapshot=1454493696 Total committed heap usage (bytes)=63963136 File Input Format Counters Bytes Read=0 File Output Format Counters Bytes Written=0 18/07/31 13:05:11 INFO mapreduce.ExportJobBase: Transferred 1.5596 KB in 24.0166 seconds (66.4956 bytes/sec) 18/07/31 13:05:11 INFO mapreduce.ExportJobBase: Exported 20 records.