在單機上運行hbase 二級索引:
import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import com.tansun.di.core.di.core.hbase.HBaseDaoPool; public class IndexCreateExample extends TableMapper<ImmutableBytesWritable, Put> { public static Configuration conf = null; private String familyName; // 原始表 private Map<byte[], ImmutableBytesWritable> indexes = new HashMap<byte[], ImmutableBytesWritable>(); static { conf = new Configuration(); String filePath = "hbase-site.xml"; Path path = new Path( filePath ); conf.addResource( path ); conf = HBaseConfiguration.create( conf ); } // public static class IndexMapper extends @Override protected void setup( Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Put>.Context context) throws IOException, InterruptedException { String tableName = conf.get("tableName "); // String family = conf.get("familyName"); // 獲得列族 familyName = conf.get("familyName"); // 獲得列 String[] qualifiers = conf.getStrings("qualifiers"); for (String qualifier : qualifiers) { // 建立一個映射,為每一個列創建一個表,表的名字tableName+"-"+qualifier // 原始表的列 索引表新建表名 indexes.put(Bytes.toBytes(qualifier), new ImmutableBytesWritable( Bytes.toBytes(tableName + "-" + qualifier))); } } public static void main(String[] args) { if (args.length < 3) { System.err .println("Usage: IndexCreateExample <table_name><cf><cn>"); System.exit(-1); } // 表名 String tableName = args[0]; // 列族 String columnFamily = args[1]; conf.set("tableName", tableName); conf.set("columnFamily", columnFamily); // 列 可能存在多個列 String[] qualifiers = new String[args.length - 2]; for (int i = 0; i < qualifiers.length; i++) { qualifiers[i] = args[i + 2]; } // 設置列 conf.setStrings("qualifiers", qualifiers); @SuppressWarnings("deprecation") Job job; try { job = new Job(conf, tableName); job.setJarByClass(IndexCreateExample.class); job.setMapperClass(IndexCreateExample.class); job.setNumReduceTasks(0);// 由於不需要執行reduce階段 job.setInputFormatClass(TableInputFormat.class); job.setOutputFormatClass(MultiTableOutputFormat.class); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob(tableName, scan, IndexCreateExample.class, ImmutableBytesWritable.class, Put.class, job); job.waitForCompletion(true); } catch (Exception e1) { e1.printStackTrace(); } } @Override protected void map( ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Put>.Context context) throws IOException, InterruptedException { // 獲得tableName 和 Set<byte[]> keys = indexes.keySet(); for (byte[] k : keys) { // index tableName ImmutableBytesWritable indexTableName = indexes.get(k); // // byte[] value2 = value.getValue(Bytes.toBytes(familyName), k); if (value2 != null) { // 索引表 Put put = new Put(value2);// 索引表行鍵 // 列族 列 原始表的行鍵 put.add(Bytes.toBytes("f1"), Bytes.toBytes("id"), key.get()); context.write(indexTableName, put); } } } }
出現異常信息:內存不足
or more detailed output, check application tracking page:http://ts.node2.com:8088/proxy/application_1472537544791_0007/Then, click on links to logs of each attempt. Diagnostics: Container [pid=3984,containerID=container_1472537544791_0007_02_000001] is running beyond physical memory limits. Current usage: 285.6 MB of 256 MB physical memory used; 1.5 GB of 537.6 MB virtual memory used. Killing container. Dump of the process-tree for container_1472537544791_0007_02_000001 : |- PID PPID PGRPID SESSID CMD_NAME USER_MODE_TIME(MILLIS) SYSTEM_TIME(MILLIS) VMEM_USAGE(BYTES) RSSMEM_USAGE(PAGES) FULL_CMD_LINE |- 3984 3982 3984 3984 (bash) 0 0 108617728 338 /bin/bash -c /usr/java/jdk1.7.0_79/bin/java -Dlog4j.configuration=container-log4j.properties -Dyarn.app.container.log.dir=/var/log/hadoop-yarn/container/application_1472537544791_0007/container_1472537544791_0007_02_000001 -Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA -Djava.net.preferIPv4Stack=true -Xmx825955249 org.apache.hadoop.mapreduce.v2.app.MRAppMaster 1>/var/log/hadoop-yarn/container/application_1472537544791_0007/container_1472537544791_0007_02_000001/stdout 2>/var/log/hadoop-yarn/container/application_1472537544791_0007/container_1472537544791_0007_02_000001/stderr |- 3999 3984 3984 3984 (java) 2183 69 1504305152 72786 /usr/java/jdk1.7.0_79/bin/java -Dlog4j.configuration=container-log4j.properties -Dyarn.app.container.log.dir=/var/log/hadoop-yarn/container/application_1472537544791_0007/container_1472537544791_0007_02_000001 -Dyarn.app.container.log.filesize=0 -Dhadoop.root.logger=INFO,CLA -Djava.net.preferIPv4Stack=true -Xmx825955249 org.apache.hadoop.mapreduce.v2.app.MRAppMaster Container killed on request. Exit code is 143 Container exited with a non-zero exit code 143 Failing this attempt. Failing the application. 16/09/01 11:07:56 INFO mapreduce.Job: Counters: 0
異常2:
16/08/31 20:40:03 ERROR mapreduce.TableInputFormat: java.io.IOException: java.lang.reflect.InvocationTargetException at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:240) at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:218) at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:119) at org.apache.hadoop.hbase.mapreduce.TableInputFormat.initialize(TableInputFormat.java:183) at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:230) at org.apache.hadoop.hbase.mapreduce.TableInputFormat.getSplits(TableInputFormat.java:237) at org.apache.hadoop.mapreduce.JobSubmitter.writeNewSplits(JobSubmitter.java:597) at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:614) at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:492) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1306) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1303) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1303) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1324) at com.tansun.di.core.di.core.hbase.second.IndexCreateExample.main(IndexCreateExample.java:101) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:526) at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:238) ... 22 more Caused by: java.lang.UnsupportedOperationException: Unable to find org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory at org.apache.hadoop.hbase.util.ReflectionUtils.instantiateWithCustomCtor(ReflectionUtils.java:36) at org.apache.hadoop.hbase.ipc.RpcControllerFactory.instantiate(RpcControllerFactory.java:58) at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.createAsyncProcess(ConnectionManager.java:2220) at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.<init>(ConnectionManager.java:676) at org.apache.hadoop.hbase.client.ConnectionManager$HConnectionImplementation.<init>(ConnectionManager.java:618) ... 27 more Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:191) at org.apache.hadoop.hbase.util.ReflectionUtils.instantiateWithCustomCtor(ReflectionUtils.java:32) ... 31 more 16/08/31 20:40:03 INFO mapreduce.JobSubmitter: Cleaning up the staging area /tmp/hadoop-yarn/staging/root/.staging/job_1472695767533_0003 16/08/31 20:40:03 WARN security.UserGroupInformation: PriviledgedActionException as:root (auth:SIMPLE) cause:java.io.IOException: Cannot create a record reader because of a previous error. Please look at the previous logs lines from the task's full log for more details. java.io.IOException: Cannot create a record reader because of a previous error. Please look at the previous logs lines from the task's full log for more details. at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:241) at org.apache.hadoop.hbase.mapreduce.TableInputFormat.getSplits(TableInputFormat.java:237) at org.apache.hadoop.mapreduce.JobSubmitter.writeNewSplits(JobSubmitter.java:597) at org.apache.hadoop.mapreduce.JobSubmitter.writeSplits(JobSubmitter.java:614) at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:492) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1306) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1303) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1671) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1303) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1324) at com.tansun.di.core.di.core.hbase.second.IndexCreateExample.main(IndexCreateExample.java:101) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Caused by: java.lang.IllegalStateException: The input format instance has not been properly initialized. Ensure you call initializeTable either in your constructor or initialize method at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getTable(TableInputFormatBase.java:389) at org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.getSplits(TableInputFormatBase.java:236)
創建原始表 hbase(main):002:0> create 'studentinfo','f1' 0 row(s) in 0.6520 seconds => Hbase::Table - studentinfo hbase(main):003:0> put 'studentinfo','1','f1:name','zhangsan' 0 row(s) in 0.1640 seconds hbase(main):004:0> put 'studentinfo','2','f1:name','lisi' 0 row(s) in 0.0240 seconds hbase(main):005:0> put 'studentinfo','3','f1:name','wangwu' 0 row(s) in 0.0290 seconds hbase(main):006:0> scan 'studentinfo' ROW COLUMN+CELL 1 column=f1:name, timestamp=1436262175823, value=zhangsan 2 column=f1:name, timestamp=1436262183922, value=lisi 3 column=f1:name, timestamp=1436262189250, value=wangwu 3 row(s) in 0.0530 seconds
創建索引表 hbase(main):007:0> create 'studentinfo-name','f1'
0 row(s) in 0.7740 seconds => Hbase::Table - studentinfo-name