From d4d262591c342c863c02a9330e8e48b4be8cdf4b Mon Sep 17 00:00:00 2001 From: Jarek Jarcec Cecho Date: Thu, 7 Feb 2013 18:48:34 -0800 Subject: [PATCH] SQOOP-870: Allow Sqoop to import row key column into HBase (David Robson via Jarek Jarcec Cecho) --- src/docs/user/import.txt | 6 ++++ .../apache/sqoop/hbase/HBasePutProcessor.java | 13 ++++++- .../sqoop/hbase/ToStringPutTransformer.java | 3 +- .../cloudera/sqoop/hbase/HBaseImportTest.java | 34 +++++++++++++++++++ 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt index 9bc4fc9c..ee10b1cf 100644 --- a/src/docs/user/import.txt +++ b/src/docs/user/import.txt @@ -607,6 +607,12 @@ Argument Description exponent component (0.0000001); while \ a value of +false+ will use toString \ which may include an exponent (1E-7) ++sqoop.hbase.add.row.key+ When set to +false+ (default), Sqoop \ + will not add the column used as a row \ + key into the row data in HBase. When \ + set to +true+, the column used as a \ + row key will be added to the row data \ + in HBase. ------------------------------------------------------------------------------- diff --git a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java index ffa5f63f..6aca97f6 100644 --- a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java +++ b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java @@ -66,6 +66,12 @@ public class HBasePutProcessor implements Closeable, Configurable, public static final String TRANSFORMER_CLASS_KEY = "sqoop.hbase.insert.put.transformer.class"; + /** Configuration key to specify whether to add the row key column into + * HBase. Set to false by default. + */ + public static final String ADD_ROW_KEY = "sqoop.hbase.add.row.key"; + public static final boolean ADD_ROW_KEY_DEFAULT = false; + private Configuration conf; // An object that can transform a map of fieldName->object @@ -98,9 +104,14 @@ public void setConf(Configuration config) { this.putTransformer.setRowKeyColumn(conf.get(ROW_KEY_COLUMN_KEY, null)); if (this.putTransformer instanceof ToStringPutTransformer) { - ((ToStringPutTransformer) this.putTransformer).bigDecimalFormatString = + ToStringPutTransformer stringPutTransformer = + (ToStringPutTransformer) this.putTransformer; + stringPutTransformer.bigDecimalFormatString = conf.getBoolean(ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT, ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT); + stringPutTransformer.addRowKey = + conf.getBoolean(HBasePutProcessor.ADD_ROW_KEY, + HBasePutProcessor.ADD_ROW_KEY_DEFAULT); } this.tableName = conf.get(TABLE_NAME_KEY, null); diff --git a/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java b/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java index 131fd437..13c765c0 100644 --- a/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java +++ b/src/java/org/apache/sqoop/hbase/ToStringPutTransformer.java @@ -44,6 +44,7 @@ public class ToStringPutTransformer extends PutTransformer { // Used to cache serialization work done for fields names. private Map serializedFieldNames; protected boolean bigDecimalFormatString; + protected boolean addRowKey; public ToStringPutTransformer() { serializedFieldNames = new TreeMap(); @@ -87,7 +88,7 @@ public List getPutCommand(Map fields) for (Map.Entry fieldEntry : fields.entrySet()) { String colName = fieldEntry.getKey(); - if (!colName.equals(rowKeyCol)) { + if (!colName.equals(rowKeyCol) || addRowKey) { // This is a regular field, not the row key. // Add it if it's not null. Object val = fieldEntry.getValue(); diff --git a/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java b/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java index e1f96968..d411f3dd 100644 --- a/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java +++ b/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java @@ -127,4 +127,38 @@ public void testNullRow() throws IOException { int rowCount = countHBaseTable("nullRowT", "nullRowF"); assertEquals(0, rowCount); } + + @Test + public void testAddRowKey() throws IOException { + String[] types = { "INT", "INT" }; + String[] vals = { "0", "1" }; + createTableWithColTypes(types, vals); + + String[] otherArg = getArgv(true, "addRowKeyT", "addRowKeyF", true, null); + String[] argv = new String[otherArg.length + 2]; + argv[0] = "-D"; + argv[1] = "sqoop.hbase.add.row.key=true"; + System.arraycopy(otherArg, 0, argv, 2, otherArg.length); + + runImport(argv); + + // Row key should have been added + verifyHBaseCell("addRowKeyT", "0", "addRowKeyF", getColName(0), "0"); + verifyHBaseCell("addRowKeyT", "0", "addRowKeyF", getColName(1), "1"); + } + + @Test + public void testAddRowKeyDefault() throws IOException { + String[] types = { "INT", "INT" }; + String[] vals = { "0", "1" }; + createTableWithColTypes(types, vals); + + String[] argv = getArgv(true, "addRowKeyDfT", "addRowKeyDfF", true, null); + + runImport(argv); + + // Row key should not be added by default + verifyHBaseCell("addRowKeyDfT", "0", "addRowKeyDfF", getColName(0), null); + verifyHBaseCell("addRowKeyDfT", "0", "addRowKeyDfF", getColName(1), "1"); + } }