From 20b16fd4bd0b045a11006d81ddd1bcc8bccd01db Mon Sep 17 00:00:00 2001 From: Jarek Jarcec Cecho Date: Thu, 7 Feb 2013 18:37:50 -0800 Subject: [PATCH] SQOOP-862: Hbase import fails if there is a row where all columns are null (David Robson via Jarek Jarcec Cecho) --- src/docs/user/hbase.txt | 3 ++- .../apache/sqoop/hbase/HBasePutProcessor.java | 15 ++++++++++++++- .../cloudera/sqoop/hbase/HBaseImportTest.java | 15 +++++++++++++++ .../cloudera/sqoop/hbase/HBaseTestCase.java | 19 +++++++++++++++++++ 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/src/docs/user/hbase.txt b/src/docs/user/hbase.txt index 74049293..24c8df87 100644 --- a/src/docs/user/hbase.txt +++ b/src/docs/user/hbase.txt @@ -48,6 +48,7 @@ using the default parameters from your HBase configuration. Sqoop currently serializes all values to HBase by converting each field to its string representation (as if you were importing to HDFS in text mode), and then inserts the UTF-8 bytes of this string in the target -cell. +cell. Sqoop will skip all rows containing null values in all columns +except the row key column. diff --git a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java index cca641f0..ffa5f63f 100644 --- a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java +++ b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java @@ -23,10 +23,13 @@ import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.util.ReflectionUtils; import org.apache.sqoop.mapreduce.ImportJobBase; @@ -41,6 +44,9 @@ public class HBasePutProcessor implements Closeable, Configurable, FieldMapProcessor { + public static final Log LOG = LogFactory.getLog( + HBasePutProcessor.class.getName()); + /** Configuration key specifying the table to insert into. */ public static final String TABLE_NAME_KEY = "sqoop.hbase.insert.table"; @@ -124,7 +130,14 @@ public void accept(FieldMappable record) List putList = putTransformer.getPutCommand(fields); if (null != putList) { for (Put put : putList) { - this.table.put(put); + if (put!=null) { + if (put.isEmpty()) { + LOG.warn("Could not insert row with no columns " + + "for row-key column: " + Bytes.toString(put.getRow())); + } else { + this.table.put(put); + } + } } } } diff --git a/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java b/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java index bf24608f..e1f96968 100644 --- a/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java +++ b/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java @@ -112,4 +112,19 @@ public void testExitFailure() throws IOException { fail("should have gotten exception"); } + + @Test + public void testNullRow() throws IOException { + String [] argv = getArgv(true, "nullRowT", "nullRowF", true, null); + String [] types = { "INT", "INT" }; + String [] vals = { "0", "null" }; + createTableWithColTypes(types, vals); + runImport(argv); + + // This cell should not be placed in the results.. + verifyHBaseCell("nullRowT", "0", "nullRowF", getColName(1), null); + + int rowCount = countHBaseTable("nullRowT", "nullRowF"); + assertEquals(0, rowCount); + } } diff --git a/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java b/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java index 65ff87b5..37dc004d 100644 --- a/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java +++ b/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.util.StringUtils; @@ -234,4 +235,22 @@ public static File createTempDir() { } throw new IllegalStateException("Failed to create directory"); } + + protected int countHBaseTable(String tableName, String colFamily) + throws IOException { + int count = 0; + HTable table = new HTable(new Configuration( + hbaseTestUtil.getConfiguration()), Bytes.toBytes(tableName)); + try { + ResultScanner scanner = table.getScanner(Bytes.toBytes(colFamily)); + for(Result result = scanner.next(); + result != null; + result = scanner.next()) { + count++; + } + } finally { + table.close(); + } + return count; + } }