From 9a3fd1484f7ff713a323f5687cb9681fc0c524da Mon Sep 17 00:00:00 2001 From: Abhijeet Gaikwad Date: Sun, 20 Jan 2013 20:56:44 +0530 Subject: [PATCH] SQOOP-824: Sqoop code generation in 'update' export mode incompatible with '--columns' option (Jarek Jarcec Cecho via Abhijeet Gaikwad) --- .../org/apache/sqoop/manager/ConnManager.java | 26 ++++++++++- .../com/cloudera/sqoop/TestExportUpdate.java | 46 ++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/sqoop/manager/ConnManager.java b/src/java/org/apache/sqoop/manager/ConnManager.java index 115186f5..358981e2 100644 --- a/src/java/org/apache/sqoop/manager/ConnManager.java +++ b/src/java/org/apache/sqoop/manager/ConnManager.java @@ -506,6 +506,17 @@ public void configureDbOutputColumns(SqoopOptions options) { // last, because the UPDATE-based OutputFormat will generate the SET // clause followed by the WHERE clause, and the SqoopRecord needs to // serialize to this layout. + + // Check if user specified --columns parameter + Set columns = null; + if (options.getColumns() != null && options.getColumns().length > 0) { + // If so, put all column in uppercase form into our help set + columns = new HashSet(); + for(String c : options.getColumns()) { + columns.add(c.toUpperCase()); + } + } + Set updateKeys = new LinkedHashSet(); Set updateKeysUppercase = new HashSet(); String updateKeyValue = options.getUpdateKeyCol(); @@ -513,8 +524,16 @@ public void configureDbOutputColumns(SqoopOptions options) { while (stok.hasMoreTokens()) { String nextUpdateColumn = stok.nextToken().trim(); if (nextUpdateColumn.length() > 0) { + String upperCase = nextUpdateColumn.toUpperCase(); + + // We must make sure that --columns is super set of --update-key + if (columns != null && !columns.contains(upperCase)) { + throw new RuntimeException("You must specify all columns from " + + "--update-key parameter in --columns parameter."); + } + updateKeys.add(nextUpdateColumn); - updateKeysUppercase.add(nextUpdateColumn.toUpperCase()); + updateKeysUppercase.add(upperCase); } else { throw new RuntimeException("Invalid update key column value specified" + ": '" + updateKeyValue + "'"); @@ -524,6 +543,11 @@ public void configureDbOutputColumns(SqoopOptions options) { List dbOutCols = new ArrayList(); for (String col : allColNames) { if (!updateKeysUppercase.contains(col.toUpperCase())) { + // Skip columns that were not explicitly stated on command line + if (columns != null && !columns.contains(col.toUpperCase())) { + continue; + } + dbOutCols.add(col); // add non-key columns to the output order list. } } diff --git a/src/test/com/cloudera/sqoop/TestExportUpdate.java b/src/test/com/cloudera/sqoop/TestExportUpdate.java index f5c30f34..95d7b6ae 100644 --- a/src/test/com/cloudera/sqoop/TestExportUpdate.java +++ b/src/test/com/cloudera/sqoop/TestExportUpdate.java @@ -98,7 +98,7 @@ private void populateDatabase(int numRows) throws SQLException { * 1 | 1 | 1foo1 * 1 | 2 | 1foo2 *

- * @param firstKeyRange the number of + * @param aMax the number of * @throws SQLException */ private void createMultiKeyTable(int aMax) throws SQLException { @@ -642,4 +642,48 @@ public void testSubsetUpdate2() throws Exception { verifyRow("A", "9", "9", "foo18", "18"); } + /** + * Test updating only subset of the columns. + * + * @throws Exception + */ + public void testUpdateColumnSubset() throws Exception { + populateDatabase(4); + createUpdateFiles(1, 3, 0); + + runExport(getArgv(true, 2, 2, "-m", "1", + "--update-key", "A", "--columns", "A,B")); + + verifyRowCount(4); + + // First column should not have any changes (even though it was updated) + verifyRow("A", "0", "0", "foo0", "0"); + + // Second column have updated column B, but C should be left untouched + verifyRow("A", "1", "1", "foo2", "1"); + + // Third column have updated column B, but C should be left untouched + verifyRow("A", "2", "2", "foo4", "2"); + + // Last columns should be completely untouched + verifyRow("A", "3", "3", "foo3", "3"); + } + + /** + * Parameter --columns must be superset of --update-key in order for + * CompilationManager and other parts of the framework work correctly. + * + * @throws Exception + */ + public void testUpdateColumnNotInColumns() throws Exception { + populateDatabase(1); + try { + runExport(getArgv(true, 2, 2, "-m", "1", + "--update-key", "A", "--columns", "B")); + fail("Expected IOException"); + } catch (IOException e) { + assertTrue(true); + } + } + }