diff --git a/src/java/org/apache/sqoop/lib/DelimiterSet.java b/src/java/org/apache/sqoop/lib/DelimiterSet.java index ef62ba08..d76890ed 100644 --- a/src/java/org/apache/sqoop/lib/DelimiterSet.java +++ b/src/java/org/apache/sqoop/lib/DelimiterSet.java @@ -46,6 +46,20 @@ public class DelimiterSet implements Cloneable { "sqoop.output.escaped.by"; public static final String OUTPUT_ENCLOSE_REQUIRED_KEY = "sqoop.output.enclose.required"; + + /** + * Create the input variations for export instead of overloading them. + */ + public static final String INPUT_FIELD_DELIM_KEY = + "sqoop.input.field.delim"; + public static final String INPUT_RECORD_DELIM_KEY = + "sqoop.input.record.delim"; + public static final String INPUT_ENCLOSED_BY_KEY = + "sqoop.input.enclosed.by"; + public static final String INPUT_ESCAPED_BY_KEY = + "sqoop.input.escaped.by"; + public static final String INPUT_ENCLOSE_REQUIRED_KEY = + "sqoop.input.enclose.required"; /** * Create a delimiter set with the default delimiters * (comma for fields, newline for records). diff --git a/src/java/org/apache/sqoop/manager/DirectNetezzaManager.java b/src/java/org/apache/sqoop/manager/DirectNetezzaManager.java index ef989365..4f36bf61 100644 --- a/src/java/org/apache/sqoop/manager/DirectNetezzaManager.java +++ b/src/java/org/apache/sqoop/manager/DirectNetezzaManager.java @@ -28,6 +28,8 @@ import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.ParseException; +import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -59,13 +61,39 @@ public class DirectNetezzaManager extends NetezzaManager { private static final String QUERY_CHECK_DICTIONARY_FOR_TABLE = "SELECT 1 FROM _V_OBJECTS WHERE OWNER= ? " + " AND OBJNAME = ? and OBJTYPE = 'TABLE'"; - + public static final String NETEZZA_NULL_VALUE = + "netezza.exttable.null.value"; public DirectNetezzaManager(SqoopOptions opts) { super(opts); try { handleNetezzaExtraArgs(options); - } catch (ParseException ioe) { - throw new RuntimeException(ioe.getMessage(), ioe); + } catch (ParseException pe) { + throw new RuntimeException(pe.getMessage(), pe); + } + } + + private void checkNullValueStrings(String nullStrValue, + String nullNonStrValue) throws IOException { + + if (!StringUtils.equals(nullStrValue, nullNonStrValue)) { + throw new IOException( + "Detected different values of --input-string and --input-non-string " + + "parameters. Netezza direct manager does not support that. Please " + + "either use the same values or omit the --direct parameter."); + } + + + // Null String values cannot be more 4 chars in length in the case + // Netezza external tables. + + if (nullStrValue != null) { + nullStrValue = StringEscapeUtils.unescapeJava(nullStrValue); + if (nullStrValue.length() > 4) { + throw new IOException( + "Null string (and null non string) values for Netezza direct mode" + + " manager must be less than 4 characters in length"); + } + options.getConf().set(NETEZZA_NULL_VALUE, nullStrValue); } } @@ -131,12 +159,13 @@ public void exportTable(com.cloudera.sqoop.manager.ExportJobContext context) options = context.getOptions(); context.setConnManager(this); - checkTable(); // Throws excpetion as necessary NetezzaExternalTableExportJob exporter = null; char qc = (char) options.getInputEnclosedBy(); char ec = (char) options.getInputEscapedBy(); + checkNullValueStrings(options.getInNullStringValue(), + options.getInNullNonStringValue()); if (qc > 0 && !(qc == '"' || qc == '\'')) { throw new ExportException("Input enclosed-by character must be '\"' " @@ -173,6 +202,9 @@ public void importTable(com.cloudera.sqoop.manager.ImportJobContext context) new IOException("Null tableName for Netezza external table import."); } + checkNullValueStrings(options.getNullStringValue(), + options.getNullNonStringValue()); + char qc = options.getOutputEnclosedBy(); char ec = options.getOutputEscapedBy(); diff --git a/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableExportMapper.java b/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableExportMapper.java index 410a5699..22b7af56 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableExportMapper.java +++ b/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableExportMapper.java @@ -64,9 +64,11 @@ public abstract class NetezzaExternalTableExportMapper extends private String getSqlStatement() throws IOException { - char fd = (char) conf.getInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY, ','); - char qc = (char) conf.getInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY, 0); - char ec = (char) conf.getInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, 0); + char fd = (char) conf.getInt(DelimiterSet.INPUT_FIELD_DELIM_KEY, ','); + char qc = (char) conf.getInt(DelimiterSet.INPUT_ENCLOSED_BY_KEY, 0); + char ec = (char) conf.getInt(DelimiterSet.INPUT_ESCAPED_BY_KEY, 0); + + String nullValue = conf.get(DirectNetezzaManager.NETEZZA_NULL_VALUE); int errorThreshold = conf.getInt( DirectNetezzaManager.NETEZZA_ERROR_THRESHOLD_OPT, 1); @@ -89,7 +91,13 @@ private String getSqlStatement() throws IOException { } sqlStmt.append(" FORMAT 'Text' "); sqlStmt.append(" INCLUDEZEROSECONDS TRUE "); - sqlStmt.append(" NULLVALUE 'NULL' "); + sqlStmt.append(" NULLVALUE '"); + if (nullValue != null) { + sqlStmt.append(nullValue); + } else { + sqlStmt.append("null"); + } + sqlStmt.append("' "); if (qc > 0) { switch (qc) { case '\'': diff --git a/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableImportMapper.java b/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableImportMapper.java index 9e6cab67..bcdc9e1a 100644 --- a/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableImportMapper.java +++ b/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableImportMapper.java @@ -66,6 +66,9 @@ private String getSqlStatement(int myId) throws IOException { char fd = (char) conf.getInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY, ','); char qc = (char) conf.getInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY, 0); char ec = (char) conf.getInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, 0); + + String nullValue = conf.get(DirectNetezzaManager.NETEZZA_NULL_VALUE); + int errorThreshold = conf.getInt( DirectNetezzaManager.NETEZZA_ERROR_THRESHOLD_OPT, 1); String logDir = conf.get(DirectNetezzaManager.NETEZZA_LOG_DIR_OPT); @@ -86,7 +89,13 @@ private String getSqlStatement(int myId) throws IOException { } sqlStmt.append(" FORMAT 'Text' "); sqlStmt.append(" INCLUDEZEROSECONDS TRUE "); - sqlStmt.append(" NULLVALUE 'null' "); + sqlStmt.append(" NULLVALUE '"); + if (nullValue != null) { + sqlStmt.append(nullValue); + } else { + sqlStmt.append("null"); + } + sqlStmt.append("' "); if (qc > 0) { switch (qc) { case '\'': diff --git a/src/java/org/apache/sqoop/mapreduce/netezza/NetezzaExternalTableExportJob.java b/src/java/org/apache/sqoop/mapreduce/netezza/NetezzaExternalTableExportJob.java index 2a702d9f..7caf9be9 100644 --- a/src/java/org/apache/sqoop/mapreduce/netezza/NetezzaExternalTableExportJob.java +++ b/src/java/org/apache/sqoop/mapreduce/netezza/NetezzaExternalTableExportJob.java @@ -20,6 +20,7 @@ import java.io.IOException; +import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -28,6 +29,7 @@ import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; import org.apache.sqoop.lib.DelimiterSet; import org.apache.sqoop.manager.ConnManager; +import org.apache.sqoop.manager.DirectNetezzaManager; import org.apache.sqoop.mapreduce.DBWritable; import org.apache.sqoop.mapreduce.db.netezza.NetezzaExternalTableRecordExportMapper; @@ -52,29 +54,41 @@ public NetezzaExternalTableExportJob(final ExportJobContext context) { } @Override + protected void propagateOptionsToJob(Job job) { + Configuration conf = job.getConfiguration(); + String nullValue = options.getInNullStringValue(); + if (nullValue != null) { + conf.set(DirectNetezzaManager.NETEZZA_NULL_VALUE, + StringEscapeUtils.unescapeJava(nullValue)); + } + conf.setInt(DelimiterSet.INPUT_FIELD_DELIM_KEY, + options.getInputFieldDelim()); + conf.setInt(DelimiterSet.INPUT_RECORD_DELIM_KEY, + options.getInputRecordDelim()); + conf.setInt(DelimiterSet.INPUT_ENCLOSED_BY_KEY, + options.getInputEnclosedBy()); + // Netezza uses \ as the escape character. Force the use of it + int escapeChar = options.getInputEscapedBy(); + if (escapeChar > 0) { + if (escapeChar != '\\') { + LOG.info( + "Setting escaped char to \\ for Netezza external table export"); + } + conf.setInt(DelimiterSet.INPUT_ESCAPED_BY_KEY, '\\'); + } + conf.setBoolean(DelimiterSet.INPUT_ENCLOSE_REQUIRED_KEY, + options.isOutputEncloseRequired()); + } /** * Configure the inputformat to use for the job. */ + @Override protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { // Configure the delimiters, etc. Configuration conf = job.getConfiguration(); - conf.setInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY, - options.getInputFieldDelim()); - conf.setInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY, - options.getInputRecordDelim()); - conf.setInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY, - options.getInputEnclosedBy()); - // Netezza uses \ as the escape character. Force the use of it - int escapeChar = options.getOutputEscapedBy(); - if (escapeChar > 0 && escapeChar != '\\') { - LOG.info("Setting escaped char to \\ for Netezza external table import"); - conf.setInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, '\\'); - } - conf.setBoolean(DelimiterSet.OUTPUT_ENCLOSE_REQUIRED_KEY, - options.isOutputEncloseRequired()); ConnManager mgr = context.getConnManager(); String username = options.getUsername(); diff --git a/src/java/org/apache/sqoop/mapreduce/netezza/NetezzaExternalTableImportJob.java b/src/java/org/apache/sqoop/mapreduce/netezza/NetezzaExternalTableImportJob.java index 7ee6f709..0f7c1b00 100644 --- a/src/java/org/apache/sqoop/mapreduce/netezza/NetezzaExternalTableImportJob.java +++ b/src/java/org/apache/sqoop/mapreduce/netezza/NetezzaExternalTableImportJob.java @@ -20,10 +20,12 @@ import java.io.IOException; +import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.sqoop.lib.DelimiterSet; +import org.apache.sqoop.manager.DirectNetezzaManager; import org.apache.sqoop.mapreduce.DBWritable; import org.apache.sqoop.mapreduce.ImportJobBase; import org.apache.sqoop.mapreduce.RawKeyTextOutputFormat; @@ -47,10 +49,37 @@ public NetezzaExternalTableImportJob(final SqoopOptions opts, context); } + @Override + protected void propagateOptionsToJob(Job job) { + Configuration conf = job.getConfiguration(); + String nullValue = options.getNullStringValue(); + if (nullValue != null) { + conf.set(DirectNetezzaManager.NETEZZA_NULL_VALUE, + StringEscapeUtils.unescapeJava(nullValue)); + } + conf.setInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY, + options.getOutputFieldDelim()); + conf.setInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY, + options.getOutputRecordDelim()); + conf.setInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY, + options.getOutputEnclosedBy()); + // Netezza uses \ as the escape character. Force the use of it + int escapeChar = options.getOutputEscapedBy(); + if (escapeChar > 0) { + if (escapeChar != '\\') { + LOG.info( + "Setting escaped char to \\ for Netezza external table import"); + } + conf.setInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, '\\'); + } + conf.setBoolean(DelimiterSet.OUTPUT_ENCLOSE_REQUIRED_KEY, + options.isOutputEncloseRequired()); + + } /** * Configure the inputformat to use for the job. */ - + @Override protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { @@ -89,22 +118,6 @@ protected void configureInputFormat(Job job, String tableName, DataDrivenDBInputFormat.setInput(job, DBWritable.class, tableName, whereClause, mgr.escapeColName(splitByCol), sqlColNames); - Configuration conf = job.getConfiguration(); - conf.setInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY, - options.getOutputFieldDelim()); - conf.setInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY, - options.getOutputRecordDelim()); - conf.setInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY, - options.getOutputEnclosedBy()); - // Netezza uses \ as the escape character. Force the use of it - int escapeChar = options.getOutputEscapedBy(); - if (escapeChar > 0 && escapeChar != '\\') { - LOG.info("Setting escaped char to \\ for Netezza external table import"); - conf.setInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, '\\'); - } - conf.setBoolean(DelimiterSet.OUTPUT_ENCLOSE_REQUIRED_KEY, - options.isOutputEncloseRequired()); - LOG.debug("Using InputFormat: " + inputFormatClass); job.setInputFormatClass(getInputFormatClass()); } diff --git a/src/test/com/cloudera/sqoop/manager/DirectNetezzaExportManualTest.java b/src/test/com/cloudera/sqoop/manager/DirectNetezzaExportManualTest.java index 938ffc53..aace924c 100644 --- a/src/test/com/cloudera/sqoop/manager/DirectNetezzaExportManualTest.java +++ b/src/test/com/cloudera/sqoop/manager/DirectNetezzaExportManualTest.java @@ -25,13 +25,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.sqoop.manager.DirectNetezzaManager; -import org.junit.After; -import org.junit.Before; import org.junit.Test; - import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.TestExport.ColumnGenerator; /** * Test the DirectNetezzaManager implementation's exportJob() functionality. @@ -162,7 +158,59 @@ public void testValidExtraArgs() throws Exception { runNetezzaTest(getTableName(), argv); } + @Test + public void testNullStringExport() throws Exception { + String [] extraArgs = { + "--input-null-string", "\\\\N", + "--input-null-non-string", "\\\\N", + "--input-escaped-by", "\\", + }; + ColumnGenerator[] extraCols = new ColumnGenerator[] { + new NullColumnGenerator(), + }; + + String[] argv = getArgv(true, 10, 10, extraArgs); + runNetezzaTest(getTableName(), argv, extraCols); + } + + + public void testDifferentNullStrings() throws IOException, SQLException { + ColumnGenerator[] extraCols = new ColumnGenerator[] { + new NullColumnGenerator(), + }; + + String [] extraArgs = { + "--input-null-string", "\\N", + "--input-null-non-string", "\\M", + }; + String[] argv = getArgv(true, 10, 10, extraArgs); + try { + runNetezzaTest(getTableName(), argv, extraCols); + fail("Expected failure for different null strings"); + } catch(IOException ioe) { + // success + } + } + + @Test(expected = java.io.IOException.class) + public void testLongNullStrings() throws IOException, SQLException { + ColumnGenerator[] extraCols = new ColumnGenerator[] { + new NullColumnGenerator(), + }; + + String [] extraArgs = { + "--input-null-string", "morethan4chars", + "--input-null-non-string", "morethan4chars", + }; + String[] argv = getArgv(true, 10, 10, extraArgs); + try { + runNetezzaTest(getTableName(), argv, extraCols); + fail("Expected failure for long null strings"); + } catch(IOException ioe) { + // success + } + } @Override public void testMultiMapTextExportWithStaging() throws IOException, diff --git a/src/test/com/cloudera/sqoop/manager/NetezzaExportManualTest.java b/src/test/com/cloudera/sqoop/manager/NetezzaExportManualTest.java index 50d27fec..43dd254a 100644 --- a/src/test/com/cloudera/sqoop/manager/NetezzaExportManualTest.java +++ b/src/test/com/cloudera/sqoop/manager/NetezzaExportManualTest.java @@ -243,4 +243,17 @@ protected void createExportFile(ColumnGenerator...extraCols) w.close(); os.close(); } + + protected class NullColumnGenerator implements ColumnGenerator { + public String getExportText(int rowNum) { + return "\\N"; + } + public String getVerifyText(int rowNum) { + return null; + } + + public String getType() { + return "INTEGER"; + } + } } diff --git a/src/test/com/cloudera/sqoop/manager/NetezzaImportManualTest.java b/src/test/com/cloudera/sqoop/manager/NetezzaImportManualTest.java index 3482dd8e..86f5bdd8 100644 --- a/src/test/com/cloudera/sqoop/manager/NetezzaImportManualTest.java +++ b/src/test/com/cloudera/sqoop/manager/NetezzaImportManualTest.java @@ -205,13 +205,14 @@ private String[] getExpectedResultsWithNulls() { private String[] getDirectModeExpectedResultsWithNulls() { String [] expectedResults = { - "1,Aaron,2009-05-14,1000000,T,engineering,nvl,1", - "2,Bob,2009-04-20,400,T,sales,nvl,2", - "3,Fred,2009-01-23,15,F,marketing,nvl,3", + "1,Aaron,2009-05-14,1000000,T,engineering,\\N,1", + "2,Bob,2009-04-20,400,T,sales,\\N,2", + "3,Fred,2009-01-23,15,F,marketing,\\N,3", }; return expectedResults; } + private String[] getArgv(boolean isDirect, String tableName, String... extraArgs) { ArrayList args = new ArrayList(); @@ -325,7 +326,7 @@ public void testIncrementalImport() throws IOException { } @Test - public void testNullEscapeCharacters() throws Exception { + public void testNullStringValue() throws Exception { String [] extraArgs = { @@ -355,4 +356,5 @@ public void testValidExtraArgs() throws Exception { runNetezzaTest(true, tableName, expectedResults, extraArgs); } + }