5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-04 06:10:18 +08:00

SQOOP-932: Netezza direct manager is ignoring --null-*-string arguments

(Venkat Ranganathan via Jarek Jarcec Cecho)
This commit is contained in:
Jarek Jarcec Cecho 2013-03-21 20:23:31 -07:00
parent e93ec9f39f
commit a0a03d7f31
9 changed files with 201 additions and 48 deletions

View File

@ -46,6 +46,20 @@ public class DelimiterSet implements Cloneable {
"sqoop.output.escaped.by";
public static final String OUTPUT_ENCLOSE_REQUIRED_KEY =
"sqoop.output.enclose.required";
/**
* Create the input variations for export instead of overloading them.
*/
public static final String INPUT_FIELD_DELIM_KEY =
"sqoop.input.field.delim";
public static final String INPUT_RECORD_DELIM_KEY =
"sqoop.input.record.delim";
public static final String INPUT_ENCLOSED_BY_KEY =
"sqoop.input.enclosed.by";
public static final String INPUT_ESCAPED_BY_KEY =
"sqoop.input.escaped.by";
public static final String INPUT_ENCLOSE_REQUIRED_KEY =
"sqoop.input.enclose.required";
/**
* Create a delimiter set with the default delimiters
* (comma for fields, newline for records).

View File

@ -28,6 +28,8 @@
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@ -59,13 +61,39 @@ public class DirectNetezzaManager extends NetezzaManager {
private static final String QUERY_CHECK_DICTIONARY_FOR_TABLE =
"SELECT 1 FROM _V_OBJECTS WHERE OWNER= ? "
+ " AND OBJNAME = ? and OBJTYPE = 'TABLE'";
public static final String NETEZZA_NULL_VALUE =
"netezza.exttable.null.value";
public DirectNetezzaManager(SqoopOptions opts) {
super(opts);
try {
handleNetezzaExtraArgs(options);
} catch (ParseException ioe) {
throw new RuntimeException(ioe.getMessage(), ioe);
} catch (ParseException pe) {
throw new RuntimeException(pe.getMessage(), pe);
}
}
private void checkNullValueStrings(String nullStrValue,
String nullNonStrValue) throws IOException {
if (!StringUtils.equals(nullStrValue, nullNonStrValue)) {
throw new IOException(
"Detected different values of --input-string and --input-non-string "
+ "parameters. Netezza direct manager does not support that. Please "
+ "either use the same values or omit the --direct parameter.");
}
// Null String values cannot be more 4 chars in length in the case
// Netezza external tables.
if (nullStrValue != null) {
nullStrValue = StringEscapeUtils.unescapeJava(nullStrValue);
if (nullStrValue.length() > 4) {
throw new IOException(
"Null string (and null non string) values for Netezza direct mode"
+ " manager must be less than 4 characters in length");
}
options.getConf().set(NETEZZA_NULL_VALUE, nullStrValue);
}
}
@ -131,12 +159,13 @@ public void exportTable(com.cloudera.sqoop.manager.ExportJobContext context)
options = context.getOptions();
context.setConnManager(this);
checkTable(); // Throws excpetion as necessary
NetezzaExternalTableExportJob exporter = null;
char qc = (char) options.getInputEnclosedBy();
char ec = (char) options.getInputEscapedBy();
checkNullValueStrings(options.getInNullStringValue(),
options.getInNullNonStringValue());
if (qc > 0 && !(qc == '"' || qc == '\'')) {
throw new ExportException("Input enclosed-by character must be '\"' "
@ -173,6 +202,9 @@ public void importTable(com.cloudera.sqoop.manager.ImportJobContext context)
new IOException("Null tableName for Netezza external table import.");
}
checkNullValueStrings(options.getNullStringValue(),
options.getNullNonStringValue());
char qc = options.getOutputEnclosedBy();
char ec = options.getOutputEscapedBy();

View File

@ -64,9 +64,11 @@ public abstract class NetezzaExternalTableExportMapper<K, V> extends
private String getSqlStatement() throws IOException {
char fd = (char) conf.getInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY, ',');
char qc = (char) conf.getInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY, 0);
char ec = (char) conf.getInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, 0);
char fd = (char) conf.getInt(DelimiterSet.INPUT_FIELD_DELIM_KEY, ',');
char qc = (char) conf.getInt(DelimiterSet.INPUT_ENCLOSED_BY_KEY, 0);
char ec = (char) conf.getInt(DelimiterSet.INPUT_ESCAPED_BY_KEY, 0);
String nullValue = conf.get(DirectNetezzaManager.NETEZZA_NULL_VALUE);
int errorThreshold = conf.getInt(
DirectNetezzaManager.NETEZZA_ERROR_THRESHOLD_OPT, 1);
@ -89,7 +91,13 @@ private String getSqlStatement() throws IOException {
}
sqlStmt.append(" FORMAT 'Text' ");
sqlStmt.append(" INCLUDEZEROSECONDS TRUE ");
sqlStmt.append(" NULLVALUE 'NULL' ");
sqlStmt.append(" NULLVALUE '");
if (nullValue != null) {
sqlStmt.append(nullValue);
} else {
sqlStmt.append("null");
}
sqlStmt.append("' ");
if (qc > 0) {
switch (qc) {
case '\'':

View File

@ -66,6 +66,9 @@ private String getSqlStatement(int myId) throws IOException {
char fd = (char) conf.getInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY, ',');
char qc = (char) conf.getInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY, 0);
char ec = (char) conf.getInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, 0);
String nullValue = conf.get(DirectNetezzaManager.NETEZZA_NULL_VALUE);
int errorThreshold = conf.getInt(
DirectNetezzaManager.NETEZZA_ERROR_THRESHOLD_OPT, 1);
String logDir = conf.get(DirectNetezzaManager.NETEZZA_LOG_DIR_OPT);
@ -86,7 +89,13 @@ private String getSqlStatement(int myId) throws IOException {
}
sqlStmt.append(" FORMAT 'Text' ");
sqlStmt.append(" INCLUDEZEROSECONDS TRUE ");
sqlStmt.append(" NULLVALUE 'null' ");
sqlStmt.append(" NULLVALUE '");
if (nullValue != null) {
sqlStmt.append(nullValue);
} else {
sqlStmt.append("null");
}
sqlStmt.append("' ");
if (qc > 0) {
switch (qc) {
case '\'':

View File

@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@ -28,6 +29,7 @@
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.sqoop.lib.DelimiterSet;
import org.apache.sqoop.manager.ConnManager;
import org.apache.sqoop.manager.DirectNetezzaManager;
import org.apache.sqoop.mapreduce.DBWritable;
import
org.apache.sqoop.mapreduce.db.netezza.NetezzaExternalTableRecordExportMapper;
@ -52,29 +54,41 @@ public NetezzaExternalTableExportJob(final ExportJobContext context) {
}
@Override
protected void propagateOptionsToJob(Job job) {
Configuration conf = job.getConfiguration();
String nullValue = options.getInNullStringValue();
if (nullValue != null) {
conf.set(DirectNetezzaManager.NETEZZA_NULL_VALUE,
StringEscapeUtils.unescapeJava(nullValue));
}
conf.setInt(DelimiterSet.INPUT_FIELD_DELIM_KEY,
options.getInputFieldDelim());
conf.setInt(DelimiterSet.INPUT_RECORD_DELIM_KEY,
options.getInputRecordDelim());
conf.setInt(DelimiterSet.INPUT_ENCLOSED_BY_KEY,
options.getInputEnclosedBy());
// Netezza uses \ as the escape character. Force the use of it
int escapeChar = options.getInputEscapedBy();
if (escapeChar > 0) {
if (escapeChar != '\\') {
LOG.info(
"Setting escaped char to \\ for Netezza external table export");
}
conf.setInt(DelimiterSet.INPUT_ESCAPED_BY_KEY, '\\');
}
conf.setBoolean(DelimiterSet.INPUT_ENCLOSE_REQUIRED_KEY,
options.isOutputEncloseRequired());
}
/**
* Configure the inputformat to use for the job.
*/
@Override
protected void configureInputFormat(Job job, String tableName,
String tableClassName, String splitByCol) throws ClassNotFoundException,
IOException {
// Configure the delimiters, etc.
Configuration conf = job.getConfiguration();
conf.setInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY,
options.getInputFieldDelim());
conf.setInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY,
options.getInputRecordDelim());
conf.setInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY,
options.getInputEnclosedBy());
// Netezza uses \ as the escape character. Force the use of it
int escapeChar = options.getOutputEscapedBy();
if (escapeChar > 0 && escapeChar != '\\') {
LOG.info("Setting escaped char to \\ for Netezza external table import");
conf.setInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, '\\');
}
conf.setBoolean(DelimiterSet.OUTPUT_ENCLOSE_REQUIRED_KEY,
options.isOutputEncloseRequired());
ConnManager mgr = context.getConnManager();
String username = options.getUsername();

View File

@ -20,10 +20,12 @@
import java.io.IOException;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.sqoop.lib.DelimiterSet;
import org.apache.sqoop.manager.DirectNetezzaManager;
import org.apache.sqoop.mapreduce.DBWritable;
import org.apache.sqoop.mapreduce.ImportJobBase;
import org.apache.sqoop.mapreduce.RawKeyTextOutputFormat;
@ -47,10 +49,37 @@ public NetezzaExternalTableImportJob(final SqoopOptions opts,
context);
}
@Override
protected void propagateOptionsToJob(Job job) {
Configuration conf = job.getConfiguration();
String nullValue = options.getNullStringValue();
if (nullValue != null) {
conf.set(DirectNetezzaManager.NETEZZA_NULL_VALUE,
StringEscapeUtils.unescapeJava(nullValue));
}
conf.setInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY,
options.getOutputFieldDelim());
conf.setInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY,
options.getOutputRecordDelim());
conf.setInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY,
options.getOutputEnclosedBy());
// Netezza uses \ as the escape character. Force the use of it
int escapeChar = options.getOutputEscapedBy();
if (escapeChar > 0) {
if (escapeChar != '\\') {
LOG.info(
"Setting escaped char to \\ for Netezza external table import");
}
conf.setInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, '\\');
}
conf.setBoolean(DelimiterSet.OUTPUT_ENCLOSE_REQUIRED_KEY,
options.isOutputEncloseRequired());
}
/**
* Configure the inputformat to use for the job.
*/
@Override
protected void configureInputFormat(Job job, String tableName,
String tableClassName, String splitByCol) throws ClassNotFoundException,
IOException {
@ -89,22 +118,6 @@ protected void configureInputFormat(Job job, String tableName,
DataDrivenDBInputFormat.setInput(job, DBWritable.class, tableName,
whereClause, mgr.escapeColName(splitByCol), sqlColNames);
Configuration conf = job.getConfiguration();
conf.setInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY,
options.getOutputFieldDelim());
conf.setInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY,
options.getOutputRecordDelim());
conf.setInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY,
options.getOutputEnclosedBy());
// Netezza uses \ as the escape character. Force the use of it
int escapeChar = options.getOutputEscapedBy();
if (escapeChar > 0 && escapeChar != '\\') {
LOG.info("Setting escaped char to \\ for Netezza external table import");
conf.setInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, '\\');
}
conf.setBoolean(DelimiterSet.OUTPUT_ENCLOSE_REQUIRED_KEY,
options.isOutputEncloseRequired());
LOG.debug("Using InputFormat: " + inputFormatClass);
job.setInputFormatClass(getInputFormatClass());
}

View File

@ -25,13 +25,9 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sqoop.manager.DirectNetezzaManager;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.TestExport.ColumnGenerator;
/**
* Test the DirectNetezzaManager implementation's exportJob() functionality.
@ -162,7 +158,59 @@ public void testValidExtraArgs() throws Exception {
runNetezzaTest(getTableName(), argv);
}
@Test
public void testNullStringExport() throws Exception {
String [] extraArgs = {
"--input-null-string", "\\\\N",
"--input-null-non-string", "\\\\N",
"--input-escaped-by", "\\",
};
ColumnGenerator[] extraCols = new ColumnGenerator[] {
new NullColumnGenerator(),
};
String[] argv = getArgv(true, 10, 10, extraArgs);
runNetezzaTest(getTableName(), argv, extraCols);
}
public void testDifferentNullStrings() throws IOException, SQLException {
ColumnGenerator[] extraCols = new ColumnGenerator[] {
new NullColumnGenerator(),
};
String [] extraArgs = {
"--input-null-string", "\\N",
"--input-null-non-string", "\\M",
};
String[] argv = getArgv(true, 10, 10, extraArgs);
try {
runNetezzaTest(getTableName(), argv, extraCols);
fail("Expected failure for different null strings");
} catch(IOException ioe) {
// success
}
}
@Test(expected = java.io.IOException.class)
public void testLongNullStrings() throws IOException, SQLException {
ColumnGenerator[] extraCols = new ColumnGenerator[] {
new NullColumnGenerator(),
};
String [] extraArgs = {
"--input-null-string", "morethan4chars",
"--input-null-non-string", "morethan4chars",
};
String[] argv = getArgv(true, 10, 10, extraArgs);
try {
runNetezzaTest(getTableName(), argv, extraCols);
fail("Expected failure for long null strings");
} catch(IOException ioe) {
// success
}
}
@Override
public void testMultiMapTextExportWithStaging() throws IOException,

View File

@ -243,4 +243,17 @@ protected void createExportFile(ColumnGenerator...extraCols)
w.close();
os.close();
}
protected class NullColumnGenerator implements ColumnGenerator {
public String getExportText(int rowNum) {
return "\\N";
}
public String getVerifyText(int rowNum) {
return null;
}
public String getType() {
return "INTEGER";
}
}
}

View File

@ -205,13 +205,14 @@ private String[] getExpectedResultsWithNulls() {
private String[] getDirectModeExpectedResultsWithNulls() {
String [] expectedResults = {
"1,Aaron,2009-05-14,1000000,T,engineering,nvl,1",
"2,Bob,2009-04-20,400,T,sales,nvl,2",
"3,Fred,2009-01-23,15,F,marketing,nvl,3",
"1,Aaron,2009-05-14,1000000,T,engineering,\\N,1",
"2,Bob,2009-04-20,400,T,sales,\\N,2",
"3,Fred,2009-01-23,15,F,marketing,\\N,3",
};
return expectedResults;
}
private String[] getArgv(boolean isDirect, String tableName,
String... extraArgs) {
ArrayList<String> args = new ArrayList<String>();
@ -325,7 +326,7 @@ public void testIncrementalImport() throws IOException {
}
@Test
public void testNullEscapeCharacters() throws Exception {
public void testNullStringValue() throws Exception {
String [] extraArgs = {
@ -355,4 +356,5 @@ public void testValidExtraArgs() throws Exception {
runNetezzaTest(true, tableName, expectedResults,
extraArgs);
}
}