mirror of
https://github.com/apache/sqoop.git
synced 2025-05-04 06:10:18 +08:00
SQOOP-932: Netezza direct manager is ignoring --null-*-string arguments
(Venkat Ranganathan via Jarek Jarcec Cecho)
This commit is contained in:
parent
e93ec9f39f
commit
a0a03d7f31
@ -46,6 +46,20 @@ public class DelimiterSet implements Cloneable {
|
||||
"sqoop.output.escaped.by";
|
||||
public static final String OUTPUT_ENCLOSE_REQUIRED_KEY =
|
||||
"sqoop.output.enclose.required";
|
||||
|
||||
/**
|
||||
* Create the input variations for export instead of overloading them.
|
||||
*/
|
||||
public static final String INPUT_FIELD_DELIM_KEY =
|
||||
"sqoop.input.field.delim";
|
||||
public static final String INPUT_RECORD_DELIM_KEY =
|
||||
"sqoop.input.record.delim";
|
||||
public static final String INPUT_ENCLOSED_BY_KEY =
|
||||
"sqoop.input.enclosed.by";
|
||||
public static final String INPUT_ESCAPED_BY_KEY =
|
||||
"sqoop.input.escaped.by";
|
||||
public static final String INPUT_ENCLOSE_REQUIRED_KEY =
|
||||
"sqoop.input.enclose.required";
|
||||
/**
|
||||
* Create a delimiter set with the default delimiters
|
||||
* (comma for fields, newline for records).
|
||||
|
@ -28,6 +28,8 @@
|
||||
import org.apache.commons.cli.GnuParser;
|
||||
import org.apache.commons.cli.OptionBuilder;
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.apache.commons.lang.StringEscapeUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
@ -59,13 +61,39 @@ public class DirectNetezzaManager extends NetezzaManager {
|
||||
private static final String QUERY_CHECK_DICTIONARY_FOR_TABLE =
|
||||
"SELECT 1 FROM _V_OBJECTS WHERE OWNER= ? "
|
||||
+ " AND OBJNAME = ? and OBJTYPE = 'TABLE'";
|
||||
|
||||
public static final String NETEZZA_NULL_VALUE =
|
||||
"netezza.exttable.null.value";
|
||||
public DirectNetezzaManager(SqoopOptions opts) {
|
||||
super(opts);
|
||||
try {
|
||||
handleNetezzaExtraArgs(options);
|
||||
} catch (ParseException ioe) {
|
||||
throw new RuntimeException(ioe.getMessage(), ioe);
|
||||
} catch (ParseException pe) {
|
||||
throw new RuntimeException(pe.getMessage(), pe);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkNullValueStrings(String nullStrValue,
|
||||
String nullNonStrValue) throws IOException {
|
||||
|
||||
if (!StringUtils.equals(nullStrValue, nullNonStrValue)) {
|
||||
throw new IOException(
|
||||
"Detected different values of --input-string and --input-non-string "
|
||||
+ "parameters. Netezza direct manager does not support that. Please "
|
||||
+ "either use the same values or omit the --direct parameter.");
|
||||
}
|
||||
|
||||
|
||||
// Null String values cannot be more 4 chars in length in the case
|
||||
// Netezza external tables.
|
||||
|
||||
if (nullStrValue != null) {
|
||||
nullStrValue = StringEscapeUtils.unescapeJava(nullStrValue);
|
||||
if (nullStrValue.length() > 4) {
|
||||
throw new IOException(
|
||||
"Null string (and null non string) values for Netezza direct mode"
|
||||
+ " manager must be less than 4 characters in length");
|
||||
}
|
||||
options.getConf().set(NETEZZA_NULL_VALUE, nullStrValue);
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,12 +159,13 @@ public void exportTable(com.cloudera.sqoop.manager.ExportJobContext context)
|
||||
options = context.getOptions();
|
||||
context.setConnManager(this);
|
||||
|
||||
|
||||
checkTable(); // Throws excpetion as necessary
|
||||
NetezzaExternalTableExportJob exporter = null;
|
||||
|
||||
char qc = (char) options.getInputEnclosedBy();
|
||||
char ec = (char) options.getInputEscapedBy();
|
||||
checkNullValueStrings(options.getInNullStringValue(),
|
||||
options.getInNullNonStringValue());
|
||||
|
||||
if (qc > 0 && !(qc == '"' || qc == '\'')) {
|
||||
throw new ExportException("Input enclosed-by character must be '\"' "
|
||||
@ -173,6 +202,9 @@ public void importTable(com.cloudera.sqoop.manager.ImportJobContext context)
|
||||
new IOException("Null tableName for Netezza external table import.");
|
||||
}
|
||||
|
||||
checkNullValueStrings(options.getNullStringValue(),
|
||||
options.getNullNonStringValue());
|
||||
|
||||
char qc = options.getOutputEnclosedBy();
|
||||
char ec = options.getOutputEscapedBy();
|
||||
|
||||
|
@ -64,9 +64,11 @@ public abstract class NetezzaExternalTableExportMapper<K, V> extends
|
||||
|
||||
private String getSqlStatement() throws IOException {
|
||||
|
||||
char fd = (char) conf.getInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY, ',');
|
||||
char qc = (char) conf.getInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY, 0);
|
||||
char ec = (char) conf.getInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, 0);
|
||||
char fd = (char) conf.getInt(DelimiterSet.INPUT_FIELD_DELIM_KEY, ',');
|
||||
char qc = (char) conf.getInt(DelimiterSet.INPUT_ENCLOSED_BY_KEY, 0);
|
||||
char ec = (char) conf.getInt(DelimiterSet.INPUT_ESCAPED_BY_KEY, 0);
|
||||
|
||||
String nullValue = conf.get(DirectNetezzaManager.NETEZZA_NULL_VALUE);
|
||||
|
||||
int errorThreshold = conf.getInt(
|
||||
DirectNetezzaManager.NETEZZA_ERROR_THRESHOLD_OPT, 1);
|
||||
@ -89,7 +91,13 @@ private String getSqlStatement() throws IOException {
|
||||
}
|
||||
sqlStmt.append(" FORMAT 'Text' ");
|
||||
sqlStmt.append(" INCLUDEZEROSECONDS TRUE ");
|
||||
sqlStmt.append(" NULLVALUE 'NULL' ");
|
||||
sqlStmt.append(" NULLVALUE '");
|
||||
if (nullValue != null) {
|
||||
sqlStmt.append(nullValue);
|
||||
} else {
|
||||
sqlStmt.append("null");
|
||||
}
|
||||
sqlStmt.append("' ");
|
||||
if (qc > 0) {
|
||||
switch (qc) {
|
||||
case '\'':
|
||||
|
@ -66,6 +66,9 @@ private String getSqlStatement(int myId) throws IOException {
|
||||
char fd = (char) conf.getInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY, ',');
|
||||
char qc = (char) conf.getInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY, 0);
|
||||
char ec = (char) conf.getInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, 0);
|
||||
|
||||
String nullValue = conf.get(DirectNetezzaManager.NETEZZA_NULL_VALUE);
|
||||
|
||||
int errorThreshold = conf.getInt(
|
||||
DirectNetezzaManager.NETEZZA_ERROR_THRESHOLD_OPT, 1);
|
||||
String logDir = conf.get(DirectNetezzaManager.NETEZZA_LOG_DIR_OPT);
|
||||
@ -86,7 +89,13 @@ private String getSqlStatement(int myId) throws IOException {
|
||||
}
|
||||
sqlStmt.append(" FORMAT 'Text' ");
|
||||
sqlStmt.append(" INCLUDEZEROSECONDS TRUE ");
|
||||
sqlStmt.append(" NULLVALUE 'null' ");
|
||||
sqlStmt.append(" NULLVALUE '");
|
||||
if (nullValue != null) {
|
||||
sqlStmt.append(nullValue);
|
||||
} else {
|
||||
sqlStmt.append("null");
|
||||
}
|
||||
sqlStmt.append("' ");
|
||||
if (qc > 0) {
|
||||
switch (qc) {
|
||||
case '\'':
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.lang.StringEscapeUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
@ -28,6 +29,7 @@
|
||||
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
|
||||
import org.apache.sqoop.lib.DelimiterSet;
|
||||
import org.apache.sqoop.manager.ConnManager;
|
||||
import org.apache.sqoop.manager.DirectNetezzaManager;
|
||||
import org.apache.sqoop.mapreduce.DBWritable;
|
||||
import
|
||||
org.apache.sqoop.mapreduce.db.netezza.NetezzaExternalTableRecordExportMapper;
|
||||
@ -52,29 +54,41 @@ public NetezzaExternalTableExportJob(final ExportJobContext context) {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void propagateOptionsToJob(Job job) {
|
||||
Configuration conf = job.getConfiguration();
|
||||
String nullValue = options.getInNullStringValue();
|
||||
if (nullValue != null) {
|
||||
conf.set(DirectNetezzaManager.NETEZZA_NULL_VALUE,
|
||||
StringEscapeUtils.unescapeJava(nullValue));
|
||||
}
|
||||
conf.setInt(DelimiterSet.INPUT_FIELD_DELIM_KEY,
|
||||
options.getInputFieldDelim());
|
||||
conf.setInt(DelimiterSet.INPUT_RECORD_DELIM_KEY,
|
||||
options.getInputRecordDelim());
|
||||
conf.setInt(DelimiterSet.INPUT_ENCLOSED_BY_KEY,
|
||||
options.getInputEnclosedBy());
|
||||
// Netezza uses \ as the escape character. Force the use of it
|
||||
int escapeChar = options.getInputEscapedBy();
|
||||
if (escapeChar > 0) {
|
||||
if (escapeChar != '\\') {
|
||||
LOG.info(
|
||||
"Setting escaped char to \\ for Netezza external table export");
|
||||
}
|
||||
conf.setInt(DelimiterSet.INPUT_ESCAPED_BY_KEY, '\\');
|
||||
}
|
||||
conf.setBoolean(DelimiterSet.INPUT_ENCLOSE_REQUIRED_KEY,
|
||||
options.isOutputEncloseRequired());
|
||||
}
|
||||
/**
|
||||
* Configure the inputformat to use for the job.
|
||||
*/
|
||||
@Override
|
||||
protected void configureInputFormat(Job job, String tableName,
|
||||
String tableClassName, String splitByCol) throws ClassNotFoundException,
|
||||
IOException {
|
||||
|
||||
// Configure the delimiters, etc.
|
||||
Configuration conf = job.getConfiguration();
|
||||
conf.setInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY,
|
||||
options.getInputFieldDelim());
|
||||
conf.setInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY,
|
||||
options.getInputRecordDelim());
|
||||
conf.setInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY,
|
||||
options.getInputEnclosedBy());
|
||||
// Netezza uses \ as the escape character. Force the use of it
|
||||
int escapeChar = options.getOutputEscapedBy();
|
||||
if (escapeChar > 0 && escapeChar != '\\') {
|
||||
LOG.info("Setting escaped char to \\ for Netezza external table import");
|
||||
conf.setInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, '\\');
|
||||
}
|
||||
conf.setBoolean(DelimiterSet.OUTPUT_ENCLOSE_REQUIRED_KEY,
|
||||
options.isOutputEncloseRequired());
|
||||
|
||||
ConnManager mgr = context.getConnManager();
|
||||
String username = options.getUsername();
|
||||
|
@ -20,10 +20,12 @@
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.lang.StringEscapeUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.NullWritable;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.sqoop.lib.DelimiterSet;
|
||||
import org.apache.sqoop.manager.DirectNetezzaManager;
|
||||
import org.apache.sqoop.mapreduce.DBWritable;
|
||||
import org.apache.sqoop.mapreduce.ImportJobBase;
|
||||
import org.apache.sqoop.mapreduce.RawKeyTextOutputFormat;
|
||||
@ -47,10 +49,37 @@ public NetezzaExternalTableImportJob(final SqoopOptions opts,
|
||||
context);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void propagateOptionsToJob(Job job) {
|
||||
Configuration conf = job.getConfiguration();
|
||||
String nullValue = options.getNullStringValue();
|
||||
if (nullValue != null) {
|
||||
conf.set(DirectNetezzaManager.NETEZZA_NULL_VALUE,
|
||||
StringEscapeUtils.unescapeJava(nullValue));
|
||||
}
|
||||
conf.setInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY,
|
||||
options.getOutputFieldDelim());
|
||||
conf.setInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY,
|
||||
options.getOutputRecordDelim());
|
||||
conf.setInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY,
|
||||
options.getOutputEnclosedBy());
|
||||
// Netezza uses \ as the escape character. Force the use of it
|
||||
int escapeChar = options.getOutputEscapedBy();
|
||||
if (escapeChar > 0) {
|
||||
if (escapeChar != '\\') {
|
||||
LOG.info(
|
||||
"Setting escaped char to \\ for Netezza external table import");
|
||||
}
|
||||
conf.setInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, '\\');
|
||||
}
|
||||
conf.setBoolean(DelimiterSet.OUTPUT_ENCLOSE_REQUIRED_KEY,
|
||||
options.isOutputEncloseRequired());
|
||||
|
||||
}
|
||||
/**
|
||||
* Configure the inputformat to use for the job.
|
||||
*/
|
||||
|
||||
@Override
|
||||
protected void configureInputFormat(Job job, String tableName,
|
||||
String tableClassName, String splitByCol) throws ClassNotFoundException,
|
||||
IOException {
|
||||
@ -89,22 +118,6 @@ protected void configureInputFormat(Job job, String tableName,
|
||||
DataDrivenDBInputFormat.setInput(job, DBWritable.class, tableName,
|
||||
whereClause, mgr.escapeColName(splitByCol), sqlColNames);
|
||||
|
||||
Configuration conf = job.getConfiguration();
|
||||
conf.setInt(DelimiterSet.OUTPUT_FIELD_DELIM_KEY,
|
||||
options.getOutputFieldDelim());
|
||||
conf.setInt(DelimiterSet.OUTPUT_RECORD_DELIM_KEY,
|
||||
options.getOutputRecordDelim());
|
||||
conf.setInt(DelimiterSet.OUTPUT_ENCLOSED_BY_KEY,
|
||||
options.getOutputEnclosedBy());
|
||||
// Netezza uses \ as the escape character. Force the use of it
|
||||
int escapeChar = options.getOutputEscapedBy();
|
||||
if (escapeChar > 0 && escapeChar != '\\') {
|
||||
LOG.info("Setting escaped char to \\ for Netezza external table import");
|
||||
conf.setInt(DelimiterSet.OUTPUT_ESCAPED_BY_KEY, '\\');
|
||||
}
|
||||
conf.setBoolean(DelimiterSet.OUTPUT_ENCLOSE_REQUIRED_KEY,
|
||||
options.isOutputEncloseRequired());
|
||||
|
||||
LOG.debug("Using InputFormat: " + inputFormatClass);
|
||||
job.setInputFormatClass(getInputFormatClass());
|
||||
}
|
||||
|
@ -25,13 +25,9 @@
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.sqoop.manager.DirectNetezzaManager;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
import com.cloudera.sqoop.SqoopOptions;
|
||||
import com.cloudera.sqoop.TestExport.ColumnGenerator;
|
||||
|
||||
/**
|
||||
* Test the DirectNetezzaManager implementation's exportJob() functionality.
|
||||
@ -162,7 +158,59 @@ public void testValidExtraArgs() throws Exception {
|
||||
runNetezzaTest(getTableName(), argv);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullStringExport() throws Exception {
|
||||
|
||||
String [] extraArgs = {
|
||||
"--input-null-string", "\\\\N",
|
||||
"--input-null-non-string", "\\\\N",
|
||||
"--input-escaped-by", "\\",
|
||||
};
|
||||
ColumnGenerator[] extraCols = new ColumnGenerator[] {
|
||||
new NullColumnGenerator(),
|
||||
};
|
||||
|
||||
String[] argv = getArgv(true, 10, 10, extraArgs);
|
||||
runNetezzaTest(getTableName(), argv, extraCols);
|
||||
}
|
||||
|
||||
|
||||
public void testDifferentNullStrings() throws IOException, SQLException {
|
||||
ColumnGenerator[] extraCols = new ColumnGenerator[] {
|
||||
new NullColumnGenerator(),
|
||||
};
|
||||
|
||||
String [] extraArgs = {
|
||||
"--input-null-string", "\\N",
|
||||
"--input-null-non-string", "\\M",
|
||||
};
|
||||
String[] argv = getArgv(true, 10, 10, extraArgs);
|
||||
try {
|
||||
runNetezzaTest(getTableName(), argv, extraCols);
|
||||
fail("Expected failure for different null strings");
|
||||
} catch(IOException ioe) {
|
||||
// success
|
||||
}
|
||||
}
|
||||
|
||||
@Test(expected = java.io.IOException.class)
|
||||
public void testLongNullStrings() throws IOException, SQLException {
|
||||
ColumnGenerator[] extraCols = new ColumnGenerator[] {
|
||||
new NullColumnGenerator(),
|
||||
};
|
||||
|
||||
String [] extraArgs = {
|
||||
"--input-null-string", "morethan4chars",
|
||||
"--input-null-non-string", "morethan4chars",
|
||||
};
|
||||
String[] argv = getArgv(true, 10, 10, extraArgs);
|
||||
try {
|
||||
runNetezzaTest(getTableName(), argv, extraCols);
|
||||
fail("Expected failure for long null strings");
|
||||
} catch(IOException ioe) {
|
||||
// success
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testMultiMapTextExportWithStaging() throws IOException,
|
||||
|
@ -243,4 +243,17 @@ protected void createExportFile(ColumnGenerator...extraCols)
|
||||
w.close();
|
||||
os.close();
|
||||
}
|
||||
|
||||
protected class NullColumnGenerator implements ColumnGenerator {
|
||||
public String getExportText(int rowNum) {
|
||||
return "\\N";
|
||||
}
|
||||
public String getVerifyText(int rowNum) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return "INTEGER";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -205,13 +205,14 @@ private String[] getExpectedResultsWithNulls() {
|
||||
|
||||
private String[] getDirectModeExpectedResultsWithNulls() {
|
||||
String [] expectedResults = {
|
||||
"1,Aaron,2009-05-14,1000000,T,engineering,nvl,1",
|
||||
"2,Bob,2009-04-20,400,T,sales,nvl,2",
|
||||
"3,Fred,2009-01-23,15,F,marketing,nvl,3",
|
||||
"1,Aaron,2009-05-14,1000000,T,engineering,\\N,1",
|
||||
"2,Bob,2009-04-20,400,T,sales,\\N,2",
|
||||
"3,Fred,2009-01-23,15,F,marketing,\\N,3",
|
||||
};
|
||||
|
||||
return expectedResults;
|
||||
}
|
||||
|
||||
private String[] getArgv(boolean isDirect, String tableName,
|
||||
String... extraArgs) {
|
||||
ArrayList<String> args = new ArrayList<String>();
|
||||
@ -325,7 +326,7 @@ public void testIncrementalImport() throws IOException {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullEscapeCharacters() throws Exception {
|
||||
public void testNullStringValue() throws Exception {
|
||||
|
||||
|
||||
String [] extraArgs = {
|
||||
@ -355,4 +356,5 @@ public void testValidExtraArgs() throws Exception {
|
||||
runNetezzaTest(true, tableName, expectedResults,
|
||||
extraArgs);
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user