5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-04 14:19:04 +08:00

SQOOP-12. Alternate NULL formats.

This fix allows the user to optionally specify different null
representations. It addresses both the import and export use
cases, in addition to both string and non-string column types.

From: Ahmed Radwan <ahmed@cloudera.com>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149996 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:04:23 +00:00
parent 55cce082c2
commit 2eaa878ff0
7 changed files with 155 additions and 39 deletions

View File

@ -43,6 +43,12 @@ Export control options
--update-key (col-name):: --update-key (col-name)::
Anchor column to use for updates Anchor column to use for updates
--input-null-string::
The string to be interpreted as null for string columns
--input-null-non-string::
The string to be interpreted as null for non-string columns
include::input-args.txt[] include::input-args.txt[]
include::output-args.txt[] include::output-args.txt[]

View File

@ -75,6 +75,11 @@ Import control options
-z:: -z::
Uses gzip to compress data as it is written to HDFS Uses gzip to compress data as it is written to HDFS
--null-string::
The string to be written for a null value for string columns
--null-non-string::
The string to be written for a null value for non-string columns
include::output-args.txt[] include::output-args.txt[]

View File

@ -43,15 +43,20 @@ include::common-args.txt[]
.Export control arguments: .Export control arguments:
[grid="all"] [grid="all"]
`---------------------------`------------------------------------------ `----------------------------------------`------------------------------
Argument Description Argument Description
----------------------------------------------------------------------- ------------------------------------------------------------------------
+\--direct+ Use direct export fast path +\--direct+ Use direct export fast path
+\--export-dir <dir>+ HDFS source path for the export +\--export-dir <dir>+ HDFS source path for the export
+-m,\--num-mappers <n>+ Use 'n' map tasks to export in parallel +-m,\--num-mappers <n>+ Use 'n' map tasks to export in\
parallel
+\--table <table-name>+ Table to populate +\--table <table-name>+ Table to populate
+\--update-key <col-name>+ Anchor column to use for updates +\--update-key <col-name>+ Anchor column to use for updates
----------------------------------------------------------------------- +\--input-null-string <null-string>+ The string to be interpreted as\
null for string columns
+\--input-null-non-string <null-string>+ The string to be interpreted as\
null for non-string columns
------------------------------------------------------------------------
The +\--table+ and +\--export-dir+ arguments are required. These The +\--table+ and +\--export-dir+ arguments are required. These
specify the table to populate in the database, and the specify the table to populate in the database, and the
@ -73,6 +78,14 @@ MySQL provides a direct mode for exports as well, using the
to specify this codepath. This may be to specify this codepath. This may be
higher-performance than the standard JDBC codepath. higher-performance than the standard JDBC codepath.
The +\--input-null-string+ and +\--input-null-non-string+ arguments are
optional. If +\--input-null-string+ is not specified, then the string
"null" will be interpreted as null for string-type columns.
If +\--input-null-non-string+ is not specified, then both the string
"null" and the empty string will be interpreted as null for non-string
columns. Note that, the empty string will be always interpreted as null
for non-string columns, in addition to other string if specified by
+\--input-null-non-string+.
Inserts vs. Updates Inserts vs. Updates
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~

View File

@ -49,9 +49,9 @@ include::connecting.txt[]
.Import control arguments: .Import control arguments:
[grid="all"] [grid="all"]
`-----------------------------`-------------------------------------- `---------------------------------`--------------------------------------
Argument Description Argument Description
--------------------------------------------------------------------- -------------------------------------------------------------------------
+\--append+ Append data to an existing dataset\ +\--append+ Append data to an existing dataset\
in HDFS in HDFS
+\--as-sequencefile+ Imports data to SequenceFiles +\--as-sequencefile+ Imports data to SequenceFiles
@ -70,9 +70,14 @@ Argument Description
+\--warehouse-dir <dir>+ HDFS parent for table destination +\--warehouse-dir <dir>+ HDFS parent for table destination
+\--where <where clause>+ WHERE clause to use during import +\--where <where clause>+ WHERE clause to use during import
+-z,\--compress+ Enable compression +-z,\--compress+ Enable compression
--------------------------------------------------------------------- +--null-string <null-string>+ The string to be written for a null\
value for string columns
+--null-non-string <null-string>+ The string to be written for a null\
value for non-string columns
-------------------------------------------------------------------------
The +\--null-string+ and +\--null-non-string+ arguments are optional.\
If not specified, then the string "null" will be used.
Selecting the Data to Import Selecting the Data to Import
^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -114,6 +114,12 @@ public enum IncrementalMode {
// used. If so, it is stored as 'db.password'. // used. If so, it is stored as 'db.password'.
private String password; private String password;
@StoredAsProperty("null.string") private String nullStringValue;
@StoredAsProperty("input.null.string") private String inNullStringValue;
@StoredAsProperty("null.non-string") private String nullNonStringValue;
@StoredAsProperty("input.null.non-string")
private String inNullNonStringValue;
@StoredAsProperty("codegen.output.dir") private String codeOutputDir; @StoredAsProperty("codegen.output.dir") private String codeOutputDir;
@StoredAsProperty("codegen.compile.dir") private String jarOutputDir; @StoredAsProperty("codegen.compile.dir") private String jarOutputDir;
// Boolean specifying whether jarOutputDir is a nonce tmpdir (true), or // Boolean specifying whether jarOutputDir is a nonce tmpdir (true), or
@ -1597,5 +1603,37 @@ public String getConnManagerClassName() {
return connManagerClassName; return connManagerClassName;
} }
public void setNullStringValue(String nullString) {
this.nullStringValue = nullString;
}
public String getNullStringValue() {
return nullStringValue;
}
public void setInNullStringValue(String inNullString) {
this.inNullStringValue = inNullString;
}
public String getInNullStringValue() {
return inNullStringValue;
}
public void setNullNonStringValue(String nullNonString) {
this.nullNonStringValue = nullNonString;
}
public String getNullNonStringValue() {
return nullNonStringValue;
}
public void setInNullNonStringValue(String inNullNonString) {
this.inNullNonStringValue = inNullNonString;
}
public String getInNullNonStringValue() {
return inNullNonStringValue;
}
} }

View File

@ -272,10 +272,17 @@ private String dbSetterForType(String javaType) {
private String stringifierForType(String javaType, String colName) { private String stringifierForType(String javaType, String colName) {
if (javaType.equals("String")) { if (javaType.equals("String")) {
return colName; // Check if it is null, and write the null representation in such case
String r = colName + "==null?\"" + this.options.getNullStringValue()
+ "\":" + colName;
return r;
} else { } else {
// This is an object type -- just call its toString() in a null-safe way. // This is an object type -- just call its toString() in a null-safe way.
return "\"\" + " + colName; // Also check if it is null, and instead write the null representation
// in such case
String r = colName + "==null?\"" + this.options.getNullNonStringValue()
+ "\":" + "\"\" + " + colName;
return r;
} }
} }
@ -731,17 +738,19 @@ private void generateParseMethod(String typ, StringBuilder sb) {
} }
/** /**
* Helper method for parseColumn(). Interpret the string 'null' as a null * Helper method for parseColumn(). Interpret the string null representation
* for a particular column. * for a particular column.
*/ */
private void parseNullVal(String javaType, String colName, StringBuilder sb) { private void parseNullVal(String javaType, String colName, StringBuilder sb) {
if (javaType.equals("String")) { if (javaType.equals("String")) {
sb.append(" if (__cur_str.equals(\"null\")) { this."); sb.append(" if (__cur_str.equals(\""
+ this.options.getInNullStringValue() + "\")) { this.");
sb.append(colName); sb.append(colName);
sb.append(" = null; } else {\n"); sb.append(" = null; } else {\n");
} else { } else {
sb.append(" if (__cur_str.equals(\"null\")"); sb.append(" if (__cur_str.equals(\""
sb.append(" || __cur_str.length() == 0) { this."); + this.options.getInNullNonStringValue());
sb.append("\") || __cur_str.length() == 0) { this.");
sb.append(colName); sb.append(colName);
sb.append(" = null; } else {\n"); sb.append(" = null; } else {\n");
} }

View File

@ -77,6 +77,10 @@ public abstract class BaseSqoopTool extends SqoopTool {
public static final String WAREHOUSE_DIR_ARG = "warehouse-dir"; public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
public static final String TARGET_DIR_ARG = "target-dir"; public static final String TARGET_DIR_ARG = "target-dir";
public static final String APPEND_ARG = "append"; public static final String APPEND_ARG = "append";
public static final String NULL_STRING = "null-string";
public static final String INPUT_NULL_STRING = "input-null-string";
public static final String NULL_NON_STRING = "null-non-string";
public static final String INPUT_NULL_NON_STRING = "input-null-non-string";
public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile"; public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
public static final String FMT_TEXTFILE_ARG = "as-textfile"; public static final String FMT_TEXTFILE_ARG = "as-textfile";
@ -493,6 +497,26 @@ protected RelatedOptions getCodeGenOpts(boolean multiTable) {
.withDescription("Put auto-generated classes in this package") .withDescription("Put auto-generated classes in this package")
.withLongOpt(PACKAGE_NAME_ARG) .withLongOpt(PACKAGE_NAME_ARG)
.create()); .create());
codeGenOpts.addOption(OptionBuilder.withArgName("null-string")
.hasArg()
.withDescription("Null string representation")
.withLongOpt(NULL_STRING)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("input-null-string")
.hasArg()
.withDescription("Input null string representation")
.withLongOpt(INPUT_NULL_STRING)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("null-non-string")
.hasArg()
.withDescription("Null non-string representation")
.withLongOpt(NULL_NON_STRING)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("input-null-non-string")
.hasArg()
.withDescription("Input null non-string representation")
.withLongOpt(INPUT_NULL_NON_STRING)
.create());
if (!multiTable) { if (!multiTable) {
codeGenOpts.addOption(OptionBuilder.withArgName("name") codeGenOpts.addOption(OptionBuilder.withArgName("name")
.hasArg() .hasArg()
@ -563,6 +587,22 @@ protected void applyCommonOptions(CommandLine in, SqoopOptions out)
out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME)); out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME));
} }
if (in.hasOption(NULL_STRING)) {
out.setNullStringValue(in.getOptionValue(NULL_STRING));
}
if (in.hasOption(INPUT_NULL_STRING)) {
out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING));
}
if (in.hasOption(NULL_NON_STRING)) {
out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING));
}
if (in.hasOption(INPUT_NULL_NON_STRING)) {
out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING));
}
if (in.hasOption(DRIVER_ARG)) { if (in.hasOption(DRIVER_ARG)) {
out.setDriverClassName(in.getOptionValue(DRIVER_ARG)); out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
} }