5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 21:49:40 +08:00

SQOOP-12. Alternate NULL formats.

This fix allows the user to optionally specify different null
representations. It addresses both the import and export use
cases, in addition to both string and non-string column types.

From: Ahmed Radwan <ahmed@cloudera.com>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149996 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:04:23 +00:00
parent 55cce082c2
commit 2eaa878ff0
7 changed files with 155 additions and 39 deletions

View File

@ -43,6 +43,12 @@ Export control options
--update-key (col-name)::
Anchor column to use for updates
--input-null-string::
The string to be interpreted as null for string columns
--input-null-non-string::
The string to be interpreted as null for non-string columns
include::input-args.txt[]
include::output-args.txt[]

View File

@ -75,6 +75,11 @@ Import control options
-z::
Uses gzip to compress data as it is written to HDFS
--null-string::
The string to be written for a null value for string columns
--null-non-string::
The string to be written for a null value for non-string columns
include::output-args.txt[]

View File

@ -43,15 +43,20 @@ include::common-args.txt[]
.Export control arguments:
[grid="all"]
`---------------------------`------------------------------------------
Argument Description
-----------------------------------------------------------------------
+\--direct+ Use direct export fast path
+\--export-dir <dir>+ HDFS source path for the export
+-m,\--num-mappers <n>+ Use 'n' map tasks to export in parallel
+\--table <table-name>+ Table to populate
+\--update-key <col-name>+ Anchor column to use for updates
-----------------------------------------------------------------------
`----------------------------------------`------------------------------
Argument Description
------------------------------------------------------------------------
+\--direct+ Use direct export fast path
+\--export-dir <dir>+ HDFS source path for the export
+-m,\--num-mappers <n>+ Use 'n' map tasks to export in\
parallel
+\--table <table-name>+ Table to populate
+\--update-key <col-name>+ Anchor column to use for updates
+\--input-null-string <null-string>+ The string to be interpreted as\
null for string columns
+\--input-null-non-string <null-string>+ The string to be interpreted as\
null for non-string columns
------------------------------------------------------------------------
The +\--table+ and +\--export-dir+ arguments are required. These
specify the table to populate in the database, and the
@ -73,6 +78,14 @@ MySQL provides a direct mode for exports as well, using the
to specify this codepath. This may be
higher-performance than the standard JDBC codepath.
The +\--input-null-string+ and +\--input-null-non-string+ arguments are
optional. If +\--input-null-string+ is not specified, then the string
"null" will be interpreted as null for string-type columns.
If +\--input-null-non-string+ is not specified, then both the string
"null" and the empty string will be interpreted as null for non-string
columns. Note that, the empty string will be always interpreted as null
for non-string columns, in addition to other string if specified by
+\--input-null-non-string+.
Inserts vs. Updates
~~~~~~~~~~~~~~~~~~~

View File

@ -49,30 +49,35 @@ include::connecting.txt[]
.Import control arguments:
[grid="all"]
`-----------------------------`--------------------------------------
Argument Description
---------------------------------------------------------------------
+\--append+ Append data to an existing dataset\
in HDFS
+\--as-sequencefile+ Imports data to SequenceFiles
+\--as-textfile+ Imports data as plain text (default)
+\--columns <col,col,col...>+ Columns to import from table
+\--direct+ Use direct import fast path
+\--direct-split-size <n>+ Split the input stream every 'n' bytes\
when importing in direct mode
+\--inline-lob-limit <n>+ Set the maximum size for an inline LOB
+-m,\--num-mappers <n>+ Use 'n' map tasks to import in parallel
+-e,\--query <statement>+ Import the results of '+statement+'.
+\--split-by <column-name>+ Column of the table used to split work\
units
+\--table <table-name>+ Table to read
+\--target-dir <dir>+ HDFS destination dir
+\--warehouse-dir <dir>+ HDFS parent for table destination
+\--where <where clause>+ WHERE clause to use during import
+-z,\--compress+ Enable compression
---------------------------------------------------------------------
`---------------------------------`--------------------------------------
Argument Description
-------------------------------------------------------------------------
+\--append+ Append data to an existing dataset\
in HDFS
+\--as-sequencefile+ Imports data to SequenceFiles
+\--as-textfile+ Imports data as plain text (default)
+\--columns <col,col,col...>+ Columns to import from table
+\--direct+ Use direct import fast path
+\--direct-split-size <n>+ Split the input stream every 'n' bytes\
when importing in direct mode
+\--inline-lob-limit <n>+ Set the maximum size for an inline LOB
+-m,\--num-mappers <n>+ Use 'n' map tasks to import in parallel
+-e,\--query <statement>+ Import the results of '+statement+'.
+\--split-by <column-name>+ Column of the table used to split work\
units
+\--table <table-name>+ Table to read
+\--target-dir <dir>+ HDFS destination dir
+\--warehouse-dir <dir>+ HDFS parent for table destination
+\--where <where clause>+ WHERE clause to use during import
+-z,\--compress+ Enable compression
+--null-string <null-string>+ The string to be written for a null\
value for string columns
+--null-non-string <null-string>+ The string to be written for a null\
value for non-string columns
-------------------------------------------------------------------------
The +\--null-string+ and +\--null-non-string+ arguments are optional.\
If not specified, then the string "null" will be used.
Selecting the Data to Import
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -114,6 +114,12 @@ public enum IncrementalMode {
// used. If so, it is stored as 'db.password'.
private String password;
@StoredAsProperty("null.string") private String nullStringValue;
@StoredAsProperty("input.null.string") private String inNullStringValue;
@StoredAsProperty("null.non-string") private String nullNonStringValue;
@StoredAsProperty("input.null.non-string")
private String inNullNonStringValue;
@StoredAsProperty("codegen.output.dir") private String codeOutputDir;
@StoredAsProperty("codegen.compile.dir") private String jarOutputDir;
// Boolean specifying whether jarOutputDir is a nonce tmpdir (true), or
@ -1597,5 +1603,37 @@ public String getConnManagerClassName() {
return connManagerClassName;
}
public void setNullStringValue(String nullString) {
this.nullStringValue = nullString;
}
public String getNullStringValue() {
return nullStringValue;
}
public void setInNullStringValue(String inNullString) {
this.inNullStringValue = inNullString;
}
public String getInNullStringValue() {
return inNullStringValue;
}
public void setNullNonStringValue(String nullNonString) {
this.nullNonStringValue = nullNonString;
}
public String getNullNonStringValue() {
return nullNonStringValue;
}
public void setInNullNonStringValue(String inNullNonString) {
this.inNullNonStringValue = inNullNonString;
}
public String getInNullNonStringValue() {
return inNullNonStringValue;
}
}

View File

@ -272,10 +272,17 @@ private String dbSetterForType(String javaType) {
private String stringifierForType(String javaType, String colName) {
if (javaType.equals("String")) {
return colName;
// Check if it is null, and write the null representation in such case
String r = colName + "==null?\"" + this.options.getNullStringValue()
+ "\":" + colName;
return r;
} else {
// This is an object type -- just call its toString() in a null-safe way.
return "\"\" + " + colName;
// Also check if it is null, and instead write the null representation
// in such case
String r = colName + "==null?\"" + this.options.getNullNonStringValue()
+ "\":" + "\"\" + " + colName;
return r;
}
}
@ -731,17 +738,19 @@ private void generateParseMethod(String typ, StringBuilder sb) {
}
/**
* Helper method for parseColumn(). Interpret the string 'null' as a null
* Helper method for parseColumn(). Interpret the string null representation
* for a particular column.
*/
private void parseNullVal(String javaType, String colName, StringBuilder sb) {
if (javaType.equals("String")) {
sb.append(" if (__cur_str.equals(\"null\")) { this.");
sb.append(" if (__cur_str.equals(\""
+ this.options.getInNullStringValue() + "\")) { this.");
sb.append(colName);
sb.append(" = null; } else {\n");
} else {
sb.append(" if (__cur_str.equals(\"null\")");
sb.append(" || __cur_str.length() == 0) { this.");
sb.append(" if (__cur_str.equals(\""
+ this.options.getInNullNonStringValue());
sb.append("\") || __cur_str.length() == 0) { this.");
sb.append(colName);
sb.append(" = null; } else {\n");
}

View File

@ -76,7 +76,11 @@ public abstract class BaseSqoopTool extends SqoopTool {
public static final String HIVE_HOME_ARG = "hive-home";
public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
public static final String TARGET_DIR_ARG = "target-dir";
public static final String APPEND_ARG = "append";
public static final String APPEND_ARG = "append";
public static final String NULL_STRING = "null-string";
public static final String INPUT_NULL_STRING = "input-null-string";
public static final String NULL_NON_STRING = "null-non-string";
public static final String INPUT_NULL_NON_STRING = "input-null-non-string";
public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
public static final String FMT_TEXTFILE_ARG = "as-textfile";
@ -493,6 +497,26 @@ protected RelatedOptions getCodeGenOpts(boolean multiTable) {
.withDescription("Put auto-generated classes in this package")
.withLongOpt(PACKAGE_NAME_ARG)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("null-string")
.hasArg()
.withDescription("Null string representation")
.withLongOpt(NULL_STRING)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("input-null-string")
.hasArg()
.withDescription("Input null string representation")
.withLongOpt(INPUT_NULL_STRING)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("null-non-string")
.hasArg()
.withDescription("Null non-string representation")
.withLongOpt(NULL_NON_STRING)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("input-null-non-string")
.hasArg()
.withDescription("Input null non-string representation")
.withLongOpt(INPUT_NULL_NON_STRING)
.create());
if (!multiTable) {
codeGenOpts.addOption(OptionBuilder.withArgName("name")
.hasArg()
@ -563,6 +587,22 @@ protected void applyCommonOptions(CommandLine in, SqoopOptions out)
out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME));
}
if (in.hasOption(NULL_STRING)) {
out.setNullStringValue(in.getOptionValue(NULL_STRING));
}
if (in.hasOption(INPUT_NULL_STRING)) {
out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING));
}
if (in.hasOption(NULL_NON_STRING)) {
out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING));
}
if (in.hasOption(INPUT_NULL_NON_STRING)) {
out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING));
}
if (in.hasOption(DRIVER_ARG)) {
out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
}