mirror of
https://github.com/apache/sqoop.git
synced 2025-05-04 14:32:46 +08:00
SQOOP-12. Alternate NULL formats.
This fix allows the user to optionally specify different null representations. It addresses both the import and export use cases, in addition to both string and non-string column types. From: Ahmed Radwan <ahmed@cloudera.com> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149996 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
55cce082c2
commit
2eaa878ff0
@ -43,6 +43,12 @@ Export control options
|
|||||||
--update-key (col-name)::
|
--update-key (col-name)::
|
||||||
Anchor column to use for updates
|
Anchor column to use for updates
|
||||||
|
|
||||||
|
--input-null-string::
|
||||||
|
The string to be interpreted as null for string columns
|
||||||
|
|
||||||
|
--input-null-non-string::
|
||||||
|
The string to be interpreted as null for non-string columns
|
||||||
|
|
||||||
include::input-args.txt[]
|
include::input-args.txt[]
|
||||||
|
|
||||||
include::output-args.txt[]
|
include::output-args.txt[]
|
||||||
|
@ -75,6 +75,11 @@ Import control options
|
|||||||
-z::
|
-z::
|
||||||
Uses gzip to compress data as it is written to HDFS
|
Uses gzip to compress data as it is written to HDFS
|
||||||
|
|
||||||
|
--null-string::
|
||||||
|
The string to be written for a null value for string columns
|
||||||
|
|
||||||
|
--null-non-string::
|
||||||
|
The string to be written for a null value for non-string columns
|
||||||
|
|
||||||
include::output-args.txt[]
|
include::output-args.txt[]
|
||||||
|
|
||||||
|
@ -43,15 +43,20 @@ include::common-args.txt[]
|
|||||||
|
|
||||||
.Export control arguments:
|
.Export control arguments:
|
||||||
[grid="all"]
|
[grid="all"]
|
||||||
`---------------------------`------------------------------------------
|
`----------------------------------------`------------------------------
|
||||||
Argument Description
|
Argument Description
|
||||||
-----------------------------------------------------------------------
|
------------------------------------------------------------------------
|
||||||
+\--direct+ Use direct export fast path
|
+\--direct+ Use direct export fast path
|
||||||
+\--export-dir <dir>+ HDFS source path for the export
|
+\--export-dir <dir>+ HDFS source path for the export
|
||||||
+-m,\--num-mappers <n>+ Use 'n' map tasks to export in parallel
|
+-m,\--num-mappers <n>+ Use 'n' map tasks to export in\
|
||||||
+\--table <table-name>+ Table to populate
|
parallel
|
||||||
+\--update-key <col-name>+ Anchor column to use for updates
|
+\--table <table-name>+ Table to populate
|
||||||
-----------------------------------------------------------------------
|
+\--update-key <col-name>+ Anchor column to use for updates
|
||||||
|
+\--input-null-string <null-string>+ The string to be interpreted as\
|
||||||
|
null for string columns
|
||||||
|
+\--input-null-non-string <null-string>+ The string to be interpreted as\
|
||||||
|
null for non-string columns
|
||||||
|
------------------------------------------------------------------------
|
||||||
|
|
||||||
The +\--table+ and +\--export-dir+ arguments are required. These
|
The +\--table+ and +\--export-dir+ arguments are required. These
|
||||||
specify the table to populate in the database, and the
|
specify the table to populate in the database, and the
|
||||||
@ -73,6 +78,14 @@ MySQL provides a direct mode for exports as well, using the
|
|||||||
to specify this codepath. This may be
|
to specify this codepath. This may be
|
||||||
higher-performance than the standard JDBC codepath.
|
higher-performance than the standard JDBC codepath.
|
||||||
|
|
||||||
|
The +\--input-null-string+ and +\--input-null-non-string+ arguments are
|
||||||
|
optional. If +\--input-null-string+ is not specified, then the string
|
||||||
|
"null" will be interpreted as null for string-type columns.
|
||||||
|
If +\--input-null-non-string+ is not specified, then both the string
|
||||||
|
"null" and the empty string will be interpreted as null for non-string
|
||||||
|
columns. Note that, the empty string will be always interpreted as null
|
||||||
|
for non-string columns, in addition to other string if specified by
|
||||||
|
+\--input-null-non-string+.
|
||||||
|
|
||||||
Inserts vs. Updates
|
Inserts vs. Updates
|
||||||
~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -49,30 +49,35 @@ include::connecting.txt[]
|
|||||||
|
|
||||||
.Import control arguments:
|
.Import control arguments:
|
||||||
[grid="all"]
|
[grid="all"]
|
||||||
`-----------------------------`--------------------------------------
|
`---------------------------------`--------------------------------------
|
||||||
Argument Description
|
Argument Description
|
||||||
---------------------------------------------------------------------
|
-------------------------------------------------------------------------
|
||||||
+\--append+ Append data to an existing dataset\
|
+\--append+ Append data to an existing dataset\
|
||||||
in HDFS
|
in HDFS
|
||||||
+\--as-sequencefile+ Imports data to SequenceFiles
|
+\--as-sequencefile+ Imports data to SequenceFiles
|
||||||
+\--as-textfile+ Imports data as plain text (default)
|
+\--as-textfile+ Imports data as plain text (default)
|
||||||
+\--columns <col,col,col...>+ Columns to import from table
|
+\--columns <col,col,col...>+ Columns to import from table
|
||||||
+\--direct+ Use direct import fast path
|
+\--direct+ Use direct import fast path
|
||||||
+\--direct-split-size <n>+ Split the input stream every 'n' bytes\
|
+\--direct-split-size <n>+ Split the input stream every 'n' bytes\
|
||||||
when importing in direct mode
|
when importing in direct mode
|
||||||
+\--inline-lob-limit <n>+ Set the maximum size for an inline LOB
|
+\--inline-lob-limit <n>+ Set the maximum size for an inline LOB
|
||||||
+-m,\--num-mappers <n>+ Use 'n' map tasks to import in parallel
|
+-m,\--num-mappers <n>+ Use 'n' map tasks to import in parallel
|
||||||
+-e,\--query <statement>+ Import the results of '+statement+'.
|
+-e,\--query <statement>+ Import the results of '+statement+'.
|
||||||
+\--split-by <column-name>+ Column of the table used to split work\
|
+\--split-by <column-name>+ Column of the table used to split work\
|
||||||
units
|
units
|
||||||
+\--table <table-name>+ Table to read
|
+\--table <table-name>+ Table to read
|
||||||
+\--target-dir <dir>+ HDFS destination dir
|
+\--target-dir <dir>+ HDFS destination dir
|
||||||
+\--warehouse-dir <dir>+ HDFS parent for table destination
|
+\--warehouse-dir <dir>+ HDFS parent for table destination
|
||||||
+\--where <where clause>+ WHERE clause to use during import
|
+\--where <where clause>+ WHERE clause to use during import
|
||||||
+-z,\--compress+ Enable compression
|
+-z,\--compress+ Enable compression
|
||||||
---------------------------------------------------------------------
|
+--null-string <null-string>+ The string to be written for a null\
|
||||||
|
value for string columns
|
||||||
|
+--null-non-string <null-string>+ The string to be written for a null\
|
||||||
|
value for non-string columns
|
||||||
|
-------------------------------------------------------------------------
|
||||||
|
|
||||||
|
The +\--null-string+ and +\--null-non-string+ arguments are optional.\
|
||||||
|
If not specified, then the string "null" will be used.
|
||||||
|
|
||||||
Selecting the Data to Import
|
Selecting the Data to Import
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
@ -114,6 +114,12 @@ public enum IncrementalMode {
|
|||||||
// used. If so, it is stored as 'db.password'.
|
// used. If so, it is stored as 'db.password'.
|
||||||
private String password;
|
private String password;
|
||||||
|
|
||||||
|
@StoredAsProperty("null.string") private String nullStringValue;
|
||||||
|
@StoredAsProperty("input.null.string") private String inNullStringValue;
|
||||||
|
@StoredAsProperty("null.non-string") private String nullNonStringValue;
|
||||||
|
@StoredAsProperty("input.null.non-string")
|
||||||
|
private String inNullNonStringValue;
|
||||||
|
|
||||||
@StoredAsProperty("codegen.output.dir") private String codeOutputDir;
|
@StoredAsProperty("codegen.output.dir") private String codeOutputDir;
|
||||||
@StoredAsProperty("codegen.compile.dir") private String jarOutputDir;
|
@StoredAsProperty("codegen.compile.dir") private String jarOutputDir;
|
||||||
// Boolean specifying whether jarOutputDir is a nonce tmpdir (true), or
|
// Boolean specifying whether jarOutputDir is a nonce tmpdir (true), or
|
||||||
@ -1597,5 +1603,37 @@ public String getConnManagerClassName() {
|
|||||||
return connManagerClassName;
|
return connManagerClassName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setNullStringValue(String nullString) {
|
||||||
|
this.nullStringValue = nullString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getNullStringValue() {
|
||||||
|
return nullStringValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInNullStringValue(String inNullString) {
|
||||||
|
this.inNullStringValue = inNullString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getInNullStringValue() {
|
||||||
|
return inNullStringValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNullNonStringValue(String nullNonString) {
|
||||||
|
this.nullNonStringValue = nullNonString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getNullNonStringValue() {
|
||||||
|
return nullNonStringValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInNullNonStringValue(String inNullNonString) {
|
||||||
|
this.inNullNonStringValue = inNullNonString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getInNullNonStringValue() {
|
||||||
|
return inNullNonStringValue;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -272,10 +272,17 @@ private String dbSetterForType(String javaType) {
|
|||||||
|
|
||||||
private String stringifierForType(String javaType, String colName) {
|
private String stringifierForType(String javaType, String colName) {
|
||||||
if (javaType.equals("String")) {
|
if (javaType.equals("String")) {
|
||||||
return colName;
|
// Check if it is null, and write the null representation in such case
|
||||||
|
String r = colName + "==null?\"" + this.options.getNullStringValue()
|
||||||
|
+ "\":" + colName;
|
||||||
|
return r;
|
||||||
} else {
|
} else {
|
||||||
// This is an object type -- just call its toString() in a null-safe way.
|
// This is an object type -- just call its toString() in a null-safe way.
|
||||||
return "\"\" + " + colName;
|
// Also check if it is null, and instead write the null representation
|
||||||
|
// in such case
|
||||||
|
String r = colName + "==null?\"" + this.options.getNullNonStringValue()
|
||||||
|
+ "\":" + "\"\" + " + colName;
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -731,17 +738,19 @@ private void generateParseMethod(String typ, StringBuilder sb) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper method for parseColumn(). Interpret the string 'null' as a null
|
* Helper method for parseColumn(). Interpret the string null representation
|
||||||
* for a particular column.
|
* for a particular column.
|
||||||
*/
|
*/
|
||||||
private void parseNullVal(String javaType, String colName, StringBuilder sb) {
|
private void parseNullVal(String javaType, String colName, StringBuilder sb) {
|
||||||
if (javaType.equals("String")) {
|
if (javaType.equals("String")) {
|
||||||
sb.append(" if (__cur_str.equals(\"null\")) { this.");
|
sb.append(" if (__cur_str.equals(\""
|
||||||
|
+ this.options.getInNullStringValue() + "\")) { this.");
|
||||||
sb.append(colName);
|
sb.append(colName);
|
||||||
sb.append(" = null; } else {\n");
|
sb.append(" = null; } else {\n");
|
||||||
} else {
|
} else {
|
||||||
sb.append(" if (__cur_str.equals(\"null\")");
|
sb.append(" if (__cur_str.equals(\""
|
||||||
sb.append(" || __cur_str.length() == 0) { this.");
|
+ this.options.getInNullNonStringValue());
|
||||||
|
sb.append("\") || __cur_str.length() == 0) { this.");
|
||||||
sb.append(colName);
|
sb.append(colName);
|
||||||
sb.append(" = null; } else {\n");
|
sb.append(" = null; } else {\n");
|
||||||
}
|
}
|
||||||
|
@ -77,6 +77,10 @@ public abstract class BaseSqoopTool extends SqoopTool {
|
|||||||
public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
|
public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
|
||||||
public static final String TARGET_DIR_ARG = "target-dir";
|
public static final String TARGET_DIR_ARG = "target-dir";
|
||||||
public static final String APPEND_ARG = "append";
|
public static final String APPEND_ARG = "append";
|
||||||
|
public static final String NULL_STRING = "null-string";
|
||||||
|
public static final String INPUT_NULL_STRING = "input-null-string";
|
||||||
|
public static final String NULL_NON_STRING = "null-non-string";
|
||||||
|
public static final String INPUT_NULL_NON_STRING = "input-null-non-string";
|
||||||
|
|
||||||
public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
|
public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
|
||||||
public static final String FMT_TEXTFILE_ARG = "as-textfile";
|
public static final String FMT_TEXTFILE_ARG = "as-textfile";
|
||||||
@ -493,6 +497,26 @@ protected RelatedOptions getCodeGenOpts(boolean multiTable) {
|
|||||||
.withDescription("Put auto-generated classes in this package")
|
.withDescription("Put auto-generated classes in this package")
|
||||||
.withLongOpt(PACKAGE_NAME_ARG)
|
.withLongOpt(PACKAGE_NAME_ARG)
|
||||||
.create());
|
.create());
|
||||||
|
codeGenOpts.addOption(OptionBuilder.withArgName("null-string")
|
||||||
|
.hasArg()
|
||||||
|
.withDescription("Null string representation")
|
||||||
|
.withLongOpt(NULL_STRING)
|
||||||
|
.create());
|
||||||
|
codeGenOpts.addOption(OptionBuilder.withArgName("input-null-string")
|
||||||
|
.hasArg()
|
||||||
|
.withDescription("Input null string representation")
|
||||||
|
.withLongOpt(INPUT_NULL_STRING)
|
||||||
|
.create());
|
||||||
|
codeGenOpts.addOption(OptionBuilder.withArgName("null-non-string")
|
||||||
|
.hasArg()
|
||||||
|
.withDescription("Null non-string representation")
|
||||||
|
.withLongOpt(NULL_NON_STRING)
|
||||||
|
.create());
|
||||||
|
codeGenOpts.addOption(OptionBuilder.withArgName("input-null-non-string")
|
||||||
|
.hasArg()
|
||||||
|
.withDescription("Input null non-string representation")
|
||||||
|
.withLongOpt(INPUT_NULL_NON_STRING)
|
||||||
|
.create());
|
||||||
if (!multiTable) {
|
if (!multiTable) {
|
||||||
codeGenOpts.addOption(OptionBuilder.withArgName("name")
|
codeGenOpts.addOption(OptionBuilder.withArgName("name")
|
||||||
.hasArg()
|
.hasArg()
|
||||||
@ -563,6 +587,22 @@ protected void applyCommonOptions(CommandLine in, SqoopOptions out)
|
|||||||
out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME));
|
out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (in.hasOption(NULL_STRING)) {
|
||||||
|
out.setNullStringValue(in.getOptionValue(NULL_STRING));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in.hasOption(INPUT_NULL_STRING)) {
|
||||||
|
out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in.hasOption(NULL_NON_STRING)) {
|
||||||
|
out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in.hasOption(INPUT_NULL_NON_STRING)) {
|
||||||
|
out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING));
|
||||||
|
}
|
||||||
|
|
||||||
if (in.hasOption(DRIVER_ARG)) {
|
if (in.hasOption(DRIVER_ARG)) {
|
||||||
out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
|
out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user