mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 21:49:40 +08:00
SQOOP-12. Alternate NULL formats.
This fix allows the user to optionally specify different null representations. It addresses both the import and export use cases, in addition to both string and non-string column types. From: Ahmed Radwan <ahmed@cloudera.com> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149996 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
55cce082c2
commit
2eaa878ff0
@ -43,6 +43,12 @@ Export control options
|
||||
--update-key (col-name)::
|
||||
Anchor column to use for updates
|
||||
|
||||
--input-null-string::
|
||||
The string to be interpreted as null for string columns
|
||||
|
||||
--input-null-non-string::
|
||||
The string to be interpreted as null for non-string columns
|
||||
|
||||
include::input-args.txt[]
|
||||
|
||||
include::output-args.txt[]
|
||||
|
@ -75,6 +75,11 @@ Import control options
|
||||
-z::
|
||||
Uses gzip to compress data as it is written to HDFS
|
||||
|
||||
--null-string::
|
||||
The string to be written for a null value for string columns
|
||||
|
||||
--null-non-string::
|
||||
The string to be written for a null value for non-string columns
|
||||
|
||||
include::output-args.txt[]
|
||||
|
||||
|
@ -43,15 +43,20 @@ include::common-args.txt[]
|
||||
|
||||
.Export control arguments:
|
||||
[grid="all"]
|
||||
`---------------------------`------------------------------------------
|
||||
Argument Description
|
||||
-----------------------------------------------------------------------
|
||||
+\--direct+ Use direct export fast path
|
||||
+\--export-dir <dir>+ HDFS source path for the export
|
||||
+-m,\--num-mappers <n>+ Use 'n' map tasks to export in parallel
|
||||
+\--table <table-name>+ Table to populate
|
||||
+\--update-key <col-name>+ Anchor column to use for updates
|
||||
-----------------------------------------------------------------------
|
||||
`----------------------------------------`------------------------------
|
||||
Argument Description
|
||||
------------------------------------------------------------------------
|
||||
+\--direct+ Use direct export fast path
|
||||
+\--export-dir <dir>+ HDFS source path for the export
|
||||
+-m,\--num-mappers <n>+ Use 'n' map tasks to export in\
|
||||
parallel
|
||||
+\--table <table-name>+ Table to populate
|
||||
+\--update-key <col-name>+ Anchor column to use for updates
|
||||
+\--input-null-string <null-string>+ The string to be interpreted as\
|
||||
null for string columns
|
||||
+\--input-null-non-string <null-string>+ The string to be interpreted as\
|
||||
null for non-string columns
|
||||
------------------------------------------------------------------------
|
||||
|
||||
The +\--table+ and +\--export-dir+ arguments are required. These
|
||||
specify the table to populate in the database, and the
|
||||
@ -73,6 +78,14 @@ MySQL provides a direct mode for exports as well, using the
|
||||
to specify this codepath. This may be
|
||||
higher-performance than the standard JDBC codepath.
|
||||
|
||||
The +\--input-null-string+ and +\--input-null-non-string+ arguments are
|
||||
optional. If +\--input-null-string+ is not specified, then the string
|
||||
"null" will be interpreted as null for string-type columns.
|
||||
If +\--input-null-non-string+ is not specified, then both the string
|
||||
"null" and the empty string will be interpreted as null for non-string
|
||||
columns. Note that, the empty string will be always interpreted as null
|
||||
for non-string columns, in addition to other string if specified by
|
||||
+\--input-null-non-string+.
|
||||
|
||||
Inserts vs. Updates
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -49,30 +49,35 @@ include::connecting.txt[]
|
||||
|
||||
.Import control arguments:
|
||||
[grid="all"]
|
||||
`-----------------------------`--------------------------------------
|
||||
Argument Description
|
||||
---------------------------------------------------------------------
|
||||
+\--append+ Append data to an existing dataset\
|
||||
in HDFS
|
||||
+\--as-sequencefile+ Imports data to SequenceFiles
|
||||
+\--as-textfile+ Imports data as plain text (default)
|
||||
+\--columns <col,col,col...>+ Columns to import from table
|
||||
+\--direct+ Use direct import fast path
|
||||
+\--direct-split-size <n>+ Split the input stream every 'n' bytes\
|
||||
when importing in direct mode
|
||||
+\--inline-lob-limit <n>+ Set the maximum size for an inline LOB
|
||||
+-m,\--num-mappers <n>+ Use 'n' map tasks to import in parallel
|
||||
+-e,\--query <statement>+ Import the results of '+statement+'.
|
||||
+\--split-by <column-name>+ Column of the table used to split work\
|
||||
units
|
||||
+\--table <table-name>+ Table to read
|
||||
+\--target-dir <dir>+ HDFS destination dir
|
||||
+\--warehouse-dir <dir>+ HDFS parent for table destination
|
||||
+\--where <where clause>+ WHERE clause to use during import
|
||||
+-z,\--compress+ Enable compression
|
||||
---------------------------------------------------------------------
|
||||
|
||||
`---------------------------------`--------------------------------------
|
||||
Argument Description
|
||||
-------------------------------------------------------------------------
|
||||
+\--append+ Append data to an existing dataset\
|
||||
in HDFS
|
||||
+\--as-sequencefile+ Imports data to SequenceFiles
|
||||
+\--as-textfile+ Imports data as plain text (default)
|
||||
+\--columns <col,col,col...>+ Columns to import from table
|
||||
+\--direct+ Use direct import fast path
|
||||
+\--direct-split-size <n>+ Split the input stream every 'n' bytes\
|
||||
when importing in direct mode
|
||||
+\--inline-lob-limit <n>+ Set the maximum size for an inline LOB
|
||||
+-m,\--num-mappers <n>+ Use 'n' map tasks to import in parallel
|
||||
+-e,\--query <statement>+ Import the results of '+statement+'.
|
||||
+\--split-by <column-name>+ Column of the table used to split work\
|
||||
units
|
||||
+\--table <table-name>+ Table to read
|
||||
+\--target-dir <dir>+ HDFS destination dir
|
||||
+\--warehouse-dir <dir>+ HDFS parent for table destination
|
||||
+\--where <where clause>+ WHERE clause to use during import
|
||||
+-z,\--compress+ Enable compression
|
||||
+--null-string <null-string>+ The string to be written for a null\
|
||||
value for string columns
|
||||
+--null-non-string <null-string>+ The string to be written for a null\
|
||||
value for non-string columns
|
||||
-------------------------------------------------------------------------
|
||||
|
||||
The +\--null-string+ and +\--null-non-string+ arguments are optional.\
|
||||
If not specified, then the string "null" will be used.
|
||||
|
||||
Selecting the Data to Import
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
@ -114,6 +114,12 @@ public enum IncrementalMode {
|
||||
// used. If so, it is stored as 'db.password'.
|
||||
private String password;
|
||||
|
||||
@StoredAsProperty("null.string") private String nullStringValue;
|
||||
@StoredAsProperty("input.null.string") private String inNullStringValue;
|
||||
@StoredAsProperty("null.non-string") private String nullNonStringValue;
|
||||
@StoredAsProperty("input.null.non-string")
|
||||
private String inNullNonStringValue;
|
||||
|
||||
@StoredAsProperty("codegen.output.dir") private String codeOutputDir;
|
||||
@StoredAsProperty("codegen.compile.dir") private String jarOutputDir;
|
||||
// Boolean specifying whether jarOutputDir is a nonce tmpdir (true), or
|
||||
@ -1597,5 +1603,37 @@ public String getConnManagerClassName() {
|
||||
return connManagerClassName;
|
||||
}
|
||||
|
||||
public void setNullStringValue(String nullString) {
|
||||
this.nullStringValue = nullString;
|
||||
}
|
||||
|
||||
public String getNullStringValue() {
|
||||
return nullStringValue;
|
||||
}
|
||||
|
||||
public void setInNullStringValue(String inNullString) {
|
||||
this.inNullStringValue = inNullString;
|
||||
}
|
||||
|
||||
public String getInNullStringValue() {
|
||||
return inNullStringValue;
|
||||
}
|
||||
|
||||
public void setNullNonStringValue(String nullNonString) {
|
||||
this.nullNonStringValue = nullNonString;
|
||||
}
|
||||
|
||||
public String getNullNonStringValue() {
|
||||
return nullNonStringValue;
|
||||
}
|
||||
|
||||
public void setInNullNonStringValue(String inNullNonString) {
|
||||
this.inNullNonStringValue = inNullNonString;
|
||||
}
|
||||
|
||||
public String getInNullNonStringValue() {
|
||||
return inNullNonStringValue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -272,10 +272,17 @@ private String dbSetterForType(String javaType) {
|
||||
|
||||
private String stringifierForType(String javaType, String colName) {
|
||||
if (javaType.equals("String")) {
|
||||
return colName;
|
||||
// Check if it is null, and write the null representation in such case
|
||||
String r = colName + "==null?\"" + this.options.getNullStringValue()
|
||||
+ "\":" + colName;
|
||||
return r;
|
||||
} else {
|
||||
// This is an object type -- just call its toString() in a null-safe way.
|
||||
return "\"\" + " + colName;
|
||||
// Also check if it is null, and instead write the null representation
|
||||
// in such case
|
||||
String r = colName + "==null?\"" + this.options.getNullNonStringValue()
|
||||
+ "\":" + "\"\" + " + colName;
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
@ -731,17 +738,19 @@ private void generateParseMethod(String typ, StringBuilder sb) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method for parseColumn(). Interpret the string 'null' as a null
|
||||
* Helper method for parseColumn(). Interpret the string null representation
|
||||
* for a particular column.
|
||||
*/
|
||||
private void parseNullVal(String javaType, String colName, StringBuilder sb) {
|
||||
if (javaType.equals("String")) {
|
||||
sb.append(" if (__cur_str.equals(\"null\")) { this.");
|
||||
sb.append(" if (__cur_str.equals(\""
|
||||
+ this.options.getInNullStringValue() + "\")) { this.");
|
||||
sb.append(colName);
|
||||
sb.append(" = null; } else {\n");
|
||||
} else {
|
||||
sb.append(" if (__cur_str.equals(\"null\")");
|
||||
sb.append(" || __cur_str.length() == 0) { this.");
|
||||
sb.append(" if (__cur_str.equals(\""
|
||||
+ this.options.getInNullNonStringValue());
|
||||
sb.append("\") || __cur_str.length() == 0) { this.");
|
||||
sb.append(colName);
|
||||
sb.append(" = null; } else {\n");
|
||||
}
|
||||
|
@ -76,7 +76,11 @@ public abstract class BaseSqoopTool extends SqoopTool {
|
||||
public static final String HIVE_HOME_ARG = "hive-home";
|
||||
public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
|
||||
public static final String TARGET_DIR_ARG = "target-dir";
|
||||
public static final String APPEND_ARG = "append";
|
||||
public static final String APPEND_ARG = "append";
|
||||
public static final String NULL_STRING = "null-string";
|
||||
public static final String INPUT_NULL_STRING = "input-null-string";
|
||||
public static final String NULL_NON_STRING = "null-non-string";
|
||||
public static final String INPUT_NULL_NON_STRING = "input-null-non-string";
|
||||
|
||||
public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
|
||||
public static final String FMT_TEXTFILE_ARG = "as-textfile";
|
||||
@ -493,6 +497,26 @@ protected RelatedOptions getCodeGenOpts(boolean multiTable) {
|
||||
.withDescription("Put auto-generated classes in this package")
|
||||
.withLongOpt(PACKAGE_NAME_ARG)
|
||||
.create());
|
||||
codeGenOpts.addOption(OptionBuilder.withArgName("null-string")
|
||||
.hasArg()
|
||||
.withDescription("Null string representation")
|
||||
.withLongOpt(NULL_STRING)
|
||||
.create());
|
||||
codeGenOpts.addOption(OptionBuilder.withArgName("input-null-string")
|
||||
.hasArg()
|
||||
.withDescription("Input null string representation")
|
||||
.withLongOpt(INPUT_NULL_STRING)
|
||||
.create());
|
||||
codeGenOpts.addOption(OptionBuilder.withArgName("null-non-string")
|
||||
.hasArg()
|
||||
.withDescription("Null non-string representation")
|
||||
.withLongOpt(NULL_NON_STRING)
|
||||
.create());
|
||||
codeGenOpts.addOption(OptionBuilder.withArgName("input-null-non-string")
|
||||
.hasArg()
|
||||
.withDescription("Input null non-string representation")
|
||||
.withLongOpt(INPUT_NULL_NON_STRING)
|
||||
.create());
|
||||
if (!multiTable) {
|
||||
codeGenOpts.addOption(OptionBuilder.withArgName("name")
|
||||
.hasArg()
|
||||
@ -563,6 +587,22 @@ protected void applyCommonOptions(CommandLine in, SqoopOptions out)
|
||||
out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME));
|
||||
}
|
||||
|
||||
if (in.hasOption(NULL_STRING)) {
|
||||
out.setNullStringValue(in.getOptionValue(NULL_STRING));
|
||||
}
|
||||
|
||||
if (in.hasOption(INPUT_NULL_STRING)) {
|
||||
out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING));
|
||||
}
|
||||
|
||||
if (in.hasOption(NULL_NON_STRING)) {
|
||||
out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING));
|
||||
}
|
||||
|
||||
if (in.hasOption(INPUT_NULL_NON_STRING)) {
|
||||
out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING));
|
||||
}
|
||||
|
||||
if (in.hasOption(DRIVER_ARG)) {
|
||||
out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user