diff --git a/src/docs/man/sqoop-export.txt b/src/docs/man/sqoop-export.txt
index 3ee36a4b..5c967172 100644
--- a/src/docs/man/sqoop-export.txt
+++ b/src/docs/man/sqoop-export.txt
@@ -43,6 +43,12 @@ Export control options
--update-key (col-name)::
Anchor column to use for updates
+--input-null-string::
+ The string to be interpreted as null for string columns
+
+--input-null-non-string::
+ The string to be interpreted as null for non-string columns
+
include::input-args.txt[]
include::output-args.txt[]
diff --git a/src/docs/man/sqoop-import.txt b/src/docs/man/sqoop-import.txt
index dc4fc8ac..735738f9 100644
--- a/src/docs/man/sqoop-import.txt
+++ b/src/docs/man/sqoop-import.txt
@@ -75,6 +75,11 @@ Import control options
-z::
Uses gzip to compress data as it is written to HDFS
+--null-string::
+ The string to be written for a null value for string columns
+
+--null-non-string::
+ The string to be written for a null value for non-string columns
include::output-args.txt[]
diff --git a/src/docs/user/export.txt b/src/docs/user/export.txt
index 3789d689..b26553d7 100644
--- a/src/docs/user/export.txt
+++ b/src/docs/user/export.txt
@@ -43,15 +43,20 @@ include::common-args.txt[]
.Export control arguments:
[grid="all"]
-`---------------------------`------------------------------------------
-Argument Description
------------------------------------------------------------------------
-+\--direct+ Use direct export fast path
-+\--export-dir
+ HDFS source path for the export
-+-m,\--num-mappers + Use 'n' map tasks to export in parallel
-+\--table + Table to populate
-+\--update-key + Anchor column to use for updates
------------------------------------------------------------------------
+`----------------------------------------`------------------------------
+Argument Description
+------------------------------------------------------------------------
++\--direct+ Use direct export fast path
++\--export-dir + HDFS source path for the export
++-m,\--num-mappers + Use 'n' map tasks to export in\
+ parallel
++\--table + Table to populate
++\--update-key + Anchor column to use for updates
++\--input-null-string + The string to be interpreted as\
+ null for string columns
++\--input-null-non-string + The string to be interpreted as\
+ null for non-string columns
+------------------------------------------------------------------------
The +\--table+ and +\--export-dir+ arguments are required. These
specify the table to populate in the database, and the
@@ -73,6 +78,14 @@ MySQL provides a direct mode for exports as well, using the
to specify this codepath. This may be
higher-performance than the standard JDBC codepath.
+The +\--input-null-string+ and +\--input-null-non-string+ arguments are
+optional. If +\--input-null-string+ is not specified, then the string
+"null" will be interpreted as null for string-type columns.
+If +\--input-null-non-string+ is not specified, then both the string
+"null" and the empty string will be interpreted as null for non-string
+columns. Note that, the empty string will be always interpreted as null
+for non-string columns, in addition to other string if specified by
++\--input-null-non-string+.
Inserts vs. Updates
~~~~~~~~~~~~~~~~~~~
diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt
index bbc0c852..9c60c5a2 100644
--- a/src/docs/user/import.txt
+++ b/src/docs/user/import.txt
@@ -49,30 +49,35 @@ include::connecting.txt[]
.Import control arguments:
[grid="all"]
-`-----------------------------`--------------------------------------
-Argument Description
----------------------------------------------------------------------
-+\--append+ Append data to an existing dataset\
- in HDFS
-+\--as-sequencefile+ Imports data to SequenceFiles
-+\--as-textfile+ Imports data as plain text (default)
-+\--columns + Columns to import from table
-+\--direct+ Use direct import fast path
-+\--direct-split-size + Split the input stream every 'n' bytes\
- when importing in direct mode
-+\--inline-lob-limit + Set the maximum size for an inline LOB
-+-m,\--num-mappers + Use 'n' map tasks to import in parallel
-+-e,\--query + Import the results of '+statement+'.
-+\--split-by + Column of the table used to split work\
- units
-+\--table + Table to read
-+\--target-dir + HDFS destination dir
-+\--warehouse-dir + HDFS parent for table destination
-+\--where + WHERE clause to use during import
-+-z,\--compress+ Enable compression
----------------------------------------------------------------------
-
+`---------------------------------`--------------------------------------
+Argument Description
+-------------------------------------------------------------------------
++\--append+ Append data to an existing dataset\
+ in HDFS
++\--as-sequencefile+ Imports data to SequenceFiles
++\--as-textfile+ Imports data as plain text (default)
++\--columns + Columns to import from table
++\--direct+ Use direct import fast path
++\--direct-split-size + Split the input stream every 'n' bytes\
+ when importing in direct mode
++\--inline-lob-limit + Set the maximum size for an inline LOB
++-m,\--num-mappers + Use 'n' map tasks to import in parallel
++-e,\--query + Import the results of '+statement+'.
++\--split-by + Column of the table used to split work\
+ units
++\--table + Table to read
++\--target-dir + HDFS destination dir
++\--warehouse-dir + HDFS parent for table destination
++\--where + WHERE clause to use during import
++-z,\--compress+ Enable compression
++--null-string + The string to be written for a null\
+ value for string columns
++--null-non-string + The string to be written for a null\
+ value for non-string columns
+-------------------------------------------------------------------------
+The +\--null-string+ and +\--null-non-string+ arguments are optional.\
+If not specified, then the string "null" will be used.
Selecting the Data to Import
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/src/java/com/cloudera/sqoop/SqoopOptions.java b/src/java/com/cloudera/sqoop/SqoopOptions.java
index a9209dff..c246324a 100644
--- a/src/java/com/cloudera/sqoop/SqoopOptions.java
+++ b/src/java/com/cloudera/sqoop/SqoopOptions.java
@@ -114,6 +114,12 @@ public enum IncrementalMode {
// used. If so, it is stored as 'db.password'.
private String password;
+ @StoredAsProperty("null.string") private String nullStringValue;
+ @StoredAsProperty("input.null.string") private String inNullStringValue;
+ @StoredAsProperty("null.non-string") private String nullNonStringValue;
+ @StoredAsProperty("input.null.non-string")
+ private String inNullNonStringValue;
+
@StoredAsProperty("codegen.output.dir") private String codeOutputDir;
@StoredAsProperty("codegen.compile.dir") private String jarOutputDir;
// Boolean specifying whether jarOutputDir is a nonce tmpdir (true), or
@@ -1597,5 +1603,37 @@ public String getConnManagerClassName() {
return connManagerClassName;
}
+ public void setNullStringValue(String nullString) {
+ this.nullStringValue = nullString;
+ }
+
+ public String getNullStringValue() {
+ return nullStringValue;
+ }
+
+ public void setInNullStringValue(String inNullString) {
+ this.inNullStringValue = inNullString;
+ }
+
+ public String getInNullStringValue() {
+ return inNullStringValue;
+ }
+
+public void setNullNonStringValue(String nullNonString) {
+ this.nullNonStringValue = nullNonString;
+}
+
+public String getNullNonStringValue() {
+ return nullNonStringValue;
+}
+
+public void setInNullNonStringValue(String inNullNonString) {
+ this.inNullNonStringValue = inNullNonString;
+}
+
+public String getInNullNonStringValue() {
+ return inNullNonStringValue;
+}
+
}
diff --git a/src/java/com/cloudera/sqoop/orm/ClassWriter.java b/src/java/com/cloudera/sqoop/orm/ClassWriter.java
index db2e8507..59a1973d 100644
--- a/src/java/com/cloudera/sqoop/orm/ClassWriter.java
+++ b/src/java/com/cloudera/sqoop/orm/ClassWriter.java
@@ -272,10 +272,17 @@ private String dbSetterForType(String javaType) {
private String stringifierForType(String javaType, String colName) {
if (javaType.equals("String")) {
- return colName;
+ // Check if it is null, and write the null representation in such case
+ String r = colName + "==null?\"" + this.options.getNullStringValue()
+ + "\":" + colName;
+ return r;
} else {
// This is an object type -- just call its toString() in a null-safe way.
- return "\"\" + " + colName;
+ // Also check if it is null, and instead write the null representation
+ // in such case
+ String r = colName + "==null?\"" + this.options.getNullNonStringValue()
+ + "\":" + "\"\" + " + colName;
+ return r;
}
}
@@ -731,17 +738,19 @@ private void generateParseMethod(String typ, StringBuilder sb) {
}
/**
- * Helper method for parseColumn(). Interpret the string 'null' as a null
+ * Helper method for parseColumn(). Interpret the string null representation
* for a particular column.
*/
private void parseNullVal(String javaType, String colName, StringBuilder sb) {
if (javaType.equals("String")) {
- sb.append(" if (__cur_str.equals(\"null\")) { this.");
+ sb.append(" if (__cur_str.equals(\""
+ + this.options.getInNullStringValue() + "\")) { this.");
sb.append(colName);
sb.append(" = null; } else {\n");
} else {
- sb.append(" if (__cur_str.equals(\"null\")");
- sb.append(" || __cur_str.length() == 0) { this.");
+ sb.append(" if (__cur_str.equals(\""
+ + this.options.getInNullNonStringValue());
+ sb.append("\") || __cur_str.length() == 0) { this.");
sb.append(colName);
sb.append(" = null; } else {\n");
}
diff --git a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java
index 5bc54be7..bf63bf23 100644
--- a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java
+++ b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java
@@ -76,7 +76,11 @@ public abstract class BaseSqoopTool extends SqoopTool {
public static final String HIVE_HOME_ARG = "hive-home";
public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
public static final String TARGET_DIR_ARG = "target-dir";
- public static final String APPEND_ARG = "append";
+ public static final String APPEND_ARG = "append";
+ public static final String NULL_STRING = "null-string";
+ public static final String INPUT_NULL_STRING = "input-null-string";
+ public static final String NULL_NON_STRING = "null-non-string";
+ public static final String INPUT_NULL_NON_STRING = "input-null-non-string";
public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
public static final String FMT_TEXTFILE_ARG = "as-textfile";
@@ -493,6 +497,26 @@ protected RelatedOptions getCodeGenOpts(boolean multiTable) {
.withDescription("Put auto-generated classes in this package")
.withLongOpt(PACKAGE_NAME_ARG)
.create());
+ codeGenOpts.addOption(OptionBuilder.withArgName("null-string")
+ .hasArg()
+ .withDescription("Null string representation")
+ .withLongOpt(NULL_STRING)
+ .create());
+ codeGenOpts.addOption(OptionBuilder.withArgName("input-null-string")
+ .hasArg()
+ .withDescription("Input null string representation")
+ .withLongOpt(INPUT_NULL_STRING)
+ .create());
+ codeGenOpts.addOption(OptionBuilder.withArgName("null-non-string")
+ .hasArg()
+ .withDescription("Null non-string representation")
+ .withLongOpt(NULL_NON_STRING)
+ .create());
+ codeGenOpts.addOption(OptionBuilder.withArgName("input-null-non-string")
+ .hasArg()
+ .withDescription("Input null non-string representation")
+ .withLongOpt(INPUT_NULL_NON_STRING)
+ .create());
if (!multiTable) {
codeGenOpts.addOption(OptionBuilder.withArgName("name")
.hasArg()
@@ -563,6 +587,22 @@ protected void applyCommonOptions(CommandLine in, SqoopOptions out)
out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME));
}
+ if (in.hasOption(NULL_STRING)) {
+ out.setNullStringValue(in.getOptionValue(NULL_STRING));
+ }
+
+ if (in.hasOption(INPUT_NULL_STRING)) {
+ out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING));
+ }
+
+ if (in.hasOption(NULL_NON_STRING)) {
+ out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING));
+ }
+
+ if (in.hasOption(INPUT_NULL_NON_STRING)) {
+ out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING));
+ }
+
if (in.hasOption(DRIVER_ARG)) {
out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
}