diff --git a/src/docs/man/sqoop-export.txt b/src/docs/man/sqoop-export.txt index 3ee36a4b..5c967172 100644 --- a/src/docs/man/sqoop-export.txt +++ b/src/docs/man/sqoop-export.txt @@ -43,6 +43,12 @@ Export control options --update-key (col-name):: Anchor column to use for updates +--input-null-string:: + The string to be interpreted as null for string columns + +--input-null-non-string:: + The string to be interpreted as null for non-string columns + include::input-args.txt[] include::output-args.txt[] diff --git a/src/docs/man/sqoop-import.txt b/src/docs/man/sqoop-import.txt index dc4fc8ac..735738f9 100644 --- a/src/docs/man/sqoop-import.txt +++ b/src/docs/man/sqoop-import.txt @@ -75,6 +75,11 @@ Import control options -z:: Uses gzip to compress data as it is written to HDFS +--null-string:: + The string to be written for a null value for string columns + +--null-non-string:: + The string to be written for a null value for non-string columns include::output-args.txt[] diff --git a/src/docs/user/export.txt b/src/docs/user/export.txt index 3789d689..b26553d7 100644 --- a/src/docs/user/export.txt +++ b/src/docs/user/export.txt @@ -43,15 +43,20 @@ include::common-args.txt[] .Export control arguments: [grid="all"] -`---------------------------`------------------------------------------ -Argument Description ------------------------------------------------------------------------ -+\--direct+ Use direct export fast path -+\--export-dir + HDFS source path for the export -+-m,\--num-mappers + Use 'n' map tasks to export in parallel -+\--table + Table to populate -+\--update-key + Anchor column to use for updates ------------------------------------------------------------------------ +`----------------------------------------`------------------------------ +Argument Description +------------------------------------------------------------------------ ++\--direct+ Use direct export fast path ++\--export-dir + HDFS source path for the export ++-m,\--num-mappers + Use 'n' map tasks to export in\ + parallel ++\--table + Table to populate ++\--update-key + Anchor column to use for updates ++\--input-null-string + The string to be interpreted as\ + null for string columns ++\--input-null-non-string + The string to be interpreted as\ + null for non-string columns +------------------------------------------------------------------------ The +\--table+ and +\--export-dir+ arguments are required. These specify the table to populate in the database, and the @@ -73,6 +78,14 @@ MySQL provides a direct mode for exports as well, using the to specify this codepath. This may be higher-performance than the standard JDBC codepath. +The +\--input-null-string+ and +\--input-null-non-string+ arguments are +optional. If +\--input-null-string+ is not specified, then the string +"null" will be interpreted as null for string-type columns. +If +\--input-null-non-string+ is not specified, then both the string +"null" and the empty string will be interpreted as null for non-string +columns. Note that, the empty string will be always interpreted as null +for non-string columns, in addition to other string if specified by ++\--input-null-non-string+. Inserts vs. Updates ~~~~~~~~~~~~~~~~~~~ diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt index bbc0c852..9c60c5a2 100644 --- a/src/docs/user/import.txt +++ b/src/docs/user/import.txt @@ -49,30 +49,35 @@ include::connecting.txt[] .Import control arguments: [grid="all"] -`-----------------------------`-------------------------------------- -Argument Description ---------------------------------------------------------------------- -+\--append+ Append data to an existing dataset\ - in HDFS -+\--as-sequencefile+ Imports data to SequenceFiles -+\--as-textfile+ Imports data as plain text (default) -+\--columns + Columns to import from table -+\--direct+ Use direct import fast path -+\--direct-split-size + Split the input stream every 'n' bytes\ - when importing in direct mode -+\--inline-lob-limit + Set the maximum size for an inline LOB -+-m,\--num-mappers + Use 'n' map tasks to import in parallel -+-e,\--query + Import the results of '+statement+'. -+\--split-by + Column of the table used to split work\ - units -+\--table + Table to read -+\--target-dir + HDFS destination dir -+\--warehouse-dir + HDFS parent for table destination -+\--where + WHERE clause to use during import -+-z,\--compress+ Enable compression ---------------------------------------------------------------------- - +`---------------------------------`-------------------------------------- +Argument Description +------------------------------------------------------------------------- ++\--append+ Append data to an existing dataset\ + in HDFS ++\--as-sequencefile+ Imports data to SequenceFiles ++\--as-textfile+ Imports data as plain text (default) ++\--columns + Columns to import from table ++\--direct+ Use direct import fast path ++\--direct-split-size + Split the input stream every 'n' bytes\ + when importing in direct mode ++\--inline-lob-limit + Set the maximum size for an inline LOB ++-m,\--num-mappers + Use 'n' map tasks to import in parallel ++-e,\--query + Import the results of '+statement+'. ++\--split-by + Column of the table used to split work\ + units ++\--table + Table to read ++\--target-dir + HDFS destination dir ++\--warehouse-dir + HDFS parent for table destination ++\--where + WHERE clause to use during import ++-z,\--compress+ Enable compression ++--null-string + The string to be written for a null\ + value for string columns ++--null-non-string + The string to be written for a null\ + value for non-string columns +------------------------------------------------------------------------- +The +\--null-string+ and +\--null-non-string+ arguments are optional.\ +If not specified, then the string "null" will be used. Selecting the Data to Import ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/java/com/cloudera/sqoop/SqoopOptions.java b/src/java/com/cloudera/sqoop/SqoopOptions.java index a9209dff..c246324a 100644 --- a/src/java/com/cloudera/sqoop/SqoopOptions.java +++ b/src/java/com/cloudera/sqoop/SqoopOptions.java @@ -114,6 +114,12 @@ public enum IncrementalMode { // used. If so, it is stored as 'db.password'. private String password; + @StoredAsProperty("null.string") private String nullStringValue; + @StoredAsProperty("input.null.string") private String inNullStringValue; + @StoredAsProperty("null.non-string") private String nullNonStringValue; + @StoredAsProperty("input.null.non-string") + private String inNullNonStringValue; + @StoredAsProperty("codegen.output.dir") private String codeOutputDir; @StoredAsProperty("codegen.compile.dir") private String jarOutputDir; // Boolean specifying whether jarOutputDir is a nonce tmpdir (true), or @@ -1597,5 +1603,37 @@ public String getConnManagerClassName() { return connManagerClassName; } + public void setNullStringValue(String nullString) { + this.nullStringValue = nullString; + } + + public String getNullStringValue() { + return nullStringValue; + } + + public void setInNullStringValue(String inNullString) { + this.inNullStringValue = inNullString; + } + + public String getInNullStringValue() { + return inNullStringValue; + } + +public void setNullNonStringValue(String nullNonString) { + this.nullNonStringValue = nullNonString; +} + +public String getNullNonStringValue() { + return nullNonStringValue; +} + +public void setInNullNonStringValue(String inNullNonString) { + this.inNullNonStringValue = inNullNonString; +} + +public String getInNullNonStringValue() { + return inNullNonStringValue; +} + } diff --git a/src/java/com/cloudera/sqoop/orm/ClassWriter.java b/src/java/com/cloudera/sqoop/orm/ClassWriter.java index db2e8507..59a1973d 100644 --- a/src/java/com/cloudera/sqoop/orm/ClassWriter.java +++ b/src/java/com/cloudera/sqoop/orm/ClassWriter.java @@ -272,10 +272,17 @@ private String dbSetterForType(String javaType) { private String stringifierForType(String javaType, String colName) { if (javaType.equals("String")) { - return colName; + // Check if it is null, and write the null representation in such case + String r = colName + "==null?\"" + this.options.getNullStringValue() + + "\":" + colName; + return r; } else { // This is an object type -- just call its toString() in a null-safe way. - return "\"\" + " + colName; + // Also check if it is null, and instead write the null representation + // in such case + String r = colName + "==null?\"" + this.options.getNullNonStringValue() + + "\":" + "\"\" + " + colName; + return r; } } @@ -731,17 +738,19 @@ private void generateParseMethod(String typ, StringBuilder sb) { } /** - * Helper method for parseColumn(). Interpret the string 'null' as a null + * Helper method for parseColumn(). Interpret the string null representation * for a particular column. */ private void parseNullVal(String javaType, String colName, StringBuilder sb) { if (javaType.equals("String")) { - sb.append(" if (__cur_str.equals(\"null\")) { this."); + sb.append(" if (__cur_str.equals(\"" + + this.options.getInNullStringValue() + "\")) { this."); sb.append(colName); sb.append(" = null; } else {\n"); } else { - sb.append(" if (__cur_str.equals(\"null\")"); - sb.append(" || __cur_str.length() == 0) { this."); + sb.append(" if (__cur_str.equals(\"" + + this.options.getInNullNonStringValue()); + sb.append("\") || __cur_str.length() == 0) { this."); sb.append(colName); sb.append(" = null; } else {\n"); } diff --git a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java index 5bc54be7..bf63bf23 100644 --- a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java +++ b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java @@ -76,7 +76,11 @@ public abstract class BaseSqoopTool extends SqoopTool { public static final String HIVE_HOME_ARG = "hive-home"; public static final String WAREHOUSE_DIR_ARG = "warehouse-dir"; public static final String TARGET_DIR_ARG = "target-dir"; - public static final String APPEND_ARG = "append"; + public static final String APPEND_ARG = "append"; + public static final String NULL_STRING = "null-string"; + public static final String INPUT_NULL_STRING = "input-null-string"; + public static final String NULL_NON_STRING = "null-non-string"; + public static final String INPUT_NULL_NON_STRING = "input-null-non-string"; public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile"; public static final String FMT_TEXTFILE_ARG = "as-textfile"; @@ -493,6 +497,26 @@ protected RelatedOptions getCodeGenOpts(boolean multiTable) { .withDescription("Put auto-generated classes in this package") .withLongOpt(PACKAGE_NAME_ARG) .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("null-string") + .hasArg() + .withDescription("Null string representation") + .withLongOpt(NULL_STRING) + .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("input-null-string") + .hasArg() + .withDescription("Input null string representation") + .withLongOpt(INPUT_NULL_STRING) + .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("null-non-string") + .hasArg() + .withDescription("Null non-string representation") + .withLongOpt(NULL_NON_STRING) + .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("input-null-non-string") + .hasArg() + .withDescription("Input null non-string representation") + .withLongOpt(INPUT_NULL_NON_STRING) + .create()); if (!multiTable) { codeGenOpts.addOption(OptionBuilder.withArgName("name") .hasArg() @@ -563,6 +587,22 @@ protected void applyCommonOptions(CommandLine in, SqoopOptions out) out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME)); } + if (in.hasOption(NULL_STRING)) { + out.setNullStringValue(in.getOptionValue(NULL_STRING)); + } + + if (in.hasOption(INPUT_NULL_STRING)) { + out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING)); + } + + if (in.hasOption(NULL_NON_STRING)) { + out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING)); + } + + if (in.hasOption(INPUT_NULL_NON_STRING)) { + out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING)); + } + if (in.hasOption(DRIVER_ARG)) { out.setDriverClassName(in.getOptionValue(DRIVER_ARG)); }