diff --git a/src/java/com/cloudera/sqoop/SqoopOptions.java b/src/java/com/cloudera/sqoop/SqoopOptions.java index 2e534ce3..6e740592 100644 --- a/src/java/com/cloudera/sqoop/SqoopOptions.java +++ b/src/java/com/cloudera/sqoop/SqoopOptions.java @@ -29,6 +29,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import com.cloudera.sqoop.lib.DelimiterSet; import com.cloudera.sqoop.lib.LargeObjectLoader; /** @@ -115,18 +116,8 @@ public enum FileLayout { // HDFS path to read from when performing an export private String exportDir; - private char inputFieldDelim; - private char inputRecordDelim; - private char inputEnclosedBy; - private char inputEscapedBy; - private boolean inputMustBeEnclosed; - - private char outputFieldDelim; - private char outputRecordDelim; - private char outputEnclosedBy; - private char outputEscapedBy; - private boolean outputMustBeEnclosed; - + private DelimiterSet inputDelimiters; + private DelimiterSet outputDelimiters; private boolean areDelimsManuallySet; private Configuration conf; @@ -252,6 +243,11 @@ private void initDefaults(Configuration baseConfiguration) { this.hiveHome = System.getenv("HIVE_HOME"); this.hiveHome = System.getProperty("hive.home", this.hiveHome); + this.inputDelimiters = new DelimiterSet( + DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR, + DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR, false); + this.outputDelimiters = new DelimiterSet(); + // Set this to cwd, but -Dsqoop.src.dir can override. this.codeOutputDir = System.getProperty("sqoop.src.dir", "."); @@ -264,18 +260,6 @@ private void initDefaults(Configuration baseConfiguration) { this.jarOutputDir = tmpDir + "sqoop/compile"; this.layout = FileLayout.TextFile; - this.inputFieldDelim = '\000'; - this.inputRecordDelim = '\000'; - this.inputEnclosedBy = '\000'; - this.inputEscapedBy = '\000'; - this.inputMustBeEnclosed = false; - - this.outputFieldDelim = ','; - this.outputRecordDelim = '\n'; - this.outputEnclosedBy = '\000'; - this.outputEscapedBy = '\000'; - this.outputMustBeEnclosed = false; - this.areDelimsManuallySet = false; this.numMappers = DEFAULT_NUM_MAPPERS; @@ -329,7 +313,7 @@ public static char toChar(String charish) throws InvalidOptionsException { } else if (charish.startsWith("\\0")) { if (charish.equals("\\0")) { // it's just '\0', which we can take as shorthand for nul. - return '\000'; + return DelimiterSet.NULL_CHAR; } else { // it's an octal value. String valStr = charish.substring(2); @@ -651,15 +635,19 @@ public void setFileLayout(FileLayout fileLayout) { * field delim to use when printing lines. */ public char getInputFieldDelim() { - if (inputFieldDelim == '\000') { - return this.outputFieldDelim; + char f = inputDelimiters.getFieldsTerminatedBy(); + if (f == DelimiterSet.NULL_CHAR) { + return this.outputDelimiters.getFieldsTerminatedBy(); } else { - return this.inputFieldDelim; + return f; } } + /** + * Set the field delimiter to use when parsing lines. + */ public void setInputFieldsTerminatedBy(char c) { - this.inputFieldDelim = c; + this.inputDelimiters.setFieldsTerminatedBy(c); } /** @@ -667,15 +655,19 @@ public void setInputFieldsTerminatedBy(char c) { * record delim to use when printing lines. */ public char getInputRecordDelim() { - if (inputRecordDelim == '\000') { - return this.outputRecordDelim; + char r = inputDelimiters.getLinesTerminatedBy(); + if (r == DelimiterSet.NULL_CHAR) { + return this.outputDelimiters.getLinesTerminatedBy(); } else { - return this.inputRecordDelim; + return r; } } + /** + * Set the record delimiter to use when parsing lines. + */ public void setInputLinesTerminatedBy(char c) { - this.inputRecordDelim = c; + this.inputDelimiters.setLinesTerminatedBy(c); } /** @@ -683,15 +675,19 @@ public void setInputLinesTerminatedBy(char c) { * Defaults to the enclosing-char to use when printing lines. */ public char getInputEnclosedBy() { - if (inputEnclosedBy == '\000') { - return this.outputEnclosedBy; + char c = inputDelimiters.getEnclosedBy(); + if (c == DelimiterSet.NULL_CHAR) { + return this.outputDelimiters.getEnclosedBy(); } else { - return this.inputEnclosedBy; + return c; } } + /** + * Set the enclosed-by character to use when parsing lines. + */ public void setInputEnclosedBy(char c) { - this.inputEnclosedBy = c; + this.inputDelimiters.setEnclosedBy(c); } /** @@ -699,15 +695,19 @@ public void setInputEnclosedBy(char c) { * escape character used when printing lines. */ public char getInputEscapedBy() { - if (inputEscapedBy == '\000') { - return this.outputEscapedBy; + char c = inputDelimiters.getEscapedBy(); + if (c == DelimiterSet.NULL_CHAR) { + return this.outputDelimiters.getEscapedBy(); } else { - return this.inputEscapedBy; + return c; } } + /** + * Set the escaped-by character to use when parsing lines. + */ public void setInputEscapedBy(char c) { - this.inputEscapedBy = c; + this.inputDelimiters.setEscapedBy(c); } /** @@ -716,15 +716,20 @@ public void setInputEscapedBy(char c) { * used. */ public boolean isInputEncloseRequired() { - if (inputEnclosedBy == '\000') { - return this.outputMustBeEnclosed; + char c = this.inputDelimiters.getEnclosedBy(); + if (c == DelimiterSet.NULL_CHAR) { + return this.outputDelimiters.isEncloseRequired(); } else { - return this.inputMustBeEnclosed; + return this.inputDelimiters.isEncloseRequired(); } } + /** + * If true, then all input fields are expected to be enclosed by the + * enclosed-by character when parsing. + */ public void setInputEncloseRequired(boolean required) { - this.inputMustBeEnclosed = required; + this.inputDelimiters.setEncloseRequired(required); } /** @@ -732,11 +737,14 @@ public void setInputEncloseRequired(boolean required) { * text. */ public char getOutputFieldDelim() { - return this.outputFieldDelim; + return this.outputDelimiters.getFieldsTerminatedBy(); } + /** + * Set the field delimiter to use when formatting lines. + */ public void setFieldsTerminatedBy(char c) { - this.outputFieldDelim = c; + this.outputDelimiters.setFieldsTerminatedBy(c); } @@ -745,11 +753,14 @@ public void setFieldsTerminatedBy(char c) { * text. */ public char getOutputRecordDelim() { - return this.outputRecordDelim; + return this.outputDelimiters.getLinesTerminatedBy(); } + /** + * Set the record delimiter to use when formatting lines. + */ public void setLinesTerminatedBy(char c) { - this.outputRecordDelim = c; + this.outputDelimiters.setLinesTerminatedBy(c); } /** @@ -757,11 +768,14 @@ public void setLinesTerminatedBy(char c) { * imported to text. */ public char getOutputEnclosedBy() { - return this.outputEnclosedBy; + return this.outputDelimiters.getEnclosedBy(); } + /** + * Set the enclosed-by character to use when formatting lines. + */ public void setEnclosedBy(char c) { - this.outputEnclosedBy = c; + this.outputDelimiters.setEnclosedBy(c); } /** @@ -769,11 +783,14 @@ public void setEnclosedBy(char c) { * text. */ public char getOutputEscapedBy() { - return this.outputEscapedBy; + return this.outputDelimiters.getEscapedBy(); } + /** + * Set the escaped-by character to use when formatting lines. + */ public void setEscapedBy(char c) { - this.outputEscapedBy = c; + this.outputDelimiters.setEscapedBy(c); } /** @@ -782,11 +799,42 @@ public void setEscapedBy(char c) { * instead of --optionally-enclosed-by. */ public boolean isOutputEncloseRequired() { - return this.outputMustBeEnclosed; + return this.outputDelimiters.isEncloseRequired(); } + /** + * If true, then the enclosed-by character will be applied to all fields, + * even if internal characters do not need enclosed-by protection. + */ public void setOutputEncloseRequired(boolean required) { - this.outputMustBeEnclosed = required; + this.outputDelimiters.setEncloseRequired(required); + } + + /** + * @return the set of delimiters used for formatting output records. + */ + public DelimiterSet getOutputDelimiters() { + return this.outputDelimiters.copy(); + } + + /** + * Set the complete set of delimiters to use for output formatting. + */ + public void setOutputDelimiters(DelimiterSet delimiters) { + this.outputDelimiters = delimiters.copy(); + } + + /** + * @return the set of delimiters used for parsing the input. + * This may include values implicitly set by the output delimiters. + */ + public DelimiterSet getInputDelimiters() { + return new DelimiterSet( + getInputFieldDelim(), + getInputRecordDelim(), + getInputEnclosedBy(), + getInputEscapedBy(), + isInputEncloseRequired()); } /** diff --git a/src/java/com/cloudera/sqoop/lib/DelimiterSet.java b/src/java/com/cloudera/sqoop/lib/DelimiterSet.java new file mode 100644 index 00000000..3f92401e --- /dev/null +++ b/src/java/com/cloudera/sqoop/lib/DelimiterSet.java @@ -0,0 +1,233 @@ +/** + * Licensed to Cloudera, Inc. under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Cloudera, Inc. licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.cloudera.sqoop.lib; + +/** + * Encapsulates a set of delimiters used to encode a record. + */ +public class DelimiterSet implements Cloneable { + + public static final char NULL_CHAR = '\000'; + + private char fieldDelim; // fields terminated by this. + private char recordDelim; // records terminated by this. + + // If these next two fields are '\000', then they are ignored. + private char enclosedBy; + private char escapedBy; + + // If true, then the enclosed-by character is applied to every + // field, not just ones containing embedded delimiters. + private boolean encloseRequired; + + /** + * Create a delimiter set with the default delimiters + * (comma for fields, newline for records). + */ + public DelimiterSet() { + this(',', '\n', NULL_CHAR, NULL_CHAR, false); + } + + /** + * Create a delimiter set with the specified delimiters. + * @param field the fields-terminated-by delimiter + * @param record the lines-terminated-by delimiter + * @param enclose the enclosed-by character + * @param escape the escaped-by character + * @param isEncloseRequired If true, enclosed-by is applied to all + * fields. If false, only applied to fields that embed delimiters. + */ + public DelimiterSet(char field, char record, char enclose, char escape, + boolean isEncloseRequired) { + this.fieldDelim = field; + this.recordDelim = record; + this.enclosedBy = enclose; + this.escapedBy = escape; + this.encloseRequired = isEncloseRequired; + } + + /** + * Sets the fields-terminated-by character. + */ + public void setFieldsTerminatedBy(char f) { + this.fieldDelim = f; + } + + /** + * @return the fields-terminated-by character. + */ + public char getFieldsTerminatedBy() { + return this.fieldDelim; + } + + /** + * Sets the end-of-record lines-terminated-by character. + */ + public void setLinesTerminatedBy(char r) { + this.recordDelim = r; + } + + /** + * @return the end-of-record (lines-terminated-by) character. + */ + public char getLinesTerminatedBy() { + return this.recordDelim; + } + + /** + * Sets the enclosed-by character. + * @param e the enclosed-by character, or '\000' for no enclosing character. + */ + public void setEnclosedBy(char e) { + this.enclosedBy = e; + } + + /** + * @return the enclosed-by character, or '\000' for none. + */ + public char getEnclosedBy() { + return this.enclosedBy; + } + + /** + * Sets the escaped-by character. + * @param e the escaped-by character, or '\000' for no escape character. + */ + public void setEscapedBy(char e) { + this.escapedBy = e; + } + + /** + * @return the escaped-by character, or '\000' for none. + */ + public char getEscapedBy() { + return this.escapedBy; + } + + /** + * Set whether the enclosed-by character must be applied to all fields, + * or only fields with embedded delimiters. + */ + public void setEncloseRequired(boolean required) { + this.encloseRequired = required; + } + + /** + * @return true if the enclosed-by character must be applied to all fields, + * or false if it's only used for fields with embedded delimiters. + */ + public boolean isEncloseRequired() { + return this.encloseRequired; + } + + @Override + /** + * @return a string representation of the delimiters. + */ + public String toString() { + return "fields=" + this.fieldDelim + + " records=" + this.recordDelim + + " escape=" + this.escapedBy + + " enclose=" + this.enclosedBy + + " required=" + this.encloseRequired; + } + + /** + * Format this set of delimiters as a call to the constructor for + * this object, that would generate identical delimiters. + * @return a String that can be embedded in generated code that + * provides this set of delimiters. + */ + public String formatConstructor() { + return "new DelimiterSet((char) " + (int) this.fieldDelim + ", " + + "(char) " + (int) this.recordDelim + ", " + + "(char) " + (int) this.enclosedBy + ", " + + "(char) " + (int) this.escapedBy + ", " + + this.encloseRequired + ")"; + } + + @Override + /** + * @return a hash code for this set of delimiters. + */ + public int hashCode() { + return (int) this.fieldDelim + + (((int) this.recordDelim) << 4) + + (((int) this.escapedBy) << 8) + + (((int) this.enclosedBy) << 12) + + (((int) this.recordDelim) << 16) + + (this.encloseRequired ? 0xFEFE : 0x7070); + } + + @Override + /** + * @return true if this delimiter set is the same as another set of + * delimiters. + */ + public boolean equals(Object other) { + if (null == other) { + return false; + } else if (!other.getClass().equals(getClass())) { + return false; + } + + DelimiterSet set = (DelimiterSet) other; + return this.fieldDelim == set.fieldDelim + && this.recordDelim == set.recordDelim + && this.escapedBy == set.escapedBy + && this.enclosedBy == set.enclosedBy + && this.encloseRequired == set.encloseRequired; + } + + @Override + /** + * @return a new copy of this same set of delimiters. + */ + public Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + /** + * Identical to clone() but does not throw spurious exceptions. + * @return a new copy of this same set of delimiters. + */ + public DelimiterSet copy() { + try { + return (DelimiterSet) clone(); + } catch (CloneNotSupportedException cnse) { + // Should never happen for DelimiterSet. + return null; + } + } + + // Static delimiter sets for the commonly-used delimiter arrangements. + + public static final DelimiterSet DEFAULT_DELIMITERS; + public static final DelimiterSet HIVE_DELIMITERS; + public static final DelimiterSet MYSQL_DELIMITERS; + + static { + DEFAULT_DELIMITERS = new DelimiterSet(',', '\n', NULL_CHAR, NULL_CHAR, + false); + MYSQL_DELIMITERS = new DelimiterSet(',', '\n', '\'', '\\', false); + HIVE_DELIMITERS = new DelimiterSet('\001', '\n', + NULL_CHAR, NULL_CHAR, false); + } +} + diff --git a/src/java/com/cloudera/sqoop/lib/FieldFormatter.java b/src/java/com/cloudera/sqoop/lib/FieldFormatter.java index 1397d687..992ba571 100644 --- a/src/java/com/cloudera/sqoop/lib/FieldFormatter.java +++ b/src/java/com/cloudera/sqoop/lib/FieldFormatter.java @@ -18,7 +18,6 @@ package com.cloudera.sqoop.lib; - /** * Static helper class that will help format data with quotes and escape chars. */ @@ -35,28 +34,25 @@ private FieldFormatter() { } * The field is enclosed only if: * enclose != '\000', and: * encloseRequired is true, or - * one of the characters in the mustEscapeFor list is present - * in the string. + * one of the fields-terminated-by or lines-terminated-by characters is + * present in the string. * * Escaping is not performed if the escape char is '\000'. * * @param str - The user's string to escape and enclose - * @param escape - What string to use as the escape sequence. If "" or null, - * then don't escape. - * @param enclose - The string to use to enclose str e.g. "quoted". If "" or - * null, then don't enclose. - * @param mustEncloseFor - A list of characters; if one is present in 'str', - * then str must be enclosed. - * @param encloseRequired - If true, then always enclose, regardless of - * mustEscapeFor. + * @param delimiters - The DelimiterSet to use identifying the escape and + * enclose semantics. If the specified escape or enclose characters are + * '\000', those operations are not performed. * @return the escaped, enclosed version of 'str'. */ - public static String escapeAndEnclose(String str, String escape, - String enclose, char [] mustEncloseFor, boolean encloseRequired) { + public static String escapeAndEnclose(String str, DelimiterSet delimiters) { + + char escape = delimiters.getEscapedBy(); + char enclose = delimiters.getEnclosedBy(); + boolean encloseRequired = delimiters.isEncloseRequired(); // true if we can use an escape character. - boolean escapingLegal = (null != escape - && escape.length() > 0 && !escape.equals("\000")); + boolean escapingLegal = DelimiterSet.NULL_CHAR != escape; String withEscapes; if (null == str) { @@ -65,13 +61,13 @@ public static String escapeAndEnclose(String str, String escape, if (escapingLegal) { // escaping is legal. Escape any instances of the escape char itself. - withEscapes = str.replace(escape, escape + escape); + withEscapes = str.replace("" + escape, "" + escape + escape); } else { // no need to double-escape withEscapes = str; } - if (null == enclose || enclose.length() == 0 || enclose.equals("\000")) { + if (DelimiterSet.NULL_CHAR == enclose) { // The enclose-with character was left unset, so we can't enclose items. // We're done. return withEscapes; @@ -80,12 +76,15 @@ public static String escapeAndEnclose(String str, String escape, // if we have an enclosing character, and escaping is legal, then the // encloser must always be escaped. if (escapingLegal) { - withEscapes = withEscapes.replace(enclose, escape + enclose); + withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose); } boolean actuallyDoEnclose = encloseRequired; - if (!actuallyDoEnclose && mustEncloseFor != null) { - // check if the string requires enclosing + if (!actuallyDoEnclose) { + // check if the string requires enclosing. + char [] mustEncloseFor = new char[2]; + mustEncloseFor[0] = delimiters.getFieldsTerminatedBy(); + mustEncloseFor[1] = delimiters.getLinesTerminatedBy(); for (char reason : mustEncloseFor) { if (str.indexOf(reason) != -1) { actuallyDoEnclose = true; @@ -95,7 +94,7 @@ public static String escapeAndEnclose(String str, String escape, } if (actuallyDoEnclose) { - return enclose + withEscapes + enclose; + return "" + enclose + withEscapes + enclose; } else { return withEscapes; } diff --git a/src/java/com/cloudera/sqoop/lib/LargeObjectLoader.java b/src/java/com/cloudera/sqoop/lib/LargeObjectLoader.java index 8ebf3f75..56ad7a93 100644 --- a/src/java/com/cloudera/sqoop/lib/LargeObjectLoader.java +++ b/src/java/com/cloudera/sqoop/lib/LargeObjectLoader.java @@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.cloudera.sqoop.io.LobFile; +import com.cloudera.sqoop.util.TaskId; /** * Contains a set of methods which can read db columns from a ResultSet into diff --git a/src/java/com/cloudera/sqoop/lib/RecordParser.java b/src/java/com/cloudera/sqoop/lib/RecordParser.java index 99a469f4..2864dd3b 100644 --- a/src/java/com/cloudera/sqoop/lib/RecordParser.java +++ b/src/java/com/cloudera/sqoop/lib/RecordParser.java @@ -86,21 +86,11 @@ public ParseError(final Throwable cause) { } } - private char fieldDelim; - private char recordDelim; - private char enclosingChar; - private char escapeChar; - private boolean enclosingRequired; + private DelimiterSet delimiters; private ArrayList outputs; - public RecordParser(final char field, final char record, final char enclose, - final char escape, final boolean mustEnclose) { - this.fieldDelim = field; - this.recordDelim = record; - this.enclosingChar = enclose; - this.escapeChar = escape; - this.enclosingRequired = mustEnclose; - + public RecordParser(final DelimiterSet delimitersIn) { + this.delimiters = delimitersIn.copy(); this.outputs = new ArrayList(); } @@ -215,13 +205,19 @@ record sep halts processing. add charater literal to current string, return to UNENCLOSED_FIELD */ - char curChar = '\000'; + char curChar = DelimiterSet.NULL_CHAR; ParseState state = ParseState.FIELD_START; int len = input.length(); StringBuilder sb = null; outputs.clear(); + char enclosingChar = delimiters.getEnclosedBy(); + char fieldDelim = delimiters.getFieldsTerminatedBy(); + char recordDelim = delimiters.getLinesTerminatedBy(); + char escapeChar = delimiters.getEscapedBy(); + boolean enclosingRequired = delimiters.isEncloseRequired(); + for (int pos = 0; pos < len; pos++) { curChar = input.get(); switch (state) { @@ -233,14 +229,14 @@ record sep halts processing. } sb = new StringBuilder(); - if (this.enclosingChar == curChar) { + if (enclosingChar == curChar) { // got an opening encloser. state = ParseState.ENCLOSED_FIELD; - } else if (this.escapeChar == curChar) { + } else if (escapeChar == curChar) { state = ParseState.UNENCLOSED_ESCAPE; - } else if (this.fieldDelim == curChar) { + } else if (fieldDelim == curChar) { // we have a zero-length field. This is a no-op. - } else if (this.recordDelim == curChar) { + } else if (recordDelim == curChar) { // we have a zero-length field, that ends processing. pos = len; } else { @@ -248,7 +244,7 @@ record sep halts processing. state = ParseState.UNENCLOSED_FIELD; sb.append(curChar); - if (this.enclosingRequired) { + if (enclosingRequired) { throw new ParseError( "Opening field-encloser expected at position " + pos); } @@ -257,10 +253,10 @@ record sep halts processing. break; case ENCLOSED_FIELD: - if (this.escapeChar == curChar) { + if (escapeChar == curChar) { // the next character is escaped. Treat it literally. state = ParseState.ENCLOSED_ESCAPE; - } else if (this.enclosingChar == curChar) { + } else if (enclosingChar == curChar) { // we're at the end of the enclosing field. Expect an EOF or EOR char. state = ParseState.ENCLOSED_EXPECT_DELIMITER; } else { @@ -272,13 +268,13 @@ record sep halts processing. break; case UNENCLOSED_FIELD: - if (this.escapeChar == curChar) { + if (escapeChar == curChar) { // the next character is escaped. Treat it literally. state = ParseState.UNENCLOSED_ESCAPE; - } else if (this.fieldDelim == curChar) { + } else if (fieldDelim == curChar) { // we're at the end of this field; may be the start of another one. state = ParseState.FIELD_START; - } else if (this.recordDelim == curChar) { + } else if (recordDelim == curChar) { pos = len; // terminate processing immediately. } else { // this is a regular char. Add to the current field string, @@ -298,10 +294,10 @@ record sep halts processing. case ENCLOSED_EXPECT_DELIMITER: // We were in an enclosed field, but got the final encloser. Now we // expect either an end-of-field or an end-of-record. - if (this.fieldDelim == curChar) { + if (fieldDelim == curChar) { // end of one field is the beginning of the next. state = ParseState.FIELD_START; - } else if (this.recordDelim == curChar) { + } else if (recordDelim == curChar) { // stop processing. pos = len; } else { @@ -323,7 +319,7 @@ record sep halts processing. } } - if (state == ParseState.FIELD_START && curChar == this.fieldDelim) { + if (state == ParseState.FIELD_START && curChar == fieldDelim) { // we hit an EOF/EOR as the last legal character and we need to mark // that string as recorded. This if block is outside the for-loop since // we don't have a physical 'epsilon' token in our string. @@ -342,19 +338,17 @@ record sep halts processing. return outputs; } - public boolean isEnclosingRequired() { - return enclosingRequired; + return delimiters.isEncloseRequired(); } @Override public String toString() { - return "RecordParser[" + fieldDelim + ',' + recordDelim + ',' - + enclosingChar + ',' + escapeChar + ',' + enclosingRequired + "]"; + return "RecordParser[" + delimiters.toString() + "]"; } @Override public int hashCode() { - return this.toString().hashCode(); + return this.delimiters.hashCode(); } } diff --git a/src/java/com/cloudera/sqoop/lib/SqoopRecord.java b/src/java/com/cloudera/sqoop/lib/SqoopRecord.java index c506ae73..1b8be9bd 100644 --- a/src/java/com/cloudera/sqoop/lib/SqoopRecord.java +++ b/src/java/com/cloudera/sqoop/lib/SqoopRecord.java @@ -31,22 +31,41 @@ /** * Interface implemented by the classes generated by sqoop's orm.ClassWriter. */ -public interface SqoopRecord extends Cloneable, DBWritable, Writable { - void parse(CharSequence s) throws RecordParser.ParseError; - void parse(Text s) throws RecordParser.ParseError; - void parse(byte [] s) throws RecordParser.ParseError; - void parse(char [] s) throws RecordParser.ParseError; - void parse(ByteBuffer s) throws RecordParser.ParseError; - void parse(CharBuffer s) throws RecordParser.ParseError; - void loadLargeObjects(LargeObjectLoader objLoader) +public abstract class SqoopRecord implements Cloneable, DBWritable, Writable { + + public SqoopRecord() { + } + + public abstract void parse(CharSequence s) throws RecordParser.ParseError; + public abstract void parse(Text s) throws RecordParser.ParseError; + public abstract void parse(byte [] s) throws RecordParser.ParseError; + public abstract void parse(char [] s) throws RecordParser.ParseError; + public abstract void parse(ByteBuffer s) throws RecordParser.ParseError; + public abstract void parse(CharBuffer s) throws RecordParser.ParseError; + public abstract void loadLargeObjects(LargeObjectLoader objLoader) throws SQLException, IOException, InterruptedException; - Object clone() throws CloneNotSupportedException; /** * Inserts the data in this object into the PreparedStatement, starting * at parameter 'offset'. * @return the number of fields written to the statement. */ - int write(PreparedStatement stmt, int offset) throws SQLException; + public abstract int write(PreparedStatement stmt, int offset) + throws SQLException; + + public abstract String toString(DelimiterSet delimiters); + + @Override + public Object clone() throws CloneNotSupportedException { + return super.clone(); + } + + /** + * Returns an integer specifying which API format version the + * generated class conforms to. Used by internal APIs for backwards + * compatibility. + * @return the API version this class was generated against. + */ + public abstract int getClassFormatVersion(); } diff --git a/src/java/com/cloudera/sqoop/manager/MySQLUtils.java b/src/java/com/cloudera/sqoop/manager/MySQLUtils.java index a5c13ff7..19bc1025 100644 --- a/src/java/com/cloudera/sqoop/manager/MySQLUtils.java +++ b/src/java/com/cloudera/sqoop/manager/MySQLUtils.java @@ -27,6 +27,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import static com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR; import com.cloudera.sqoop.shims.HadoopShim; import com.cloudera.sqoop.util.DirectImportUtils; @@ -76,10 +77,10 @@ private MySQLUtils() { * escape: \\ */ public static boolean outputDelimsAreMySQL(Configuration conf) { - return ',' == (char) conf.getInt(OUTPUT_FIELD_DELIM_KEY, '\000') - && '\n' == (char) conf.getInt(OUTPUT_RECORD_DELIM_KEY, '\000') - && '\'' == (char) conf.getInt(OUTPUT_ENCLOSED_BY_KEY, '\000') - && '\\' == (char) conf.getInt(OUTPUT_ESCAPED_BY_KEY, '\000') + return ',' == (char) conf.getInt(OUTPUT_FIELD_DELIM_KEY, NULL_CHAR) + && '\n' == (char) conf.getInt(OUTPUT_RECORD_DELIM_KEY, NULL_CHAR) + && '\'' == (char) conf.getInt(OUTPUT_ENCLOSED_BY_KEY, NULL_CHAR) + && '\\' == (char) conf.getInt(OUTPUT_ESCAPED_BY_KEY, NULL_CHAR) && !conf.getBoolean(OUTPUT_ENCLOSE_REQUIRED_KEY, false); } diff --git a/src/java/com/cloudera/sqoop/mapreduce/MySQLDumpMapper.java b/src/java/com/cloudera/sqoop/mapreduce/MySQLDumpMapper.java index 521d85fe..a158388c 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/MySQLDumpMapper.java +++ b/src/java/com/cloudera/sqoop/mapreduce/MySQLDumpMapper.java @@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Mapper; +import com.cloudera.sqoop.lib.DelimiterSet; import com.cloudera.sqoop.lib.FieldFormatter; import com.cloudera.sqoop.lib.RecordParser; import com.cloudera.sqoop.manager.MySQLUtils; @@ -194,9 +195,7 @@ private static class ReparsingStreamThread extends ErrorableThread { static { // build a record parser for mysqldump's format - MYSQLDUMP_PARSER = new RecordParser(MYSQL_FIELD_DELIM, - MYSQL_RECORD_DELIM, MYSQL_ENCLOSE_CHAR, MYSQL_ESCAPE_CHAR, - MYSQL_ENCLOSE_REQUIRED); + MYSQLDUMP_PARSER = new RecordParser(DelimiterSet.MYSQL_DELIMITERS); } public void run() { @@ -205,22 +204,30 @@ public void run() { try { r = new BufferedReader(new InputStreamReader(this.stream)); + // Configure the output with the user's delimiters. char outputFieldDelim = (char) conf.getInt( - MySQLUtils.OUTPUT_FIELD_DELIM_KEY, '\000'); + MySQLUtils.OUTPUT_FIELD_DELIM_KEY, + DelimiterSet.NULL_CHAR); String outputFieldDelimStr = "" + outputFieldDelim; char outputRecordDelim = (char) conf.getInt( - MySQLUtils.OUTPUT_RECORD_DELIM_KEY, '\000'); + MySQLUtils.OUTPUT_RECORD_DELIM_KEY, + DelimiterSet.NULL_CHAR); String outputRecordDelimStr = "" + outputRecordDelim; char outputEnclose = (char) conf.getInt( MySQLUtils.OUTPUT_ENCLOSED_BY_KEY, - '\000'); - String outputEncloseStr = "" + outputEnclose; + DelimiterSet.NULL_CHAR); char outputEscape = (char) conf.getInt( - MySQLUtils.OUTPUT_ESCAPED_BY_KEY, '\000'); - String outputEscapeStr = "" + outputEscape; + MySQLUtils.OUTPUT_ESCAPED_BY_KEY, + DelimiterSet.NULL_CHAR); boolean outputEncloseRequired = conf.getBoolean( MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY, false); - char [] encloseFor = { outputFieldDelim, outputRecordDelim }; + + DelimiterSet delimiters = new DelimiterSet( + outputFieldDelim, + outputRecordDelim, + outputEnclose, + outputEscape, + outputEncloseRequired); // Actually do the read/write transfer loop here. int preambleLen = -1; // set to this for "undefined" @@ -268,8 +275,7 @@ public void run() { } String fieldStr = FieldFormatter.escapeAndEnclose(field, - outputEscapeStr, outputEncloseStr, - encloseFor, outputEncloseRequired); + delimiters); context.write(fieldStr, null); recordLen += fieldStr.length(); } diff --git a/src/java/com/cloudera/sqoop/mapreduce/MySQLExportMapper.java b/src/java/com/cloudera/sqoop/mapreduce/MySQLExportMapper.java index 008fc172..0c316983 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/MySQLExportMapper.java +++ b/src/java/com/cloudera/sqoop/mapreduce/MySQLExportMapper.java @@ -32,13 +32,13 @@ import org.apache.hadoop.util.Shell; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.db.DBConfiguration; -import com.cloudera.sqoop.lib.TaskId; import com.cloudera.sqoop.manager.MySQLUtils; import com.cloudera.sqoop.shims.HadoopShim; import com.cloudera.sqoop.util.AsyncSink; import com.cloudera.sqoop.util.JdbcUrl; import com.cloudera.sqoop.util.LoggingAsyncSink; import com.cloudera.sqoop.util.NullAsyncSink; +import com.cloudera.sqoop.util.TaskId; /** * Mapper that starts a 'mysqlimport' process and uses that to export rows from diff --git a/src/java/com/cloudera/sqoop/orm/ClassWriter.java b/src/java/com/cloudera/sqoop/orm/ClassWriter.java index 57dc02ed..e6a88649 100644 --- a/src/java/com/cloudera/sqoop/orm/ClassWriter.java +++ b/src/java/com/cloudera/sqoop/orm/ClassWriter.java @@ -22,6 +22,7 @@ import com.cloudera.sqoop.SqoopOptions; import com.cloudera.sqoop.manager.ConnManager; import com.cloudera.sqoop.lib.BigDecimalSerializer; +import com.cloudera.sqoop.lib.DelimiterSet; import com.cloudera.sqoop.lib.FieldFormatter; import com.cloudera.sqoop.lib.JdbcWritableBridge; import com.cloudera.sqoop.lib.LargeObjectLoader; @@ -101,9 +102,11 @@ public class ClassWriter { * which version of the ClassWriter's output format was used to generate the * class. * - * If the way that we generate classes, bump this number. + * If the way that we generate classes changes, bump this number. + * This number is retrieved by the SqoopRecord.getClassFormatVersion() + * method. */ - public static final int CLASS_WRITER_VERSION = 2; + public static final int CLASS_WRITER_VERSION = 3; private SqoopOptions options; private ConnManager connManager; @@ -642,31 +645,20 @@ private void generateCloneMethod(Map columnTypes, private void generateToString(Map columnTypes, String [] colNames, StringBuilder sb) { - // Embed the delimiters into the class, as characters... - sb.append(" private static final char __OUTPUT_FIELD_DELIM_CHAR = " - + (int)options.getOutputFieldDelim() + ";\n"); - sb.append(" private static final char __OUTPUT_RECORD_DELIM_CHAR = " - + (int)options.getOutputRecordDelim() + ";\n"); + // Save the delimiters to the class. + sb.append(" private final DelimiterSet __outputDelimiters = "); + sb.append(options.getOutputDelimiters().formatConstructor() + ";\n"); - // as strings... - sb.append(" private static final String __OUTPUT_FIELD_DELIM = " - + "\"\" + (char) " + (int) options.getOutputFieldDelim() + ";\n"); - sb.append(" private static final String __OUTPUT_RECORD_DELIM = " - + "\"\" + (char) " + (int) options.getOutputRecordDelim() + ";\n"); - sb.append(" private static final String __OUTPUT_ENCLOSED_BY = " - + "\"\" + (char) " + (int) options.getOutputEnclosedBy() + ";\n"); - sb.append(" private static final String __OUTPUT_ESCAPED_BY = " - + "\"\" + (char) " + (int) options.getOutputEscapedBy() + ";\n"); - - // and some more options. - sb.append(" private static final boolean __OUTPUT_ENCLOSE_REQUIRED = " - + options.isOutputEncloseRequired() + ";\n"); - sb.append(" private static final char [] __OUTPUT_DELIMITER_LIST = { " - + "__OUTPUT_FIELD_DELIM_CHAR, __OUTPUT_RECORD_DELIM_CHAR };\n\n"); - - // The actual toString() method itself follows. + // The default toString() method itself follows. This just calls + // the delimiter-specific toString() with the default delimiters. sb.append(" public String toString() {\n"); + sb.append(" return toString(__outputDelimiters);\n"); + sb.append(" }\n"); + + // This toString() variant, though, accepts delimiters as arguments. + sb.append(" public String toString(DelimiterSet delimiters) {\n"); sb.append(" StringBuilder __sb = new StringBuilder();\n"); + sb.append(" char fieldDelim = delimiters.getFieldsTerminatedBy();\n"); boolean first = true; for (String col : colNames) { @@ -679,7 +671,7 @@ private void generateToString(Map columnTypes, if (!first) { // print inter-field tokens. - sb.append(" __sb.append(__OUTPUT_FIELD_DELIM);\n"); + sb.append(" __sb.append(fieldDelim);\n"); } first = false; @@ -691,12 +683,10 @@ private void generateToString(Map columnTypes, } sb.append(" __sb.append(FieldFormatter.escapeAndEnclose(" + stringExpr - + ", __OUTPUT_ESCAPED_BY, __OUTPUT_ENCLOSED_BY, " - + "__OUTPUT_DELIMITER_LIST, __OUTPUT_ENCLOSE_REQUIRED));\n"); - + + ", delimiters));\n"); } - sb.append(" __sb.append(__OUTPUT_RECORD_DELIM);\n"); + sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n"); sb.append(" return __sb.toString();\n"); sb.append(" }\n"); } @@ -711,11 +701,7 @@ private void generateParseMethod(String typ, StringBuilder sb) { sb.append(" public void parse(" + typ + " __record) " + "throws RecordParser.ParseError {\n"); sb.append(" if (null == this.__parser) {\n"); - sb.append(" this.__parser = new RecordParser(" - + "__INPUT_FIELD_DELIM_CHAR, "); - sb.append("__INPUT_RECORD_DELIM_CHAR, __INPUT_ENCLOSED_BY_CHAR, " - + "__INPUT_ESCAPED_BY_CHAR, "); - sb.append("__INPUT_ENCLOSE_REQUIRED);\n"); + sb.append(" this.__parser = new RecordParser(__inputDelimiters);\n"); sb.append(" }\n"); sb.append(" List __fields = " + "this.__parser.parseRecord(__record);\n"); @@ -795,17 +781,8 @@ private void generateParser(Map columnTypes, // records. Note that these can differ from the delims to use as output // via toString(), if the user wants to use this class to convert one // format to another. - sb.append(" private static final char __INPUT_FIELD_DELIM_CHAR = " - + (int)options.getInputFieldDelim() + ";\n"); - sb.append(" private static final char __INPUT_RECORD_DELIM_CHAR = " - + (int)options.getInputRecordDelim() + ";\n"); - sb.append(" private static final char __INPUT_ENCLOSED_BY_CHAR = " - + (int)options.getInputEnclosedBy() + ";\n"); - sb.append(" private static final char __INPUT_ESCAPED_BY_CHAR = " - + (int)options.getInputEscapedBy() + ";\n"); - sb.append(" private static final boolean __INPUT_ENCLOSE_REQUIRED = " - + options.isInputEncloseRequired() + ";\n"); - + sb.append(" private final DelimiterSet __inputDelimiters = "); + sb.append(options.getInputDelimiters().formatConstructor() + ";\n"); // The parser object which will do the heavy lifting for field splitting. sb.append(" private RecordParser __parser;\n"); @@ -976,6 +953,7 @@ public StringBuilder generateClassForColumns(Map columnTypes, sb.append("import org.apache.hadoop.io.Writable;\n"); sb.append("import org.apache.hadoop.mapred.lib.db.DBWritable;\n"); sb.append("import " + JdbcWritableBridge.class.getCanonicalName() + ";\n"); + sb.append("import " + DelimiterSet.class.getCanonicalName() + ";\n"); sb.append("import " + FieldFormatter.class.getCanonicalName() + ";\n"); sb.append("import " + RecordParser.class.getCanonicalName() + ";\n"); sb.append("import " + BlobRef.class.getCanonicalName() + ";\n"); @@ -999,10 +977,12 @@ public StringBuilder generateClassForColumns(Map columnTypes, sb.append("\n"); String className = tableNameInfo.getShortClassForTable(tableName); - sb.append("public class " + className - + " implements DBWritable, SqoopRecord, Writable {\n"); - sb.append(" public static final int PROTOCOL_VERSION = " + sb.append("public class " + className + " extends SqoopRecord " + + " implements DBWritable, Writable {\n"); + sb.append(" private final int PROTOCOL_VERSION = " + CLASS_WRITER_VERSION + ";\n"); + sb.append( + " public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n"); sb.append(" protected ResultSet __cur_result_set;\n"); generateFields(columnTypes, colNames, sb); generateDbRead(columnTypes, colNames, sb); diff --git a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java index fd0d8ab2..9bef2c55 100644 --- a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java +++ b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java @@ -36,6 +36,7 @@ import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; import com.cloudera.sqoop.cli.RelatedOptions; import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.lib.DelimiterSet; import com.cloudera.sqoop.manager.ConnManager; import com.cloudera.sqoop.shims.ShimLoader; @@ -603,19 +604,15 @@ protected void validateOutputFormatOptions(SqoopOptions options) // straight to Hive. Use Hive-style delimiters. LOG.info("Using Hive-specific delimiters for output. You can override"); LOG.info("delimiters with --fields-terminated-by, etc."); - options.setFieldsTerminatedBy((char) 0x1); // ^A - options.setLinesTerminatedBy('\n'); - options.setEnclosedBy('\000'); // no enclosing in Hive. - options.setEscapedBy('\000'); // no escaping in Hive. - options.setOutputEncloseRequired(false); + options.setOutputDelimiters(DelimiterSet.HIVE_DELIMITERS); } - if (options.getOutputEscapedBy() != '\000') { + if (options.getOutputEscapedBy() != DelimiterSet.NULL_CHAR) { LOG.warn("Hive does not support escape characters in fields;"); LOG.warn("parse errors in Hive may result from using --escaped-by."); } - if (options.getOutputEnclosedBy() != '\000') { + if (options.getOutputEnclosedBy() != DelimiterSet.NULL_CHAR) { LOG.warn("Hive does not support quoted strings; parse errors"); LOG.warn("in Hive may result from using --enclosed-by."); } @@ -627,7 +624,5 @@ protected void validateHiveOptions(SqoopOptions options) { // is reserved for future constraints on Hive options. } - - } diff --git a/src/java/com/cloudera/sqoop/lib/TaskId.java b/src/java/com/cloudera/sqoop/util/TaskId.java similarity index 97% rename from src/java/com/cloudera/sqoop/lib/TaskId.java rename to src/java/com/cloudera/sqoop/util/TaskId.java index 74580e70..5f57a85d 100644 --- a/src/java/com/cloudera/sqoop/lib/TaskId.java +++ b/src/java/com/cloudera/sqoop/util/TaskId.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package com.cloudera.sqoop.lib; +package com.cloudera.sqoop.util; import org.apache.hadoop.conf.Configuration; diff --git a/src/test/com/cloudera/sqoop/TestSqoopOptions.java b/src/test/com/cloudera/sqoop/TestSqoopOptions.java index dd639c69..0ae87be9 100644 --- a/src/test/com/cloudera/sqoop/TestSqoopOptions.java +++ b/src/test/com/cloudera/sqoop/TestSqoopOptions.java @@ -20,6 +20,7 @@ import junit.framework.TestCase; +import com.cloudera.sqoop.lib.DelimiterSet; import com.cloudera.sqoop.tool.ImportTool; @@ -109,19 +110,19 @@ public void testUnknownEscape2() throws Exception { } public void testEscapeNul1() throws Exception { - assertEquals('\000', SqoopOptions.toChar("\\0")); + assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\0")); } public void testEscapeNul2() throws Exception { - assertEquals('\000', SqoopOptions.toChar("\\00")); + assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\00")); } public void testEscapeNul3() throws Exception { - assertEquals('\000', SqoopOptions.toChar("\\0000")); + assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\0000")); } public void testEscapeNul4() throws Exception { - assertEquals('\000', SqoopOptions.toChar("\\0x0")); + assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\0x0")); } public void testOctalChar1() throws Exception { diff --git a/src/test/com/cloudera/sqoop/lib/TestFieldFormatter.java b/src/test/com/cloudera/sqoop/lib/TestFieldFormatter.java index c87d8057..0c7e0314 100644 --- a/src/test/com/cloudera/sqoop/lib/TestFieldFormatter.java +++ b/src/test/com/cloudera/sqoop/lib/TestFieldFormatter.java @@ -27,86 +27,59 @@ public class TestFieldFormatter extends TestCase { public void testAllEmpty() { - char [] chars = new char[0]; - String result = FieldFormatter.escapeAndEnclose("", "", "", chars, false); + String result = FieldFormatter.escapeAndEnclose("", + new DelimiterSet(DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR, + DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR, false)); assertEquals("", result); } public void testNullArgs() { - String result = FieldFormatter.escapeAndEnclose("", null, null, null, - false); - assertEquals("", result); - - char [] encloseFor = { '\"' }; - assertNull(FieldFormatter.escapeAndEnclose(null, "\\", "\"", encloseFor, - false)); + assertNull(FieldFormatter.escapeAndEnclose(null, + new DelimiterSet('\"', DelimiterSet.NULL_CHAR, '\"', '\\', false))); } public void testBasicStr() { - String result = FieldFormatter.escapeAndEnclose("foo", null, null, null, - false); + String result = FieldFormatter.escapeAndEnclose("foo", + DelimiterSet.DEFAULT_DELIMITERS); assertEquals("foo", result); } public void testEscapeSlash() { - String result = FieldFormatter.escapeAndEnclose("foo\\bar", "\\", "\"", - null, false); + String result = FieldFormatter.escapeAndEnclose("foo\\bar", + new DelimiterSet(',', '\n', '\"', '\\', false)); assertEquals("foo\\\\bar", result); } public void testMustEnclose() { - String result = FieldFormatter.escapeAndEnclose("foo", null, "\"", - null, true); + String result = FieldFormatter.escapeAndEnclose("foo", + new DelimiterSet(',', '\n', '\"', DelimiterSet.NULL_CHAR, true)); assertEquals("\"foo\"", result); } public void testEncloseComma1() { - char [] chars = { ',' }; - - String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "\"", - chars, false); + String result = FieldFormatter.escapeAndEnclose("foo,bar", + new DelimiterSet(',', '\n', '\"', '\\', false)); assertEquals("\"foo,bar\"", result); } public void testEncloseComma2() { - char [] chars = { '\n', ',' }; - - String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "\"", - chars, false); - assertEquals("\"foo,bar\"", result); - } - - public void testEncloseComma3() { - char [] chars = { ',', '\n' }; - - String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "\"", - chars, false); + String result = FieldFormatter.escapeAndEnclose("foo,bar", + new DelimiterSet(',', ',', '\"', '\\', false)); assertEquals("\"foo,bar\"", result); } public void testNoNeedToEnclose() { - char [] chars = { ',', '\n' }; - String result = FieldFormatter.escapeAndEnclose( - "just another string", "\\", "\"", chars, false); + "just another string", + new DelimiterSet(',', '\n', '\"', '\\', false)); assertEquals("just another string", result); } - public void testCannotEnclose1() { - char [] chars = { ',', '\n' }; + public void testCannotEnclose() { + // can't enclose because encloser is nul + String result = FieldFormatter.escapeAndEnclose("foo,bar", + new DelimiterSet(',', '\n', DelimiterSet.NULL_CHAR, '\\', false)); - // can't enclose because encloser is "" - String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "", - chars, false); - assertEquals("foo,bar", result); - } - - public void testCannotEnclose2() { - char [] chars = { ',', '\n' }; - - // can't enclose because encloser is null - String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", null, - chars, false); assertEquals("foo,bar", result); } @@ -114,48 +87,44 @@ public void testEmptyCharToEscapeString() { // test what happens when the escape char is null. It should encode the // null char. - char nul = '\000'; + char nul = DelimiterSet.NULL_CHAR; String s = "" + nul; assertEquals("\000", s); } public void testEscapeCentralQuote() { - String result = FieldFormatter.escapeAndEnclose("foo\"bar", "\\", "\"", - null, false); + String result = FieldFormatter.escapeAndEnclose("foo\"bar", + new DelimiterSet(',', '\n', '\"', '\\', false)); assertEquals("foo\\\"bar", result); } public void testEscapeMultiCentralQuote() { - String result = FieldFormatter.escapeAndEnclose("foo\"\"bar", "\\", "\"", - null, false); + String result = FieldFormatter.escapeAndEnclose("foo\"\"bar", + new DelimiterSet(',', '\n', '\"', '\\', false)); assertEquals("foo\\\"\\\"bar", result); } public void testDoubleEscape() { - String result = FieldFormatter.escapeAndEnclose("foo\\\"bar", "\\", "\"", - null, false); + String result = FieldFormatter.escapeAndEnclose("foo\\\"bar", + new DelimiterSet(',', '\n', '\"', '\\', false)); assertEquals("foo\\\\\\\"bar", result); } public void testReverseEscape() { - String result = FieldFormatter.escapeAndEnclose("foo\"\\bar", "\\", "\"", - null, false); + String result = FieldFormatter.escapeAndEnclose("foo\"\\bar", + new DelimiterSet(',', '\n', '\"', '\\', false)); assertEquals("foo\\\"\\\\bar", result); } public void testQuotedEncloser() { - char [] chars = { ',', '\n' }; - - String result = FieldFormatter.escapeAndEnclose("foo\",bar", "\\", "\"", - chars, false); + String result = FieldFormatter.escapeAndEnclose("foo\",bar", + new DelimiterSet(',', '\n', '\"', '\\', false)); assertEquals("\"foo\\\",bar\"", result); } public void testQuotedEscape() { - char [] chars = { ',', '\n' }; - - String result = FieldFormatter.escapeAndEnclose("foo\\,bar", "\\", "\"", - chars, false); + String result = FieldFormatter.escapeAndEnclose("foo\\,bar", + new DelimiterSet(',', '\n', '\"', '\\', false)); assertEquals("\"foo\\\\,bar\"", result); } } diff --git a/src/test/com/cloudera/sqoop/lib/TestRecordParser.java b/src/test/com/cloudera/sqoop/lib/TestRecordParser.java index d516f70a..721ec416 100644 --- a/src/test/com/cloudera/sqoop/lib/TestRecordParser.java +++ b/src/test/com/cloudera/sqoop/lib/TestRecordParser.java @@ -103,7 +103,8 @@ private List list(String [] items) { public void testEmptyLine() throws RecordParser.ParseError { // an empty line should return no fields. - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { }; assertListsEqual(null, list(strings), parser.parseRecord("")); } @@ -111,32 +112,37 @@ public void testEmptyLine() throws RecordParser.ParseError { public void testJustEOR() throws RecordParser.ParseError { // a line with just a newline char should return a single zero-length field. - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "" }; assertListsEqual(null, list(strings), parser.parseRecord("\n")); } public void testOneField() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the field" }; assertListsEqual(null, list(strings), parser.parseRecord("the field")); } public void testOneField2() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the field" }; assertListsEqual(null, list(strings), parser.parseRecord("the field\n")); } public void testQuotedField1() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the field" }; assertListsEqual(null, list(strings), parser.parseRecord("\"the field\"\n")); } public void testQuotedField2() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the field" }; assertListsEqual(null, list(strings), parser.parseRecord("\"the field\"")); @@ -144,7 +150,8 @@ public void testQuotedField2() throws RecordParser.ParseError { public void testQuotedField3() throws RecordParser.ParseError { // quoted containing EOF - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the ,field" }; assertListsEqual(null, list(strings), parser.parseRecord("\"the ,field\"")); @@ -152,7 +159,8 @@ public void testQuotedField3() throws RecordParser.ParseError { public void testQuotedField4() throws RecordParser.ParseError { // quoted containing multiple EOFs - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the ,,field" }; assertListsEqual(null, list(strings), parser.parseRecord("\"the ,,field\"")); @@ -160,7 +168,8 @@ public void testQuotedField4() throws RecordParser.ParseError { public void testQuotedField5() throws RecordParser.ParseError { // quoted containing EOF and EOR - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the ,\nfield" }; assertListsEqual(null, list(strings), parser.parseRecord("\"the ,\nfield\"")); @@ -168,7 +177,8 @@ public void testQuotedField5() throws RecordParser.ParseError { public void testQuotedField6() throws RecordParser.ParseError { // quoted containing EOR - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the \nfield" }; assertListsEqual(null, list(strings), parser.parseRecord("\"the \nfield\"")); @@ -176,7 +186,8 @@ public void testQuotedField6() throws RecordParser.ParseError { public void testQuotedField7() throws RecordParser.ParseError { // quoted containing multiple EORs - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the \n\nfield" }; assertListsEqual(null, list(strings), parser.parseRecord("\"the \n\nfield\"")); @@ -184,7 +195,8 @@ public void testQuotedField7() throws RecordParser.ParseError { public void testQuotedField8() throws RecordParser.ParseError { // quoted containing escaped quoted char - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the \"field" }; assertListsEqual(null, list(strings), parser.parseRecord("\"the \\\"field\"")); @@ -192,68 +204,78 @@ public void testQuotedField8() throws RecordParser.ParseError { public void testUnquotedEscape1() throws RecordParser.ParseError { // field without quotes with an escaped EOF char. - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the ,field" }; assertListsEqual(null, list(strings), parser.parseRecord("the \\,field")); } public void testUnquotedEscape2() throws RecordParser.ParseError { // field without quotes with an escaped escape char. - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "the \\field" }; assertListsEqual(null, list(strings), parser.parseRecord("the \\\\field")); } public void testTwoFields1() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "field1", "field2" }; assertListsEqual(null, list(strings), parser.parseRecord("field1,field2")); } public void testTwoFields2() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "field1", "field2" }; assertListsEqual(null, list(strings), parser.parseRecord("field1,field2\n")); } public void testTwoFields3() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "field1", "field2" }; assertListsEqual(null, list(strings), parser.parseRecord("\"field1\",field2\n")); } public void testTwoFields4() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "field1", "field2" }; assertListsEqual(null, list(strings), parser.parseRecord("field1,\"field2\"\n")); } public void testTwoFields5() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "field1", "field2" }; assertListsEqual(null, list(strings), parser.parseRecord("field1,\"field2\"")); } public void testRequiredQuotes0() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', true)); String [] strings = { "field1", "field2" }; assertListsEqual(null, list(strings), parser.parseRecord("\"field1\",\"field2\"\n")); } public void testRequiredQuotes1() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', true)); String [] strings = { "field1", "field2" }; assertListsEqual(null, list(strings), parser.parseRecord("\"field1\",\"field2\"")); } public void testRequiredQuotes2() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', true)); try { parser.parseRecord("\"field1\",field2"); fail("Expected parse error for required quotes"); @@ -263,7 +285,8 @@ public void testRequiredQuotes2() throws RecordParser.ParseError { } public void testRequiredQuotes3() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', true)); try { parser.parseRecord("field1,\"field2\""); fail("Expected parse error for required quotes"); @@ -273,7 +296,8 @@ public void testRequiredQuotes3() throws RecordParser.ParseError { } public void testRequiredQuotes4() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', true)); try { parser.parseRecord("field1,\"field2\"\n"); fail("Expected parse error for required quotes"); @@ -283,7 +307,8 @@ public void testRequiredQuotes4() throws RecordParser.ParseError { } public void testNull() { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', true)); String input = null; try { parser.parseRecord(input); @@ -295,75 +320,87 @@ public void testNull() { public void testEmptyFields1() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "", ""}; assertListsEqual(null, list(strings), parser.parseRecord(",")); } public void testEmptyFields2() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "", "" }; assertListsEqual(null, list(strings), parser.parseRecord(",\n")); } public void testEmptyFields3() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "", "", "" }; assertListsEqual(null, list(strings), parser.parseRecord(",,\n")); } public void testEmptyFields4() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "", "foo", "" }; assertListsEqual(null, list(strings), parser.parseRecord(",foo,\n")); } public void testEmptyFields5() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "", "foo", "" }; assertListsEqual(null, list(strings), parser.parseRecord(",foo,")); } public void testEmptyFields6() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "foo", "" }; assertListsEqual(null, list(strings), parser.parseRecord("foo,")); } public void testTrailingText() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "foo", "bar" }; assertListsEqual(null, list(strings), parser.parseRecord("foo,bar\nbaz")); } public void testTrailingText2() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "" }; assertListsEqual(null, list(strings), parser.parseRecord("\nbaz")); } public void testLeadingEscape() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', '\n', '\"', '\\', false)); String [] strings = { "\nbaz" }; assertListsEqual(null, list(strings), parser.parseRecord("\\\nbaz")); } public void testEofIsEor() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', ',', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', ',', '\"', '\\', false)); String [] strings = { "three", "different", "fields" }; assertListsEqual(null, list(strings), parser.parseRecord("three,different,fields")); } public void testEofIsEor2() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', ',', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', ',', '\"', '\\', false)); String [] strings = { "three", "different", "fields" }; assertListsEqual(null, list(strings), parser.parseRecord("three,\"different\",fields")); } public void testRepeatedParse() throws RecordParser.ParseError { - RecordParser parser = new RecordParser(',', ',', '\"', '\\', false); + RecordParser parser = new RecordParser( + new DelimiterSet(',', ',', '\"', '\\', false)); String [] strings = { "three", "different", "fields" }; assertListsEqual(null, list(strings), parser.parseRecord("three,\"different\",fields")); diff --git a/src/test/findbugsExcludeFile.xml b/src/test/findbugsExcludeFile.xml index a5a8a7dc..d6284550 100644 --- a/src/test/findbugsExcludeFile.xml +++ b/src/test/findbugsExcludeFile.xml @@ -45,22 +45,22 @@ not need to be rigidly upheld. --> - + - + - + - +