5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 18:19:23 +08:00

SIP-4. Refactor classes for public API 1.0.0.

Moves TaskId to com.cloudera.sqoop.util.
Add com.cloudera.sqoop.lib.DelimiterSet.
Rewrite FieldFormatter, RecordParser, to use DelimiterSet.
Add generated class version id to SqoopRecord.

From: Aaron Kimball <aaron@cloudera.com>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149907 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:03:54 +00:00
parent 42875119dd
commit 3ab9ebd354
16 changed files with 586 additions and 303 deletions

View File

@ -29,6 +29,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import com.cloudera.sqoop.lib.DelimiterSet;
import com.cloudera.sqoop.lib.LargeObjectLoader;
/**
@ -115,18 +116,8 @@ public enum FileLayout {
// HDFS path to read from when performing an export
private String exportDir;
private char inputFieldDelim;
private char inputRecordDelim;
private char inputEnclosedBy;
private char inputEscapedBy;
private boolean inputMustBeEnclosed;
private char outputFieldDelim;
private char outputRecordDelim;
private char outputEnclosedBy;
private char outputEscapedBy;
private boolean outputMustBeEnclosed;
private DelimiterSet inputDelimiters;
private DelimiterSet outputDelimiters;
private boolean areDelimsManuallySet;
private Configuration conf;
@ -252,6 +243,11 @@ private void initDefaults(Configuration baseConfiguration) {
this.hiveHome = System.getenv("HIVE_HOME");
this.hiveHome = System.getProperty("hive.home", this.hiveHome);
this.inputDelimiters = new DelimiterSet(
DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR,
DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR, false);
this.outputDelimiters = new DelimiterSet();
// Set this to cwd, but -Dsqoop.src.dir can override.
this.codeOutputDir = System.getProperty("sqoop.src.dir", ".");
@ -264,18 +260,6 @@ private void initDefaults(Configuration baseConfiguration) {
this.jarOutputDir = tmpDir + "sqoop/compile";
this.layout = FileLayout.TextFile;
this.inputFieldDelim = '\000';
this.inputRecordDelim = '\000';
this.inputEnclosedBy = '\000';
this.inputEscapedBy = '\000';
this.inputMustBeEnclosed = false;
this.outputFieldDelim = ',';
this.outputRecordDelim = '\n';
this.outputEnclosedBy = '\000';
this.outputEscapedBy = '\000';
this.outputMustBeEnclosed = false;
this.areDelimsManuallySet = false;
this.numMappers = DEFAULT_NUM_MAPPERS;
@ -329,7 +313,7 @@ public static char toChar(String charish) throws InvalidOptionsException {
} else if (charish.startsWith("\\0")) {
if (charish.equals("\\0")) {
// it's just '\0', which we can take as shorthand for nul.
return '\000';
return DelimiterSet.NULL_CHAR;
} else {
// it's an octal value.
String valStr = charish.substring(2);
@ -651,15 +635,19 @@ public void setFileLayout(FileLayout fileLayout) {
* field delim to use when printing lines.
*/
public char getInputFieldDelim() {
if (inputFieldDelim == '\000') {
return this.outputFieldDelim;
char f = inputDelimiters.getFieldsTerminatedBy();
if (f == DelimiterSet.NULL_CHAR) {
return this.outputDelimiters.getFieldsTerminatedBy();
} else {
return this.inputFieldDelim;
return f;
}
}
/**
* Set the field delimiter to use when parsing lines.
*/
public void setInputFieldsTerminatedBy(char c) {
this.inputFieldDelim = c;
this.inputDelimiters.setFieldsTerminatedBy(c);
}
/**
@ -667,15 +655,19 @@ public void setInputFieldsTerminatedBy(char c) {
* record delim to use when printing lines.
*/
public char getInputRecordDelim() {
if (inputRecordDelim == '\000') {
return this.outputRecordDelim;
char r = inputDelimiters.getLinesTerminatedBy();
if (r == DelimiterSet.NULL_CHAR) {
return this.outputDelimiters.getLinesTerminatedBy();
} else {
return this.inputRecordDelim;
return r;
}
}
/**
* Set the record delimiter to use when parsing lines.
*/
public void setInputLinesTerminatedBy(char c) {
this.inputRecordDelim = c;
this.inputDelimiters.setLinesTerminatedBy(c);
}
/**
@ -683,15 +675,19 @@ public void setInputLinesTerminatedBy(char c) {
* Defaults to the enclosing-char to use when printing lines.
*/
public char getInputEnclosedBy() {
if (inputEnclosedBy == '\000') {
return this.outputEnclosedBy;
char c = inputDelimiters.getEnclosedBy();
if (c == DelimiterSet.NULL_CHAR) {
return this.outputDelimiters.getEnclosedBy();
} else {
return this.inputEnclosedBy;
return c;
}
}
/**
* Set the enclosed-by character to use when parsing lines.
*/
public void setInputEnclosedBy(char c) {
this.inputEnclosedBy = c;
this.inputDelimiters.setEnclosedBy(c);
}
/**
@ -699,15 +695,19 @@ public void setInputEnclosedBy(char c) {
* escape character used when printing lines.
*/
public char getInputEscapedBy() {
if (inputEscapedBy == '\000') {
return this.outputEscapedBy;
char c = inputDelimiters.getEscapedBy();
if (c == DelimiterSet.NULL_CHAR) {
return this.outputDelimiters.getEscapedBy();
} else {
return this.inputEscapedBy;
return c;
}
}
/**
* Set the escaped-by character to use when parsing lines.
*/
public void setInputEscapedBy(char c) {
this.inputEscapedBy = c;
this.inputDelimiters.setEscapedBy(c);
}
/**
@ -716,15 +716,20 @@ public void setInputEscapedBy(char c) {
* used.
*/
public boolean isInputEncloseRequired() {
if (inputEnclosedBy == '\000') {
return this.outputMustBeEnclosed;
char c = this.inputDelimiters.getEnclosedBy();
if (c == DelimiterSet.NULL_CHAR) {
return this.outputDelimiters.isEncloseRequired();
} else {
return this.inputMustBeEnclosed;
return this.inputDelimiters.isEncloseRequired();
}
}
/**
* If true, then all input fields are expected to be enclosed by the
* enclosed-by character when parsing.
*/
public void setInputEncloseRequired(boolean required) {
this.inputMustBeEnclosed = required;
this.inputDelimiters.setEncloseRequired(required);
}
/**
@ -732,11 +737,14 @@ public void setInputEncloseRequired(boolean required) {
* text.
*/
public char getOutputFieldDelim() {
return this.outputFieldDelim;
return this.outputDelimiters.getFieldsTerminatedBy();
}
/**
* Set the field delimiter to use when formatting lines.
*/
public void setFieldsTerminatedBy(char c) {
this.outputFieldDelim = c;
this.outputDelimiters.setFieldsTerminatedBy(c);
}
@ -745,11 +753,14 @@ public void setFieldsTerminatedBy(char c) {
* text.
*/
public char getOutputRecordDelim() {
return this.outputRecordDelim;
return this.outputDelimiters.getLinesTerminatedBy();
}
/**
* Set the record delimiter to use when formatting lines.
*/
public void setLinesTerminatedBy(char c) {
this.outputRecordDelim = c;
this.outputDelimiters.setLinesTerminatedBy(c);
}
/**
@ -757,11 +768,14 @@ public void setLinesTerminatedBy(char c) {
* imported to text.
*/
public char getOutputEnclosedBy() {
return this.outputEnclosedBy;
return this.outputDelimiters.getEnclosedBy();
}
/**
* Set the enclosed-by character to use when formatting lines.
*/
public void setEnclosedBy(char c) {
this.outputEnclosedBy = c;
this.outputDelimiters.setEnclosedBy(c);
}
/**
@ -769,11 +783,14 @@ public void setEnclosedBy(char c) {
* text.
*/
public char getOutputEscapedBy() {
return this.outputEscapedBy;
return this.outputDelimiters.getEscapedBy();
}
/**
* Set the escaped-by character to use when formatting lines.
*/
public void setEscapedBy(char c) {
this.outputEscapedBy = c;
this.outputDelimiters.setEscapedBy(c);
}
/**
@ -782,11 +799,42 @@ public void setEscapedBy(char c) {
* instead of --optionally-enclosed-by.
*/
public boolean isOutputEncloseRequired() {
return this.outputMustBeEnclosed;
return this.outputDelimiters.isEncloseRequired();
}
/**
* If true, then the enclosed-by character will be applied to all fields,
* even if internal characters do not need enclosed-by protection.
*/
public void setOutputEncloseRequired(boolean required) {
this.outputMustBeEnclosed = required;
this.outputDelimiters.setEncloseRequired(required);
}
/**
* @return the set of delimiters used for formatting output records.
*/
public DelimiterSet getOutputDelimiters() {
return this.outputDelimiters.copy();
}
/**
* Set the complete set of delimiters to use for output formatting.
*/
public void setOutputDelimiters(DelimiterSet delimiters) {
this.outputDelimiters = delimiters.copy();
}
/**
* @return the set of delimiters used for parsing the input.
* This may include values implicitly set by the output delimiters.
*/
public DelimiterSet getInputDelimiters() {
return new DelimiterSet(
getInputFieldDelim(),
getInputRecordDelim(),
getInputEnclosedBy(),
getInputEscapedBy(),
isInputEncloseRequired());
}
/**

View File

@ -0,0 +1,233 @@
/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
/**
* Encapsulates a set of delimiters used to encode a record.
*/
public class DelimiterSet implements Cloneable {
public static final char NULL_CHAR = '\000';
private char fieldDelim; // fields terminated by this.
private char recordDelim; // records terminated by this.
// If these next two fields are '\000', then they are ignored.
private char enclosedBy;
private char escapedBy;
// If true, then the enclosed-by character is applied to every
// field, not just ones containing embedded delimiters.
private boolean encloseRequired;
/**
* Create a delimiter set with the default delimiters
* (comma for fields, newline for records).
*/
public DelimiterSet() {
this(',', '\n', NULL_CHAR, NULL_CHAR, false);
}
/**
* Create a delimiter set with the specified delimiters.
* @param field the fields-terminated-by delimiter
* @param record the lines-terminated-by delimiter
* @param enclose the enclosed-by character
* @param escape the escaped-by character
* @param isEncloseRequired If true, enclosed-by is applied to all
* fields. If false, only applied to fields that embed delimiters.
*/
public DelimiterSet(char field, char record, char enclose, char escape,
boolean isEncloseRequired) {
this.fieldDelim = field;
this.recordDelim = record;
this.enclosedBy = enclose;
this.escapedBy = escape;
this.encloseRequired = isEncloseRequired;
}
/**
* Sets the fields-terminated-by character.
*/
public void setFieldsTerminatedBy(char f) {
this.fieldDelim = f;
}
/**
* @return the fields-terminated-by character.
*/
public char getFieldsTerminatedBy() {
return this.fieldDelim;
}
/**
* Sets the end-of-record lines-terminated-by character.
*/
public void setLinesTerminatedBy(char r) {
this.recordDelim = r;
}
/**
* @return the end-of-record (lines-terminated-by) character.
*/
public char getLinesTerminatedBy() {
return this.recordDelim;
}
/**
* Sets the enclosed-by character.
* @param e the enclosed-by character, or '\000' for no enclosing character.
*/
public void setEnclosedBy(char e) {
this.enclosedBy = e;
}
/**
* @return the enclosed-by character, or '\000' for none.
*/
public char getEnclosedBy() {
return this.enclosedBy;
}
/**
* Sets the escaped-by character.
* @param e the escaped-by character, or '\000' for no escape character.
*/
public void setEscapedBy(char e) {
this.escapedBy = e;
}
/**
* @return the escaped-by character, or '\000' for none.
*/
public char getEscapedBy() {
return this.escapedBy;
}
/**
* Set whether the enclosed-by character must be applied to all fields,
* or only fields with embedded delimiters.
*/
public void setEncloseRequired(boolean required) {
this.encloseRequired = required;
}
/**
* @return true if the enclosed-by character must be applied to all fields,
* or false if it's only used for fields with embedded delimiters.
*/
public boolean isEncloseRequired() {
return this.encloseRequired;
}
@Override
/**
* @return a string representation of the delimiters.
*/
public String toString() {
return "fields=" + this.fieldDelim
+ " records=" + this.recordDelim
+ " escape=" + this.escapedBy
+ " enclose=" + this.enclosedBy
+ " required=" + this.encloseRequired;
}
/**
* Format this set of delimiters as a call to the constructor for
* this object, that would generate identical delimiters.
* @return a String that can be embedded in generated code that
* provides this set of delimiters.
*/
public String formatConstructor() {
return "new DelimiterSet((char) " + (int) this.fieldDelim + ", "
+ "(char) " + (int) this.recordDelim + ", "
+ "(char) " + (int) this.enclosedBy + ", "
+ "(char) " + (int) this.escapedBy + ", "
+ this.encloseRequired + ")";
}
@Override
/**
* @return a hash code for this set of delimiters.
*/
public int hashCode() {
return (int) this.fieldDelim
+ (((int) this.recordDelim) << 4)
+ (((int) this.escapedBy) << 8)
+ (((int) this.enclosedBy) << 12)
+ (((int) this.recordDelim) << 16)
+ (this.encloseRequired ? 0xFEFE : 0x7070);
}
@Override
/**
* @return true if this delimiter set is the same as another set of
* delimiters.
*/
public boolean equals(Object other) {
if (null == other) {
return false;
} else if (!other.getClass().equals(getClass())) {
return false;
}
DelimiterSet set = (DelimiterSet) other;
return this.fieldDelim == set.fieldDelim
&& this.recordDelim == set.recordDelim
&& this.escapedBy == set.escapedBy
&& this.enclosedBy == set.enclosedBy
&& this.encloseRequired == set.encloseRequired;
}
@Override
/**
* @return a new copy of this same set of delimiters.
*/
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
/**
* Identical to clone() but does not throw spurious exceptions.
* @return a new copy of this same set of delimiters.
*/
public DelimiterSet copy() {
try {
return (DelimiterSet) clone();
} catch (CloneNotSupportedException cnse) {
// Should never happen for DelimiterSet.
return null;
}
}
// Static delimiter sets for the commonly-used delimiter arrangements.
public static final DelimiterSet DEFAULT_DELIMITERS;
public static final DelimiterSet HIVE_DELIMITERS;
public static final DelimiterSet MYSQL_DELIMITERS;
static {
DEFAULT_DELIMITERS = new DelimiterSet(',', '\n', NULL_CHAR, NULL_CHAR,
false);
MYSQL_DELIMITERS = new DelimiterSet(',', '\n', '\'', '\\', false);
HIVE_DELIMITERS = new DelimiterSet('\001', '\n',
NULL_CHAR, NULL_CHAR, false);
}
}

View File

@ -18,7 +18,6 @@
package com.cloudera.sqoop.lib;
/**
* Static helper class that will help format data with quotes and escape chars.
*/
@ -35,28 +34,25 @@ private FieldFormatter() { }
* The field is enclosed only if:
* enclose != '\000', and:
* encloseRequired is true, or
* one of the characters in the mustEscapeFor list is present
* in the string.
* one of the fields-terminated-by or lines-terminated-by characters is
* present in the string.
*
* Escaping is not performed if the escape char is '\000'.
*
* @param str - The user's string to escape and enclose
* @param escape - What string to use as the escape sequence. If "" or null,
* then don't escape.
* @param enclose - The string to use to enclose str e.g. "quoted". If "" or
* null, then don't enclose.
* @param mustEncloseFor - A list of characters; if one is present in 'str',
* then str must be enclosed.
* @param encloseRequired - If true, then always enclose, regardless of
* mustEscapeFor.
* @param delimiters - The DelimiterSet to use identifying the escape and
* enclose semantics. If the specified escape or enclose characters are
* '\000', those operations are not performed.
* @return the escaped, enclosed version of 'str'.
*/
public static String escapeAndEnclose(String str, String escape,
String enclose, char [] mustEncloseFor, boolean encloseRequired) {
public static String escapeAndEnclose(String str, DelimiterSet delimiters) {
char escape = delimiters.getEscapedBy();
char enclose = delimiters.getEnclosedBy();
boolean encloseRequired = delimiters.isEncloseRequired();
// true if we can use an escape character.
boolean escapingLegal = (null != escape
&& escape.length() > 0 && !escape.equals("\000"));
boolean escapingLegal = DelimiterSet.NULL_CHAR != escape;
String withEscapes;
if (null == str) {
@ -65,13 +61,13 @@ public static String escapeAndEnclose(String str, String escape,
if (escapingLegal) {
// escaping is legal. Escape any instances of the escape char itself.
withEscapes = str.replace(escape, escape + escape);
withEscapes = str.replace("" + escape, "" + escape + escape);
} else {
// no need to double-escape
withEscapes = str;
}
if (null == enclose || enclose.length() == 0 || enclose.equals("\000")) {
if (DelimiterSet.NULL_CHAR == enclose) {
// The enclose-with character was left unset, so we can't enclose items.
// We're done.
return withEscapes;
@ -80,12 +76,15 @@ public static String escapeAndEnclose(String str, String escape,
// if we have an enclosing character, and escaping is legal, then the
// encloser must always be escaped.
if (escapingLegal) {
withEscapes = withEscapes.replace(enclose, escape + enclose);
withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose);
}
boolean actuallyDoEnclose = encloseRequired;
if (!actuallyDoEnclose && mustEncloseFor != null) {
// check if the string requires enclosing
if (!actuallyDoEnclose) {
// check if the string requires enclosing.
char [] mustEncloseFor = new char[2];
mustEncloseFor[0] = delimiters.getFieldsTerminatedBy();
mustEncloseFor[1] = delimiters.getLinesTerminatedBy();
for (char reason : mustEncloseFor) {
if (str.indexOf(reason) != -1) {
actuallyDoEnclose = true;
@ -95,7 +94,7 @@ public static String escapeAndEnclose(String str, String escape,
}
if (actuallyDoEnclose) {
return enclose + withEscapes + enclose;
return "" + enclose + withEscapes + enclose;
} else {
return withEscapes;
}

View File

@ -34,6 +34,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.cloudera.sqoop.io.LobFile;
import com.cloudera.sqoop.util.TaskId;
/**
* Contains a set of methods which can read db columns from a ResultSet into

View File

@ -86,21 +86,11 @@ public ParseError(final Throwable cause) {
}
}
private char fieldDelim;
private char recordDelim;
private char enclosingChar;
private char escapeChar;
private boolean enclosingRequired;
private DelimiterSet delimiters;
private ArrayList<String> outputs;
public RecordParser(final char field, final char record, final char enclose,
final char escape, final boolean mustEnclose) {
this.fieldDelim = field;
this.recordDelim = record;
this.enclosingChar = enclose;
this.escapeChar = escape;
this.enclosingRequired = mustEnclose;
public RecordParser(final DelimiterSet delimitersIn) {
this.delimiters = delimitersIn.copy();
this.outputs = new ArrayList<String>();
}
@ -215,13 +205,19 @@ record sep halts processing.
add charater literal to current string, return to UNENCLOSED_FIELD
*/
char curChar = '\000';
char curChar = DelimiterSet.NULL_CHAR;
ParseState state = ParseState.FIELD_START;
int len = input.length();
StringBuilder sb = null;
outputs.clear();
char enclosingChar = delimiters.getEnclosedBy();
char fieldDelim = delimiters.getFieldsTerminatedBy();
char recordDelim = delimiters.getLinesTerminatedBy();
char escapeChar = delimiters.getEscapedBy();
boolean enclosingRequired = delimiters.isEncloseRequired();
for (int pos = 0; pos < len; pos++) {
curChar = input.get();
switch (state) {
@ -233,14 +229,14 @@ record sep halts processing.
}
sb = new StringBuilder();
if (this.enclosingChar == curChar) {
if (enclosingChar == curChar) {
// got an opening encloser.
state = ParseState.ENCLOSED_FIELD;
} else if (this.escapeChar == curChar) {
} else if (escapeChar == curChar) {
state = ParseState.UNENCLOSED_ESCAPE;
} else if (this.fieldDelim == curChar) {
} else if (fieldDelim == curChar) {
// we have a zero-length field. This is a no-op.
} else if (this.recordDelim == curChar) {
} else if (recordDelim == curChar) {
// we have a zero-length field, that ends processing.
pos = len;
} else {
@ -248,7 +244,7 @@ record sep halts processing.
state = ParseState.UNENCLOSED_FIELD;
sb.append(curChar);
if (this.enclosingRequired) {
if (enclosingRequired) {
throw new ParseError(
"Opening field-encloser expected at position " + pos);
}
@ -257,10 +253,10 @@ record sep halts processing.
break;
case ENCLOSED_FIELD:
if (this.escapeChar == curChar) {
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.ENCLOSED_ESCAPE;
} else if (this.enclosingChar == curChar) {
} else if (enclosingChar == curChar) {
// we're at the end of the enclosing field. Expect an EOF or EOR char.
state = ParseState.ENCLOSED_EXPECT_DELIMITER;
} else {
@ -272,13 +268,13 @@ record sep halts processing.
break;
case UNENCLOSED_FIELD:
if (this.escapeChar == curChar) {
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.UNENCLOSED_ESCAPE;
} else if (this.fieldDelim == curChar) {
} else if (fieldDelim == curChar) {
// we're at the end of this field; may be the start of another one.
state = ParseState.FIELD_START;
} else if (this.recordDelim == curChar) {
} else if (recordDelim == curChar) {
pos = len; // terminate processing immediately.
} else {
// this is a regular char. Add to the current field string,
@ -298,10 +294,10 @@ record sep halts processing.
case ENCLOSED_EXPECT_DELIMITER:
// We were in an enclosed field, but got the final encloser. Now we
// expect either an end-of-field or an end-of-record.
if (this.fieldDelim == curChar) {
if (fieldDelim == curChar) {
// end of one field is the beginning of the next.
state = ParseState.FIELD_START;
} else if (this.recordDelim == curChar) {
} else if (recordDelim == curChar) {
// stop processing.
pos = len;
} else {
@ -323,7 +319,7 @@ record sep halts processing.
}
}
if (state == ParseState.FIELD_START && curChar == this.fieldDelim) {
if (state == ParseState.FIELD_START && curChar == fieldDelim) {
// we hit an EOF/EOR as the last legal character and we need to mark
// that string as recorded. This if block is outside the for-loop since
// we don't have a physical 'epsilon' token in our string.
@ -342,19 +338,17 @@ record sep halts processing.
return outputs;
}
public boolean isEnclosingRequired() {
return enclosingRequired;
return delimiters.isEncloseRequired();
}
@Override
public String toString() {
return "RecordParser[" + fieldDelim + ',' + recordDelim + ','
+ enclosingChar + ',' + escapeChar + ',' + enclosingRequired + "]";
return "RecordParser[" + delimiters.toString() + "]";
}
@Override
public int hashCode() {
return this.toString().hashCode();
return this.delimiters.hashCode();
}
}

View File

@ -31,22 +31,41 @@
/**
* Interface implemented by the classes generated by sqoop's orm.ClassWriter.
*/
public interface SqoopRecord extends Cloneable, DBWritable, Writable {
void parse(CharSequence s) throws RecordParser.ParseError;
void parse(Text s) throws RecordParser.ParseError;
void parse(byte [] s) throws RecordParser.ParseError;
void parse(char [] s) throws RecordParser.ParseError;
void parse(ByteBuffer s) throws RecordParser.ParseError;
void parse(CharBuffer s) throws RecordParser.ParseError;
void loadLargeObjects(LargeObjectLoader objLoader)
public abstract class SqoopRecord implements Cloneable, DBWritable, Writable {
public SqoopRecord() {
}
public abstract void parse(CharSequence s) throws RecordParser.ParseError;
public abstract void parse(Text s) throws RecordParser.ParseError;
public abstract void parse(byte [] s) throws RecordParser.ParseError;
public abstract void parse(char [] s) throws RecordParser.ParseError;
public abstract void parse(ByteBuffer s) throws RecordParser.ParseError;
public abstract void parse(CharBuffer s) throws RecordParser.ParseError;
public abstract void loadLargeObjects(LargeObjectLoader objLoader)
throws SQLException, IOException, InterruptedException;
Object clone() throws CloneNotSupportedException;
/**
* Inserts the data in this object into the PreparedStatement, starting
* at parameter 'offset'.
* @return the number of fields written to the statement.
*/
int write(PreparedStatement stmt, int offset) throws SQLException;
public abstract int write(PreparedStatement stmt, int offset)
throws SQLException;
public abstract String toString(DelimiterSet delimiters);
@Override
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
/**
* Returns an integer specifying which API format version the
* generated class conforms to. Used by internal APIs for backwards
* compatibility.
* @return the API version this class was generated against.
*/
public abstract int getClassFormatVersion();
}

View File

@ -27,6 +27,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import static com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR;
import com.cloudera.sqoop.shims.HadoopShim;
import com.cloudera.sqoop.util.DirectImportUtils;
@ -76,10 +77,10 @@ private MySQLUtils() {
* escape: \\
*/
public static boolean outputDelimsAreMySQL(Configuration conf) {
return ',' == (char) conf.getInt(OUTPUT_FIELD_DELIM_KEY, '\000')
&& '\n' == (char) conf.getInt(OUTPUT_RECORD_DELIM_KEY, '\000')
&& '\'' == (char) conf.getInt(OUTPUT_ENCLOSED_BY_KEY, '\000')
&& '\\' == (char) conf.getInt(OUTPUT_ESCAPED_BY_KEY, '\000')
return ',' == (char) conf.getInt(OUTPUT_FIELD_DELIM_KEY, NULL_CHAR)
&& '\n' == (char) conf.getInt(OUTPUT_RECORD_DELIM_KEY, NULL_CHAR)
&& '\'' == (char) conf.getInt(OUTPUT_ENCLOSED_BY_KEY, NULL_CHAR)
&& '\\' == (char) conf.getInt(OUTPUT_ESCAPED_BY_KEY, NULL_CHAR)
&& !conf.getBoolean(OUTPUT_ENCLOSE_REQUIRED_KEY, false);
}

View File

@ -32,6 +32,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import com.cloudera.sqoop.lib.DelimiterSet;
import com.cloudera.sqoop.lib.FieldFormatter;
import com.cloudera.sqoop.lib.RecordParser;
import com.cloudera.sqoop.manager.MySQLUtils;
@ -194,9 +195,7 @@ private static class ReparsingStreamThread extends ErrorableThread {
static {
// build a record parser for mysqldump's format
MYSQLDUMP_PARSER = new RecordParser(MYSQL_FIELD_DELIM,
MYSQL_RECORD_DELIM, MYSQL_ENCLOSE_CHAR, MYSQL_ESCAPE_CHAR,
MYSQL_ENCLOSE_REQUIRED);
MYSQLDUMP_PARSER = new RecordParser(DelimiterSet.MYSQL_DELIMITERS);
}
public void run() {
@ -205,22 +204,30 @@ public void run() {
try {
r = new BufferedReader(new InputStreamReader(this.stream));
// Configure the output with the user's delimiters.
char outputFieldDelim = (char) conf.getInt(
MySQLUtils.OUTPUT_FIELD_DELIM_KEY, '\000');
MySQLUtils.OUTPUT_FIELD_DELIM_KEY,
DelimiterSet.NULL_CHAR);
String outputFieldDelimStr = "" + outputFieldDelim;
char outputRecordDelim = (char) conf.getInt(
MySQLUtils.OUTPUT_RECORD_DELIM_KEY, '\000');
MySQLUtils.OUTPUT_RECORD_DELIM_KEY,
DelimiterSet.NULL_CHAR);
String outputRecordDelimStr = "" + outputRecordDelim;
char outputEnclose = (char) conf.getInt(
MySQLUtils.OUTPUT_ENCLOSED_BY_KEY,
'\000');
String outputEncloseStr = "" + outputEnclose;
DelimiterSet.NULL_CHAR);
char outputEscape = (char) conf.getInt(
MySQLUtils.OUTPUT_ESCAPED_BY_KEY, '\000');
String outputEscapeStr = "" + outputEscape;
MySQLUtils.OUTPUT_ESCAPED_BY_KEY,
DelimiterSet.NULL_CHAR);
boolean outputEncloseRequired = conf.getBoolean(
MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY, false);
char [] encloseFor = { outputFieldDelim, outputRecordDelim };
DelimiterSet delimiters = new DelimiterSet(
outputFieldDelim,
outputRecordDelim,
outputEnclose,
outputEscape,
outputEncloseRequired);
// Actually do the read/write transfer loop here.
int preambleLen = -1; // set to this for "undefined"
@ -268,8 +275,7 @@ public void run() {
}
String fieldStr = FieldFormatter.escapeAndEnclose(field,
outputEscapeStr, outputEncloseStr,
encloseFor, outputEncloseRequired);
delimiters);
context.write(fieldStr, null);
recordLen += fieldStr.length();
}

View File

@ -32,13 +32,13 @@
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import com.cloudera.sqoop.lib.TaskId;
import com.cloudera.sqoop.manager.MySQLUtils;
import com.cloudera.sqoop.shims.HadoopShim;
import com.cloudera.sqoop.util.AsyncSink;
import com.cloudera.sqoop.util.JdbcUrl;
import com.cloudera.sqoop.util.LoggingAsyncSink;
import com.cloudera.sqoop.util.NullAsyncSink;
import com.cloudera.sqoop.util.TaskId;
/**
* Mapper that starts a 'mysqlimport' process and uses that to export rows from

View File

@ -22,6 +22,7 @@
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.manager.ConnManager;
import com.cloudera.sqoop.lib.BigDecimalSerializer;
import com.cloudera.sqoop.lib.DelimiterSet;
import com.cloudera.sqoop.lib.FieldFormatter;
import com.cloudera.sqoop.lib.JdbcWritableBridge;
import com.cloudera.sqoop.lib.LargeObjectLoader;
@ -101,9 +102,11 @@ public class ClassWriter {
* which version of the ClassWriter's output format was used to generate the
* class.
*
* If the way that we generate classes, bump this number.
* If the way that we generate classes changes, bump this number.
* This number is retrieved by the SqoopRecord.getClassFormatVersion()
* method.
*/
public static final int CLASS_WRITER_VERSION = 2;
public static final int CLASS_WRITER_VERSION = 3;
private SqoopOptions options;
private ConnManager connManager;
@ -642,31 +645,20 @@ private void generateCloneMethod(Map<String, Integer> columnTypes,
private void generateToString(Map<String, Integer> columnTypes,
String [] colNames, StringBuilder sb) {
// Embed the delimiters into the class, as characters...
sb.append(" private static final char __OUTPUT_FIELD_DELIM_CHAR = "
+ (int)options.getOutputFieldDelim() + ";\n");
sb.append(" private static final char __OUTPUT_RECORD_DELIM_CHAR = "
+ (int)options.getOutputRecordDelim() + ";\n");
// Save the delimiters to the class.
sb.append(" private final DelimiterSet __outputDelimiters = ");
sb.append(options.getOutputDelimiters().formatConstructor() + ";\n");
// as strings...
sb.append(" private static final String __OUTPUT_FIELD_DELIM = "
+ "\"\" + (char) " + (int) options.getOutputFieldDelim() + ";\n");
sb.append(" private static final String __OUTPUT_RECORD_DELIM = "
+ "\"\" + (char) " + (int) options.getOutputRecordDelim() + ";\n");
sb.append(" private static final String __OUTPUT_ENCLOSED_BY = "
+ "\"\" + (char) " + (int) options.getOutputEnclosedBy() + ";\n");
sb.append(" private static final String __OUTPUT_ESCAPED_BY = "
+ "\"\" + (char) " + (int) options.getOutputEscapedBy() + ";\n");
// and some more options.
sb.append(" private static final boolean __OUTPUT_ENCLOSE_REQUIRED = "
+ options.isOutputEncloseRequired() + ";\n");
sb.append(" private static final char [] __OUTPUT_DELIMITER_LIST = { "
+ "__OUTPUT_FIELD_DELIM_CHAR, __OUTPUT_RECORD_DELIM_CHAR };\n\n");
// The actual toString() method itself follows.
// The default toString() method itself follows. This just calls
// the delimiter-specific toString() with the default delimiters.
sb.append(" public String toString() {\n");
sb.append(" return toString(__outputDelimiters);\n");
sb.append(" }\n");
// This toString() variant, though, accepts delimiters as arguments.
sb.append(" public String toString(DelimiterSet delimiters) {\n");
sb.append(" StringBuilder __sb = new StringBuilder();\n");
sb.append(" char fieldDelim = delimiters.getFieldsTerminatedBy();\n");
boolean first = true;
for (String col : colNames) {
@ -679,7 +671,7 @@ private void generateToString(Map<String, Integer> columnTypes,
if (!first) {
// print inter-field tokens.
sb.append(" __sb.append(__OUTPUT_FIELD_DELIM);\n");
sb.append(" __sb.append(fieldDelim);\n");
}
first = false;
@ -691,12 +683,10 @@ private void generateToString(Map<String, Integer> columnTypes,
}
sb.append(" __sb.append(FieldFormatter.escapeAndEnclose(" + stringExpr
+ ", __OUTPUT_ESCAPED_BY, __OUTPUT_ENCLOSED_BY, "
+ "__OUTPUT_DELIMITER_LIST, __OUTPUT_ENCLOSE_REQUIRED));\n");
+ ", delimiters));\n");
}
sb.append(" __sb.append(__OUTPUT_RECORD_DELIM);\n");
sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n");
sb.append(" return __sb.toString();\n");
sb.append(" }\n");
}
@ -711,11 +701,7 @@ private void generateParseMethod(String typ, StringBuilder sb) {
sb.append(" public void parse(" + typ + " __record) "
+ "throws RecordParser.ParseError {\n");
sb.append(" if (null == this.__parser) {\n");
sb.append(" this.__parser = new RecordParser("
+ "__INPUT_FIELD_DELIM_CHAR, ");
sb.append("__INPUT_RECORD_DELIM_CHAR, __INPUT_ENCLOSED_BY_CHAR, "
+ "__INPUT_ESCAPED_BY_CHAR, ");
sb.append("__INPUT_ENCLOSE_REQUIRED);\n");
sb.append(" this.__parser = new RecordParser(__inputDelimiters);\n");
sb.append(" }\n");
sb.append(" List<String> __fields = "
+ "this.__parser.parseRecord(__record);\n");
@ -795,17 +781,8 @@ private void generateParser(Map<String, Integer> columnTypes,
// records. Note that these can differ from the delims to use as output
// via toString(), if the user wants to use this class to convert one
// format to another.
sb.append(" private static final char __INPUT_FIELD_DELIM_CHAR = "
+ (int)options.getInputFieldDelim() + ";\n");
sb.append(" private static final char __INPUT_RECORD_DELIM_CHAR = "
+ (int)options.getInputRecordDelim() + ";\n");
sb.append(" private static final char __INPUT_ENCLOSED_BY_CHAR = "
+ (int)options.getInputEnclosedBy() + ";\n");
sb.append(" private static final char __INPUT_ESCAPED_BY_CHAR = "
+ (int)options.getInputEscapedBy() + ";\n");
sb.append(" private static final boolean __INPUT_ENCLOSE_REQUIRED = "
+ options.isInputEncloseRequired() + ";\n");
sb.append(" private final DelimiterSet __inputDelimiters = ");
sb.append(options.getInputDelimiters().formatConstructor() + ";\n");
// The parser object which will do the heavy lifting for field splitting.
sb.append(" private RecordParser __parser;\n");
@ -976,6 +953,7 @@ public StringBuilder generateClassForColumns(Map<String, Integer> columnTypes,
sb.append("import org.apache.hadoop.io.Writable;\n");
sb.append("import org.apache.hadoop.mapred.lib.db.DBWritable;\n");
sb.append("import " + JdbcWritableBridge.class.getCanonicalName() + ";\n");
sb.append("import " + DelimiterSet.class.getCanonicalName() + ";\n");
sb.append("import " + FieldFormatter.class.getCanonicalName() + ";\n");
sb.append("import " + RecordParser.class.getCanonicalName() + ";\n");
sb.append("import " + BlobRef.class.getCanonicalName() + ";\n");
@ -999,10 +977,12 @@ public StringBuilder generateClassForColumns(Map<String, Integer> columnTypes,
sb.append("\n");
String className = tableNameInfo.getShortClassForTable(tableName);
sb.append("public class " + className
+ " implements DBWritable, SqoopRecord, Writable {\n");
sb.append(" public static final int PROTOCOL_VERSION = "
sb.append("public class " + className + " extends SqoopRecord "
+ " implements DBWritable, Writable {\n");
sb.append(" private final int PROTOCOL_VERSION = "
+ CLASS_WRITER_VERSION + ";\n");
sb.append(
" public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n");
sb.append(" protected ResultSet __cur_result_set;\n");
generateFields(columnTypes, colNames, sb);
generateDbRead(columnTypes, colNames, sb);

View File

@ -36,6 +36,7 @@
import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException;
import com.cloudera.sqoop.cli.RelatedOptions;
import com.cloudera.sqoop.cli.ToolOptions;
import com.cloudera.sqoop.lib.DelimiterSet;
import com.cloudera.sqoop.manager.ConnManager;
import com.cloudera.sqoop.shims.ShimLoader;
@ -603,19 +604,15 @@ protected void validateOutputFormatOptions(SqoopOptions options)
// straight to Hive. Use Hive-style delimiters.
LOG.info("Using Hive-specific delimiters for output. You can override");
LOG.info("delimiters with --fields-terminated-by, etc.");
options.setFieldsTerminatedBy((char) 0x1); // ^A
options.setLinesTerminatedBy('\n');
options.setEnclosedBy('\000'); // no enclosing in Hive.
options.setEscapedBy('\000'); // no escaping in Hive.
options.setOutputEncloseRequired(false);
options.setOutputDelimiters(DelimiterSet.HIVE_DELIMITERS);
}
if (options.getOutputEscapedBy() != '\000') {
if (options.getOutputEscapedBy() != DelimiterSet.NULL_CHAR) {
LOG.warn("Hive does not support escape characters in fields;");
LOG.warn("parse errors in Hive may result from using --escaped-by.");
}
if (options.getOutputEnclosedBy() != '\000') {
if (options.getOutputEnclosedBy() != DelimiterSet.NULL_CHAR) {
LOG.warn("Hive does not support quoted strings; parse errors");
LOG.warn("in Hive may result from using --enclosed-by.");
}
@ -627,7 +624,5 @@ protected void validateHiveOptions(SqoopOptions options) {
// is reserved for future constraints on Hive options.
}
}

View File

@ -16,7 +16,7 @@
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
package com.cloudera.sqoop.util;
import org.apache.hadoop.conf.Configuration;

View File

@ -20,6 +20,7 @@
import junit.framework.TestCase;
import com.cloudera.sqoop.lib.DelimiterSet;
import com.cloudera.sqoop.tool.ImportTool;
@ -109,19 +110,19 @@ public void testUnknownEscape2() throws Exception {
}
public void testEscapeNul1() throws Exception {
assertEquals('\000', SqoopOptions.toChar("\\0"));
assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\0"));
}
public void testEscapeNul2() throws Exception {
assertEquals('\000', SqoopOptions.toChar("\\00"));
assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\00"));
}
public void testEscapeNul3() throws Exception {
assertEquals('\000', SqoopOptions.toChar("\\0000"));
assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\0000"));
}
public void testEscapeNul4() throws Exception {
assertEquals('\000', SqoopOptions.toChar("\\0x0"));
assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\0x0"));
}
public void testOctalChar1() throws Exception {

View File

@ -27,86 +27,59 @@
public class TestFieldFormatter extends TestCase {
public void testAllEmpty() {
char [] chars = new char[0];
String result = FieldFormatter.escapeAndEnclose("", "", "", chars, false);
String result = FieldFormatter.escapeAndEnclose("",
new DelimiterSet(DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR,
DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR, false));
assertEquals("", result);
}
public void testNullArgs() {
String result = FieldFormatter.escapeAndEnclose("", null, null, null,
false);
assertEquals("", result);
char [] encloseFor = { '\"' };
assertNull(FieldFormatter.escapeAndEnclose(null, "\\", "\"", encloseFor,
false));
assertNull(FieldFormatter.escapeAndEnclose(null,
new DelimiterSet('\"', DelimiterSet.NULL_CHAR, '\"', '\\', false)));
}
public void testBasicStr() {
String result = FieldFormatter.escapeAndEnclose("foo", null, null, null,
false);
String result = FieldFormatter.escapeAndEnclose("foo",
DelimiterSet.DEFAULT_DELIMITERS);
assertEquals("foo", result);
}
public void testEscapeSlash() {
String result = FieldFormatter.escapeAndEnclose("foo\\bar", "\\", "\"",
null, false);
String result = FieldFormatter.escapeAndEnclose("foo\\bar",
new DelimiterSet(',', '\n', '\"', '\\', false));
assertEquals("foo\\\\bar", result);
}
public void testMustEnclose() {
String result = FieldFormatter.escapeAndEnclose("foo", null, "\"",
null, true);
String result = FieldFormatter.escapeAndEnclose("foo",
new DelimiterSet(',', '\n', '\"', DelimiterSet.NULL_CHAR, true));
assertEquals("\"foo\"", result);
}
public void testEncloseComma1() {
char [] chars = { ',' };
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "\"",
chars, false);
String result = FieldFormatter.escapeAndEnclose("foo,bar",
new DelimiterSet(',', '\n', '\"', '\\', false));
assertEquals("\"foo,bar\"", result);
}
public void testEncloseComma2() {
char [] chars = { '\n', ',' };
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "\"",
chars, false);
assertEquals("\"foo,bar\"", result);
}
public void testEncloseComma3() {
char [] chars = { ',', '\n' };
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "\"",
chars, false);
String result = FieldFormatter.escapeAndEnclose("foo,bar",
new DelimiterSet(',', ',', '\"', '\\', false));
assertEquals("\"foo,bar\"", result);
}
public void testNoNeedToEnclose() {
char [] chars = { ',', '\n' };
String result = FieldFormatter.escapeAndEnclose(
"just another string", "\\", "\"", chars, false);
"just another string",
new DelimiterSet(',', '\n', '\"', '\\', false));
assertEquals("just another string", result);
}
public void testCannotEnclose1() {
char [] chars = { ',', '\n' };
public void testCannotEnclose() {
// can't enclose because encloser is nul
String result = FieldFormatter.escapeAndEnclose("foo,bar",
new DelimiterSet(',', '\n', DelimiterSet.NULL_CHAR, '\\', false));
// can't enclose because encloser is ""
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "",
chars, false);
assertEquals("foo,bar", result);
}
public void testCannotEnclose2() {
char [] chars = { ',', '\n' };
// can't enclose because encloser is null
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", null,
chars, false);
assertEquals("foo,bar", result);
}
@ -114,48 +87,44 @@ public void testEmptyCharToEscapeString() {
// test what happens when the escape char is null. It should encode the
// null char.
char nul = '\000';
char nul = DelimiterSet.NULL_CHAR;
String s = "" + nul;
assertEquals("\000", s);
}
public void testEscapeCentralQuote() {
String result = FieldFormatter.escapeAndEnclose("foo\"bar", "\\", "\"",
null, false);
String result = FieldFormatter.escapeAndEnclose("foo\"bar",
new DelimiterSet(',', '\n', '\"', '\\', false));
assertEquals("foo\\\"bar", result);
}
public void testEscapeMultiCentralQuote() {
String result = FieldFormatter.escapeAndEnclose("foo\"\"bar", "\\", "\"",
null, false);
String result = FieldFormatter.escapeAndEnclose("foo\"\"bar",
new DelimiterSet(',', '\n', '\"', '\\', false));
assertEquals("foo\\\"\\\"bar", result);
}
public void testDoubleEscape() {
String result = FieldFormatter.escapeAndEnclose("foo\\\"bar", "\\", "\"",
null, false);
String result = FieldFormatter.escapeAndEnclose("foo\\\"bar",
new DelimiterSet(',', '\n', '\"', '\\', false));
assertEquals("foo\\\\\\\"bar", result);
}
public void testReverseEscape() {
String result = FieldFormatter.escapeAndEnclose("foo\"\\bar", "\\", "\"",
null, false);
String result = FieldFormatter.escapeAndEnclose("foo\"\\bar",
new DelimiterSet(',', '\n', '\"', '\\', false));
assertEquals("foo\\\"\\\\bar", result);
}
public void testQuotedEncloser() {
char [] chars = { ',', '\n' };
String result = FieldFormatter.escapeAndEnclose("foo\",bar", "\\", "\"",
chars, false);
String result = FieldFormatter.escapeAndEnclose("foo\",bar",
new DelimiterSet(',', '\n', '\"', '\\', false));
assertEquals("\"foo\\\",bar\"", result);
}
public void testQuotedEscape() {
char [] chars = { ',', '\n' };
String result = FieldFormatter.escapeAndEnclose("foo\\,bar", "\\", "\"",
chars, false);
String result = FieldFormatter.escapeAndEnclose("foo\\,bar",
new DelimiterSet(',', '\n', '\"', '\\', false));
assertEquals("\"foo\\\\,bar\"", result);
}
}

View File

@ -103,7 +103,8 @@ private List<String> list(String [] items) {
public void testEmptyLine() throws RecordParser.ParseError {
// an empty line should return no fields.
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { };
assertListsEqual(null, list(strings), parser.parseRecord(""));
}
@ -111,32 +112,37 @@ public void testEmptyLine() throws RecordParser.ParseError {
public void testJustEOR() throws RecordParser.ParseError {
// a line with just a newline char should return a single zero-length field.
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "" };
assertListsEqual(null, list(strings), parser.parseRecord("\n"));
}
public void testOneField() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the field" };
assertListsEqual(null, list(strings), parser.parseRecord("the field"));
}
public void testOneField2() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the field" };
assertListsEqual(null, list(strings), parser.parseRecord("the field\n"));
}
public void testQuotedField1() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the field" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"the field\"\n"));
}
public void testQuotedField2() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the field" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"the field\""));
@ -144,7 +150,8 @@ public void testQuotedField2() throws RecordParser.ParseError {
public void testQuotedField3() throws RecordParser.ParseError {
// quoted containing EOF
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the ,field" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"the ,field\""));
@ -152,7 +159,8 @@ public void testQuotedField3() throws RecordParser.ParseError {
public void testQuotedField4() throws RecordParser.ParseError {
// quoted containing multiple EOFs
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the ,,field" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"the ,,field\""));
@ -160,7 +168,8 @@ public void testQuotedField4() throws RecordParser.ParseError {
public void testQuotedField5() throws RecordParser.ParseError {
// quoted containing EOF and EOR
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the ,\nfield" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"the ,\nfield\""));
@ -168,7 +177,8 @@ public void testQuotedField5() throws RecordParser.ParseError {
public void testQuotedField6() throws RecordParser.ParseError {
// quoted containing EOR
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the \nfield" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"the \nfield\""));
@ -176,7 +186,8 @@ public void testQuotedField6() throws RecordParser.ParseError {
public void testQuotedField7() throws RecordParser.ParseError {
// quoted containing multiple EORs
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the \n\nfield" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"the \n\nfield\""));
@ -184,7 +195,8 @@ public void testQuotedField7() throws RecordParser.ParseError {
public void testQuotedField8() throws RecordParser.ParseError {
// quoted containing escaped quoted char
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the \"field" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"the \\\"field\""));
@ -192,68 +204,78 @@ public void testQuotedField8() throws RecordParser.ParseError {
public void testUnquotedEscape1() throws RecordParser.ParseError {
// field without quotes with an escaped EOF char.
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the ,field" };
assertListsEqual(null, list(strings), parser.parseRecord("the \\,field"));
}
public void testUnquotedEscape2() throws RecordParser.ParseError {
// field without quotes with an escaped escape char.
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "the \\field" };
assertListsEqual(null, list(strings), parser.parseRecord("the \\\\field"));
}
public void testTwoFields1() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "field1", "field2" };
assertListsEqual(null, list(strings), parser.parseRecord("field1,field2"));
}
public void testTwoFields2() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "field1", "field2" };
assertListsEqual(null, list(strings),
parser.parseRecord("field1,field2\n"));
}
public void testTwoFields3() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "field1", "field2" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"field1\",field2\n"));
}
public void testTwoFields4() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "field1", "field2" };
assertListsEqual(null, list(strings),
parser.parseRecord("field1,\"field2\"\n"));
}
public void testTwoFields5() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "field1", "field2" };
assertListsEqual(null, list(strings),
parser.parseRecord("field1,\"field2\""));
}
public void testRequiredQuotes0() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', true));
String [] strings = { "field1", "field2" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"field1\",\"field2\"\n"));
}
public void testRequiredQuotes1() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', true));
String [] strings = { "field1", "field2" };
assertListsEqual(null, list(strings),
parser.parseRecord("\"field1\",\"field2\""));
}
public void testRequiredQuotes2() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', true));
try {
parser.parseRecord("\"field1\",field2");
fail("Expected parse error for required quotes");
@ -263,7 +285,8 @@ public void testRequiredQuotes2() throws RecordParser.ParseError {
}
public void testRequiredQuotes3() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', true));
try {
parser.parseRecord("field1,\"field2\"");
fail("Expected parse error for required quotes");
@ -273,7 +296,8 @@ public void testRequiredQuotes3() throws RecordParser.ParseError {
}
public void testRequiredQuotes4() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', true));
try {
parser.parseRecord("field1,\"field2\"\n");
fail("Expected parse error for required quotes");
@ -283,7 +307,8 @@ public void testRequiredQuotes4() throws RecordParser.ParseError {
}
public void testNull() {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', true));
String input = null;
try {
parser.parseRecord(input);
@ -295,75 +320,87 @@ public void testNull() {
public void testEmptyFields1() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "", ""};
assertListsEqual(null, list(strings), parser.parseRecord(","));
}
public void testEmptyFields2() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "", "" };
assertListsEqual(null, list(strings), parser.parseRecord(",\n"));
}
public void testEmptyFields3() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "", "", "" };
assertListsEqual(null, list(strings), parser.parseRecord(",,\n"));
}
public void testEmptyFields4() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "", "foo", "" };
assertListsEqual(null, list(strings), parser.parseRecord(",foo,\n"));
}
public void testEmptyFields5() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "", "foo", "" };
assertListsEqual(null, list(strings), parser.parseRecord(",foo,"));
}
public void testEmptyFields6() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "foo", "" };
assertListsEqual(null, list(strings), parser.parseRecord("foo,"));
}
public void testTrailingText() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "foo", "bar" };
assertListsEqual(null, list(strings), parser.parseRecord("foo,bar\nbaz"));
}
public void testTrailingText2() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "" };
assertListsEqual(null, list(strings), parser.parseRecord("\nbaz"));
}
public void testLeadingEscape() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', '\n', '\"', '\\', false));
String [] strings = { "\nbaz" };
assertListsEqual(null, list(strings), parser.parseRecord("\\\nbaz"));
}
public void testEofIsEor() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', ',', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', ',', '\"', '\\', false));
String [] strings = { "three", "different", "fields" };
assertListsEqual(null, list(strings),
parser.parseRecord("three,different,fields"));
}
public void testEofIsEor2() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', ',', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', ',', '\"', '\\', false));
String [] strings = { "three", "different", "fields" };
assertListsEqual(null, list(strings),
parser.parseRecord("three,\"different\",fields"));
}
public void testRepeatedParse() throws RecordParser.ParseError {
RecordParser parser = new RecordParser(',', ',', '\"', '\\', false);
RecordParser parser = new RecordParser(
new DelimiterSet(',', ',', '\"', '\\', false));
String [] strings = { "three", "different", "fields" };
assertListsEqual(null, list(strings),
parser.parseRecord("three,\"different\",fields"));

View File

@ -45,22 +45,22 @@
not need to be rigidly upheld. -->
<Match>
<!-- Performance warnings are ignored in test code. -->
<Class name="~org\.apache\.hadoop\.sqoop\..*Test.*" />
<Class name="~com\.cloudera\.sqoop\..*Test.*" />
<Bug category="PERFORMANCE" />
</Match>
<Match>
<!-- More performance warnings to suppress in tests. -->
<Class name="~org\.apache\.hadoop\.sqoop\..*Test.*" />
<Class name="~com\.cloudera\.sqoop\..*Test.*" />
<Bug pattern="SBSC_USE_STRINGBUFFER_CONCATENATION" />
</Match>
<Match>
<!-- Security warnings are ignored in test code. -->
<Class name="~org\.apache\.hadoop\.sqoop\..*Test.*" />
<Class name="~com\.cloudera\.sqoop\..*Test.*" />
<Bug category="SECURITY" />
</Match>
<Match>
<!-- Ok to use methods to generate SQL statements in tests. -->
<Class name="~org\.apache\.hadoop\.sqoop\..*Test.*" />
<Class name="~com\.cloudera\.sqoop\..*Test.*" />
<Bug pattern="SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING" />
</Match>