mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 18:19:23 +08:00
SIP-4. Refactor classes for public API 1.0.0.
Moves TaskId to com.cloudera.sqoop.util. Add com.cloudera.sqoop.lib.DelimiterSet. Rewrite FieldFormatter, RecordParser, to use DelimiterSet. Add generated class version id to SqoopRecord. From: Aaron Kimball <aaron@cloudera.com> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149907 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
42875119dd
commit
3ab9ebd354
@ -29,6 +29,7 @@
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import com.cloudera.sqoop.lib.DelimiterSet;
|
||||
import com.cloudera.sqoop.lib.LargeObjectLoader;
|
||||
|
||||
/**
|
||||
@ -115,18 +116,8 @@ public enum FileLayout {
|
||||
// HDFS path to read from when performing an export
|
||||
private String exportDir;
|
||||
|
||||
private char inputFieldDelim;
|
||||
private char inputRecordDelim;
|
||||
private char inputEnclosedBy;
|
||||
private char inputEscapedBy;
|
||||
private boolean inputMustBeEnclosed;
|
||||
|
||||
private char outputFieldDelim;
|
||||
private char outputRecordDelim;
|
||||
private char outputEnclosedBy;
|
||||
private char outputEscapedBy;
|
||||
private boolean outputMustBeEnclosed;
|
||||
|
||||
private DelimiterSet inputDelimiters;
|
||||
private DelimiterSet outputDelimiters;
|
||||
private boolean areDelimsManuallySet;
|
||||
|
||||
private Configuration conf;
|
||||
@ -252,6 +243,11 @@ private void initDefaults(Configuration baseConfiguration) {
|
||||
this.hiveHome = System.getenv("HIVE_HOME");
|
||||
this.hiveHome = System.getProperty("hive.home", this.hiveHome);
|
||||
|
||||
this.inputDelimiters = new DelimiterSet(
|
||||
DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR,
|
||||
DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR, false);
|
||||
this.outputDelimiters = new DelimiterSet();
|
||||
|
||||
// Set this to cwd, but -Dsqoop.src.dir can override.
|
||||
this.codeOutputDir = System.getProperty("sqoop.src.dir", ".");
|
||||
|
||||
@ -264,18 +260,6 @@ private void initDefaults(Configuration baseConfiguration) {
|
||||
this.jarOutputDir = tmpDir + "sqoop/compile";
|
||||
this.layout = FileLayout.TextFile;
|
||||
|
||||
this.inputFieldDelim = '\000';
|
||||
this.inputRecordDelim = '\000';
|
||||
this.inputEnclosedBy = '\000';
|
||||
this.inputEscapedBy = '\000';
|
||||
this.inputMustBeEnclosed = false;
|
||||
|
||||
this.outputFieldDelim = ',';
|
||||
this.outputRecordDelim = '\n';
|
||||
this.outputEnclosedBy = '\000';
|
||||
this.outputEscapedBy = '\000';
|
||||
this.outputMustBeEnclosed = false;
|
||||
|
||||
this.areDelimsManuallySet = false;
|
||||
|
||||
this.numMappers = DEFAULT_NUM_MAPPERS;
|
||||
@ -329,7 +313,7 @@ public static char toChar(String charish) throws InvalidOptionsException {
|
||||
} else if (charish.startsWith("\\0")) {
|
||||
if (charish.equals("\\0")) {
|
||||
// it's just '\0', which we can take as shorthand for nul.
|
||||
return '\000';
|
||||
return DelimiterSet.NULL_CHAR;
|
||||
} else {
|
||||
// it's an octal value.
|
||||
String valStr = charish.substring(2);
|
||||
@ -651,15 +635,19 @@ public void setFileLayout(FileLayout fileLayout) {
|
||||
* field delim to use when printing lines.
|
||||
*/
|
||||
public char getInputFieldDelim() {
|
||||
if (inputFieldDelim == '\000') {
|
||||
return this.outputFieldDelim;
|
||||
char f = inputDelimiters.getFieldsTerminatedBy();
|
||||
if (f == DelimiterSet.NULL_CHAR) {
|
||||
return this.outputDelimiters.getFieldsTerminatedBy();
|
||||
} else {
|
||||
return this.inputFieldDelim;
|
||||
return f;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the field delimiter to use when parsing lines.
|
||||
*/
|
||||
public void setInputFieldsTerminatedBy(char c) {
|
||||
this.inputFieldDelim = c;
|
||||
this.inputDelimiters.setFieldsTerminatedBy(c);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -667,15 +655,19 @@ public void setInputFieldsTerminatedBy(char c) {
|
||||
* record delim to use when printing lines.
|
||||
*/
|
||||
public char getInputRecordDelim() {
|
||||
if (inputRecordDelim == '\000') {
|
||||
return this.outputRecordDelim;
|
||||
char r = inputDelimiters.getLinesTerminatedBy();
|
||||
if (r == DelimiterSet.NULL_CHAR) {
|
||||
return this.outputDelimiters.getLinesTerminatedBy();
|
||||
} else {
|
||||
return this.inputRecordDelim;
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the record delimiter to use when parsing lines.
|
||||
*/
|
||||
public void setInputLinesTerminatedBy(char c) {
|
||||
this.inputRecordDelim = c;
|
||||
this.inputDelimiters.setLinesTerminatedBy(c);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -683,15 +675,19 @@ public void setInputLinesTerminatedBy(char c) {
|
||||
* Defaults to the enclosing-char to use when printing lines.
|
||||
*/
|
||||
public char getInputEnclosedBy() {
|
||||
if (inputEnclosedBy == '\000') {
|
||||
return this.outputEnclosedBy;
|
||||
char c = inputDelimiters.getEnclosedBy();
|
||||
if (c == DelimiterSet.NULL_CHAR) {
|
||||
return this.outputDelimiters.getEnclosedBy();
|
||||
} else {
|
||||
return this.inputEnclosedBy;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the enclosed-by character to use when parsing lines.
|
||||
*/
|
||||
public void setInputEnclosedBy(char c) {
|
||||
this.inputEnclosedBy = c;
|
||||
this.inputDelimiters.setEnclosedBy(c);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -699,15 +695,19 @@ public void setInputEnclosedBy(char c) {
|
||||
* escape character used when printing lines.
|
||||
*/
|
||||
public char getInputEscapedBy() {
|
||||
if (inputEscapedBy == '\000') {
|
||||
return this.outputEscapedBy;
|
||||
char c = inputDelimiters.getEscapedBy();
|
||||
if (c == DelimiterSet.NULL_CHAR) {
|
||||
return this.outputDelimiters.getEscapedBy();
|
||||
} else {
|
||||
return this.inputEscapedBy;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the escaped-by character to use when parsing lines.
|
||||
*/
|
||||
public void setInputEscapedBy(char c) {
|
||||
this.inputEscapedBy = c;
|
||||
this.inputDelimiters.setEscapedBy(c);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -716,15 +716,20 @@ public void setInputEscapedBy(char c) {
|
||||
* used.
|
||||
*/
|
||||
public boolean isInputEncloseRequired() {
|
||||
if (inputEnclosedBy == '\000') {
|
||||
return this.outputMustBeEnclosed;
|
||||
char c = this.inputDelimiters.getEnclosedBy();
|
||||
if (c == DelimiterSet.NULL_CHAR) {
|
||||
return this.outputDelimiters.isEncloseRequired();
|
||||
} else {
|
||||
return this.inputMustBeEnclosed;
|
||||
return this.inputDelimiters.isEncloseRequired();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If true, then all input fields are expected to be enclosed by the
|
||||
* enclosed-by character when parsing.
|
||||
*/
|
||||
public void setInputEncloseRequired(boolean required) {
|
||||
this.inputMustBeEnclosed = required;
|
||||
this.inputDelimiters.setEncloseRequired(required);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -732,11 +737,14 @@ public void setInputEncloseRequired(boolean required) {
|
||||
* text.
|
||||
*/
|
||||
public char getOutputFieldDelim() {
|
||||
return this.outputFieldDelim;
|
||||
return this.outputDelimiters.getFieldsTerminatedBy();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the field delimiter to use when formatting lines.
|
||||
*/
|
||||
public void setFieldsTerminatedBy(char c) {
|
||||
this.outputFieldDelim = c;
|
||||
this.outputDelimiters.setFieldsTerminatedBy(c);
|
||||
}
|
||||
|
||||
|
||||
@ -745,11 +753,14 @@ public void setFieldsTerminatedBy(char c) {
|
||||
* text.
|
||||
*/
|
||||
public char getOutputRecordDelim() {
|
||||
return this.outputRecordDelim;
|
||||
return this.outputDelimiters.getLinesTerminatedBy();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the record delimiter to use when formatting lines.
|
||||
*/
|
||||
public void setLinesTerminatedBy(char c) {
|
||||
this.outputRecordDelim = c;
|
||||
this.outputDelimiters.setLinesTerminatedBy(c);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -757,11 +768,14 @@ public void setLinesTerminatedBy(char c) {
|
||||
* imported to text.
|
||||
*/
|
||||
public char getOutputEnclosedBy() {
|
||||
return this.outputEnclosedBy;
|
||||
return this.outputDelimiters.getEnclosedBy();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the enclosed-by character to use when formatting lines.
|
||||
*/
|
||||
public void setEnclosedBy(char c) {
|
||||
this.outputEnclosedBy = c;
|
||||
this.outputDelimiters.setEnclosedBy(c);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -769,11 +783,14 @@ public void setEnclosedBy(char c) {
|
||||
* text.
|
||||
*/
|
||||
public char getOutputEscapedBy() {
|
||||
return this.outputEscapedBy;
|
||||
return this.outputDelimiters.getEscapedBy();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the escaped-by character to use when formatting lines.
|
||||
*/
|
||||
public void setEscapedBy(char c) {
|
||||
this.outputEscapedBy = c;
|
||||
this.outputDelimiters.setEscapedBy(c);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -782,11 +799,42 @@ public void setEscapedBy(char c) {
|
||||
* instead of --optionally-enclosed-by.
|
||||
*/
|
||||
public boolean isOutputEncloseRequired() {
|
||||
return this.outputMustBeEnclosed;
|
||||
return this.outputDelimiters.isEncloseRequired();
|
||||
}
|
||||
|
||||
/**
|
||||
* If true, then the enclosed-by character will be applied to all fields,
|
||||
* even if internal characters do not need enclosed-by protection.
|
||||
*/
|
||||
public void setOutputEncloseRequired(boolean required) {
|
||||
this.outputMustBeEnclosed = required;
|
||||
this.outputDelimiters.setEncloseRequired(required);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the set of delimiters used for formatting output records.
|
||||
*/
|
||||
public DelimiterSet getOutputDelimiters() {
|
||||
return this.outputDelimiters.copy();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the complete set of delimiters to use for output formatting.
|
||||
*/
|
||||
public void setOutputDelimiters(DelimiterSet delimiters) {
|
||||
this.outputDelimiters = delimiters.copy();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the set of delimiters used for parsing the input.
|
||||
* This may include values implicitly set by the output delimiters.
|
||||
*/
|
||||
public DelimiterSet getInputDelimiters() {
|
||||
return new DelimiterSet(
|
||||
getInputFieldDelim(),
|
||||
getInputRecordDelim(),
|
||||
getInputEnclosedBy(),
|
||||
getInputEscapedBy(),
|
||||
isInputEncloseRequired());
|
||||
}
|
||||
|
||||
/**
|
||||
|
233
src/java/com/cloudera/sqoop/lib/DelimiterSet.java
Normal file
233
src/java/com/cloudera/sqoop/lib/DelimiterSet.java
Normal file
@ -0,0 +1,233 @@
|
||||
/**
|
||||
* Licensed to Cloudera, Inc. under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Cloudera, Inc. licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.cloudera.sqoop.lib;
|
||||
|
||||
/**
|
||||
* Encapsulates a set of delimiters used to encode a record.
|
||||
*/
|
||||
public class DelimiterSet implements Cloneable {
|
||||
|
||||
public static final char NULL_CHAR = '\000';
|
||||
|
||||
private char fieldDelim; // fields terminated by this.
|
||||
private char recordDelim; // records terminated by this.
|
||||
|
||||
// If these next two fields are '\000', then they are ignored.
|
||||
private char enclosedBy;
|
||||
private char escapedBy;
|
||||
|
||||
// If true, then the enclosed-by character is applied to every
|
||||
// field, not just ones containing embedded delimiters.
|
||||
private boolean encloseRequired;
|
||||
|
||||
/**
|
||||
* Create a delimiter set with the default delimiters
|
||||
* (comma for fields, newline for records).
|
||||
*/
|
||||
public DelimiterSet() {
|
||||
this(',', '\n', NULL_CHAR, NULL_CHAR, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a delimiter set with the specified delimiters.
|
||||
* @param field the fields-terminated-by delimiter
|
||||
* @param record the lines-terminated-by delimiter
|
||||
* @param enclose the enclosed-by character
|
||||
* @param escape the escaped-by character
|
||||
* @param isEncloseRequired If true, enclosed-by is applied to all
|
||||
* fields. If false, only applied to fields that embed delimiters.
|
||||
*/
|
||||
public DelimiterSet(char field, char record, char enclose, char escape,
|
||||
boolean isEncloseRequired) {
|
||||
this.fieldDelim = field;
|
||||
this.recordDelim = record;
|
||||
this.enclosedBy = enclose;
|
||||
this.escapedBy = escape;
|
||||
this.encloseRequired = isEncloseRequired;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the fields-terminated-by character.
|
||||
*/
|
||||
public void setFieldsTerminatedBy(char f) {
|
||||
this.fieldDelim = f;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the fields-terminated-by character.
|
||||
*/
|
||||
public char getFieldsTerminatedBy() {
|
||||
return this.fieldDelim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the end-of-record lines-terminated-by character.
|
||||
*/
|
||||
public void setLinesTerminatedBy(char r) {
|
||||
this.recordDelim = r;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the end-of-record (lines-terminated-by) character.
|
||||
*/
|
||||
public char getLinesTerminatedBy() {
|
||||
return this.recordDelim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the enclosed-by character.
|
||||
* @param e the enclosed-by character, or '\000' for no enclosing character.
|
||||
*/
|
||||
public void setEnclosedBy(char e) {
|
||||
this.enclosedBy = e;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the enclosed-by character, or '\000' for none.
|
||||
*/
|
||||
public char getEnclosedBy() {
|
||||
return this.enclosedBy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the escaped-by character.
|
||||
* @param e the escaped-by character, or '\000' for no escape character.
|
||||
*/
|
||||
public void setEscapedBy(char e) {
|
||||
this.escapedBy = e;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the escaped-by character, or '\000' for none.
|
||||
*/
|
||||
public char getEscapedBy() {
|
||||
return this.escapedBy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether the enclosed-by character must be applied to all fields,
|
||||
* or only fields with embedded delimiters.
|
||||
*/
|
||||
public void setEncloseRequired(boolean required) {
|
||||
this.encloseRequired = required;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the enclosed-by character must be applied to all fields,
|
||||
* or false if it's only used for fields with embedded delimiters.
|
||||
*/
|
||||
public boolean isEncloseRequired() {
|
||||
return this.encloseRequired;
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
* @return a string representation of the delimiters.
|
||||
*/
|
||||
public String toString() {
|
||||
return "fields=" + this.fieldDelim
|
||||
+ " records=" + this.recordDelim
|
||||
+ " escape=" + this.escapedBy
|
||||
+ " enclose=" + this.enclosedBy
|
||||
+ " required=" + this.encloseRequired;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format this set of delimiters as a call to the constructor for
|
||||
* this object, that would generate identical delimiters.
|
||||
* @return a String that can be embedded in generated code that
|
||||
* provides this set of delimiters.
|
||||
*/
|
||||
public String formatConstructor() {
|
||||
return "new DelimiterSet((char) " + (int) this.fieldDelim + ", "
|
||||
+ "(char) " + (int) this.recordDelim + ", "
|
||||
+ "(char) " + (int) this.enclosedBy + ", "
|
||||
+ "(char) " + (int) this.escapedBy + ", "
|
||||
+ this.encloseRequired + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
* @return a hash code for this set of delimiters.
|
||||
*/
|
||||
public int hashCode() {
|
||||
return (int) this.fieldDelim
|
||||
+ (((int) this.recordDelim) << 4)
|
||||
+ (((int) this.escapedBy) << 8)
|
||||
+ (((int) this.enclosedBy) << 12)
|
||||
+ (((int) this.recordDelim) << 16)
|
||||
+ (this.encloseRequired ? 0xFEFE : 0x7070);
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
* @return true if this delimiter set is the same as another set of
|
||||
* delimiters.
|
||||
*/
|
||||
public boolean equals(Object other) {
|
||||
if (null == other) {
|
||||
return false;
|
||||
} else if (!other.getClass().equals(getClass())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DelimiterSet set = (DelimiterSet) other;
|
||||
return this.fieldDelim == set.fieldDelim
|
||||
&& this.recordDelim == set.recordDelim
|
||||
&& this.escapedBy == set.escapedBy
|
||||
&& this.enclosedBy == set.enclosedBy
|
||||
&& this.encloseRequired == set.encloseRequired;
|
||||
}
|
||||
|
||||
@Override
|
||||
/**
|
||||
* @return a new copy of this same set of delimiters.
|
||||
*/
|
||||
public Object clone() throws CloneNotSupportedException {
|
||||
return super.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to clone() but does not throw spurious exceptions.
|
||||
* @return a new copy of this same set of delimiters.
|
||||
*/
|
||||
public DelimiterSet copy() {
|
||||
try {
|
||||
return (DelimiterSet) clone();
|
||||
} catch (CloneNotSupportedException cnse) {
|
||||
// Should never happen for DelimiterSet.
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Static delimiter sets for the commonly-used delimiter arrangements.
|
||||
|
||||
public static final DelimiterSet DEFAULT_DELIMITERS;
|
||||
public static final DelimiterSet HIVE_DELIMITERS;
|
||||
public static final DelimiterSet MYSQL_DELIMITERS;
|
||||
|
||||
static {
|
||||
DEFAULT_DELIMITERS = new DelimiterSet(',', '\n', NULL_CHAR, NULL_CHAR,
|
||||
false);
|
||||
MYSQL_DELIMITERS = new DelimiterSet(',', '\n', '\'', '\\', false);
|
||||
HIVE_DELIMITERS = new DelimiterSet('\001', '\n',
|
||||
NULL_CHAR, NULL_CHAR, false);
|
||||
}
|
||||
}
|
||||
|
@ -18,7 +18,6 @@
|
||||
|
||||
package com.cloudera.sqoop.lib;
|
||||
|
||||
|
||||
/**
|
||||
* Static helper class that will help format data with quotes and escape chars.
|
||||
*/
|
||||
@ -35,28 +34,25 @@ private FieldFormatter() { }
|
||||
* The field is enclosed only if:
|
||||
* enclose != '\000', and:
|
||||
* encloseRequired is true, or
|
||||
* one of the characters in the mustEscapeFor list is present
|
||||
* in the string.
|
||||
* one of the fields-terminated-by or lines-terminated-by characters is
|
||||
* present in the string.
|
||||
*
|
||||
* Escaping is not performed if the escape char is '\000'.
|
||||
*
|
||||
* @param str - The user's string to escape and enclose
|
||||
* @param escape - What string to use as the escape sequence. If "" or null,
|
||||
* then don't escape.
|
||||
* @param enclose - The string to use to enclose str e.g. "quoted". If "" or
|
||||
* null, then don't enclose.
|
||||
* @param mustEncloseFor - A list of characters; if one is present in 'str',
|
||||
* then str must be enclosed.
|
||||
* @param encloseRequired - If true, then always enclose, regardless of
|
||||
* mustEscapeFor.
|
||||
* @param delimiters - The DelimiterSet to use identifying the escape and
|
||||
* enclose semantics. If the specified escape or enclose characters are
|
||||
* '\000', those operations are not performed.
|
||||
* @return the escaped, enclosed version of 'str'.
|
||||
*/
|
||||
public static String escapeAndEnclose(String str, String escape,
|
||||
String enclose, char [] mustEncloseFor, boolean encloseRequired) {
|
||||
public static String escapeAndEnclose(String str, DelimiterSet delimiters) {
|
||||
|
||||
char escape = delimiters.getEscapedBy();
|
||||
char enclose = delimiters.getEnclosedBy();
|
||||
boolean encloseRequired = delimiters.isEncloseRequired();
|
||||
|
||||
// true if we can use an escape character.
|
||||
boolean escapingLegal = (null != escape
|
||||
&& escape.length() > 0 && !escape.equals("\000"));
|
||||
boolean escapingLegal = DelimiterSet.NULL_CHAR != escape;
|
||||
String withEscapes;
|
||||
|
||||
if (null == str) {
|
||||
@ -65,13 +61,13 @@ public static String escapeAndEnclose(String str, String escape,
|
||||
|
||||
if (escapingLegal) {
|
||||
// escaping is legal. Escape any instances of the escape char itself.
|
||||
withEscapes = str.replace(escape, escape + escape);
|
||||
withEscapes = str.replace("" + escape, "" + escape + escape);
|
||||
} else {
|
||||
// no need to double-escape
|
||||
withEscapes = str;
|
||||
}
|
||||
|
||||
if (null == enclose || enclose.length() == 0 || enclose.equals("\000")) {
|
||||
if (DelimiterSet.NULL_CHAR == enclose) {
|
||||
// The enclose-with character was left unset, so we can't enclose items.
|
||||
// We're done.
|
||||
return withEscapes;
|
||||
@ -80,12 +76,15 @@ public static String escapeAndEnclose(String str, String escape,
|
||||
// if we have an enclosing character, and escaping is legal, then the
|
||||
// encloser must always be escaped.
|
||||
if (escapingLegal) {
|
||||
withEscapes = withEscapes.replace(enclose, escape + enclose);
|
||||
withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose);
|
||||
}
|
||||
|
||||
boolean actuallyDoEnclose = encloseRequired;
|
||||
if (!actuallyDoEnclose && mustEncloseFor != null) {
|
||||
// check if the string requires enclosing
|
||||
if (!actuallyDoEnclose) {
|
||||
// check if the string requires enclosing.
|
||||
char [] mustEncloseFor = new char[2];
|
||||
mustEncloseFor[0] = delimiters.getFieldsTerminatedBy();
|
||||
mustEncloseFor[1] = delimiters.getLinesTerminatedBy();
|
||||
for (char reason : mustEncloseFor) {
|
||||
if (str.indexOf(reason) != -1) {
|
||||
actuallyDoEnclose = true;
|
||||
@ -95,7 +94,7 @@ public static String escapeAndEnclose(String str, String escape,
|
||||
}
|
||||
|
||||
if (actuallyDoEnclose) {
|
||||
return enclose + withEscapes + enclose;
|
||||
return "" + enclose + withEscapes + enclose;
|
||||
} else {
|
||||
return withEscapes;
|
||||
}
|
||||
|
@ -34,6 +34,7 @@
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import com.cloudera.sqoop.io.LobFile;
|
||||
import com.cloudera.sqoop.util.TaskId;
|
||||
|
||||
/**
|
||||
* Contains a set of methods which can read db columns from a ResultSet into
|
||||
|
@ -86,21 +86,11 @@ public ParseError(final Throwable cause) {
|
||||
}
|
||||
}
|
||||
|
||||
private char fieldDelim;
|
||||
private char recordDelim;
|
||||
private char enclosingChar;
|
||||
private char escapeChar;
|
||||
private boolean enclosingRequired;
|
||||
private DelimiterSet delimiters;
|
||||
private ArrayList<String> outputs;
|
||||
|
||||
public RecordParser(final char field, final char record, final char enclose,
|
||||
final char escape, final boolean mustEnclose) {
|
||||
this.fieldDelim = field;
|
||||
this.recordDelim = record;
|
||||
this.enclosingChar = enclose;
|
||||
this.escapeChar = escape;
|
||||
this.enclosingRequired = mustEnclose;
|
||||
|
||||
public RecordParser(final DelimiterSet delimitersIn) {
|
||||
this.delimiters = delimitersIn.copy();
|
||||
this.outputs = new ArrayList<String>();
|
||||
}
|
||||
|
||||
@ -215,13 +205,19 @@ record sep halts processing.
|
||||
add charater literal to current string, return to UNENCLOSED_FIELD
|
||||
*/
|
||||
|
||||
char curChar = '\000';
|
||||
char curChar = DelimiterSet.NULL_CHAR;
|
||||
ParseState state = ParseState.FIELD_START;
|
||||
int len = input.length();
|
||||
StringBuilder sb = null;
|
||||
|
||||
outputs.clear();
|
||||
|
||||
char enclosingChar = delimiters.getEnclosedBy();
|
||||
char fieldDelim = delimiters.getFieldsTerminatedBy();
|
||||
char recordDelim = delimiters.getLinesTerminatedBy();
|
||||
char escapeChar = delimiters.getEscapedBy();
|
||||
boolean enclosingRequired = delimiters.isEncloseRequired();
|
||||
|
||||
for (int pos = 0; pos < len; pos++) {
|
||||
curChar = input.get();
|
||||
switch (state) {
|
||||
@ -233,14 +229,14 @@ record sep halts processing.
|
||||
}
|
||||
|
||||
sb = new StringBuilder();
|
||||
if (this.enclosingChar == curChar) {
|
||||
if (enclosingChar == curChar) {
|
||||
// got an opening encloser.
|
||||
state = ParseState.ENCLOSED_FIELD;
|
||||
} else if (this.escapeChar == curChar) {
|
||||
} else if (escapeChar == curChar) {
|
||||
state = ParseState.UNENCLOSED_ESCAPE;
|
||||
} else if (this.fieldDelim == curChar) {
|
||||
} else if (fieldDelim == curChar) {
|
||||
// we have a zero-length field. This is a no-op.
|
||||
} else if (this.recordDelim == curChar) {
|
||||
} else if (recordDelim == curChar) {
|
||||
// we have a zero-length field, that ends processing.
|
||||
pos = len;
|
||||
} else {
|
||||
@ -248,7 +244,7 @@ record sep halts processing.
|
||||
state = ParseState.UNENCLOSED_FIELD;
|
||||
sb.append(curChar);
|
||||
|
||||
if (this.enclosingRequired) {
|
||||
if (enclosingRequired) {
|
||||
throw new ParseError(
|
||||
"Opening field-encloser expected at position " + pos);
|
||||
}
|
||||
@ -257,10 +253,10 @@ record sep halts processing.
|
||||
break;
|
||||
|
||||
case ENCLOSED_FIELD:
|
||||
if (this.escapeChar == curChar) {
|
||||
if (escapeChar == curChar) {
|
||||
// the next character is escaped. Treat it literally.
|
||||
state = ParseState.ENCLOSED_ESCAPE;
|
||||
} else if (this.enclosingChar == curChar) {
|
||||
} else if (enclosingChar == curChar) {
|
||||
// we're at the end of the enclosing field. Expect an EOF or EOR char.
|
||||
state = ParseState.ENCLOSED_EXPECT_DELIMITER;
|
||||
} else {
|
||||
@ -272,13 +268,13 @@ record sep halts processing.
|
||||
break;
|
||||
|
||||
case UNENCLOSED_FIELD:
|
||||
if (this.escapeChar == curChar) {
|
||||
if (escapeChar == curChar) {
|
||||
// the next character is escaped. Treat it literally.
|
||||
state = ParseState.UNENCLOSED_ESCAPE;
|
||||
} else if (this.fieldDelim == curChar) {
|
||||
} else if (fieldDelim == curChar) {
|
||||
// we're at the end of this field; may be the start of another one.
|
||||
state = ParseState.FIELD_START;
|
||||
} else if (this.recordDelim == curChar) {
|
||||
} else if (recordDelim == curChar) {
|
||||
pos = len; // terminate processing immediately.
|
||||
} else {
|
||||
// this is a regular char. Add to the current field string,
|
||||
@ -298,10 +294,10 @@ record sep halts processing.
|
||||
case ENCLOSED_EXPECT_DELIMITER:
|
||||
// We were in an enclosed field, but got the final encloser. Now we
|
||||
// expect either an end-of-field or an end-of-record.
|
||||
if (this.fieldDelim == curChar) {
|
||||
if (fieldDelim == curChar) {
|
||||
// end of one field is the beginning of the next.
|
||||
state = ParseState.FIELD_START;
|
||||
} else if (this.recordDelim == curChar) {
|
||||
} else if (recordDelim == curChar) {
|
||||
// stop processing.
|
||||
pos = len;
|
||||
} else {
|
||||
@ -323,7 +319,7 @@ record sep halts processing.
|
||||
}
|
||||
}
|
||||
|
||||
if (state == ParseState.FIELD_START && curChar == this.fieldDelim) {
|
||||
if (state == ParseState.FIELD_START && curChar == fieldDelim) {
|
||||
// we hit an EOF/EOR as the last legal character and we need to mark
|
||||
// that string as recorded. This if block is outside the for-loop since
|
||||
// we don't have a physical 'epsilon' token in our string.
|
||||
@ -342,19 +338,17 @@ record sep halts processing.
|
||||
return outputs;
|
||||
}
|
||||
|
||||
|
||||
public boolean isEnclosingRequired() {
|
||||
return enclosingRequired;
|
||||
return delimiters.isEncloseRequired();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "RecordParser[" + fieldDelim + ',' + recordDelim + ','
|
||||
+ enclosingChar + ',' + escapeChar + ',' + enclosingRequired + "]";
|
||||
return "RecordParser[" + delimiters.toString() + "]";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.toString().hashCode();
|
||||
return this.delimiters.hashCode();
|
||||
}
|
||||
}
|
||||
|
@ -31,22 +31,41 @@
|
||||
/**
|
||||
* Interface implemented by the classes generated by sqoop's orm.ClassWriter.
|
||||
*/
|
||||
public interface SqoopRecord extends Cloneable, DBWritable, Writable {
|
||||
void parse(CharSequence s) throws RecordParser.ParseError;
|
||||
void parse(Text s) throws RecordParser.ParseError;
|
||||
void parse(byte [] s) throws RecordParser.ParseError;
|
||||
void parse(char [] s) throws RecordParser.ParseError;
|
||||
void parse(ByteBuffer s) throws RecordParser.ParseError;
|
||||
void parse(CharBuffer s) throws RecordParser.ParseError;
|
||||
void loadLargeObjects(LargeObjectLoader objLoader)
|
||||
public abstract class SqoopRecord implements Cloneable, DBWritable, Writable {
|
||||
|
||||
public SqoopRecord() {
|
||||
}
|
||||
|
||||
public abstract void parse(CharSequence s) throws RecordParser.ParseError;
|
||||
public abstract void parse(Text s) throws RecordParser.ParseError;
|
||||
public abstract void parse(byte [] s) throws RecordParser.ParseError;
|
||||
public abstract void parse(char [] s) throws RecordParser.ParseError;
|
||||
public abstract void parse(ByteBuffer s) throws RecordParser.ParseError;
|
||||
public abstract void parse(CharBuffer s) throws RecordParser.ParseError;
|
||||
public abstract void loadLargeObjects(LargeObjectLoader objLoader)
|
||||
throws SQLException, IOException, InterruptedException;
|
||||
Object clone() throws CloneNotSupportedException;
|
||||
|
||||
/**
|
||||
* Inserts the data in this object into the PreparedStatement, starting
|
||||
* at parameter 'offset'.
|
||||
* @return the number of fields written to the statement.
|
||||
*/
|
||||
int write(PreparedStatement stmt, int offset) throws SQLException;
|
||||
public abstract int write(PreparedStatement stmt, int offset)
|
||||
throws SQLException;
|
||||
|
||||
public abstract String toString(DelimiterSet delimiters);
|
||||
|
||||
@Override
|
||||
public Object clone() throws CloneNotSupportedException {
|
||||
return super.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an integer specifying which API format version the
|
||||
* generated class conforms to. Used by internal APIs for backwards
|
||||
* compatibility.
|
||||
* @return the API version this class was generated against.
|
||||
*/
|
||||
public abstract int getClassFormatVersion();
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import static com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR;
|
||||
import com.cloudera.sqoop.shims.HadoopShim;
|
||||
import com.cloudera.sqoop.util.DirectImportUtils;
|
||||
|
||||
@ -76,10 +77,10 @@ private MySQLUtils() {
|
||||
* escape: \\
|
||||
*/
|
||||
public static boolean outputDelimsAreMySQL(Configuration conf) {
|
||||
return ',' == (char) conf.getInt(OUTPUT_FIELD_DELIM_KEY, '\000')
|
||||
&& '\n' == (char) conf.getInt(OUTPUT_RECORD_DELIM_KEY, '\000')
|
||||
&& '\'' == (char) conf.getInt(OUTPUT_ENCLOSED_BY_KEY, '\000')
|
||||
&& '\\' == (char) conf.getInt(OUTPUT_ESCAPED_BY_KEY, '\000')
|
||||
return ',' == (char) conf.getInt(OUTPUT_FIELD_DELIM_KEY, NULL_CHAR)
|
||||
&& '\n' == (char) conf.getInt(OUTPUT_RECORD_DELIM_KEY, NULL_CHAR)
|
||||
&& '\'' == (char) conf.getInt(OUTPUT_ENCLOSED_BY_KEY, NULL_CHAR)
|
||||
&& '\\' == (char) conf.getInt(OUTPUT_ESCAPED_BY_KEY, NULL_CHAR)
|
||||
&& !conf.getBoolean(OUTPUT_ENCLOSE_REQUIRED_KEY, false);
|
||||
}
|
||||
|
||||
|
@ -32,6 +32,7 @@
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.NullWritable;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import com.cloudera.sqoop.lib.DelimiterSet;
|
||||
import com.cloudera.sqoop.lib.FieldFormatter;
|
||||
import com.cloudera.sqoop.lib.RecordParser;
|
||||
import com.cloudera.sqoop.manager.MySQLUtils;
|
||||
@ -194,9 +195,7 @@ private static class ReparsingStreamThread extends ErrorableThread {
|
||||
|
||||
static {
|
||||
// build a record parser for mysqldump's format
|
||||
MYSQLDUMP_PARSER = new RecordParser(MYSQL_FIELD_DELIM,
|
||||
MYSQL_RECORD_DELIM, MYSQL_ENCLOSE_CHAR, MYSQL_ESCAPE_CHAR,
|
||||
MYSQL_ENCLOSE_REQUIRED);
|
||||
MYSQLDUMP_PARSER = new RecordParser(DelimiterSet.MYSQL_DELIMITERS);
|
||||
}
|
||||
|
||||
public void run() {
|
||||
@ -205,22 +204,30 @@ public void run() {
|
||||
try {
|
||||
r = new BufferedReader(new InputStreamReader(this.stream));
|
||||
|
||||
// Configure the output with the user's delimiters.
|
||||
char outputFieldDelim = (char) conf.getInt(
|
||||
MySQLUtils.OUTPUT_FIELD_DELIM_KEY, '\000');
|
||||
MySQLUtils.OUTPUT_FIELD_DELIM_KEY,
|
||||
DelimiterSet.NULL_CHAR);
|
||||
String outputFieldDelimStr = "" + outputFieldDelim;
|
||||
char outputRecordDelim = (char) conf.getInt(
|
||||
MySQLUtils.OUTPUT_RECORD_DELIM_KEY, '\000');
|
||||
MySQLUtils.OUTPUT_RECORD_DELIM_KEY,
|
||||
DelimiterSet.NULL_CHAR);
|
||||
String outputRecordDelimStr = "" + outputRecordDelim;
|
||||
char outputEnclose = (char) conf.getInt(
|
||||
MySQLUtils.OUTPUT_ENCLOSED_BY_KEY,
|
||||
'\000');
|
||||
String outputEncloseStr = "" + outputEnclose;
|
||||
DelimiterSet.NULL_CHAR);
|
||||
char outputEscape = (char) conf.getInt(
|
||||
MySQLUtils.OUTPUT_ESCAPED_BY_KEY, '\000');
|
||||
String outputEscapeStr = "" + outputEscape;
|
||||
MySQLUtils.OUTPUT_ESCAPED_BY_KEY,
|
||||
DelimiterSet.NULL_CHAR);
|
||||
boolean outputEncloseRequired = conf.getBoolean(
|
||||
MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY, false);
|
||||
char [] encloseFor = { outputFieldDelim, outputRecordDelim };
|
||||
|
||||
DelimiterSet delimiters = new DelimiterSet(
|
||||
outputFieldDelim,
|
||||
outputRecordDelim,
|
||||
outputEnclose,
|
||||
outputEscape,
|
||||
outputEncloseRequired);
|
||||
|
||||
// Actually do the read/write transfer loop here.
|
||||
int preambleLen = -1; // set to this for "undefined"
|
||||
@ -268,8 +275,7 @@ public void run() {
|
||||
}
|
||||
|
||||
String fieldStr = FieldFormatter.escapeAndEnclose(field,
|
||||
outputEscapeStr, outputEncloseStr,
|
||||
encloseFor, outputEncloseRequired);
|
||||
delimiters);
|
||||
context.write(fieldStr, null);
|
||||
recordLen += fieldStr.length();
|
||||
}
|
||||
|
@ -32,13 +32,13 @@
|
||||
import org.apache.hadoop.util.Shell;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
|
||||
import com.cloudera.sqoop.lib.TaskId;
|
||||
import com.cloudera.sqoop.manager.MySQLUtils;
|
||||
import com.cloudera.sqoop.shims.HadoopShim;
|
||||
import com.cloudera.sqoop.util.AsyncSink;
|
||||
import com.cloudera.sqoop.util.JdbcUrl;
|
||||
import com.cloudera.sqoop.util.LoggingAsyncSink;
|
||||
import com.cloudera.sqoop.util.NullAsyncSink;
|
||||
import com.cloudera.sqoop.util.TaskId;
|
||||
|
||||
/**
|
||||
* Mapper that starts a 'mysqlimport' process and uses that to export rows from
|
||||
|
@ -22,6 +22,7 @@
|
||||
import com.cloudera.sqoop.SqoopOptions;
|
||||
import com.cloudera.sqoop.manager.ConnManager;
|
||||
import com.cloudera.sqoop.lib.BigDecimalSerializer;
|
||||
import com.cloudera.sqoop.lib.DelimiterSet;
|
||||
import com.cloudera.sqoop.lib.FieldFormatter;
|
||||
import com.cloudera.sqoop.lib.JdbcWritableBridge;
|
||||
import com.cloudera.sqoop.lib.LargeObjectLoader;
|
||||
@ -101,9 +102,11 @@ public class ClassWriter {
|
||||
* which version of the ClassWriter's output format was used to generate the
|
||||
* class.
|
||||
*
|
||||
* If the way that we generate classes, bump this number.
|
||||
* If the way that we generate classes changes, bump this number.
|
||||
* This number is retrieved by the SqoopRecord.getClassFormatVersion()
|
||||
* method.
|
||||
*/
|
||||
public static final int CLASS_WRITER_VERSION = 2;
|
||||
public static final int CLASS_WRITER_VERSION = 3;
|
||||
|
||||
private SqoopOptions options;
|
||||
private ConnManager connManager;
|
||||
@ -642,31 +645,20 @@ private void generateCloneMethod(Map<String, Integer> columnTypes,
|
||||
private void generateToString(Map<String, Integer> columnTypes,
|
||||
String [] colNames, StringBuilder sb) {
|
||||
|
||||
// Embed the delimiters into the class, as characters...
|
||||
sb.append(" private static final char __OUTPUT_FIELD_DELIM_CHAR = "
|
||||
+ (int)options.getOutputFieldDelim() + ";\n");
|
||||
sb.append(" private static final char __OUTPUT_RECORD_DELIM_CHAR = "
|
||||
+ (int)options.getOutputRecordDelim() + ";\n");
|
||||
// Save the delimiters to the class.
|
||||
sb.append(" private final DelimiterSet __outputDelimiters = ");
|
||||
sb.append(options.getOutputDelimiters().formatConstructor() + ";\n");
|
||||
|
||||
// as strings...
|
||||
sb.append(" private static final String __OUTPUT_FIELD_DELIM = "
|
||||
+ "\"\" + (char) " + (int) options.getOutputFieldDelim() + ";\n");
|
||||
sb.append(" private static final String __OUTPUT_RECORD_DELIM = "
|
||||
+ "\"\" + (char) " + (int) options.getOutputRecordDelim() + ";\n");
|
||||
sb.append(" private static final String __OUTPUT_ENCLOSED_BY = "
|
||||
+ "\"\" + (char) " + (int) options.getOutputEnclosedBy() + ";\n");
|
||||
sb.append(" private static final String __OUTPUT_ESCAPED_BY = "
|
||||
+ "\"\" + (char) " + (int) options.getOutputEscapedBy() + ";\n");
|
||||
|
||||
// and some more options.
|
||||
sb.append(" private static final boolean __OUTPUT_ENCLOSE_REQUIRED = "
|
||||
+ options.isOutputEncloseRequired() + ";\n");
|
||||
sb.append(" private static final char [] __OUTPUT_DELIMITER_LIST = { "
|
||||
+ "__OUTPUT_FIELD_DELIM_CHAR, __OUTPUT_RECORD_DELIM_CHAR };\n\n");
|
||||
|
||||
// The actual toString() method itself follows.
|
||||
// The default toString() method itself follows. This just calls
|
||||
// the delimiter-specific toString() with the default delimiters.
|
||||
sb.append(" public String toString() {\n");
|
||||
sb.append(" return toString(__outputDelimiters);\n");
|
||||
sb.append(" }\n");
|
||||
|
||||
// This toString() variant, though, accepts delimiters as arguments.
|
||||
sb.append(" public String toString(DelimiterSet delimiters) {\n");
|
||||
sb.append(" StringBuilder __sb = new StringBuilder();\n");
|
||||
sb.append(" char fieldDelim = delimiters.getFieldsTerminatedBy();\n");
|
||||
|
||||
boolean first = true;
|
||||
for (String col : colNames) {
|
||||
@ -679,7 +671,7 @@ private void generateToString(Map<String, Integer> columnTypes,
|
||||
|
||||
if (!first) {
|
||||
// print inter-field tokens.
|
||||
sb.append(" __sb.append(__OUTPUT_FIELD_DELIM);\n");
|
||||
sb.append(" __sb.append(fieldDelim);\n");
|
||||
}
|
||||
|
||||
first = false;
|
||||
@ -691,12 +683,10 @@ private void generateToString(Map<String, Integer> columnTypes,
|
||||
}
|
||||
|
||||
sb.append(" __sb.append(FieldFormatter.escapeAndEnclose(" + stringExpr
|
||||
+ ", __OUTPUT_ESCAPED_BY, __OUTPUT_ENCLOSED_BY, "
|
||||
+ "__OUTPUT_DELIMITER_LIST, __OUTPUT_ENCLOSE_REQUIRED));\n");
|
||||
|
||||
+ ", delimiters));\n");
|
||||
}
|
||||
|
||||
sb.append(" __sb.append(__OUTPUT_RECORD_DELIM);\n");
|
||||
sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n");
|
||||
sb.append(" return __sb.toString();\n");
|
||||
sb.append(" }\n");
|
||||
}
|
||||
@ -711,11 +701,7 @@ private void generateParseMethod(String typ, StringBuilder sb) {
|
||||
sb.append(" public void parse(" + typ + " __record) "
|
||||
+ "throws RecordParser.ParseError {\n");
|
||||
sb.append(" if (null == this.__parser) {\n");
|
||||
sb.append(" this.__parser = new RecordParser("
|
||||
+ "__INPUT_FIELD_DELIM_CHAR, ");
|
||||
sb.append("__INPUT_RECORD_DELIM_CHAR, __INPUT_ENCLOSED_BY_CHAR, "
|
||||
+ "__INPUT_ESCAPED_BY_CHAR, ");
|
||||
sb.append("__INPUT_ENCLOSE_REQUIRED);\n");
|
||||
sb.append(" this.__parser = new RecordParser(__inputDelimiters);\n");
|
||||
sb.append(" }\n");
|
||||
sb.append(" List<String> __fields = "
|
||||
+ "this.__parser.parseRecord(__record);\n");
|
||||
@ -795,17 +781,8 @@ private void generateParser(Map<String, Integer> columnTypes,
|
||||
// records. Note that these can differ from the delims to use as output
|
||||
// via toString(), if the user wants to use this class to convert one
|
||||
// format to another.
|
||||
sb.append(" private static final char __INPUT_FIELD_DELIM_CHAR = "
|
||||
+ (int)options.getInputFieldDelim() + ";\n");
|
||||
sb.append(" private static final char __INPUT_RECORD_DELIM_CHAR = "
|
||||
+ (int)options.getInputRecordDelim() + ";\n");
|
||||
sb.append(" private static final char __INPUT_ENCLOSED_BY_CHAR = "
|
||||
+ (int)options.getInputEnclosedBy() + ";\n");
|
||||
sb.append(" private static final char __INPUT_ESCAPED_BY_CHAR = "
|
||||
+ (int)options.getInputEscapedBy() + ";\n");
|
||||
sb.append(" private static final boolean __INPUT_ENCLOSE_REQUIRED = "
|
||||
+ options.isInputEncloseRequired() + ";\n");
|
||||
|
||||
sb.append(" private final DelimiterSet __inputDelimiters = ");
|
||||
sb.append(options.getInputDelimiters().formatConstructor() + ";\n");
|
||||
|
||||
// The parser object which will do the heavy lifting for field splitting.
|
||||
sb.append(" private RecordParser __parser;\n");
|
||||
@ -976,6 +953,7 @@ public StringBuilder generateClassForColumns(Map<String, Integer> columnTypes,
|
||||
sb.append("import org.apache.hadoop.io.Writable;\n");
|
||||
sb.append("import org.apache.hadoop.mapred.lib.db.DBWritable;\n");
|
||||
sb.append("import " + JdbcWritableBridge.class.getCanonicalName() + ";\n");
|
||||
sb.append("import " + DelimiterSet.class.getCanonicalName() + ";\n");
|
||||
sb.append("import " + FieldFormatter.class.getCanonicalName() + ";\n");
|
||||
sb.append("import " + RecordParser.class.getCanonicalName() + ";\n");
|
||||
sb.append("import " + BlobRef.class.getCanonicalName() + ";\n");
|
||||
@ -999,10 +977,12 @@ public StringBuilder generateClassForColumns(Map<String, Integer> columnTypes,
|
||||
sb.append("\n");
|
||||
|
||||
String className = tableNameInfo.getShortClassForTable(tableName);
|
||||
sb.append("public class " + className
|
||||
+ " implements DBWritable, SqoopRecord, Writable {\n");
|
||||
sb.append(" public static final int PROTOCOL_VERSION = "
|
||||
sb.append("public class " + className + " extends SqoopRecord "
|
||||
+ " implements DBWritable, Writable {\n");
|
||||
sb.append(" private final int PROTOCOL_VERSION = "
|
||||
+ CLASS_WRITER_VERSION + ";\n");
|
||||
sb.append(
|
||||
" public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n");
|
||||
sb.append(" protected ResultSet __cur_result_set;\n");
|
||||
generateFields(columnTypes, colNames, sb);
|
||||
generateDbRead(columnTypes, colNames, sb);
|
||||
|
@ -36,6 +36,7 @@
|
||||
import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException;
|
||||
import com.cloudera.sqoop.cli.RelatedOptions;
|
||||
import com.cloudera.sqoop.cli.ToolOptions;
|
||||
import com.cloudera.sqoop.lib.DelimiterSet;
|
||||
import com.cloudera.sqoop.manager.ConnManager;
|
||||
import com.cloudera.sqoop.shims.ShimLoader;
|
||||
|
||||
@ -603,19 +604,15 @@ protected void validateOutputFormatOptions(SqoopOptions options)
|
||||
// straight to Hive. Use Hive-style delimiters.
|
||||
LOG.info("Using Hive-specific delimiters for output. You can override");
|
||||
LOG.info("delimiters with --fields-terminated-by, etc.");
|
||||
options.setFieldsTerminatedBy((char) 0x1); // ^A
|
||||
options.setLinesTerminatedBy('\n');
|
||||
options.setEnclosedBy('\000'); // no enclosing in Hive.
|
||||
options.setEscapedBy('\000'); // no escaping in Hive.
|
||||
options.setOutputEncloseRequired(false);
|
||||
options.setOutputDelimiters(DelimiterSet.HIVE_DELIMITERS);
|
||||
}
|
||||
|
||||
if (options.getOutputEscapedBy() != '\000') {
|
||||
if (options.getOutputEscapedBy() != DelimiterSet.NULL_CHAR) {
|
||||
LOG.warn("Hive does not support escape characters in fields;");
|
||||
LOG.warn("parse errors in Hive may result from using --escaped-by.");
|
||||
}
|
||||
|
||||
if (options.getOutputEnclosedBy() != '\000') {
|
||||
if (options.getOutputEnclosedBy() != DelimiterSet.NULL_CHAR) {
|
||||
LOG.warn("Hive does not support quoted strings; parse errors");
|
||||
LOG.warn("in Hive may result from using --enclosed-by.");
|
||||
}
|
||||
@ -627,7 +624,5 @@ protected void validateHiveOptions(SqoopOptions options) {
|
||||
// is reserved for future constraints on Hive options.
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.cloudera.sqoop.lib;
|
||||
package com.cloudera.sqoop.util;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import com.cloudera.sqoop.lib.DelimiterSet;
|
||||
import com.cloudera.sqoop.tool.ImportTool;
|
||||
|
||||
|
||||
@ -109,19 +110,19 @@ public void testUnknownEscape2() throws Exception {
|
||||
}
|
||||
|
||||
public void testEscapeNul1() throws Exception {
|
||||
assertEquals('\000', SqoopOptions.toChar("\\0"));
|
||||
assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\0"));
|
||||
}
|
||||
|
||||
public void testEscapeNul2() throws Exception {
|
||||
assertEquals('\000', SqoopOptions.toChar("\\00"));
|
||||
assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\00"));
|
||||
}
|
||||
|
||||
public void testEscapeNul3() throws Exception {
|
||||
assertEquals('\000', SqoopOptions.toChar("\\0000"));
|
||||
assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\0000"));
|
||||
}
|
||||
|
||||
public void testEscapeNul4() throws Exception {
|
||||
assertEquals('\000', SqoopOptions.toChar("\\0x0"));
|
||||
assertEquals(DelimiterSet.NULL_CHAR, SqoopOptions.toChar("\\0x0"));
|
||||
}
|
||||
|
||||
public void testOctalChar1() throws Exception {
|
||||
|
@ -27,86 +27,59 @@
|
||||
public class TestFieldFormatter extends TestCase {
|
||||
|
||||
public void testAllEmpty() {
|
||||
char [] chars = new char[0];
|
||||
String result = FieldFormatter.escapeAndEnclose("", "", "", chars, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("",
|
||||
new DelimiterSet(DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR,
|
||||
DelimiterSet.NULL_CHAR, DelimiterSet.NULL_CHAR, false));
|
||||
assertEquals("", result);
|
||||
}
|
||||
|
||||
public void testNullArgs() {
|
||||
String result = FieldFormatter.escapeAndEnclose("", null, null, null,
|
||||
false);
|
||||
assertEquals("", result);
|
||||
|
||||
char [] encloseFor = { '\"' };
|
||||
assertNull(FieldFormatter.escapeAndEnclose(null, "\\", "\"", encloseFor,
|
||||
false));
|
||||
assertNull(FieldFormatter.escapeAndEnclose(null,
|
||||
new DelimiterSet('\"', DelimiterSet.NULL_CHAR, '\"', '\\', false)));
|
||||
}
|
||||
|
||||
public void testBasicStr() {
|
||||
String result = FieldFormatter.escapeAndEnclose("foo", null, null, null,
|
||||
false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo",
|
||||
DelimiterSet.DEFAULT_DELIMITERS);
|
||||
assertEquals("foo", result);
|
||||
}
|
||||
|
||||
public void testEscapeSlash() {
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\\bar", "\\", "\"",
|
||||
null, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\\bar",
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
assertEquals("foo\\\\bar", result);
|
||||
}
|
||||
|
||||
public void testMustEnclose() {
|
||||
String result = FieldFormatter.escapeAndEnclose("foo", null, "\"",
|
||||
null, true);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo",
|
||||
new DelimiterSet(',', '\n', '\"', DelimiterSet.NULL_CHAR, true));
|
||||
assertEquals("\"foo\"", result);
|
||||
}
|
||||
|
||||
public void testEncloseComma1() {
|
||||
char [] chars = { ',' };
|
||||
|
||||
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "\"",
|
||||
chars, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo,bar",
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
assertEquals("\"foo,bar\"", result);
|
||||
}
|
||||
|
||||
public void testEncloseComma2() {
|
||||
char [] chars = { '\n', ',' };
|
||||
|
||||
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "\"",
|
||||
chars, false);
|
||||
assertEquals("\"foo,bar\"", result);
|
||||
}
|
||||
|
||||
public void testEncloseComma3() {
|
||||
char [] chars = { ',', '\n' };
|
||||
|
||||
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "\"",
|
||||
chars, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo,bar",
|
||||
new DelimiterSet(',', ',', '\"', '\\', false));
|
||||
assertEquals("\"foo,bar\"", result);
|
||||
}
|
||||
|
||||
public void testNoNeedToEnclose() {
|
||||
char [] chars = { ',', '\n' };
|
||||
|
||||
String result = FieldFormatter.escapeAndEnclose(
|
||||
"just another string", "\\", "\"", chars, false);
|
||||
"just another string",
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
assertEquals("just another string", result);
|
||||
}
|
||||
|
||||
public void testCannotEnclose1() {
|
||||
char [] chars = { ',', '\n' };
|
||||
public void testCannotEnclose() {
|
||||
// can't enclose because encloser is nul
|
||||
String result = FieldFormatter.escapeAndEnclose("foo,bar",
|
||||
new DelimiterSet(',', '\n', DelimiterSet.NULL_CHAR, '\\', false));
|
||||
|
||||
// can't enclose because encloser is ""
|
||||
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", "",
|
||||
chars, false);
|
||||
assertEquals("foo,bar", result);
|
||||
}
|
||||
|
||||
public void testCannotEnclose2() {
|
||||
char [] chars = { ',', '\n' };
|
||||
|
||||
// can't enclose because encloser is null
|
||||
String result = FieldFormatter.escapeAndEnclose("foo,bar", "\\", null,
|
||||
chars, false);
|
||||
assertEquals("foo,bar", result);
|
||||
}
|
||||
|
||||
@ -114,48 +87,44 @@ public void testEmptyCharToEscapeString() {
|
||||
// test what happens when the escape char is null. It should encode the
|
||||
// null char.
|
||||
|
||||
char nul = '\000';
|
||||
char nul = DelimiterSet.NULL_CHAR;
|
||||
String s = "" + nul;
|
||||
assertEquals("\000", s);
|
||||
}
|
||||
|
||||
public void testEscapeCentralQuote() {
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\"bar", "\\", "\"",
|
||||
null, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\"bar",
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
assertEquals("foo\\\"bar", result);
|
||||
}
|
||||
|
||||
public void testEscapeMultiCentralQuote() {
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\"\"bar", "\\", "\"",
|
||||
null, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\"\"bar",
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
assertEquals("foo\\\"\\\"bar", result);
|
||||
}
|
||||
|
||||
public void testDoubleEscape() {
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\\\"bar", "\\", "\"",
|
||||
null, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\\\"bar",
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
assertEquals("foo\\\\\\\"bar", result);
|
||||
}
|
||||
|
||||
public void testReverseEscape() {
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\"\\bar", "\\", "\"",
|
||||
null, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\"\\bar",
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
assertEquals("foo\\\"\\\\bar", result);
|
||||
}
|
||||
|
||||
public void testQuotedEncloser() {
|
||||
char [] chars = { ',', '\n' };
|
||||
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\",bar", "\\", "\"",
|
||||
chars, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\",bar",
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
assertEquals("\"foo\\\",bar\"", result);
|
||||
}
|
||||
|
||||
public void testQuotedEscape() {
|
||||
char [] chars = { ',', '\n' };
|
||||
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\\,bar", "\\", "\"",
|
||||
chars, false);
|
||||
String result = FieldFormatter.escapeAndEnclose("foo\\,bar",
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
assertEquals("\"foo\\\\,bar\"", result);
|
||||
}
|
||||
}
|
||||
|
@ -103,7 +103,8 @@ private List<String> list(String [] items) {
|
||||
public void testEmptyLine() throws RecordParser.ParseError {
|
||||
// an empty line should return no fields.
|
||||
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord(""));
|
||||
}
|
||||
@ -111,32 +112,37 @@ public void testEmptyLine() throws RecordParser.ParseError {
|
||||
public void testJustEOR() throws RecordParser.ParseError {
|
||||
// a line with just a newline char should return a single zero-length field.
|
||||
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("\n"));
|
||||
}
|
||||
|
||||
public void testOneField() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the field" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("the field"));
|
||||
}
|
||||
|
||||
public void testOneField2() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the field" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("the field\n"));
|
||||
}
|
||||
|
||||
public void testQuotedField1() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the field" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"the field\"\n"));
|
||||
}
|
||||
|
||||
public void testQuotedField2() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the field" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"the field\""));
|
||||
@ -144,7 +150,8 @@ public void testQuotedField2() throws RecordParser.ParseError {
|
||||
|
||||
public void testQuotedField3() throws RecordParser.ParseError {
|
||||
// quoted containing EOF
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the ,field" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"the ,field\""));
|
||||
@ -152,7 +159,8 @@ public void testQuotedField3() throws RecordParser.ParseError {
|
||||
|
||||
public void testQuotedField4() throws RecordParser.ParseError {
|
||||
// quoted containing multiple EOFs
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the ,,field" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"the ,,field\""));
|
||||
@ -160,7 +168,8 @@ public void testQuotedField4() throws RecordParser.ParseError {
|
||||
|
||||
public void testQuotedField5() throws RecordParser.ParseError {
|
||||
// quoted containing EOF and EOR
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the ,\nfield" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"the ,\nfield\""));
|
||||
@ -168,7 +177,8 @@ public void testQuotedField5() throws RecordParser.ParseError {
|
||||
|
||||
public void testQuotedField6() throws RecordParser.ParseError {
|
||||
// quoted containing EOR
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the \nfield" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"the \nfield\""));
|
||||
@ -176,7 +186,8 @@ public void testQuotedField6() throws RecordParser.ParseError {
|
||||
|
||||
public void testQuotedField7() throws RecordParser.ParseError {
|
||||
// quoted containing multiple EORs
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the \n\nfield" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"the \n\nfield\""));
|
||||
@ -184,7 +195,8 @@ public void testQuotedField7() throws RecordParser.ParseError {
|
||||
|
||||
public void testQuotedField8() throws RecordParser.ParseError {
|
||||
// quoted containing escaped quoted char
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the \"field" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"the \\\"field\""));
|
||||
@ -192,68 +204,78 @@ public void testQuotedField8() throws RecordParser.ParseError {
|
||||
|
||||
public void testUnquotedEscape1() throws RecordParser.ParseError {
|
||||
// field without quotes with an escaped EOF char.
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the ,field" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("the \\,field"));
|
||||
}
|
||||
|
||||
public void testUnquotedEscape2() throws RecordParser.ParseError {
|
||||
// field without quotes with an escaped escape char.
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "the \\field" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("the \\\\field"));
|
||||
}
|
||||
|
||||
public void testTwoFields1() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "field1", "field2" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("field1,field2"));
|
||||
}
|
||||
|
||||
public void testTwoFields2() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "field1", "field2" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("field1,field2\n"));
|
||||
}
|
||||
|
||||
public void testTwoFields3() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "field1", "field2" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"field1\",field2\n"));
|
||||
}
|
||||
|
||||
public void testTwoFields4() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "field1", "field2" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("field1,\"field2\"\n"));
|
||||
}
|
||||
|
||||
public void testTwoFields5() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "field1", "field2" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("field1,\"field2\""));
|
||||
}
|
||||
|
||||
public void testRequiredQuotes0() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', true));
|
||||
String [] strings = { "field1", "field2" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"field1\",\"field2\"\n"));
|
||||
}
|
||||
|
||||
public void testRequiredQuotes1() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', true));
|
||||
String [] strings = { "field1", "field2" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("\"field1\",\"field2\""));
|
||||
}
|
||||
|
||||
public void testRequiredQuotes2() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', true));
|
||||
try {
|
||||
parser.parseRecord("\"field1\",field2");
|
||||
fail("Expected parse error for required quotes");
|
||||
@ -263,7 +285,8 @@ public void testRequiredQuotes2() throws RecordParser.ParseError {
|
||||
}
|
||||
|
||||
public void testRequiredQuotes3() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', true));
|
||||
try {
|
||||
parser.parseRecord("field1,\"field2\"");
|
||||
fail("Expected parse error for required quotes");
|
||||
@ -273,7 +296,8 @@ public void testRequiredQuotes3() throws RecordParser.ParseError {
|
||||
}
|
||||
|
||||
public void testRequiredQuotes4() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', true));
|
||||
try {
|
||||
parser.parseRecord("field1,\"field2\"\n");
|
||||
fail("Expected parse error for required quotes");
|
||||
@ -283,7 +307,8 @@ public void testRequiredQuotes4() throws RecordParser.ParseError {
|
||||
}
|
||||
|
||||
public void testNull() {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', true);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', true));
|
||||
String input = null;
|
||||
try {
|
||||
parser.parseRecord(input);
|
||||
@ -295,75 +320,87 @@ public void testNull() {
|
||||
|
||||
|
||||
public void testEmptyFields1() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "", ""};
|
||||
assertListsEqual(null, list(strings), parser.parseRecord(","));
|
||||
}
|
||||
|
||||
public void testEmptyFields2() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "", "" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord(",\n"));
|
||||
}
|
||||
|
||||
public void testEmptyFields3() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "", "", "" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord(",,\n"));
|
||||
}
|
||||
|
||||
public void testEmptyFields4() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "", "foo", "" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord(",foo,\n"));
|
||||
}
|
||||
|
||||
public void testEmptyFields5() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "", "foo", "" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord(",foo,"));
|
||||
}
|
||||
|
||||
public void testEmptyFields6() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "foo", "" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("foo,"));
|
||||
}
|
||||
|
||||
public void testTrailingText() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "foo", "bar" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("foo,bar\nbaz"));
|
||||
}
|
||||
|
||||
public void testTrailingText2() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("\nbaz"));
|
||||
}
|
||||
|
||||
public void testLeadingEscape() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', '\n', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', '\n', '\"', '\\', false));
|
||||
String [] strings = { "\nbaz" };
|
||||
assertListsEqual(null, list(strings), parser.parseRecord("\\\nbaz"));
|
||||
}
|
||||
|
||||
public void testEofIsEor() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', ',', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', ',', '\"', '\\', false));
|
||||
String [] strings = { "three", "different", "fields" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("three,different,fields"));
|
||||
}
|
||||
|
||||
public void testEofIsEor2() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', ',', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', ',', '\"', '\\', false));
|
||||
String [] strings = { "three", "different", "fields" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("three,\"different\",fields"));
|
||||
}
|
||||
|
||||
public void testRepeatedParse() throws RecordParser.ParseError {
|
||||
RecordParser parser = new RecordParser(',', ',', '\"', '\\', false);
|
||||
RecordParser parser = new RecordParser(
|
||||
new DelimiterSet(',', ',', '\"', '\\', false));
|
||||
String [] strings = { "three", "different", "fields" };
|
||||
assertListsEqual(null, list(strings),
|
||||
parser.parseRecord("three,\"different\",fields"));
|
||||
|
@ -45,22 +45,22 @@
|
||||
not need to be rigidly upheld. -->
|
||||
<Match>
|
||||
<!-- Performance warnings are ignored in test code. -->
|
||||
<Class name="~org\.apache\.hadoop\.sqoop\..*Test.*" />
|
||||
<Class name="~com\.cloudera\.sqoop\..*Test.*" />
|
||||
<Bug category="PERFORMANCE" />
|
||||
</Match>
|
||||
<Match>
|
||||
<!-- More performance warnings to suppress in tests. -->
|
||||
<Class name="~org\.apache\.hadoop\.sqoop\..*Test.*" />
|
||||
<Class name="~com\.cloudera\.sqoop\..*Test.*" />
|
||||
<Bug pattern="SBSC_USE_STRINGBUFFER_CONCATENATION" />
|
||||
</Match>
|
||||
<Match>
|
||||
<!-- Security warnings are ignored in test code. -->
|
||||
<Class name="~org\.apache\.hadoop\.sqoop\..*Test.*" />
|
||||
<Class name="~com\.cloudera\.sqoop\..*Test.*" />
|
||||
<Bug category="SECURITY" />
|
||||
</Match>
|
||||
<Match>
|
||||
<!-- Ok to use methods to generate SQL statements in tests. -->
|
||||
<Class name="~org\.apache\.hadoop\.sqoop\..*Test.*" />
|
||||
<Class name="~com\.cloudera\.sqoop\..*Test.*" />
|
||||
<Bug pattern="SQL_PREPARED_STATEMENT_GENERATED_FROM_NONCONSTANT_STRING" />
|
||||
</Match>
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user