mirror of
https://github.com/apache/sqoop.git
synced 2025-05-04 03:11:00 +08:00
SQOOP-187. Allow skipping end-of-record delimiter.
The SqoopRecord.toString() and SqoopRecord.toString(DelimiterSet) methods always append an end-of-record delimiter. Sqoop uses its own OutputFormat when rendering these to text files, so that the user's delimiters are preserved. Other users could use this OutputFormat when working with SqoopRecord instances in their own MapReduce code, but it would also be nice to "play nice" with TextOutputFormat in the event that the intent is newline-terminated records. This patch allows users to suppress end-of-record delimiter generation when formatting records with toString. (Aaron Kimball via Arvind Prabhakar) From: Arvind Prabhakar <arvind@cloudera.com> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1150025 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3bbb4a6314
commit
7677d39b79
@ -55,8 +55,46 @@ public abstract void loadLargeObjects(LargeObjectLoader objLoader)
|
||||
public abstract int write(PreparedStatement stmt, int offset)
|
||||
throws SQLException;
|
||||
|
||||
/**
|
||||
* Format output data according to the specified delimiters.
|
||||
*/
|
||||
public abstract String toString(DelimiterSet delimiters);
|
||||
|
||||
/**
|
||||
* Use the default delimiters, but only append an end-of-record delimiter
|
||||
* if useRecordDelim is true.
|
||||
*/
|
||||
public String toString(boolean useRecordDelim) {
|
||||
// Method body should be overridden by generated classes in 1.3.0+
|
||||
if (useRecordDelim) {
|
||||
// This is the existing functionality.
|
||||
return toString();
|
||||
} else {
|
||||
// Setting this to false requires behavior in the generated class.
|
||||
throw new RuntimeException(
|
||||
"toString(useRecordDelim=false) requires a newer SqoopRecord. "
|
||||
+ "Please regenerate your record class to use this function.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format the record according to the specified delimiters. An end-of-record
|
||||
* delimiter is optional, and only used if useRecordDelim is true. For
|
||||
* use with TextOutputFormat, calling this with useRecordDelim=false may
|
||||
* make more sense.
|
||||
*/
|
||||
public String toString(DelimiterSet delimiters, boolean useRecordDelim) {
|
||||
if (useRecordDelim) {
|
||||
return toString(delimiters);
|
||||
} else {
|
||||
// Setting this to false requires behavior in the generated class.
|
||||
throw new RuntimeException(
|
||||
"toString(delimiters, useRecordDelim=false) requires a newer "
|
||||
+ "SqoopRecord. Please regenerate your record class to use this "
|
||||
+ "function.");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() throws CloneNotSupportedException {
|
||||
return super.clone();
|
||||
|
@ -718,12 +718,29 @@ private void generateToString(Map<String, Integer> columnTypes,
|
||||
|
||||
// The default toString() method itself follows. This just calls
|
||||
// the delimiter-specific toString() with the default delimiters.
|
||||
// Also appends an end-of-record delimiter to the line.
|
||||
sb.append(" public String toString() {\n");
|
||||
sb.append(" return toString(__outputDelimiters);\n");
|
||||
sb.append(" return toString(__outputDelimiters, true);\n");
|
||||
sb.append(" }\n");
|
||||
|
||||
// This toString() variant, though, accepts delimiters as arguments.
|
||||
sb.append(" public String toString(DelimiterSet delimiters) {\n");
|
||||
sb.append(" return toString(delimiters, true);\n");
|
||||
sb.append(" }\n");
|
||||
|
||||
// This variant allows the user to specify whether or not an end-of-record
|
||||
// delimiter should be appended.
|
||||
sb.append(" public String toString(boolean useRecordDelim) {\n");
|
||||
sb.append(" return toString(__outputDelimiters, useRecordDelim);\n");
|
||||
sb.append(" }\n");
|
||||
|
||||
|
||||
// This toString() variant allows the user to specify delimiters, as well
|
||||
// as whether or not the end-of-record delimiter should be added to the
|
||||
// string. Use 'false' to do reasonable things with TextOutputFormat,
|
||||
// which appends its own newline.
|
||||
sb.append(" public String toString(DelimiterSet delimiters, ");
|
||||
sb.append("boolean useRecordDelim) {\n");
|
||||
sb.append(" StringBuilder __sb = new StringBuilder();\n");
|
||||
sb.append(" char fieldDelim = delimiters.getFieldsTerminatedBy();\n");
|
||||
|
||||
@ -754,13 +771,13 @@ private void generateToString(Map<String, Integer> columnTypes,
|
||||
+ ", delimiters));\n");
|
||||
}
|
||||
|
||||
sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n");
|
||||
sb.append(" if (useRecordDelim) {\n");
|
||||
sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n");
|
||||
sb.append(" }\n");
|
||||
sb.append(" return __sb.toString();\n");
|
||||
sb.append(" }\n");
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Helper method for generateParser(). Writes out the parse() method for one
|
||||
* particular type we support as an input string-ish type.
|
||||
|
@ -100,6 +100,13 @@ public void map(LongWritable key, Text val,
|
||||
|
||||
out.collect(new Text(userRecord.toString()), NullWritable.get());
|
||||
|
||||
if (!userRecord.toString(false).equals(val.toString())) {
|
||||
// Could not format record w/o end-of-record delimiter.
|
||||
throw new IOException("Returned string w/o EOR has value ["
|
||||
+ userRecord.toString(false) + "] when ["
|
||||
+ val.toString() + "] was expected.");
|
||||
}
|
||||
|
||||
if (!userRecord.toString().equals(val.toString() + "\n")) {
|
||||
// misparsed.
|
||||
throw new IOException("Returned string has value ["
|
||||
|
Loading…
Reference in New Issue
Block a user