5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-04 02:52:19 +08:00

SQOOP-187. Allow skipping end-of-record delimiter.

The SqoopRecord.toString() and SqoopRecord.toString(DelimiterSet) methods
always append an end-of-record delimiter. Sqoop uses its own OutputFormat
when rendering these to text files, so that the user's delimiters are
preserved.

Other users could use this OutputFormat when working with SqoopRecord
instances in their own MapReduce code, but it would also be nice to "play
nice" with TextOutputFormat in the event that the intent is
newline-terminated records.

This patch allows users to suppress end-of-record delimiter generation when
formatting records with toString.

(Aaron Kimball via Arvind Prabhakar)

From: Arvind Prabhakar <arvind@cloudera.com>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1150025 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:04:32 +00:00
parent 3bbb4a6314
commit 7677d39b79
3 changed files with 66 additions and 4 deletions

View File

@ -55,8 +55,46 @@ public abstract void loadLargeObjects(LargeObjectLoader objLoader)
public abstract int write(PreparedStatement stmt, int offset) public abstract int write(PreparedStatement stmt, int offset)
throws SQLException; throws SQLException;
/**
* Format output data according to the specified delimiters.
*/
public abstract String toString(DelimiterSet delimiters); public abstract String toString(DelimiterSet delimiters);
/**
* Use the default delimiters, but only append an end-of-record delimiter
* if useRecordDelim is true.
*/
public String toString(boolean useRecordDelim) {
// Method body should be overridden by generated classes in 1.3.0+
if (useRecordDelim) {
// This is the existing functionality.
return toString();
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(useRecordDelim=false) requires a newer SqoopRecord. "
+ "Please regenerate your record class to use this function.");
}
}
/**
* Format the record according to the specified delimiters. An end-of-record
* delimiter is optional, and only used if useRecordDelim is true. For
* use with TextOutputFormat, calling this with useRecordDelim=false may
* make more sense.
*/
public String toString(DelimiterSet delimiters, boolean useRecordDelim) {
if (useRecordDelim) {
return toString(delimiters);
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(delimiters, useRecordDelim=false) requires a newer "
+ "SqoopRecord. Please regenerate your record class to use this "
+ "function.");
}
}
@Override @Override
public Object clone() throws CloneNotSupportedException { public Object clone() throws CloneNotSupportedException {
return super.clone(); return super.clone();

View File

@ -718,12 +718,29 @@ private void generateToString(Map<String, Integer> columnTypes,
// The default toString() method itself follows. This just calls // The default toString() method itself follows. This just calls
// the delimiter-specific toString() with the default delimiters. // the delimiter-specific toString() with the default delimiters.
// Also appends an end-of-record delimiter to the line.
sb.append(" public String toString() {\n"); sb.append(" public String toString() {\n");
sb.append(" return toString(__outputDelimiters);\n"); sb.append(" return toString(__outputDelimiters, true);\n");
sb.append(" }\n"); sb.append(" }\n");
// This toString() variant, though, accepts delimiters as arguments. // This toString() variant, though, accepts delimiters as arguments.
sb.append(" public String toString(DelimiterSet delimiters) {\n"); sb.append(" public String toString(DelimiterSet delimiters) {\n");
sb.append(" return toString(delimiters, true);\n");
sb.append(" }\n");
// This variant allows the user to specify whether or not an end-of-record
// delimiter should be appended.
sb.append(" public String toString(boolean useRecordDelim) {\n");
sb.append(" return toString(__outputDelimiters, useRecordDelim);\n");
sb.append(" }\n");
// This toString() variant allows the user to specify delimiters, as well
// as whether or not the end-of-record delimiter should be added to the
// string. Use 'false' to do reasonable things with TextOutputFormat,
// which appends its own newline.
sb.append(" public String toString(DelimiterSet delimiters, ");
sb.append("boolean useRecordDelim) {\n");
sb.append(" StringBuilder __sb = new StringBuilder();\n"); sb.append(" StringBuilder __sb = new StringBuilder();\n");
sb.append(" char fieldDelim = delimiters.getFieldsTerminatedBy();\n"); sb.append(" char fieldDelim = delimiters.getFieldsTerminatedBy();\n");
@ -754,13 +771,13 @@ private void generateToString(Map<String, Integer> columnTypes,
+ ", delimiters));\n"); + ", delimiters));\n");
} }
sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n"); sb.append(" if (useRecordDelim) {\n");
sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n");
sb.append(" }\n");
sb.append(" return __sb.toString();\n"); sb.append(" return __sb.toString();\n");
sb.append(" }\n"); sb.append(" }\n");
} }
/** /**
* Helper method for generateParser(). Writes out the parse() method for one * Helper method for generateParser(). Writes out the parse() method for one
* particular type we support as an input string-ish type. * particular type we support as an input string-ish type.

View File

@ -100,6 +100,13 @@ public void map(LongWritable key, Text val,
out.collect(new Text(userRecord.toString()), NullWritable.get()); out.collect(new Text(userRecord.toString()), NullWritable.get());
if (!userRecord.toString(false).equals(val.toString())) {
// Could not format record w/o end-of-record delimiter.
throw new IOException("Returned string w/o EOR has value ["
+ userRecord.toString(false) + "] when ["
+ val.toString() + "] was expected.");
}
if (!userRecord.toString().equals(val.toString() + "\n")) { if (!userRecord.toString().equals(val.toString() + "\n")) {
// misparsed. // misparsed.
throw new IOException("Returned string has value [" throw new IOException("Returned string has value ["