From 7677d39b79b72d5a4cde627ef08075f14833a1d5 Mon Sep 17 00:00:00 2001 From: Andrew Bayer Date: Fri, 22 Jul 2011 20:04:32 +0000 Subject: [PATCH] SQOOP-187. Allow skipping end-of-record delimiter. The SqoopRecord.toString() and SqoopRecord.toString(DelimiterSet) methods always append an end-of-record delimiter. Sqoop uses its own OutputFormat when rendering these to text files, so that the user's delimiters are preserved. Other users could use this OutputFormat when working with SqoopRecord instances in their own MapReduce code, but it would also be nice to "play nice" with TextOutputFormat in the event that the intent is newline-terminated records. This patch allows users to suppress end-of-record delimiter generation when formatting records with toString. (Aaron Kimball via Arvind Prabhakar) From: Arvind Prabhakar git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1150025 13f79535-47bb-0310-9956-ffa450edef68 --- .../com/cloudera/sqoop/lib/SqoopRecord.java | 38 +++++++++++++++++++ .../com/cloudera/sqoop/orm/ClassWriter.java | 25 ++++++++++-- .../sqoop/testutil/ReparseMapper.java | 7 ++++ 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/src/java/com/cloudera/sqoop/lib/SqoopRecord.java b/src/java/com/cloudera/sqoop/lib/SqoopRecord.java index 507b3f1f..932d6a55 100644 --- a/src/java/com/cloudera/sqoop/lib/SqoopRecord.java +++ b/src/java/com/cloudera/sqoop/lib/SqoopRecord.java @@ -55,8 +55,46 @@ public abstract void loadLargeObjects(LargeObjectLoader objLoader) public abstract int write(PreparedStatement stmt, int offset) throws SQLException; + /** + * Format output data according to the specified delimiters. + */ public abstract String toString(DelimiterSet delimiters); + /** + * Use the default delimiters, but only append an end-of-record delimiter + * if useRecordDelim is true. + */ + public String toString(boolean useRecordDelim) { + // Method body should be overridden by generated classes in 1.3.0+ + if (useRecordDelim) { + // This is the existing functionality. + return toString(); + } else { + // Setting this to false requires behavior in the generated class. + throw new RuntimeException( + "toString(useRecordDelim=false) requires a newer SqoopRecord. " + + "Please regenerate your record class to use this function."); + } + } + + /** + * Format the record according to the specified delimiters. An end-of-record + * delimiter is optional, and only used if useRecordDelim is true. For + * use with TextOutputFormat, calling this with useRecordDelim=false may + * make more sense. + */ + public String toString(DelimiterSet delimiters, boolean useRecordDelim) { + if (useRecordDelim) { + return toString(delimiters); + } else { + // Setting this to false requires behavior in the generated class. + throw new RuntimeException( + "toString(delimiters, useRecordDelim=false) requires a newer " + + "SqoopRecord. Please regenerate your record class to use this " + + "function."); + } + } + @Override public Object clone() throws CloneNotSupportedException { return super.clone(); diff --git a/src/java/com/cloudera/sqoop/orm/ClassWriter.java b/src/java/com/cloudera/sqoop/orm/ClassWriter.java index ef0007c3..a3b8ed90 100644 --- a/src/java/com/cloudera/sqoop/orm/ClassWriter.java +++ b/src/java/com/cloudera/sqoop/orm/ClassWriter.java @@ -718,12 +718,29 @@ private void generateToString(Map columnTypes, // The default toString() method itself follows. This just calls // the delimiter-specific toString() with the default delimiters. + // Also appends an end-of-record delimiter to the line. sb.append(" public String toString() {\n"); - sb.append(" return toString(__outputDelimiters);\n"); + sb.append(" return toString(__outputDelimiters, true);\n"); sb.append(" }\n"); // This toString() variant, though, accepts delimiters as arguments. sb.append(" public String toString(DelimiterSet delimiters) {\n"); + sb.append(" return toString(delimiters, true);\n"); + sb.append(" }\n"); + + // This variant allows the user to specify whether or not an end-of-record + // delimiter should be appended. + sb.append(" public String toString(boolean useRecordDelim) {\n"); + sb.append(" return toString(__outputDelimiters, useRecordDelim);\n"); + sb.append(" }\n"); + + + // This toString() variant allows the user to specify delimiters, as well + // as whether or not the end-of-record delimiter should be added to the + // string. Use 'false' to do reasonable things with TextOutputFormat, + // which appends its own newline. + sb.append(" public String toString(DelimiterSet delimiters, "); + sb.append("boolean useRecordDelim) {\n"); sb.append(" StringBuilder __sb = new StringBuilder();\n"); sb.append(" char fieldDelim = delimiters.getFieldsTerminatedBy();\n"); @@ -754,13 +771,13 @@ private void generateToString(Map columnTypes, + ", delimiters));\n"); } - sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n"); + sb.append(" if (useRecordDelim) {\n"); + sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n"); + sb.append(" }\n"); sb.append(" return __sb.toString();\n"); sb.append(" }\n"); } - - /** * Helper method for generateParser(). Writes out the parse() method for one * particular type we support as an input string-ish type. diff --git a/src/test/com/cloudera/sqoop/testutil/ReparseMapper.java b/src/test/com/cloudera/sqoop/testutil/ReparseMapper.java index 49340853..3e563a47 100644 --- a/src/test/com/cloudera/sqoop/testutil/ReparseMapper.java +++ b/src/test/com/cloudera/sqoop/testutil/ReparseMapper.java @@ -100,6 +100,13 @@ public void map(LongWritable key, Text val, out.collect(new Text(userRecord.toString()), NullWritable.get()); + if (!userRecord.toString(false).equals(val.toString())) { + // Could not format record w/o end-of-record delimiter. + throw new IOException("Returned string w/o EOR has value [" + + userRecord.toString(false) + "] when [" + + val.toString() + "] was expected."); + } + if (!userRecord.toString().equals(val.toString() + "\n")) { // misparsed. throw new IOException("Returned string has value ["