From 7631d293335855fd42a319c1fcb56530c27f78a4 Mon Sep 17 00:00:00 2001 From: Abraham Elmahrek Date: Mon, 12 Jan 2015 21:16:13 -0800 Subject: [PATCH] SQOOP-1936: Sqoop2: Sort by comparing IDF data in shuffle phase (Veena Basavaraj via Abraham Elmahrek) --- .../connector/idf/CSVIntermediateDataFormat.java | 7 +++---- .../connector/idf/IntermediateDataFormat.java | 16 +++++++++++++++- .../idf/JSONIntermediateDataFormat.java | 4 ++++ .../idf/TestCSVIntermediateDataFormat.java | 3 +-- .../org/apache/sqoop/job/io/SqoopWritable.java | 4 ++-- .../apache/sqoop/job/io/TestSqoopWritable.java | 9 --------- 6 files changed, 25 insertions(+), 18 deletions(-) diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java index 2af6acd8..4870fae4 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java @@ -58,8 +58,7 @@ public CSVIntermediateDataFormat(Schema schema) { */ @Override public String getCSVTextData() { - // TODO:SQOOP-1936 to enable schema validation after we use compareTo - return this.data; + return super.getData(); } /** @@ -81,14 +80,14 @@ public Object[] getObjectData() { if (csvStringArray == null) { return null; } + Column[] columns = schema.getColumnsArray(); - if (csvStringArray.length != schema.getColumnsArray().length) { + if (csvStringArray.length != columns.length) { throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001, "The data " + getCSVTextData() + " has the wrong number of fields."); } Object[] objectArray = new Object[csvStringArray.length]; - Column[] columns = schema.getColumnsArray(); for (int i = 0; i < csvStringArray.length; i++) { if (csvStringArray[i].equals(NULL_VALUE) && !columns[i].isNullable()) { throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0005, diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java index 60633201..6f945c2c 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java @@ -44,10 +44,13 @@ * Any conversion to the format dictated by the corresponding data source from the native or CSV text format * has to be done by the connector themselves both in FROM and TO * + * NOTE: we cannot use the generic for comparable, since the comparison can be arbitrary for instance, + * purely based on text format * @param - Each data format may have a native representation of the * data, represented by the parameter. */ -public abstract class IntermediateDataFormat { +@SuppressWarnings("rawtypes") +public abstract class IntermediateDataFormat implements Comparable { protected volatile T data; @@ -203,4 +206,15 @@ public boolean equals(Object obj) { return true; } + @Override + public String toString() { + return this.data.toString(); + } + + @Override + public int compareTo(Object o) { + IntermediateDataFormat idf = (IntermediateDataFormat) o; + return toString().compareTo(idf.toString()); + } + } \ No newline at end of file diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java index 3cfd3566..c8df6e0a 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java @@ -419,4 +419,8 @@ private Object[] toObject(JSONObject json) { return object; } + @Override + public String toString() { + return this.data.toJSONString(); + } } diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java index 861d34eb..d2b0ae0b 100644 --- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java @@ -1143,8 +1143,7 @@ public void testNotSettingSchemaAndGetData() { dataFormat.getData(); } - //SQOOP-1936 to enable schema validation after we use compareTo - @Test + @Test(expectedExceptions = SqoopException.class) public void testNotSettingSchemaAndGetCSVData() { dataFormat = new CSVIntermediateDataFormat(); dataFormat.getCSVTextData(); diff --git a/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java b/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java index 08c2031f..59ad3115 100644 --- a/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java +++ b/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java @@ -68,12 +68,12 @@ public void readFields(DataInput in) throws IOException { @Override public int compareTo(SqoopWritable o) { - return toString().compareTo(o.toString()); + return toIDF.compareTo(o.toIDF); } @Override public String toString() { - return toIDF.getCSVTextData(); + return toIDF.toString(); } @Override diff --git a/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java b/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java index 6a14201c..452e0857 100644 --- a/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java +++ b/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java @@ -50,15 +50,6 @@ public void setUp() { writable = new SqoopWritable(idfMock); } - @Test - public void testStringInStringOut() { - String testData = "Live Long and prosper"; - writable.setString(testData); - verify(idfMock, times(1)).setCSVTextData(testData); - writable.toString(); - verify(idfMock, times(1)).getCSVTextData(); - } - @Test public void testWrite() throws IOException { String testData = "One ring to rule them all";