From c19f9c9460b4f10466b15f20b0c2493e9ef09387 Mon Sep 17 00:00:00 2001 From: Abraham Elmahrek Date: Thu, 4 Dec 2014 13:21:10 -0800 Subject: [PATCH] SQOOP-1765: Sqoop2: Time/Timestamp format support for CSV IDF (Veena Basavaraj via Abraham Elmahrek) --- .../idf/CSVIntermediateDataFormat.java | 21 ++++-- .../idf/TestCSVIntermediateDataFormat.java | 72 ++++++++++++++++--- 2 files changed, 78 insertions(+), 15 deletions(-) diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java index a075d3fe..d481ccef 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java @@ -29,6 +29,7 @@ import org.apache.sqoop.schema.type.FloatingPoint; import org.joda.time.DateTime; import org.joda.time.LocalDate; +import org.joda.time.LocalTime; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.json.simple.JSONArray; @@ -83,12 +84,12 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat { new String(new char[] { ESCAPE_CHARACTER, '\''}) }; - // ISO-8859-1 is an 8-bit codec that is supported in every java - // implementation. + // ISO-8859-1 is an 8-bit codec that is supported in every java implementation. static final String BYTE_FIELD_CHARSET = "ISO-8859-1"; - //http://www.joda.org/joda-time/key_format.html provides details on the formatter token - static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS'Z'"); + // http://www.joda.org/joda-time/key_format.html provides details on the formatter token + static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSSSSZ"); static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd"); + static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSSZ"); private final List stringTypeColumnIndices = new ArrayList(); private final List byteTypeColumnIndices = new ArrayList(); @@ -96,6 +97,7 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat { private final List mapTypeColumnIndices = new ArrayList(); private final List dateTimeTypeColumnIndices = new ArrayList(); private final List dateTypeColumnIndices = new ArrayList(); + private final List timeColumnIndices = new ArrayList(); private Schema schema; @@ -138,6 +140,8 @@ public void setSchema(Schema schema) { stringTypeColumnIndices.add(i); } else if (col.getType() == ColumnType.DATE) { dateTypeColumnIndices.add(i); + } else if (col.getType() == ColumnType.TIME) { + timeColumnIndices.add(i); } else if (col.getType() == ColumnType.DATE_TIME) { dateTimeTypeColumnIndices.add(i); } else if (col.getType() == ColumnType.BINARY) { @@ -273,6 +277,9 @@ private Object parseCSVStringArrayElement(String fieldString, Column column) { case DATE: returnValue = LocalDate.parse(removeQuotes(fieldString)); break; + case TIME: + returnValue = LocalTime.parse(removeQuotes(fieldString)); + break; case DATE_TIME: // A datetime string with a space as date-time separator will not be // parsed expectedly. The expected separator is "T". See also: @@ -436,6 +443,10 @@ private void encodeCSVStringElements(Object[] stringArray, Column[] columnArray) org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i]; stringArray[i] = encloseWithQuote(df.print(date)); } + for (int i : timeColumnIndices) { + org.joda.time.LocalTime date = (org.joda.time.LocalTime) stringArray[i]; + stringArray[i] = encloseWithQuote(tf.print(date)); + } for (int i : byteTypeColumnIndices) { stringArray[i] = escapeByteArrays((byte[]) stringArray[i]); } @@ -560,4 +571,4 @@ private byte[] unescapeByteArray(String orig) { public String toString() { return data; } -} +} \ No newline at end of file diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java index bf15c696..b348ed8a 100644 --- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java @@ -41,6 +41,7 @@ import org.apache.sqoop.schema.type.DateTime; import org.apache.sqoop.schema.type.FixedPoint; import org.apache.sqoop.schema.type.Text; +import org.apache.sqoop.schema.type.Time; import org.junit.Before; import org.junit.Test; @@ -272,6 +273,49 @@ public void testByteArrayFullRangeOfCharacters() { assertTrue(Arrays.deepEquals(inCopy, dataFormat.getObjectData())); } + // **************test cases for time******************* + + @Test + public void testTimeWithCSVTextInCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Time("1")); + dataFormat.setSchema(schema); + dataFormat.setTextData("'12:00:00'"); + assertEquals("'12:00:00'", dataFormat.getTextData()); + } + + @Test + public void testTimeWithCSVTextInObjectArrayOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Time("1")); + dataFormat.setSchema(schema); + dataFormat.setTextData("'12:59:59'"); + org.joda.time.LocalTime time = new org.joda.time.LocalTime(12, 59, 59); + assertEquals(time.toString(), dataFormat.getObjectData()[0].toString()); + } + + @Test + public void testTimeWithObjectArrayInCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Time("1")).addColumn(new Text("2")); + dataFormat.setSchema(schema); + org.joda.time.LocalTime time = new org.joda.time.LocalTime(15, 0, 0); + Object[] in = { time, "test" }; + dataFormat.setObjectData(in); + assertEquals("'15:00:00.000000','test'", dataFormat.getTextData()); + } + + @Test + public void testTimeWithObjectArrayInObjectArrayOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Time("1")); + dataFormat.setSchema(schema); + org.joda.time.LocalTime time = new org.joda.time.LocalTime(2, 23, 33); + Object[] in = { time }; + dataFormat.setObjectData(in); + assertEquals(time.toString(), dataFormat.getObjectData()[0].toString()); + } + // **************test cases for date******************* @Test @@ -327,6 +371,16 @@ public void testDateTimeWithCSVTextInCSVTextOut() { assertEquals("'2014-10-01 12:00:00'", dataFormat.getTextData()); } + @Test + public void testDateTimeWithMilliSecsWithCSVTextInCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new DateTime("1")); + dataFormat.setSchema(schema); + + dataFormat.setTextData("'2014-10-01 12:00:00.000'"); + assertEquals("'2014-10-01 12:00:00.000'", dataFormat.getTextData()); + } + @Test public void testDateTimeWithCSVTextInObjectArrayOut() { Schema schema = new Schema("test"); @@ -342,10 +396,11 @@ public void testDateTimeWithObjectInCSVTextOut() { Schema schema = new Schema("test"); schema.addColumn(new DateTime("1")); dataFormat.setSchema(schema); - org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 0); + org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 1); Object[] in = { dateTime }; dataFormat.setObjectData(in); - assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData()); + // Note: DateTime has the timezone info + assertEquals("'2014-10-01 12:00:00.001000-0700'", dataFormat.getTextData()); } @Test @@ -353,11 +408,11 @@ public void testLocalDateTimeWithObjectInCSVTextOut() { Schema schema = new Schema("test"); schema.addColumn(new DateTime("1")); dataFormat.setSchema(schema); - org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0, - 0); + org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0, 2); Object[] in = { dateTime }; dataFormat.setObjectData(in); - assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData()); + // Note: LocalDateTime is missing the timezone info + assertEquals("'2014-10-01 12:00:00.002000'", dataFormat.getTextData()); } @Test @@ -365,11 +420,8 @@ public void testDateTimePrecisionWithCSVTextInObjectArrayOut() { Schema schema = new Schema("test"); schema.addColumn(new DateTime("1")); dataFormat.setSchema(schema); - - for (String dateTime : new String[] { "'2014-10-01 12:00:00.000'" }) { - dataFormat.setTextData(dateTime); - assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString()); - } + dataFormat.setTextData("'2014-10-01 12:00:00.000'"); + assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString()); } /**