5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-13 23:41:56 +08:00

SQOOP-1765: Sqoop2: Time/Timestamp format support for CSV IDF

(Veena Basavaraj via Abraham Elmahrek)
This commit is contained in:
Abraham Elmahrek 2014-12-04 13:21:10 -08:00
parent 9febdf3aed
commit c19f9c9460
2 changed files with 78 additions and 15 deletions

View File

@ -29,6 +29,7 @@
import org.apache.sqoop.schema.type.FloatingPoint; import org.apache.sqoop.schema.type.FloatingPoint;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import org.joda.time.LocalDate; import org.joda.time.LocalDate;
import org.joda.time.LocalTime;
import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.DateTimeFormatter;
import org.json.simple.JSONArray; import org.json.simple.JSONArray;
@ -83,12 +84,12 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
new String(new char[] { ESCAPE_CHARACTER, '\''}) new String(new char[] { ESCAPE_CHARACTER, '\''})
}; };
// ISO-8859-1 is an 8-bit codec that is supported in every java // ISO-8859-1 is an 8-bit codec that is supported in every java implementation.
// implementation.
static final String BYTE_FIELD_CHARSET = "ISO-8859-1"; static final String BYTE_FIELD_CHARSET = "ISO-8859-1";
// http://www.joda.org/joda-time/key_format.html provides details on the formatter token // http://www.joda.org/joda-time/key_format.html provides details on the formatter token
static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS'Z'"); static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSSSSZ");
static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd"); static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd");
static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSSZ");
private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>();
@ -96,6 +97,7 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> dateTimeTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> dateTimeTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>(); private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> timeColumnIndices = new ArrayList<Integer>();
private Schema schema; private Schema schema;
@ -138,6 +140,8 @@ public void setSchema(Schema schema) {
stringTypeColumnIndices.add(i); stringTypeColumnIndices.add(i);
} else if (col.getType() == ColumnType.DATE) { } else if (col.getType() == ColumnType.DATE) {
dateTypeColumnIndices.add(i); dateTypeColumnIndices.add(i);
} else if (col.getType() == ColumnType.TIME) {
timeColumnIndices.add(i);
} else if (col.getType() == ColumnType.DATE_TIME) { } else if (col.getType() == ColumnType.DATE_TIME) {
dateTimeTypeColumnIndices.add(i); dateTimeTypeColumnIndices.add(i);
} else if (col.getType() == ColumnType.BINARY) { } else if (col.getType() == ColumnType.BINARY) {
@ -273,6 +277,9 @@ private Object parseCSVStringArrayElement(String fieldString, Column column) {
case DATE: case DATE:
returnValue = LocalDate.parse(removeQuotes(fieldString)); returnValue = LocalDate.parse(removeQuotes(fieldString));
break; break;
case TIME:
returnValue = LocalTime.parse(removeQuotes(fieldString));
break;
case DATE_TIME: case DATE_TIME:
// A datetime string with a space as date-time separator will not be // A datetime string with a space as date-time separator will not be
// parsed expectedly. The expected separator is "T". See also: // parsed expectedly. The expected separator is "T". See also:
@ -436,6 +443,10 @@ private void encodeCSVStringElements(Object[] stringArray, Column[] columnArray)
org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i]; org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i];
stringArray[i] = encloseWithQuote(df.print(date)); stringArray[i] = encloseWithQuote(df.print(date));
} }
for (int i : timeColumnIndices) {
org.joda.time.LocalTime date = (org.joda.time.LocalTime) stringArray[i];
stringArray[i] = encloseWithQuote(tf.print(date));
}
for (int i : byteTypeColumnIndices) { for (int i : byteTypeColumnIndices) {
stringArray[i] = escapeByteArrays((byte[]) stringArray[i]); stringArray[i] = escapeByteArrays((byte[]) stringArray[i]);
} }

View File

@ -41,6 +41,7 @@
import org.apache.sqoop.schema.type.DateTime; import org.apache.sqoop.schema.type.DateTime;
import org.apache.sqoop.schema.type.FixedPoint; import org.apache.sqoop.schema.type.FixedPoint;
import org.apache.sqoop.schema.type.Text; import org.apache.sqoop.schema.type.Text;
import org.apache.sqoop.schema.type.Time;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -272,6 +273,49 @@ public void testByteArrayFullRangeOfCharacters() {
assertTrue(Arrays.deepEquals(inCopy, dataFormat.getObjectData())); assertTrue(Arrays.deepEquals(inCopy, dataFormat.getObjectData()));
} }
// **************test cases for time*******************
@Test
public void testTimeWithCSVTextInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new Time("1"));
dataFormat.setSchema(schema);
dataFormat.setTextData("'12:00:00'");
assertEquals("'12:00:00'", dataFormat.getTextData());
}
@Test
public void testTimeWithCSVTextInObjectArrayOut() {
Schema schema = new Schema("test");
schema.addColumn(new Time("1"));
dataFormat.setSchema(schema);
dataFormat.setTextData("'12:59:59'");
org.joda.time.LocalTime time = new org.joda.time.LocalTime(12, 59, 59);
assertEquals(time.toString(), dataFormat.getObjectData()[0].toString());
}
@Test
public void testTimeWithObjectArrayInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new Time("1")).addColumn(new Text("2"));
dataFormat.setSchema(schema);
org.joda.time.LocalTime time = new org.joda.time.LocalTime(15, 0, 0);
Object[] in = { time, "test" };
dataFormat.setObjectData(in);
assertEquals("'15:00:00.000000','test'", dataFormat.getTextData());
}
@Test
public void testTimeWithObjectArrayInObjectArrayOut() {
Schema schema = new Schema("test");
schema.addColumn(new Time("1"));
dataFormat.setSchema(schema);
org.joda.time.LocalTime time = new org.joda.time.LocalTime(2, 23, 33);
Object[] in = { time };
dataFormat.setObjectData(in);
assertEquals(time.toString(), dataFormat.getObjectData()[0].toString());
}
// **************test cases for date******************* // **************test cases for date*******************
@Test @Test
@ -327,6 +371,16 @@ public void testDateTimeWithCSVTextInCSVTextOut() {
assertEquals("'2014-10-01 12:00:00'", dataFormat.getTextData()); assertEquals("'2014-10-01 12:00:00'", dataFormat.getTextData());
} }
@Test
public void testDateTimeWithMilliSecsWithCSVTextInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema);
dataFormat.setTextData("'2014-10-01 12:00:00.000'");
assertEquals("'2014-10-01 12:00:00.000'", dataFormat.getTextData());
}
@Test @Test
public void testDateTimeWithCSVTextInObjectArrayOut() { public void testDateTimeWithCSVTextInObjectArrayOut() {
Schema schema = new Schema("test"); Schema schema = new Schema("test");
@ -342,10 +396,11 @@ public void testDateTimeWithObjectInCSVTextOut() {
Schema schema = new Schema("test"); Schema schema = new Schema("test");
schema.addColumn(new DateTime("1")); schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema); dataFormat.setSchema(schema);
org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 0); org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 1);
Object[] in = { dateTime }; Object[] in = { dateTime };
dataFormat.setObjectData(in); dataFormat.setObjectData(in);
assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData()); // Note: DateTime has the timezone info
assertEquals("'2014-10-01 12:00:00.001000-0700'", dataFormat.getTextData());
} }
@Test @Test
@ -353,11 +408,11 @@ public void testLocalDateTimeWithObjectInCSVTextOut() {
Schema schema = new Schema("test"); Schema schema = new Schema("test");
schema.addColumn(new DateTime("1")); schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema); dataFormat.setSchema(schema);
org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0, org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0, 2);
0);
Object[] in = { dateTime }; Object[] in = { dateTime };
dataFormat.setObjectData(in); dataFormat.setObjectData(in);
assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData()); // Note: LocalDateTime is missing the timezone info
assertEquals("'2014-10-01 12:00:00.002000'", dataFormat.getTextData());
} }
@Test @Test
@ -365,12 +420,9 @@ public void testDateTimePrecisionWithCSVTextInObjectArrayOut() {
Schema schema = new Schema("test"); Schema schema = new Schema("test");
schema.addColumn(new DateTime("1")); schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema); dataFormat.setSchema(schema);
dataFormat.setTextData("'2014-10-01 12:00:00.000'");
for (String dateTime : new String[] { "'2014-10-01 12:00:00.000'" }) {
dataFormat.setTextData(dateTime);
assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString()); assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
} }
}
/** /**
* In ISO8601 "T" is used as date-time separator. Unfortunately in the real * In ISO8601 "T" is used as date-time separator. Unfortunately in the real