5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-08 13:40:28 +08:00

SQOOP-1765: Sqoop2: Time/Timestamp format support for CSV IDF

(Veena Basavaraj via Abraham Elmahrek)
This commit is contained in:
Abraham Elmahrek 2014-12-04 13:21:10 -08:00
parent 9febdf3aed
commit c19f9c9460
2 changed files with 78 additions and 15 deletions

View File

@ -29,6 +29,7 @@
import org.apache.sqoop.schema.type.FloatingPoint;
import org.joda.time.DateTime;
import org.joda.time.LocalDate;
import org.joda.time.LocalTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.json.simple.JSONArray;
@ -83,12 +84,12 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
new String(new char[] { ESCAPE_CHARACTER, '\''})
};
// ISO-8859-1 is an 8-bit codec that is supported in every java
// implementation.
// ISO-8859-1 is an 8-bit codec that is supported in every java implementation.
static final String BYTE_FIELD_CHARSET = "ISO-8859-1";
//http://www.joda.org/joda-time/key_format.html provides details on the formatter token
static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS'Z'");
// http://www.joda.org/joda-time/key_format.html provides details on the formatter token
static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSSSSZ");
static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd");
static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSSZ");
private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>();
@ -96,6 +97,7 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> dateTimeTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> timeColumnIndices = new ArrayList<Integer>();
private Schema schema;
@ -138,6 +140,8 @@ public void setSchema(Schema schema) {
stringTypeColumnIndices.add(i);
} else if (col.getType() == ColumnType.DATE) {
dateTypeColumnIndices.add(i);
} else if (col.getType() == ColumnType.TIME) {
timeColumnIndices.add(i);
} else if (col.getType() == ColumnType.DATE_TIME) {
dateTimeTypeColumnIndices.add(i);
} else if (col.getType() == ColumnType.BINARY) {
@ -273,6 +277,9 @@ private Object parseCSVStringArrayElement(String fieldString, Column column) {
case DATE:
returnValue = LocalDate.parse(removeQuotes(fieldString));
break;
case TIME:
returnValue = LocalTime.parse(removeQuotes(fieldString));
break;
case DATE_TIME:
// A datetime string with a space as date-time separator will not be
// parsed expectedly. The expected separator is "T". See also:
@ -436,6 +443,10 @@ private void encodeCSVStringElements(Object[] stringArray, Column[] columnArray)
org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i];
stringArray[i] = encloseWithQuote(df.print(date));
}
for (int i : timeColumnIndices) {
org.joda.time.LocalTime date = (org.joda.time.LocalTime) stringArray[i];
stringArray[i] = encloseWithQuote(tf.print(date));
}
for (int i : byteTypeColumnIndices) {
stringArray[i] = escapeByteArrays((byte[]) stringArray[i]);
}
@ -560,4 +571,4 @@ private byte[] unescapeByteArray(String orig) {
public String toString() {
return data;
}
}
}

View File

@ -41,6 +41,7 @@
import org.apache.sqoop.schema.type.DateTime;
import org.apache.sqoop.schema.type.FixedPoint;
import org.apache.sqoop.schema.type.Text;
import org.apache.sqoop.schema.type.Time;
import org.junit.Before;
import org.junit.Test;
@ -272,6 +273,49 @@ public void testByteArrayFullRangeOfCharacters() {
assertTrue(Arrays.deepEquals(inCopy, dataFormat.getObjectData()));
}
// **************test cases for time*******************
@Test
public void testTimeWithCSVTextInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new Time("1"));
dataFormat.setSchema(schema);
dataFormat.setTextData("'12:00:00'");
assertEquals("'12:00:00'", dataFormat.getTextData());
}
@Test
public void testTimeWithCSVTextInObjectArrayOut() {
Schema schema = new Schema("test");
schema.addColumn(new Time("1"));
dataFormat.setSchema(schema);
dataFormat.setTextData("'12:59:59'");
org.joda.time.LocalTime time = new org.joda.time.LocalTime(12, 59, 59);
assertEquals(time.toString(), dataFormat.getObjectData()[0].toString());
}
@Test
public void testTimeWithObjectArrayInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new Time("1")).addColumn(new Text("2"));
dataFormat.setSchema(schema);
org.joda.time.LocalTime time = new org.joda.time.LocalTime(15, 0, 0);
Object[] in = { time, "test" };
dataFormat.setObjectData(in);
assertEquals("'15:00:00.000000','test'", dataFormat.getTextData());
}
@Test
public void testTimeWithObjectArrayInObjectArrayOut() {
Schema schema = new Schema("test");
schema.addColumn(new Time("1"));
dataFormat.setSchema(schema);
org.joda.time.LocalTime time = new org.joda.time.LocalTime(2, 23, 33);
Object[] in = { time };
dataFormat.setObjectData(in);
assertEquals(time.toString(), dataFormat.getObjectData()[0].toString());
}
// **************test cases for date*******************
@Test
@ -327,6 +371,16 @@ public void testDateTimeWithCSVTextInCSVTextOut() {
assertEquals("'2014-10-01 12:00:00'", dataFormat.getTextData());
}
@Test
public void testDateTimeWithMilliSecsWithCSVTextInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema);
dataFormat.setTextData("'2014-10-01 12:00:00.000'");
assertEquals("'2014-10-01 12:00:00.000'", dataFormat.getTextData());
}
@Test
public void testDateTimeWithCSVTextInObjectArrayOut() {
Schema schema = new Schema("test");
@ -342,10 +396,11 @@ public void testDateTimeWithObjectInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema);
org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 0);
org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 1);
Object[] in = { dateTime };
dataFormat.setObjectData(in);
assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData());
// Note: DateTime has the timezone info
assertEquals("'2014-10-01 12:00:00.001000-0700'", dataFormat.getTextData());
}
@Test
@ -353,11 +408,11 @@ public void testLocalDateTimeWithObjectInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema);
org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0,
0);
org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0, 2);
Object[] in = { dateTime };
dataFormat.setObjectData(in);
assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData());
// Note: LocalDateTime is missing the timezone info
assertEquals("'2014-10-01 12:00:00.002000'", dataFormat.getTextData());
}
@Test
@ -365,11 +420,8 @@ public void testDateTimePrecisionWithCSVTextInObjectArrayOut() {
Schema schema = new Schema("test");
schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema);
for (String dateTime : new String[] { "'2014-10-01 12:00:00.000'" }) {
dataFormat.setTextData(dateTime);
assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
}
dataFormat.setTextData("'2014-10-01 12:00:00.000'");
assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
}
/**