mirror of
https://github.com/apache/sqoop.git
synced 2025-05-13 23:41:56 +08:00
SQOOP-1765: Sqoop2: Time/Timestamp format support for CSV IDF
(Veena Basavaraj via Abraham Elmahrek)
This commit is contained in:
parent
9febdf3aed
commit
c19f9c9460
@ -29,6 +29,7 @@
|
|||||||
import org.apache.sqoop.schema.type.FloatingPoint;
|
import org.apache.sqoop.schema.type.FloatingPoint;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
import org.joda.time.LocalDate;
|
import org.joda.time.LocalDate;
|
||||||
|
import org.joda.time.LocalTime;
|
||||||
import org.joda.time.format.DateTimeFormat;
|
import org.joda.time.format.DateTimeFormat;
|
||||||
import org.joda.time.format.DateTimeFormatter;
|
import org.joda.time.format.DateTimeFormatter;
|
||||||
import org.json.simple.JSONArray;
|
import org.json.simple.JSONArray;
|
||||||
@ -83,12 +84,12 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
|
|||||||
new String(new char[] { ESCAPE_CHARACTER, '\''})
|
new String(new char[] { ESCAPE_CHARACTER, '\''})
|
||||||
};
|
};
|
||||||
|
|
||||||
// ISO-8859-1 is an 8-bit codec that is supported in every java
|
// ISO-8859-1 is an 8-bit codec that is supported in every java implementation.
|
||||||
// implementation.
|
|
||||||
static final String BYTE_FIELD_CHARSET = "ISO-8859-1";
|
static final String BYTE_FIELD_CHARSET = "ISO-8859-1";
|
||||||
// http://www.joda.org/joda-time/key_format.html provides details on the formatter token
|
// http://www.joda.org/joda-time/key_format.html provides details on the formatter token
|
||||||
static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS'Z'");
|
static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSSSSZ");
|
||||||
static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd");
|
static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd");
|
||||||
|
static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSSZ");
|
||||||
|
|
||||||
private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>();
|
private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>();
|
||||||
private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>();
|
private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>();
|
||||||
@ -96,6 +97,7 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
|
|||||||
private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>();
|
private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>();
|
||||||
private final List<Integer> dateTimeTypeColumnIndices = new ArrayList<Integer>();
|
private final List<Integer> dateTimeTypeColumnIndices = new ArrayList<Integer>();
|
||||||
private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>();
|
private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>();
|
||||||
|
private final List<Integer> timeColumnIndices = new ArrayList<Integer>();
|
||||||
|
|
||||||
private Schema schema;
|
private Schema schema;
|
||||||
|
|
||||||
@ -138,6 +140,8 @@ public void setSchema(Schema schema) {
|
|||||||
stringTypeColumnIndices.add(i);
|
stringTypeColumnIndices.add(i);
|
||||||
} else if (col.getType() == ColumnType.DATE) {
|
} else if (col.getType() == ColumnType.DATE) {
|
||||||
dateTypeColumnIndices.add(i);
|
dateTypeColumnIndices.add(i);
|
||||||
|
} else if (col.getType() == ColumnType.TIME) {
|
||||||
|
timeColumnIndices.add(i);
|
||||||
} else if (col.getType() == ColumnType.DATE_TIME) {
|
} else if (col.getType() == ColumnType.DATE_TIME) {
|
||||||
dateTimeTypeColumnIndices.add(i);
|
dateTimeTypeColumnIndices.add(i);
|
||||||
} else if (col.getType() == ColumnType.BINARY) {
|
} else if (col.getType() == ColumnType.BINARY) {
|
||||||
@ -273,6 +277,9 @@ private Object parseCSVStringArrayElement(String fieldString, Column column) {
|
|||||||
case DATE:
|
case DATE:
|
||||||
returnValue = LocalDate.parse(removeQuotes(fieldString));
|
returnValue = LocalDate.parse(removeQuotes(fieldString));
|
||||||
break;
|
break;
|
||||||
|
case TIME:
|
||||||
|
returnValue = LocalTime.parse(removeQuotes(fieldString));
|
||||||
|
break;
|
||||||
case DATE_TIME:
|
case DATE_TIME:
|
||||||
// A datetime string with a space as date-time separator will not be
|
// A datetime string with a space as date-time separator will not be
|
||||||
// parsed expectedly. The expected separator is "T". See also:
|
// parsed expectedly. The expected separator is "T". See also:
|
||||||
@ -436,6 +443,10 @@ private void encodeCSVStringElements(Object[] stringArray, Column[] columnArray)
|
|||||||
org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i];
|
org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i];
|
||||||
stringArray[i] = encloseWithQuote(df.print(date));
|
stringArray[i] = encloseWithQuote(df.print(date));
|
||||||
}
|
}
|
||||||
|
for (int i : timeColumnIndices) {
|
||||||
|
org.joda.time.LocalTime date = (org.joda.time.LocalTime) stringArray[i];
|
||||||
|
stringArray[i] = encloseWithQuote(tf.print(date));
|
||||||
|
}
|
||||||
for (int i : byteTypeColumnIndices) {
|
for (int i : byteTypeColumnIndices) {
|
||||||
stringArray[i] = escapeByteArrays((byte[]) stringArray[i]);
|
stringArray[i] = escapeByteArrays((byte[]) stringArray[i]);
|
||||||
}
|
}
|
||||||
|
@ -41,6 +41,7 @@
|
|||||||
import org.apache.sqoop.schema.type.DateTime;
|
import org.apache.sqoop.schema.type.DateTime;
|
||||||
import org.apache.sqoop.schema.type.FixedPoint;
|
import org.apache.sqoop.schema.type.FixedPoint;
|
||||||
import org.apache.sqoop.schema.type.Text;
|
import org.apache.sqoop.schema.type.Text;
|
||||||
|
import org.apache.sqoop.schema.type.Time;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@ -272,6 +273,49 @@ public void testByteArrayFullRangeOfCharacters() {
|
|||||||
assertTrue(Arrays.deepEquals(inCopy, dataFormat.getObjectData()));
|
assertTrue(Arrays.deepEquals(inCopy, dataFormat.getObjectData()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// **************test cases for time*******************
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTimeWithCSVTextInCSVTextOut() {
|
||||||
|
Schema schema = new Schema("test");
|
||||||
|
schema.addColumn(new Time("1"));
|
||||||
|
dataFormat.setSchema(schema);
|
||||||
|
dataFormat.setTextData("'12:00:00'");
|
||||||
|
assertEquals("'12:00:00'", dataFormat.getTextData());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTimeWithCSVTextInObjectArrayOut() {
|
||||||
|
Schema schema = new Schema("test");
|
||||||
|
schema.addColumn(new Time("1"));
|
||||||
|
dataFormat.setSchema(schema);
|
||||||
|
dataFormat.setTextData("'12:59:59'");
|
||||||
|
org.joda.time.LocalTime time = new org.joda.time.LocalTime(12, 59, 59);
|
||||||
|
assertEquals(time.toString(), dataFormat.getObjectData()[0].toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTimeWithObjectArrayInCSVTextOut() {
|
||||||
|
Schema schema = new Schema("test");
|
||||||
|
schema.addColumn(new Time("1")).addColumn(new Text("2"));
|
||||||
|
dataFormat.setSchema(schema);
|
||||||
|
org.joda.time.LocalTime time = new org.joda.time.LocalTime(15, 0, 0);
|
||||||
|
Object[] in = { time, "test" };
|
||||||
|
dataFormat.setObjectData(in);
|
||||||
|
assertEquals("'15:00:00.000000','test'", dataFormat.getTextData());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTimeWithObjectArrayInObjectArrayOut() {
|
||||||
|
Schema schema = new Schema("test");
|
||||||
|
schema.addColumn(new Time("1"));
|
||||||
|
dataFormat.setSchema(schema);
|
||||||
|
org.joda.time.LocalTime time = new org.joda.time.LocalTime(2, 23, 33);
|
||||||
|
Object[] in = { time };
|
||||||
|
dataFormat.setObjectData(in);
|
||||||
|
assertEquals(time.toString(), dataFormat.getObjectData()[0].toString());
|
||||||
|
}
|
||||||
|
|
||||||
// **************test cases for date*******************
|
// **************test cases for date*******************
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -327,6 +371,16 @@ public void testDateTimeWithCSVTextInCSVTextOut() {
|
|||||||
assertEquals("'2014-10-01 12:00:00'", dataFormat.getTextData());
|
assertEquals("'2014-10-01 12:00:00'", dataFormat.getTextData());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDateTimeWithMilliSecsWithCSVTextInCSVTextOut() {
|
||||||
|
Schema schema = new Schema("test");
|
||||||
|
schema.addColumn(new DateTime("1"));
|
||||||
|
dataFormat.setSchema(schema);
|
||||||
|
|
||||||
|
dataFormat.setTextData("'2014-10-01 12:00:00.000'");
|
||||||
|
assertEquals("'2014-10-01 12:00:00.000'", dataFormat.getTextData());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDateTimeWithCSVTextInObjectArrayOut() {
|
public void testDateTimeWithCSVTextInObjectArrayOut() {
|
||||||
Schema schema = new Schema("test");
|
Schema schema = new Schema("test");
|
||||||
@ -342,10 +396,11 @@ public void testDateTimeWithObjectInCSVTextOut() {
|
|||||||
Schema schema = new Schema("test");
|
Schema schema = new Schema("test");
|
||||||
schema.addColumn(new DateTime("1"));
|
schema.addColumn(new DateTime("1"));
|
||||||
dataFormat.setSchema(schema);
|
dataFormat.setSchema(schema);
|
||||||
org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 0);
|
org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 1);
|
||||||
Object[] in = { dateTime };
|
Object[] in = { dateTime };
|
||||||
dataFormat.setObjectData(in);
|
dataFormat.setObjectData(in);
|
||||||
assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData());
|
// Note: DateTime has the timezone info
|
||||||
|
assertEquals("'2014-10-01 12:00:00.001000-0700'", dataFormat.getTextData());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -353,11 +408,11 @@ public void testLocalDateTimeWithObjectInCSVTextOut() {
|
|||||||
Schema schema = new Schema("test");
|
Schema schema = new Schema("test");
|
||||||
schema.addColumn(new DateTime("1"));
|
schema.addColumn(new DateTime("1"));
|
||||||
dataFormat.setSchema(schema);
|
dataFormat.setSchema(schema);
|
||||||
org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0,
|
org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0, 2);
|
||||||
0);
|
|
||||||
Object[] in = { dateTime };
|
Object[] in = { dateTime };
|
||||||
dataFormat.setObjectData(in);
|
dataFormat.setObjectData(in);
|
||||||
assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData());
|
// Note: LocalDateTime is missing the timezone info
|
||||||
|
assertEquals("'2014-10-01 12:00:00.002000'", dataFormat.getTextData());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -365,12 +420,9 @@ public void testDateTimePrecisionWithCSVTextInObjectArrayOut() {
|
|||||||
Schema schema = new Schema("test");
|
Schema schema = new Schema("test");
|
||||||
schema.addColumn(new DateTime("1"));
|
schema.addColumn(new DateTime("1"));
|
||||||
dataFormat.setSchema(schema);
|
dataFormat.setSchema(schema);
|
||||||
|
dataFormat.setTextData("'2014-10-01 12:00:00.000'");
|
||||||
for (String dateTime : new String[] { "'2014-10-01 12:00:00.000'" }) {
|
|
||||||
dataFormat.setTextData(dateTime);
|
|
||||||
assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
|
assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* In ISO8601 "T" is used as date-time separator. Unfortunately in the real
|
* In ISO8601 "T" is used as date-time separator. Unfortunately in the real
|
||||||
|
Loading…
Reference in New Issue
Block a user