From c865aefea62a8f0f5eabe56860b393538f03c09c Mon Sep 17 00:00:00 2001 From: Abraham Elmahrek Date: Fri, 5 Dec 2014 15:57:39 -0800 Subject: [PATCH] SQOOP-1817: Sqoop2: Update CSVIntermediate BIT data type (Veena Basavaraj via Abraham Elmahrek) --- .../idf/CSVIntermediateDataFormat.java | 57 +++++++--- .../idf/IntermediateDataFormatError.java | 9 +- .../idf/TestCSVIntermediateDataFormat.java | 106 ++++++++++++++++-- 3 files changed, 144 insertions(+), 28 deletions(-) diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java index d481ccef..daa51ebb 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java @@ -44,6 +44,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -89,9 +90,10 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat { // http://www.joda.org/joda-time/key_format.html provides details on the formatter token static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSSSSZ"); static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd"); - static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSSZ"); + static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSS"); private final List stringTypeColumnIndices = new ArrayList(); + private final List bitTypeColumnIndices = new ArrayList(); private final List byteTypeColumnIndices = new ArrayList(); private final List listTypeColumnIndices = new ArrayList(); private final List mapTypeColumnIndices = new ArrayList(); @@ -99,6 +101,11 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat { private final List dateTypeColumnIndices = new ArrayList(); private final List timeColumnIndices = new ArrayList(); + static final String[] TRUE_BIT_VALUES = new String[] { "1", "true", "TRUE" }; + static final Set TRUE_BIT_SET = new HashSet(Arrays.asList(TRUE_BIT_VALUES)); + static final String[] FALSE_BIT_VALUES = new String[] { "0", "false", "FALSE" }; + static final Set FALSE_BIT_SET = new HashSet(Arrays.asList(FALSE_BIT_VALUES)); + private Schema schema; public CSVIntermediateDataFormat() { @@ -138,6 +145,8 @@ public void setSchema(Schema schema) { for (Column col : columns) { if (isColumnStringType(col)) { stringTypeColumnIndices.add(i); + } else if (col.getType() == ColumnType.BIT) { + bitTypeColumnIndices.add(i); } else if (col.getType() == ColumnType.DATE) { dateTypeColumnIndices.add(i); } else if (col.getType() == ColumnType.TIME) { @@ -288,8 +297,12 @@ private Object parseCSVStringArrayElement(String fieldString, Column column) { returnValue = DateTime.parse(dateTime); break; case BIT: - returnValue = Boolean.valueOf(fieldString.equals("1") - || fieldString.toLowerCase().equals("true")); + if ((TRUE_BIT_SET.contains(fieldString)) || (FALSE_BIT_SET.contains(fieldString))) { + returnValue = TRUE_BIT_SET.contains(fieldString); + } else { + // throw an exception for any unsupported value for BITs + throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0009, " given bit value: " + fieldString); + } break; case ARRAY: case SET: @@ -425,36 +438,44 @@ public int compareTo(IntermediateDataFormat o) { /** * Sanitize every element of the CSV string based on the column type * - * @param stringArray + * @param objectArray */ @SuppressWarnings("unchecked") - private void encodeCSVStringElements(Object[] stringArray, Column[] columnArray) { + private void encodeCSVStringElements(Object[] objectArray, Column[] columnArray) { + for (int i : bitTypeColumnIndices) { + String bitStringValue = objectArray[i].toString(); + if ((TRUE_BIT_SET.contains(bitStringValue)) || (FALSE_BIT_SET.contains(bitStringValue))) { + objectArray[i] = bitStringValue; + } else { + throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0009, " given bit value: " + objectArray[i]); + } + } for (int i : stringTypeColumnIndices) { - stringArray[i] = escapeString((String) stringArray[i]); + objectArray[i] = escapeString((String) objectArray[i]); } for (int i : dateTimeTypeColumnIndices) { - if (stringArray[i] instanceof org.joda.time.DateTime) { - stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.DateTime) stringArray[i])); - } else if (stringArray[i] instanceof org.joda.time.LocalDateTime) { - stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.LocalDateTime) stringArray[i])); + if (objectArray[i] instanceof org.joda.time.DateTime) { + objectArray[i] = encloseWithQuote(dtf.print((org.joda.time.DateTime) objectArray[i])); + } else if (objectArray[i] instanceof org.joda.time.LocalDateTime) { + objectArray[i] = encloseWithQuote(dtf.print((org.joda.time.LocalDateTime) objectArray[i])); } } for (int i : dateTypeColumnIndices) { - org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i]; - stringArray[i] = encloseWithQuote(df.print(date)); + org.joda.time.LocalDate date = (org.joda.time.LocalDate) objectArray[i]; + objectArray[i] = encloseWithQuote(df.print(date)); } for (int i : timeColumnIndices) { - org.joda.time.LocalTime date = (org.joda.time.LocalTime) stringArray[i]; - stringArray[i] = encloseWithQuote(tf.print(date)); + org.joda.time.LocalTime date = (org.joda.time.LocalTime) objectArray[i]; + objectArray[i] = encloseWithQuote(tf.print(date)); } for (int i : byteTypeColumnIndices) { - stringArray[i] = escapeByteArrays((byte[]) stringArray[i]); + objectArray[i] = escapeByteArrays((byte[]) objectArray[i]); } for (int i : listTypeColumnIndices) { - stringArray[i] = encodeList((Object[]) stringArray[i], columnArray[i]); + objectArray[i] = encodeList((Object[]) objectArray[i], columnArray[i]); } for (int i : mapTypeColumnIndices) { - stringArray[i] = encodeMap((Map) stringArray[i], columnArray[i]); + objectArray[i] = encodeMap((Map) objectArray[i], columnArray[i]); } } @@ -571,4 +592,4 @@ private byte[] unescapeByteArray(String orig) { public String toString() { return data; } -} \ No newline at end of file +} diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java index 665418d7..4b0dd881 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java @@ -36,13 +36,18 @@ public enum IntermediateDataFormatError implements ErrorCode { /** Column type isn't known by Intermediate Data Format. */ INTERMEDIATE_DATA_FORMAT_0004("Unknown column type."), - /** Number of fields. */ - INTERMEDIATE_DATA_FORMAT_0005("Wrong number of fields."), + /** Number of columns in schema does not match the data set. */ + INTERMEDIATE_DATA_FORMAT_0005("Wrong number of columns."), + /** Schema is missing in the IDF. */ INTERMEDIATE_DATA_FORMAT_0006("Schema missing."), + /** For arrays and maps we use JSON representation and incorrect representation results in parse exception*/ INTERMEDIATE_DATA_FORMAT_0008("JSON parse internal error."), + /** Unsupported bit values */ + INTERMEDIATE_DATA_FORMAT_0009("Unsupported bit value."), + ; private final String message; diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java index b348ed8a..8a032ef9 100644 --- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java +++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java @@ -442,28 +442,118 @@ public void testDateTimeISO8601Alternative() { } } + // **************test cases for BIT******************* + @Test - public void testBit() { + public void testBitTrueFalseWithCSVTextInAndCSVTextOut() { Schema schema = new Schema("test"); schema.addColumn(new Bit("1")); dataFormat.setSchema(schema); - for (String trueBit : new String[]{ - "true", "TRUE", "1" - }) { + for (String trueBit : new String[] { "true", "TRUE" }) { + dataFormat.setTextData(trueBit); + assertTrue(Boolean.valueOf(dataFormat.getTextData())); + } + + for (String falseBit : new String[] { "false", "FALSE" }) { + dataFormat.setTextData(falseBit); + assertFalse(Boolean.valueOf(dataFormat.getTextData())); + } + } + + @Test + public void testBitWithCSVTextInAndCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")); + dataFormat.setSchema(schema); + dataFormat.setTextData("1"); + assertEquals("1", dataFormat.getTextData()); + dataFormat.setTextData("0"); + assertEquals("0", dataFormat.getTextData()); + } + + @Test + public void testBitWithObjectArrayInAndCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + Object[] data = new Object[2]; + data[0] = Boolean.TRUE; + data[1] = Boolean.FALSE; + dataFormat.setObjectData(data); + assertEquals("true,false", dataFormat.getTextData()); + } + + @Test(expected = SqoopException.class) + public void testUnsupportedBitWithObjectArrayInAndCSVTextOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + Object[] data = new Object[2]; + data[0] = "1"; + data[1] = "2"; + dataFormat.setObjectData(data); + assertEquals("1,2", dataFormat.getTextData()); + } + + @Test + public void testBitWithObjectArrayInAndObjectOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + Object[] data = new Object[2]; + data[0] = Boolean.TRUE; + data[1] = Boolean.FALSE; + dataFormat.setObjectData(data); + assertEquals(true, dataFormat.getObjectData()[0]); + assertEquals(false, dataFormat.getObjectData()[1]); + data[0] = "1"; + data[1] = "0"; + dataFormat.setObjectData(data); + assertEquals(true, dataFormat.getObjectData()[0]); + assertEquals(false, dataFormat.getObjectData()[1]); + } + + public void testBitWithCSVTextInAndObjectArrayOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")); + dataFormat.setSchema(schema); + + for (String trueBit : new String[] { "true", "TRUE", "1" }) { dataFormat.setTextData(trueBit); assertTrue((Boolean) dataFormat.getObjectData()[0]); } - for (String falseBit : new String[]{ - "false", "FALSE", "0" - }) { + for (String falseBit : new String[] { "false", "FALSE", "0" }) { dataFormat.setTextData(falseBit); assertFalse((Boolean) dataFormat.getObjectData()[0]); } } - //**************test cases for arrays******************* + @Test(expected = SqoopException.class) + public void testUnsupportedBitWithObjectArrayInAndObjectOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + Object[] data = new Object[2]; + data[0] = "1"; + data[1] = "2"; + dataFormat.setObjectData(data); + assertEquals(true, dataFormat.getObjectData()[0]); + assertEquals(false, dataFormat.getObjectData()[1]); + } + + @Test(expected = SqoopException.class) + public void testUnsupportedBitWithCSVTextInAndObjectOut() { + Schema schema = new Schema("test"); + schema.addColumn(new Bit("1")).addColumn(new Bit("2")); + dataFormat.setSchema(schema); + dataFormat.setTextData("1,3"); + assertEquals(true, dataFormat.getObjectData()[0]); + assertEquals(false, dataFormat.getObjectData()[1]); + } + + // **************test cases for arrays******************* @Test public void testArrayOfStringWithObjectArrayInObjectArrayOut() { Schema schema = new Schema("test");