diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopAvroUtils.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopAvroUtils.java index 857271a1..985149cb 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopAvroUtils.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopAvroUtils.java @@ -74,6 +74,7 @@ public static Schema toAvroFieldType(Column column) throws IllegalArgumentExcept switch (column.getType()) { case ARRAY: case SET: + assert column instanceof AbstractComplexListType; AbstractComplexListType listColumn = (AbstractComplexListType) column; return Schema.createArray(toAvroFieldType(listColumn.getListType())); case UNKNOWN: @@ -93,20 +94,21 @@ public static Schema toAvroFieldType(Column column) throws IllegalArgumentExcept case ENUM: return createEnumSchema(column); case FIXED_POINT: - Long byteSize = ((FixedPoint) column).getByteSize(); if (SqoopIDFUtils.isInteger(column)) { return Schema.create(Schema.Type.INT); } else { return Schema.create(Schema.Type.LONG); } case FLOATING_POINT: - byteSize = ((FloatingPoint) column).getByteSize(); + assert column instanceof FloatingPoint; + Long byteSize = ((FloatingPoint) column).getByteSize(); if (byteSize != null && byteSize <= (Float.SIZE/Byte.SIZE)) { return Schema.create(Schema.Type.FLOAT); } else { return Schema.create(Schema.Type.DOUBLE); } case MAP: + assert column instanceof org.apache.sqoop.schema.type.Map; org.apache.sqoop.schema.type.Map mapColumn = (org.apache.sqoop.schema.type.Map) column; return Schema.createArray(toAvroFieldType(mapColumn.getValue())); case TEXT: @@ -117,6 +119,7 @@ public static Schema toAvroFieldType(Column column) throws IllegalArgumentExcept } public static Schema createEnumSchema(Column column) { + assert column instanceof org.apache.sqoop.schema.type.Enum; Set options = ((org.apache.sqoop.schema.type.Enum) column).getOptions(); List listOptions = new ArrayList(options); return Schema.createEnum(column.getName(), null, SQOOP_SCHEMA_NAMESPACE, listOptions); diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopIDFUtils.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopIDFUtils.java index 2a7aa1b2..52ff2b86 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopIDFUtils.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopIDFUtils.java @@ -49,7 +49,7 @@ import java.util.Map; import java.util.Set; import java.util.TreeMap; -import java.util.regex.Matcher; +import java.util.Collections; /** * Utility methods for connectors to encode data into the sqoop expected formats @@ -68,7 +68,7 @@ public class SqoopIDFUtils { // implementation. public static final String BYTE_FIELD_CHARSET = "ISO-8859-1"; - public static final Map ORIGINALS = new TreeMap(); + private static final Map ORIGINALS = new TreeMap(); public static final char CSV_SEPARATOR_CHARACTER = ','; public static final char ESCAPE_CHARACTER = '\\'; @@ -77,19 +77,19 @@ public class SqoopIDFUtils { private static final Map REPLACEMENTS = new TreeMap(); static { - ORIGINALS.put(new Character((char)0x00), new String(new char[] { ESCAPE_CHARACTER, '0' })); - ORIGINALS.put(new Character((char)0x0A), new String(new char[] { ESCAPE_CHARACTER, 'n' })); - ORIGINALS.put(new Character((char)0x0D), new String(new char[] { ESCAPE_CHARACTER, 'r' })); - ORIGINALS.put(new Character((char)0x1A), new String(new char[] { ESCAPE_CHARACTER, 'Z' })); - ORIGINALS.put(new Character((char)0x22), new String(new char[] { ESCAPE_CHARACTER, '"' })); - ORIGINALS.put(new Character((char)0x27), new String(new char[] { ESCAPE_CHARACTER, '\'' })); + ORIGINALS.put(Character.valueOf((char)0x00), new String(new char[] { ESCAPE_CHARACTER, '0' })); + ORIGINALS.put(Character.valueOf((char)0x0A), new String(new char[] { ESCAPE_CHARACTER, 'n' })); + ORIGINALS.put(Character.valueOf((char)0x0D), new String(new char[] { ESCAPE_CHARACTER, 'r' })); + ORIGINALS.put(Character.valueOf((char)0x1A), new String(new char[] { ESCAPE_CHARACTER, 'Z' })); + ORIGINALS.put(Character.valueOf((char)0x22), new String(new char[] { ESCAPE_CHARACTER, '"' })); + ORIGINALS.put(Character.valueOf((char)0x27), new String(new char[] { ESCAPE_CHARACTER, '\'' })); - REPLACEMENTS.put('0', new Character((char)0x00)); - REPLACEMENTS.put('n', new Character((char)0x0A)); - REPLACEMENTS.put('r', new Character((char)0x0D)); - REPLACEMENTS.put('Z', new Character((char)0x1A)); - REPLACEMENTS.put('"', new Character((char)0x22)); - REPLACEMENTS.put('\'', new Character((char)0x27)); + REPLACEMENTS.put('0', Character.valueOf((char)0x00)); + REPLACEMENTS.put('n', Character.valueOf((char)0x0A)); + REPLACEMENTS.put('r', Character.valueOf((char)0x0D)); + REPLACEMENTS.put('Z', Character.valueOf((char)0x1A)); + REPLACEMENTS.put('"', Character.valueOf((char)0x22)); + REPLACEMENTS.put('\'', Character.valueOf((char)0x27)); } // http://www.joda.org/joda-time/key_format.html provides details on the @@ -106,14 +106,15 @@ public class SqoopIDFUtils { public static final DateTimeFormatter tfWithFraction = DateTimeFormat.forPattern("HH:mm:ss.SSS"); public static final DateTimeFormatter tfWithNoFraction = DateTimeFormat.forPattern("HH:mm:ss"); - public static final String[] TRUE_BIT_VALUES = new String[] { "1", "true", "TRUE" }; - public static final Set TRUE_BIT_SET = new HashSet(Arrays.asList(TRUE_BIT_VALUES)); - public static final String[] FALSE_BIT_VALUES = new String[] { "0", "false", "FALSE" }; - public static final Set FALSE_BIT_SET = new HashSet(Arrays.asList(FALSE_BIT_VALUES)); + private static final String[] TRUE_BIT_VALUES = new String[] { "1", "true", "TRUE" }; + public static final Set TRUE_BIT_SET = Collections.unmodifiableSet(new HashSet(Arrays.asList(TRUE_BIT_VALUES))); + private static final String[] FALSE_BIT_VALUES = new String[] { "0", "false", "FALSE" }; + public static final Set FALSE_BIT_SET = Collections.unmodifiableSet(new HashSet(Arrays.asList(FALSE_BIT_VALUES))); // ******** Number Column Type utils*********** public static boolean isInteger(Column column) { + assert column instanceof FixedPoint; Long byteSize = ((FixedPoint) column).getByteSize(); Boolean signed = ((FixedPoint) column).isSigned(); @@ -129,15 +130,15 @@ public static boolean isInteger(Column column) { public static String toCSVFixedPoint(Object obj, Column column) { if (isInteger(column)) { if (obj instanceof Number) { - return new Integer(((Number)obj).intValue()).toString(); + return Integer.toString(((Number) obj).intValue()); } else { - return new Integer(obj.toString()).toString(); + return Integer.valueOf(obj.toString()).toString(); } } else { if (obj instanceof Number) { - return new Long(((Number)obj).longValue()).toString(); + return Long.toString(((Number) obj).longValue()); } else { - return new Long(obj.toString()).toString(); + return Long.valueOf(obj.toString()).toString(); } } } @@ -153,6 +154,7 @@ public static Object toFixedPoint(String csvString, Column column) { } public static String toCSVFloatingPoint(Object obj, Column column) { + assert column instanceof FloatingPoint; Long byteSize = ((FloatingPoint) column).getByteSize(); if (byteSize != null && byteSize <= (Float.SIZE / Byte.SIZE)) { return ((Float) obj).toString(); @@ -163,6 +165,7 @@ public static String toCSVFloatingPoint(Object obj, Column column) { public static Object toFloatingPoint(String csvString, Column column) { Object returnValue; + assert column instanceof FloatingPoint; Long byteSize = ((FloatingPoint) column).getByteSize(); if (byteSize != null && byteSize <= (Float.SIZE / Byte.SIZE)) { returnValue = Float.valueOf(csvString); @@ -177,6 +180,7 @@ public static String toCSVDecimal(Object obj) { } public static Object toDecimal(String csvString, Column column) { + assert column instanceof org.apache.sqoop.schema.type.Decimal; Integer precision = ((org.apache.sqoop.schema.type.Decimal) column).getPrecision(); Integer scale = ((org.apache.sqoop.schema.type.Decimal) column).getScale(); BigDecimal bd = null; @@ -190,7 +194,7 @@ public static Object toDecimal(String csvString, Column column) { // we have decided to use the default MathContext DEFAULT_ROUNDINGMODE // which is RoundingMode.HALF_UP, // we are aware that there may be some loss - bd.setScale(scale, RoundingMode.HALF_UP); + bd = bd.setScale(scale, RoundingMode.HALF_UP); } return bd; } @@ -218,11 +222,14 @@ public static Object toBit(String csvString) { // *********** DATE and TIME Column Type utils ********** public static String toCSVDate(Object obj) { + assert obj instanceof org.joda.time.LocalDate; org.joda.time.LocalDate date = (org.joda.time.LocalDate) obj; return encloseWithQuotes(df.print(date)); } public static String toCSVTime(Object obj, Column col) { + assert col instanceof org.apache.sqoop.schema.type.Time; + assert obj instanceof org.joda.time.LocalTime; if (((org.apache.sqoop.schema.type.Time) col).hasFraction()) { return encloseWithQuotes(tfWithFraction.print((org.joda.time.LocalTime) obj)); } else { @@ -241,6 +248,8 @@ public static Object toTime(String csvString, Column column) { // *********** DATE TIME Column Type utils ********** public static String toCSVLocalDateTime(Object obj, Column col) { + assert obj instanceof org.joda.time.LocalDateTime; + assert col instanceof org.apache.sqoop.schema.type.DateTime; org.joda.time.LocalDateTime localDateTime = (org.joda.time.LocalDateTime) obj; org.apache.sqoop.schema.type.DateTime column = (org.apache.sqoop.schema.type.DateTime) col; if (column.hasFraction()) { @@ -251,6 +260,8 @@ public static String toCSVLocalDateTime(Object obj, Column col) { } public static String toCSVDateTime(Object obj, Column col) { + assert obj instanceof org.joda.time.DateTime; + assert col instanceof org.apache.sqoop.schema.type.DateTime; org.joda.time.DateTime dateTime = (org.joda.time.DateTime) obj; org.apache.sqoop.schema.type.DateTime column = (org.apache.sqoop.schema.type.DateTime) col; if (column.hasFraction() && column.hasTimezone()) { @@ -267,7 +278,8 @@ public static String toCSVDateTime(Object obj, Column col) { public static Object toDateTime(String csvString, Column column) { Object returnValue; String dateTime = removeQuotes(csvString); - org.apache.sqoop.schema.type.DateTime col = ((org.apache.sqoop.schema.type.DateTime) column); + assert column instanceof org.apache.sqoop.schema.type.DateTime; + org.apache.sqoop.schema.type.DateTime col = (org.apache.sqoop.schema.type.DateTime) column; if (col.hasFraction() && col.hasTimezone()) { // After calling withOffsetParsed method, a string // '2004-06-09T10:20:30-08:00' will create a datetime with a zone of @@ -288,7 +300,8 @@ public static Object toDateTime(String csvString, Column column) { public static Long toDateTimeInMillis(String csvString, Column column) { long returnValue; String dateTime = removeQuotes(csvString); - org.apache.sqoop.schema.type.DateTime col = ((org.apache.sqoop.schema.type.DateTime) column); + assert column instanceof org.apache.sqoop.schema.type.DateTime; + org.apache.sqoop.schema.type.DateTime col = (org.apache.sqoop.schema.type.DateTime) column; if (col.hasFraction() && col.hasTimezone()) { // After calling withOffsetParsed method, a string // '2004-06-09T10:20:30-08:00' will create a datetime with a zone of @@ -370,6 +383,7 @@ else if (value instanceof JSONObject) { public static String toCSVList(Object[] list, Column column) { List elementList = new ArrayList(); for (int n = 0; n < list.length; n++) { + assert column instanceof AbstractComplexListType; Column listType = ((AbstractComplexListType) column).getListType(); // 2 level nesting supported if (isColumnListType(listType)) { @@ -751,7 +765,7 @@ public static Object[] fromCSV(String csvText, Schema schema) { if (csvArray.length != columns.length) { throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001, - "The data " + csvArray + " has the wrong number of fields."); + "The data " + Arrays.toString(csvArray) + " has the wrong number of fields."); } Object[] objectArray = new Object[csvArray.length]; diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/AVROIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/AVROIntermediateDataFormat.java index fef82026..68eb6eb9 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/AVROIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/AVROIntermediateDataFormat.java @@ -120,6 +120,7 @@ public Object[] getObjectData() { public void write(DataOutput out) throws IOException { // do we need to write the schema? DatumWriter writer = new GenericDatumWriter(avroSchema); + assert out instanceof DataOutputStream; BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder((DataOutputStream) out, null); writer.write(data, encoder); } @@ -130,6 +131,7 @@ public void write(DataOutput out) throws IOException { @Override public void read(DataInput in) throws IOException { DatumReader reader = new GenericDatumReader(avroSchema); + assert in instanceof InputStream; Decoder decoder = DecoderFactory.get().binaryDecoder((InputStream) in, null); data = reader.read(null, decoder); } @@ -238,7 +240,7 @@ private GenericRecord toAVRO(Object[] objectArray) { if (objectArray.length != columns.length) { throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001, - "The data " + objectArray.toString() + " has the wrong number of fields."); + "The data " + Arrays.toString(objectArray) + " has the wrong number of fields."); } // get avro schema from sqoop schema GenericRecord avroObject = new GenericData.Record(avroSchema); @@ -257,9 +259,6 @@ private GenericRecord toAVRO(Object[] objectArray) { case SET: avroObject.put(columns[i].getName(), toList((Object[]) objectArray[i])); break; - case MAP: - avroObject.put(columns[i].getName(), objectArray[i]); - break; case ENUM: GenericData.EnumSymbol enumValue = new GenericData.EnumSymbol(createEnumSchema(columns[i]), (String) objectArray[i]); @@ -272,6 +271,7 @@ private GenericRecord toAVRO(Object[] objectArray) { case UNKNOWN: avroObject.put(columns[i].getName(), ByteBuffer.wrap((byte[]) objectArray[i])); break; + case MAP: case FIXED_POINT: case FLOATING_POINT: avroObject.put(columns[i].getName(), objectArray[i]); @@ -298,7 +298,7 @@ private GenericRecord toAVRO(Object[] objectArray) { .getTime()); break; case BIT: - avroObject.put(columns[i].getName(), Boolean.valueOf((Boolean) objectArray[i])); + avroObject.put(columns[i].getName(), Boolean.valueOf(objectArray[i].toString())); break; default: throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001, @@ -412,13 +412,12 @@ private Object[] toObject(GenericRecord record) { case SET: object[nameIndex] = toObjectArray((List) obj); break; - case MAP: - object[nameIndex] = obj; - break; case ENUM: // stored as enum symbol case TEXT: // stored as UTF8 + case DECIMAL: + // stored as string object[nameIndex] = obj.toString(); break; case BINARY: @@ -426,15 +425,12 @@ private Object[] toObject(GenericRecord record) { // stored as byte buffer object[nameIndex] = getBytesFromByteBuffer(obj); break; + case MAP: case FIXED_POINT: case FLOATING_POINT: // stored as java objects in avro as well object[nameIndex] = obj; break; - case DECIMAL: - // stored as string - object[nameIndex] = obj.toString(); - break; case DATE: Long dateInMillis = (Long) obj; object[nameIndex] = new org.joda.time.LocalDate(dateInMillis); diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java index 0fac41aa..9708c058 100644 --- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java +++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java @@ -37,6 +37,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Arrays; import java.util.Map; import java.util.Set; @@ -223,7 +224,7 @@ private JSONObject toJSON(Object[] objectArray) { Column[] columns = schema.getColumnsArray(); if (objectArray.length != columns.length) { - throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001, "The data " + objectArray.toString() + throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001, "The data " + Arrays.toString(objectArray) + " has the wrong number of fields."); } JSONObject json = new JSONObject(); @@ -251,14 +252,12 @@ private JSONObject toJSON(Object[] objectArray) { jsonObject.putAll(map); json.put(columns[i].getName(), jsonObject); break; - case ENUM: - case TEXT: - json.put(columns[i].getName(), objectArray[i]); - break; case BINARY: case UNKNOWN: json.put(columns[i].getName(), Base64.encodeBase64String((byte[]) objectArray[i])); break; + case ENUM: + case TEXT: case FIXED_POINT: case FLOATING_POINT: case DECIMAL: @@ -363,13 +362,12 @@ private Object[] toObject(JSONObject json) { } Column[] columns = schema.getColumnsArray(); Object[] object = new Object[columns.length]; - - Set jsonKeyNames = json.keySet(); - for (String name : jsonKeyNames) { - Integer nameIndex = schema.getColumnNameIndex(name); + Set> entrySet = json.entrySet(); + for (Map.Entry entry : entrySet) { + Integer nameIndex = schema.getColumnNameIndex(entry.getKey()); Column column = columns[nameIndex]; - Object obj = json.get(name); + Object obj = entry.getValue(); // null is a possible value if (obj == null && !column.isNullable()) { throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0005, diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/common/TestSqoopIDFUtils.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/common/TestSqoopIDFUtils.java index f9b676b1..aead9ef8 100644 --- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/common/TestSqoopIDFUtils.java +++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/common/TestSqoopIDFUtils.java @@ -328,16 +328,16 @@ public void testToDecimaPointReturnsDecimal() { Decimal col = new Decimal("dd", 4, 2); assertTrue(toDecimal(text, col) instanceof BigDecimal); BigDecimal bd = (BigDecimal) toDecimal(text, col); - assertEquals("23.44", toCSVDecimal(bd)); + assertEquals(toCSVDecimal(bd), "23.44"); } @Test public void testToDecimaPoint2ReturnsDecimal() { - String text = "23.44444444"; + String text = "123456.44444444"; Decimal col = new Decimal("dd", 8, 2); assertTrue(toDecimal(text, col) instanceof BigDecimal); BigDecimal bd = (BigDecimal) toDecimal(text, col); - assertEquals("23.444444", toCSVDecimal(bd)); + assertEquals(toCSVDecimal(bd), "123456.44"); } @Test @@ -346,7 +346,7 @@ public void testToDecimaPointNoScaleNoPrecisionReturnsDecimal() { Decimal col = new Decimal("dd", null, null); assertTrue(toDecimal(text, col) instanceof BigDecimal); BigDecimal bd = (BigDecimal) toDecimal(text, col); - assertEquals("23.44444444", toCSVDecimal(bd)); + assertEquals(toCSVDecimal(bd), "23.44444444"); } @Test