diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopIDFUtils.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopIDFUtils.java
new file mode 100644
index 00000000..48adae1c
--- /dev/null
+++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/common/SqoopIDFUtils.java
@@ -0,0 +1,396 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sqoop.connector.common;
+
+import org.apache.sqoop.common.SqoopException;
+import org.apache.sqoop.connector.idf.CSVIntermediateDataFormatError;
+import org.apache.sqoop.schema.type.AbstractComplexListType;
+import org.apache.sqoop.schema.type.Column;
+import org.apache.sqoop.schema.type.ColumnType;
+import org.apache.sqoop.schema.type.FixedPoint;
+import org.apache.sqoop.schema.type.FloatingPoint;
+import org.joda.time.LocalDate;
+import org.joda.time.LocalTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+
+import java.io.UnsupportedEncodingException;
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+
+/**
+ * Utility methods for connectors to encode data into the sqoop expected formats
+ * documented in
+ * https://cwiki.apache.org/confluence/display/SQOOP/Intermediate+Data
+ * +Format+API
+ *
+ */
+
+public class SqoopIDFUtils {
+
+ public static final String NULL_VALUE = "NULL";
+
+ // ISO-8859-1 is an 8-bit codec that is supported in every java
+ // implementation.
+ public static final String BYTE_FIELD_CHARSET = "ISO-8859-1";
+
+ public static final char[] originals = { 0x5C, 0x00, 0x0A, 0x0D, 0x1A, 0x22, 0x27 };
+
+ public static final char CSV_SEPARATOR_CHARACTER = ',';
+ public static final char ESCAPE_CHARACTER = '\\';
+ public static final char QUOTE_CHARACTER = '\'';
+
+ // string related replacements
+ private static final String[] replacements = {
+ new String(new char[] { ESCAPE_CHARACTER, '\\' }),
+ new String(new char[] { ESCAPE_CHARACTER, '0' }),
+ new String(new char[] { ESCAPE_CHARACTER, 'n' }),
+ new String(new char[] { ESCAPE_CHARACTER, 'r' }),
+ new String(new char[] { ESCAPE_CHARACTER, 'Z' }),
+ new String(new char[] { ESCAPE_CHARACTER, '\"' }),
+ new String(new char[] { ESCAPE_CHARACTER, '\'' })
+ };
+
+ // http://www.joda.org/joda-time/key_format.html provides details on the
+ // formatter token
+ // can have fraction and or timezone
+ public static final DateTimeFormatter dtfWithFractionAndTimeZone = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSZ");
+ public static final DateTimeFormatter dtfWithNoFractionAndTimeZone = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
+ public static final DateTimeFormatter dtfWithFractionNoTimeZone = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS");
+ public static final DateTimeFormatter dtfWithNoFractionWithTimeZone = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ssZ");
+
+ // only date, no time
+ public static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd");
+ // time with fraction only, no timezone
+ public static final DateTimeFormatter tfWithFraction = DateTimeFormat.forPattern("HH:mm:ss.SSSSSS");
+ public static final DateTimeFormatter tfWithNoFraction = DateTimeFormat.forPattern("HH:mm:ss");
+
+ static final String[] TRUE_BIT_VALUES = new String[] { "1", "true", "TRUE" };
+ static final Set TRUE_BIT_SET = new HashSet(Arrays.asList(TRUE_BIT_VALUES));
+ static final String[] FALSE_BIT_VALUES = new String[] { "0", "false", "FALSE" };
+ static final Set FALSE_BIT_SET = new HashSet(Arrays.asList(FALSE_BIT_VALUES));
+
+ // ******** Number Column Type utils***********
+
+ public static Object toFixedPoint(String csvString, Column column) {
+ Object returnValue;
+ Long byteSize = ((FixedPoint) column).getByteSize();
+ if (byteSize != null && byteSize <= Integer.SIZE) {
+ returnValue = Integer.valueOf(csvString);
+ } else {
+ returnValue = Long.valueOf(csvString);
+ }
+ return returnValue;
+ }
+
+ public static Object toFloatingPoint(String csvString, Column column) {
+ Object returnValue;
+ Long byteSize = ((FloatingPoint) column).getByteSize();
+ if (byteSize != null && byteSize <= Float.SIZE) {
+ returnValue = Float.valueOf(csvString);
+ } else {
+ returnValue = Double.valueOf(csvString);
+ }
+ return returnValue;
+ }
+
+ public static Object toDecimal(String csvString, Column column) {
+ return new BigDecimal(csvString);
+ }
+
+ // ********** BIT Column Type utils******************
+ public static void encodeToCSVBit(Object[] objectArray, int i) {
+ String bitStringValue = objectArray[i].toString();
+ if ((TRUE_BIT_SET.contains(bitStringValue)) || (FALSE_BIT_SET.contains(bitStringValue))) {
+ objectArray[i] = bitStringValue;
+ } else {
+ throw new SqoopException(CSVIntermediateDataFormatError.CSV_INTERMEDIATE_DATA_FORMAT_0009, " given bit value: "
+ + objectArray[i]);
+ }
+ }
+
+ public static Object toBit(String csvString, Object returnValue) {
+ if ((TRUE_BIT_SET.contains(csvString)) || (FALSE_BIT_SET.contains(csvString))) {
+ returnValue = TRUE_BIT_SET.contains(csvString);
+ } else {
+ // throw an exception for any unsupported value for BITs
+ throw new SqoopException(CSVIntermediateDataFormatError.CSV_INTERMEDIATE_DATA_FORMAT_0009, " given bit value: " + csvString);
+ }
+ return returnValue;
+ }
+
+ // *********** DATE and TIME Column Type utils **********
+
+ public static void encodeToCSVDate(Object[] objectArray, int i) {
+ org.joda.time.LocalDate date = (org.joda.time.LocalDate) objectArray[i];
+ objectArray[i] = encloseWithQuote(df.print(date));
+ }
+
+ public static void encodeToCSVTime(Object[] objectArray, int i, Column col) {
+ if (((org.apache.sqoop.schema.type.Time) col).hasFraction()) {
+ objectArray[i] = encloseWithQuote(tfWithFraction.print((org.joda.time.LocalTime) objectArray[i]));
+ } else {
+ objectArray[i] = encloseWithQuote(tfWithNoFraction.print((org.joda.time.LocalTime) objectArray[i]));
+ }
+ }
+
+ public static Object toDate(String csvString, Column column) {
+ return LocalDate.parse(removeQuotes(csvString));
+ }
+
+ public static Object toTime(String csvString, Column column) {
+ return LocalTime.parse(removeQuotes(csvString));
+ }
+
+ // *********** DATE TIME Column Type utils **********
+
+ public static void encodeToCSVLocalDateTime(Object[] objectArray, int i, Column col, org.joda.time.LocalDateTime localDateTime) {
+ org.apache.sqoop.schema.type.DateTime column = (org.apache.sqoop.schema.type.DateTime) col;
+ if (column.hasFraction()) {
+ objectArray[i] = encloseWithQuote(dtfWithFractionNoTimeZone.print(localDateTime));
+ } else {
+ objectArray[i] = encloseWithQuote(dtfWithNoFractionAndTimeZone.print(localDateTime));
+ }
+ }
+
+ public static void encodeToCSVDateTime(Object[] objectArray, int i, Column col, org.joda.time.DateTime dateTime) {
+ org.apache.sqoop.schema.type.DateTime column = (org.apache.sqoop.schema.type.DateTime) col;
+ if (column.hasFraction() && column.hasTimezone()) {
+ objectArray[i] = encloseWithQuote(dtfWithFractionAndTimeZone.print(dateTime));
+ } else if (column.hasFraction() && !column.hasTimezone()) {
+ objectArray[i] = encloseWithQuote(dtfWithFractionNoTimeZone.print(dateTime));
+ } else if (column.hasTimezone()) {
+ objectArray[i] = encloseWithQuote(dtfWithNoFractionWithTimeZone.print(dateTime));
+ } else {
+ objectArray[i] = encloseWithQuote(dtfWithNoFractionAndTimeZone.print(dateTime));
+ }
+ }
+
+ public static Object toDateTime(String fieldString, Column column) {
+ Object returnValue;
+ String dateTime = removeQuotes(fieldString);
+ org.apache.sqoop.schema.type.DateTime col = ((org.apache.sqoop.schema.type.DateTime) column);
+ if (col.hasFraction() && col.hasTimezone()) {
+ // After calling withOffsetParsed method, a string
+ // '2004-06-09T10:20:30-08:00' will create a datetime with a zone of
+ // -08:00 (a fixed zone, with no daylight savings rules)
+ returnValue = dtfWithFractionAndTimeZone.withOffsetParsed().parseDateTime(dateTime);
+ } else if (col.hasFraction() && !col.hasTimezone()) {
+ // we use local date time explicitly to not include the timezone
+ returnValue = dtfWithFractionNoTimeZone.parseLocalDateTime(dateTime);
+ } else if (col.hasTimezone()) {
+ returnValue = dtfWithNoFractionWithTimeZone.withOffsetParsed().parseDateTime(dateTime);
+ } else {
+ // we use local date time explicitly to not include the timezone
+ returnValue = dtfWithNoFractionAndTimeZone.parseLocalDateTime(dateTime);
+ }
+ return returnValue;
+ }
+
+ // ************ MAP Column Type utils*********
+
+ @SuppressWarnings("unchecked")
+ public static String encodeToCSVMap(Map
diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/common/TestSqoopIDFUtils.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/common/TestSqoopIDFUtils.java
new file mode 100644
index 00000000..0dde2e7c
--- /dev/null
+++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/common/TestSqoopIDFUtils.java
@@ -0,0 +1,162 @@
+package org.apache.sqoop.connector.common;
+
+import static org.junit.Assert.*;
+import static org.apache.sqoop.connector.common.SqoopIDFUtils.*;
+
+import org.apache.sqoop.schema.type.AbstractComplexListType;
+import org.apache.sqoop.schema.type.Array;
+import org.apache.sqoop.schema.type.Text;
+import org.junit.Test;
+
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class TestSqoopIDFUtils {
+
+ public static String getByteFieldString(byte[] byteFieldData) {
+ try {
+ return new StringBuilder("'").append(new String(byteFieldData, BYTE_FIELD_CHARSET)).append("'").toString();
+ } catch (UnsupportedEncodingException e) {
+ // Should never get to this point because ISO-8859-1 is a standard codec.
+ return null;
+ }
+ }
+
+ @Test
+ public void testEncloseStringWithQuotes() {
+ String test = "test";
+ String quotedText = encloseWithQuote(test);
+ assertEquals(quotedText, "'test'");
+
+ }
+
+ @Test
+ public void testStringWithQuotesToEncloseStringWithQuotes() {
+ String test = "'test'";
+ String quotedText = encloseWithQuote(test);
+ assertEquals(quotedText, "''test''");
+
+ }
+
+ @Test
+ public void testRemoveQuotes() {
+ String test = "'test'";
+ String quotedText = removeQuotes(test);
+ assertEquals(quotedText, "test");
+ }
+
+ @Test
+ public void testStringWithNoQuotesRemoveQuotes() {
+ String test = "test";
+ String quotedText = removeQuotes(test);
+ assertEquals(quotedText, "test");
+ }
+
+ @Test
+ public void testStingWithNoQuotesRemoveQuotes() {
+ String test = "test";
+ String quotedText = removeQuotes(test);
+ assertEquals(quotedText, "test");
+ }
+
+ @Test
+ public void testExample1EncodeToCSVString() {
+ String test = "test";
+ String encodedText = encodeToCSVString(test);
+ assertEquals(encodedText, "'test'");
+ }
+
+ @Test
+ public void testExample2EncodeToCSVString() {
+ String test = "test,test1";
+ String encodedText = encodeToCSVString(test);
+ assertEquals(encodedText, "'test,test1'");
+ }
+
+ @Test
+ public void testExample3EncodeToCSVString() {
+ String test = "test,'test1";
+ String encodedText = encodeToCSVString(test);
+ assertEquals(encodedText, "'test,\\'test1'");
+ }
+
+ @Test
+ public void testExample4EncodeToCSVString() {
+ String test = "test,\"test1";
+ String encodedText = encodeToCSVString(test);
+ assertEquals(encodedText, "'test,\\\"test1'");
+ }
+
+ @Test
+ public void testExample4ToString() {
+ String test = "'test,\\\"test1'";
+ String expectedString = "test,\"test1";
+ String toString = toText(test);
+ assertEquals(toString, expectedString);
+ }
+
+ public void testExample5EncodeToCSVString() {
+ String test = new String(new char[] { 0x0A });
+ String encodedText = encodeToCSVString(test);
+ assertEquals(encodedText, "'\\n'");
+ }
+
+ public void testExample5ToString() {
+ String test = "'\\n'";
+ String expectedString = new String(new char[] { 0x0A });
+ String toString = toText(test);
+ assertEquals(toString, expectedString);
+ }
+
+ public void testExample6EncodeToCSVString() {
+ String test = new String(new char[] { 0x0D });
+ String encodedText = encodeToCSVString(test);
+ assertEquals(encodedText, "'\\r'");
+ }
+
+ @Test
+ public void testEncodeByteToCSVString() {
+ // byte[0] = \r byte[1] = -112, byte[1] = 54 - 2's complements
+ byte[] bytes = new byte[] { (byte) 0x0D, (byte) -112, (byte) 54 };
+ String encodedText = encodeToCSVByteArray(bytes);
+ String expectedText = getByteFieldString(bytes).replaceAll("\r", "\\\\r");
+ assertEquals(encodedText, expectedText);
+ }
+
+ @Test
+ public void testEncodeArrayIntegersToCSVString() {
+ List list = new ArrayList();
+ list.add(1);
+ list.add(2);
+ AbstractComplexListType array = new Array("a", new Text("t"));
+ String encodedText = encodeToCSVList(list.toArray(), array);
+ assertEquals(encodedText, "'[1,2]'");
+ }
+
+ @Test
+ public void testEncodeArrayStringsToCSVString() {
+ List list = new ArrayList();
+ list.add("A");
+ list.add("B");
+ AbstractComplexListType array = new Array("a", new Text("t"));
+ String encodedText = encodeToCSVList(list.toArray(), array);
+ assertEquals(encodedText, "'[\"A\",\"B\"]'");
+ }
+
+ @Test
+ public void testEncodeMapToCSVString() {
+ List list = new ArrayList();
+ list.add("A");
+ list.add("B");
+ Map map = new HashMap();
+ map.put("A", list);
+ org.apache.sqoop.schema.type.Map mapCol = new org.apache.sqoop.schema.type.Map("a", new Text("t"), new Array("r", new Text(
+ "tr")));
+ String encodedText = encodeToCSVMap(map, mapCol);
+ assertEquals(encodedText, "'{\"A\":[\"A\",\"B\"]}'");
+ }
+
+}
\ No newline at end of file
diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
index 1a2a96fb..83a95ec6 100644
--- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
@@ -22,8 +22,9 @@
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import static org.apache.sqoop.connector.common.SqoopIDFUtils.*;
+import static org.apache.sqoop.connector.common.TestSqoopIDFUtils.getByteFieldString;
-import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -57,16 +58,6 @@ public void setUp() {
dataFormat = new CSVIntermediateDataFormat();
}
- private String getByteFieldString(byte[] byteFieldData) {
- try {
- return new StringBuilder("'")
- .append(new String(byteFieldData, CSVIntermediateDataFormat.BYTE_FIELD_CHARSET))
- .append("'").toString();
- } catch (UnsupportedEncodingException e) {
- // Should never get to this point because ISO-8859-1 is a standard codec.
- return null;
- }
- }
//**************test cases for null and empty input*******************
@@ -114,7 +105,7 @@ public void testNullValueAsObjectArrayInAndCSVTextOut() {
String csvText = dataFormat.getCSVTextData();
String[] textValues = csvText.split(",");
for (String text : textValues) {
- assertEquals(text, CSVIntermediateDataFormat.NULL_VALUE);
+ assertEquals(text, NULL_VALUE);
}
}
@@ -183,7 +174,7 @@ public void testNullValueAsCSVTextInAndCSVTextOut() {
String csvText = dataFormat.getCSVTextData();
String[] textValues = csvText.split(",");
for (String text : textValues) {
- assertEquals(text, CSVIntermediateDataFormat.NULL_VALUE);
+ assertEquals(text, NULL_VALUE);
}
}
@@ -252,8 +243,8 @@ public void testInputAsObjectArayInCSVTextOut() {
dataFormat.setObjectData(in);
//byte[0] = \r byte[1] = -112, byte[1] = 54 - 2's complements
- String testData = "10,34,'54','random data'," +
- getByteFieldString(byteFieldData).replaceAll("\r", "\\\\r") + ",'\\n','TEST_ENUM'";
+ String testData = "10,34,'54','random data'," + getByteFieldString(byteFieldData).replaceAll("\r", "\\\\r")
+ + ",'\\n','TEST_ENUM'";
assertEquals(testData, dataFormat.getCSVTextData());
}
@@ -315,8 +306,8 @@ public void testObjectArrayWithNullInCSVTextOut() {
dataFormat.setObjectData(in);
//byte[0] = \r byte[1] = -112, byte[1] = 54 - 2's complements
- String testData = "10,34,NULL,'random data'," +
- getByteFieldString(byteFieldData).replaceAll("\r", "\\\\r") + ",'\\n','TEST_ENUM'";
+ String testData = "10,34,NULL,'random data'," + getByteFieldString(byteFieldData).replaceAll("\r", "\\\\r")
+ + ",'\\n','TEST_ENUM'";
assertEquals(testData, dataFormat.getCSVTextData());
}
@@ -521,7 +512,7 @@ public void testCurrentDateTime2WithFractionNoTimezoneWithCSVTextInObjectArrayOu
dataFormat.setSchema(schema);
// current date time
org.joda.time.DateTime dateTime = new org.joda.time.DateTime();
- String dateTimeString = CSVIntermediateDataFormat.dtfWithFractionNoTimeZone.print(dateTime);
+ String dateTimeString = dtfWithFractionNoTimeZone.print(dateTime);
dataFormat.setCSVTextData("'" + dateTimeString + "'");
assertEquals(dateTimeString.replace(" ", "T"), dataFormat.getObjectData()[0].toString());
}