out
.
+ *
+ * @param out DataOuput
to serialize this object into.
+ * @throws IOException
+ */
+ public abstract void write(DataOutput out) throws IOException;
+
+ /**
+ * Deserialize the fields of this object from in
.
+ *
+ * For efficiency, implementations should attempt to re-use storage in the + * existing object where possible.
+ * + * @param inDataInput
to deseriablize this object from.
+ * @throws IOException
+ */
+ public abstract void read(DataInput in) throws IOException;
+}
diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java
new file mode 100644
index 00000000..92190744
--- /dev/null
+++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sqoop.connector.idf;
+
+import org.apache.sqoop.common.ErrorCode;
+
+public enum IntermediateDataFormatError implements ErrorCode {
+ /** An unknown error has occurred. */
+ INTERMEDIATE_DATA_FORMAT_0000("An unknown error has occurred."),
+
+ /** An encoding is missing in the Java native libraries. */
+ INTERMEDIATE_DATA_FORMAT_0001("Native character set error."),
+
+ /** Error while escaping a row. */
+ INTERMEDIATE_DATA_FORMAT_0002("An error has occurred while escaping a row."),
+
+ /** Error while escaping a row. */
+ INTERMEDIATE_DATA_FORMAT_0003("An error has occurred while unescaping a row."),
+
+ /** Column type isn't known by Intermediate Data Format. */
+ INTERMEDIATE_DATA_FORMAT_0004("Unknown column type."),
+
+ /** Number of fields. */
+ INTERMEDIATE_DATA_FORMAT_0005("Wrong number of fields.")
+
+ ;
+
+ private final String message;
+
+ private IntermediateDataFormatError(String message) {
+ this.message = message;
+ }
+
+ public String getCode() {
+ return name();
+ }
+
+ public String getMessage() {
+ return message;
+ }
+}
diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormatTest.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormatTest.java
new file mode 100644
index 00000000..df6d30f2
--- /dev/null
+++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormatTest.java
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sqoop.connector.idf;
+
+import org.apache.sqoop.common.SqoopException;
+import org.apache.sqoop.schema.Schema;
+import org.apache.sqoop.schema.type.Binary;
+import org.apache.sqoop.schema.type.FixedPoint;
+import org.apache.sqoop.schema.type.Text;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+public class CSVIntermediateDataFormatTest {
+
+ private final String BYTE_FIELD_ENCODING = "ISO-8859-1";
+
+ private IntermediateDataFormat> data;
+
+ @Before
+ public void setUp() {
+ data = new CSVIntermediateDataFormat();
+ }
+
+ private String getByteFieldString(byte[] byteFieldData) {
+ try {
+ return new StringBuilder("'").append(new String(byteFieldData, BYTE_FIELD_ENCODING)).append("'").toString();
+ } catch(UnsupportedEncodingException e) {
+ // Should never get to this point because ISO-8859-1 is a standard codec.
+ return null;
+ }
+ }
+
+ @Test
+ public void testStringInStringOut() {
+ String testData = "10,34,'54','random data'," + getByteFieldString(new byte[] { (byte) -112, (byte) 54})
+ + ",'" + String.valueOf(0x0A) + "'";
+ data.setTextData(testData);
+ assertEquals(testData, data.getTextData());
+ }
+
+ @Test
+ public void testNullStringInObjectOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new FixedPoint("1"))
+ .addColumn(new FixedPoint("2"))
+ .addColumn(new Text("3"))
+ .addColumn(new Text("4"))
+ .addColumn(new Binary("5"))
+ .addColumn(new Text("6"));
+ data.setSchema(schema);
+ data.setTextData(null);
+
+ Object[] out = data.getObjectData();
+
+ assertNull(out);
+ }
+
+ @Test(expected=SqoopException.class)
+ public void testEmptyStringInObjectOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new FixedPoint("1"))
+ .addColumn(new FixedPoint("2"))
+ .addColumn(new Text("3"))
+ .addColumn(new Text("4"))
+ .addColumn(new Binary("5"))
+ .addColumn(new Text("6"));
+ data.setSchema(schema);
+ data.setTextData("");
+
+ data.getObjectData();
+ }
+
+ @Test
+ public void testStringInObjectOut() {
+
+ //byte[0] = -112, byte[1] = 54 - 2's complements
+ String testData = "10,34,'54','random data'," + getByteFieldString(new byte[] { (byte) -112, (byte) 54})
+ + ",'\\n'";
+ Schema schema = new Schema("test");
+ schema.addColumn(new FixedPoint("1"))
+ .addColumn(new FixedPoint("2"))
+ .addColumn(new Text("3"))
+ .addColumn(new Text("4"))
+ .addColumn(new Binary("5"))
+ .addColumn(new Text("6"));
+ data.setSchema(schema);
+ data.setTextData(testData);
+
+ Object[] out = data.getObjectData();
+
+ assertEquals(new Long(10),out[0]);
+ assertEquals(new Long(34),out[1]);
+ assertEquals("54",out[2]);
+ assertEquals("random data",out[3]);
+ assertEquals(-112, ((byte[])out[4])[0]);
+ assertEquals(54, ((byte[])out[4])[1]);
+ assertEquals("\n", out[5].toString());
+ }
+
+ @Test
+ public void testObjectInStringOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new FixedPoint("1"))
+ .addColumn(new FixedPoint("2"))
+ .addColumn(new Text("3"))
+ .addColumn(new Text("4"))
+ .addColumn(new Binary("5"))
+ .addColumn(new Text("6"));
+ data.setSchema(schema);
+
+ byte[] byteFieldData = new byte[] { (byte) 0x0D, (byte) -112, (byte) 54};
+ Object[] in = new Object[6];
+ in[0] = new Long(10);
+ in[1] = new Long(34);
+ in[2] = "54";
+ in[3] = "random data";
+ in[4] = byteFieldData;
+ in[5] = new String(new char[] { 0x0A });
+
+ data.setObjectData(in);
+
+ //byte[0] = \r byte[1] = -112, byte[1] = 54 - 2's complements
+ String testData = "10,34,'54','random data'," +
+ getByteFieldString(byteFieldData).replaceAll("\r", "\\\\r") + ",'\\n'";
+ assertEquals(testData, data.getTextData());
+ }
+
+ @Test
+ public void testObjectInObjectOut() {
+ //Test escapable sequences too.
+ //byte[0] = -112, byte[1] = 54 - 2's complements
+ Schema schema = new Schema("test");
+ schema.addColumn(new FixedPoint("1"))
+ .addColumn(new FixedPoint("2"))
+ .addColumn(new Text("3"))
+ .addColumn(new Text("4"))
+ .addColumn(new Binary("5"))
+ .addColumn(new Text("6"));
+ data.setSchema(schema);
+
+ Object[] in = new Object[6];
+ in[0] = new Long(10);
+ in[1] = new Long(34);
+ in[2] = "54";
+ in[3] = "random data";
+ in[4] = new byte[] { (byte) -112, (byte) 54};
+ in[5] = new String(new char[] { 0x0A });
+ Object[] inCopy = new Object[6];
+ System.arraycopy(in,0,inCopy,0,in.length);
+
+ // Modifies the input array, so we use the copy to confirm
+ data.setObjectData(in);
+
+ assertTrue(Arrays.deepEquals(inCopy, data.getObjectData()));
+ }
+
+ @Test
+ public void testStringFullRangeOfCharacters() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new Text("1"));
+ data.setSchema(schema);
+
+ char[] allCharArr = new char[256];
+ for(int i = 0; i < allCharArr.length; ++i) {
+ allCharArr[i] = (char)i;
+ }
+ String strData = new String(allCharArr);
+
+ Object[] in = {strData};
+ Object[] inCopy = new Object[1];
+ System.arraycopy(in,0,inCopy,0,in.length);
+
+ // Modifies the input array, so we use the copy to confirm
+ data.setObjectData(in);
+
+ assertEquals(strData, data.getObjectData()[0]);
+ assertTrue(Arrays.deepEquals(inCopy, data.getObjectData()));
+ }
+
+ @Test
+ public void testByteArrayFullRangeOfCharacters() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new Binary("1"));
+ data.setSchema(schema);
+
+ byte[] allCharByteArr = new byte[256];
+ for(int i = 0; i < allCharByteArr.length; ++i) {
+ allCharByteArr[i] = (byte)i;
+ }
+
+ Object[] in = {allCharByteArr};
+ Object[] inCopy = new Object[1];
+ System.arraycopy(in,0,inCopy,0,in.length);
+
+ // Modifies the input array, so we use the copy to confirm
+ data.setObjectData(in);
+ assertTrue(Arrays.deepEquals(inCopy, data.getObjectData()));
+ }
+}
diff --git a/core/src/main/java/org/apache/sqoop/framework/JobManager.java b/core/src/main/java/org/apache/sqoop/framework/JobManager.java
index e0525846..1700432c 100644
--- a/core/src/main/java/org/apache/sqoop/framework/JobManager.java
+++ b/core/src/main/java/org/apache/sqoop/framework/JobManager.java
@@ -22,6 +22,7 @@
import org.apache.sqoop.common.SqoopException;
import org.apache.sqoop.connector.ConnectorManager;
import org.apache.sqoop.request.HttpEventContext;
+import org.apache.sqoop.connector.idf.IntermediateDataFormat;
import org.apache.sqoop.connector.spi.SqoopConnector;
import org.apache.sqoop.core.Reconfigurable;
import org.apache.sqoop.core.SqoopConfiguration;
@@ -327,6 +328,10 @@ public MSubmission submit(long jobId, HttpEventContext ctx) {
request.setJobName(job.getName());
request.setJobId(job.getPersistenceId());
request.setNotificationUrl(notificationBaseUrl + jobId);
+ Class extends IntermediateDataFormat>> dataFormatClass =
+ connector.getIntermediateDataFormat();
+ request.setIntermediateDataFormat(connector.getIntermediateDataFormat());
+ // Create request object
// Let's register all important jars
// sqoop-common
@@ -343,6 +348,9 @@ public MSubmission submit(long jobId, HttpEventContext ctx) {
// Extra libraries that Sqoop code requires
request.addJarForClass(JSONValue.class);
+ // The IDF is used in the ETL process.
+ request.addJarForClass(dataFormatClass);
+
// Get connector callbacks
switch (job.getType()) {
case IMPORT:
diff --git a/core/src/main/java/org/apache/sqoop/framework/SubmissionRequest.java b/core/src/main/java/org/apache/sqoop/framework/SubmissionRequest.java
index a138db54..7900eee0 100644
--- a/core/src/main/java/org/apache/sqoop/framework/SubmissionRequest.java
+++ b/core/src/main/java/org/apache/sqoop/framework/SubmissionRequest.java
@@ -18,6 +18,7 @@
package org.apache.sqoop.framework;
import org.apache.sqoop.common.MutableMapContext;
+import org.apache.sqoop.connector.idf.IntermediateDataFormat;
import org.apache.sqoop.connector.spi.SqoopConnector;
import org.apache.sqoop.job.etl.CallbackBase;
import org.apache.sqoop.model.MJob;
@@ -107,6 +108,11 @@ public class SubmissionRequest {
*/
Integer loaders;
+ /**
+ * The intermediate data format this submission should use.
+ */
+ Class extends IntermediateDataFormat> intermediateDataFormat;
+
public SubmissionRequest() {
this.jars = new LinkedList