mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 14:42:31 +08:00
SQOOP-3224: Mainframe FTP transfer should have an option to use binary mode for transfer
(Chris Teoh via Szabolcs Vasas)
This commit is contained in:
parent
f5eda1e208
commit
0d6c455e5b
18
build.xml
18
build.xml
@ -253,6 +253,15 @@
|
|||||||
<property name="sqoop.test.mainframe.ftp.dataset.gdg" value="TSODIQ1.GDGTEXT" />
|
<property name="sqoop.test.mainframe.ftp.dataset.gdg" value="TSODIQ1.GDGTEXT" />
|
||||||
<property name="sqoop.test.mainframe.ftp.dataset.gdg.filename" value="G0001V43" />
|
<property name="sqoop.test.mainframe.ftp.dataset.gdg.filename" value="G0001V43" />
|
||||||
<property name="sqoop.test.mainframe.ftp.dataset.gdg.md5" value="f0d0d171fdb8a03dbc1266ed179d7093" />
|
<property name="sqoop.test.mainframe.ftp.dataset.gdg.md5" value="f0d0d171fdb8a03dbc1266ed179d7093" />
|
||||||
|
<property name="sqoop.test.mainframe.ftp.binary.dataset.gdg" value="TSODIQ1.FOLDER" />
|
||||||
|
<property name="sqoop.test.mainframe.ftp.binary.dataset.gdg.filename" value="G0002V45" />
|
||||||
|
<property name="sqoop.test.mainframe.ftp.binary.dataset.gdg.md5" value="43eefbe34e466dd3f65a3e867a60809a" />
|
||||||
|
<property name="sqoop.test.mainframe.ftp.dataset.seq" value="TSODIQ1.GDGTEXT.G0001V43" />
|
||||||
|
<property name="sqoop.test.mainframe.ftp.dataset.seq.filename" value="G0001V43" />
|
||||||
|
<property name="sqoop.test.mainframe.ftp.dataset.seq.md5" value="f0d0d171fdb8a03dbc1266ed179d7093" />
|
||||||
|
<property name="sqoop.test.mainframe.ftp.binary.dataset.seq" value="TSODIQ1.FOLDER.FOLDERTXT" />
|
||||||
|
<property name="sqoop.test.mainframe.ftp.binary.dataset.seq.filename" value="FOLDERTXT" />
|
||||||
|
<property name="sqoop.test.mainframe.ftp.binary.dataset.seq.md5" value="1591c0fcc718fda7e9c1f3561d232b2b" />
|
||||||
|
|
||||||
<property name="s3.bucket.url" value="" />
|
<property name="s3.bucket.url" value="" />
|
||||||
<property name="s3.generator.command" value="" />
|
<property name="s3.generator.command" value="" />
|
||||||
@ -890,10 +899,19 @@
|
|||||||
<sysproperty key="sqoop.test.mainframe.ftp.dataset.gdg" value="${sqoop.test.mainframe.ftp.dataset.gdg}" />
|
<sysproperty key="sqoop.test.mainframe.ftp.dataset.gdg" value="${sqoop.test.mainframe.ftp.dataset.gdg}" />
|
||||||
<sysproperty key="sqoop.test.mainframe.ftp.dataset.gdg.filename" value="${sqoop.test.mainframe.ftp.dataset.gdg.filename}" />
|
<sysproperty key="sqoop.test.mainframe.ftp.dataset.gdg.filename" value="${sqoop.test.mainframe.ftp.dataset.gdg.filename}" />
|
||||||
<sysproperty key="sqoop.test.mainframe.ftp.dataset.gdg.md5" value="${sqoop.test.mainframe.ftp.dataset.gdg.md5}" />
|
<sysproperty key="sqoop.test.mainframe.ftp.dataset.gdg.md5" value="${sqoop.test.mainframe.ftp.dataset.gdg.md5}" />
|
||||||
|
<sysproperty key="sqoop.test.mainframe.ftp.binary.dataset.gdg" value="${sqoop.test.mainframe.ftp.binary.dataset.gdg}" />
|
||||||
|
<sysproperty key="sqoop.test.mainframe.ftp.binary.dataset.gdg.filename" value="${sqoop.test.mainframe.ftp.binary.dataset.gdg.filename}" />
|
||||||
|
<sysproperty key="sqoop.test.mainframe.ftp.binary.dataset.gdg.md5" value="${sqoop.test.mainframe.ftp.binary.dataset.gdg.md5}" />
|
||||||
|
|
||||||
<sysproperty key="s3.bucket.url" value="${s3.bucket.url}" />
|
<sysproperty key="s3.bucket.url" value="${s3.bucket.url}" />
|
||||||
<sysproperty key="s3.generator.command" value="${s3.generator.command}" />
|
<sysproperty key="s3.generator.command" value="${s3.generator.command}" />
|
||||||
|
|
||||||
|
<sysproperty key="sqoop.test.mainframe.ftp.dataset.seq" value="${sqoop.test.mainframe.ftp.dataset.seq}" />
|
||||||
|
<sysproperty key="sqoop.test.mainframe.ftp.dataset.seq.filename" value="${sqoop.test.mainframe.ftp.dataset.seq.filename}" />
|
||||||
|
<sysproperty key="sqoop.test.mainframe.ftp.dataset.seq.md5" value="${sqoop.test.mainframe.ftp.dataset.seq.md5}" />
|
||||||
|
<sysproperty key="sqoop.test.mainframe.ftp.binary.dataset.seq" value="${sqoop.test.mainframe.ftp.binary.dataset.seq}" />
|
||||||
|
<sysproperty key="sqoop.test.mainframe.ftp.binary.dataset.seq.filename" value="${sqoop.test.mainframe.ftp.binary.dataset.seq.filename}" />
|
||||||
|
<sysproperty key="sqoop.test.mainframe.ftp.binary.dataset.seq.md5" value="${sqoop.test.mainframe.ftp.binary.dataset.seq.md5}" />
|
||||||
<!-- Location of Hive logs -->
|
<!-- Location of Hive logs -->
|
||||||
<!--<sysproperty key="hive.log.dir"
|
<!--<sysproperty key="hive.log.dir"
|
||||||
value="${test.build.data}/sqoop/logs"/> -->
|
value="${test.build.data}/sqoop/logs"/> -->
|
||||||
|
@ -49,6 +49,7 @@ Argument Description
|
|||||||
+\--as-sequencefile+ Imports data to SequenceFiles
|
+\--as-sequencefile+ Imports data to SequenceFiles
|
||||||
+\--as-textfile+ Imports data as plain text (default)
|
+\--as-textfile+ Imports data as plain text (default)
|
||||||
+\--as-parquetfile+ Imports data to Parquet Files
|
+\--as-parquetfile+ Imports data to Parquet Files
|
||||||
|
+\--as-binaryfile+ Imports data as binary files
|
||||||
+\--delete-target-dir+ Delete the import target directory\
|
+\--delete-target-dir+ Delete the import target directory\
|
||||||
if it exists
|
if it exists
|
||||||
+-m,\--num-mappers <n>+ Use 'n' map tasks to import in parallel
|
+-m,\--num-mappers <n>+ Use 'n' map tasks to import in parallel
|
||||||
@ -193,6 +194,26 @@ $ sqoop import-mainframe --dataset SomePDS --jar-file mydatatypes.jar \
|
|||||||
|
|
||||||
This command will load the +SomePDSType+ class out of +mydatatypes.jar+.
|
This command will load the +SomePDSType+ class out of +mydatatypes.jar+.
|
||||||
|
|
||||||
|
Support for Generation Data Group and Sequential data sets.
|
||||||
|
This can be specified with the --datasettype option followed by one of:
|
||||||
|
'p' for partitioned dataset (default)
|
||||||
|
'g' for generation data group dataset
|
||||||
|
's' for sequential dataset
|
||||||
|
|
||||||
|
In the case of generation data group datasets, Sqoop will retrieve just the last or
|
||||||
|
latest file (or generation).
|
||||||
|
|
||||||
|
In the case of sequential datasets, Sqoop will retrieve just the file specified.
|
||||||
|
|
||||||
|
Support of datasets that are stored on tape volumes by specifying --tape true.
|
||||||
|
|
||||||
|
By default, mainframe datasets are assumed to be plain text. Attempting to transfer
|
||||||
|
binary datasets using this method will result in data corruption.
|
||||||
|
Support for binary datasets by specifying --as-binaryfile and optionally --buffersize followed by
|
||||||
|
buffer size specified in bytes. By default, --buffersize is set to 32760 bytes. Altering buffersize
|
||||||
|
will alter the number of records Sqoop reports to have imported. This is because it reads the
|
||||||
|
binary dataset in chunks specified by buffersize. Larger buffer size means lower number of records.
|
||||||
|
|
||||||
Additional Import Configuration Properties
|
Additional Import Configuration Properties
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
There are some additional properties which can be configured by modifying
|
There are some additional properties which can be configured by modifying
|
||||||
@ -228,6 +249,23 @@ $ sqoop import-mainframe --connect z390 --dataset EMPLOYEES \
|
|||||||
Enter password: (hidden)
|
Enter password: (hidden)
|
||||||
----
|
----
|
||||||
|
|
||||||
|
Import of a tape based generation data group dataset using a password alias and writing out to
|
||||||
|
an intermediate directory (--outdir) before moving it to (--target-dir).
|
||||||
|
----
|
||||||
|
$ sqoop import-mainframe --dataset SomeGdg --connect <host> --username myuser --password-alias \
|
||||||
|
mypasswordalias --datasettype g --tape true --outdir /tmp/imported/sqoop \
|
||||||
|
--target-dir /data/imported/mainframe/SomeGdg
|
||||||
|
----
|
||||||
|
|
||||||
|
Import of a tape based binary generation data group dataset with a buffer size of 64000 using a
|
||||||
|
password alias and writing out to an intermediate directory (--outdir) before moving it
|
||||||
|
to (--target-dir).
|
||||||
|
----
|
||||||
|
$ sqoop import-mainframe --dataset SomeGdg --connect <host> --username myuser --password-alias \
|
||||||
|
mypasswordalias --datasettype g --tape true --as-binaryfile --buffersize 64000 --outdir /tmp/imported/sqoop \
|
||||||
|
--target-dir /data/imported/mainframe/SomeGdg
|
||||||
|
----
|
||||||
|
|
||||||
Controlling the import parallelism (using 8 parallel tasks):
|
Controlling the import parallelism (using 8 parallel tasks):
|
||||||
|
|
||||||
----
|
----
|
||||||
|
@ -87,7 +87,8 @@ public enum FileLayout {
|
|||||||
TextFile,
|
TextFile,
|
||||||
SequenceFile,
|
SequenceFile,
|
||||||
AvroDataFile,
|
AvroDataFile,
|
||||||
ParquetFile
|
ParquetFile,
|
||||||
|
BinaryFile
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -362,7 +363,12 @@ public String toString() {
|
|||||||
// Indicates if the data set is on tape to use different FTP parser
|
// Indicates if the data set is on tape to use different FTP parser
|
||||||
@StoredAsProperty("mainframe.input.dataset.tape")
|
@StoredAsProperty("mainframe.input.dataset.tape")
|
||||||
private String mainframeInputDatasetTape;
|
private String mainframeInputDatasetTape;
|
||||||
|
// Indicates if binary or ascii FTP transfer mode should be used
|
||||||
|
@StoredAsProperty("mainframe.ftp.transfermode")
|
||||||
|
private String mainframeFtpTransferMode;
|
||||||
|
// Buffer size to use when using binary FTP transfer mode
|
||||||
|
@StoredAsProperty("mainframe.ftp.buffersize")
|
||||||
|
private Integer bufferSize;
|
||||||
// Accumulo home directory
|
// Accumulo home directory
|
||||||
private String accumuloHome; // not serialized to metastore.
|
private String accumuloHome; // not serialized to metastore.
|
||||||
// Zookeeper home directory
|
// Zookeeper home directory
|
||||||
@ -1162,6 +1168,11 @@ private void initDefaults(Configuration baseConfiguration) {
|
|||||||
this.escapeColumnMappingEnabled = true;
|
this.escapeColumnMappingEnabled = true;
|
||||||
|
|
||||||
this.parquetConfiguratorImplementation = HADOOP;
|
this.parquetConfiguratorImplementation = HADOOP;
|
||||||
|
|
||||||
|
// set default transfer mode to ascii
|
||||||
|
this.mainframeFtpTransferMode = MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE_ASCII;
|
||||||
|
// set default buffer size for mainframe binary transfers
|
||||||
|
this.bufferSize = MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2499,6 +2510,23 @@ public Boolean getMainframeInputDatasetTape() {
|
|||||||
public void setMainframeInputDatasetTape(String txtIsFromTape) {
|
public void setMainframeInputDatasetTape(String txtIsFromTape) {
|
||||||
mainframeInputDatasetTape = Boolean.valueOf(Boolean.parseBoolean(txtIsFromTape)).toString();
|
mainframeInputDatasetTape = Boolean.valueOf(Boolean.parseBoolean(txtIsFromTape)).toString();
|
||||||
}
|
}
|
||||||
|
// returns the buffer size set.
|
||||||
|
public Integer getBufferSize() {
|
||||||
|
return bufferSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMainframeFtpTransferMode(String transferMode) {
|
||||||
|
mainframeFtpTransferMode = transferMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMainframeFtpTransferMode() {
|
||||||
|
return mainframeFtpTransferMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
// sets the binary transfer buffer size, defaults to MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE
|
||||||
|
public void setBufferSize(int buf) {
|
||||||
|
bufferSize = buf;
|
||||||
|
}
|
||||||
|
|
||||||
public static String getAccumuloHomeDefault() {
|
public static String getAccumuloHomeDefault() {
|
||||||
// Set this with $ACCUMULO_HOME, but -Daccumulo.home can override.
|
// Set this with $ACCUMULO_HOME, but -Daccumulo.home can override.
|
||||||
|
42
src/java/org/apache/sqoop/mapreduce/ByteKeyOutputFormat.java
Normal file
42
src/java/org/apache/sqoop/mapreduce/ByteKeyOutputFormat.java
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.sqoop.mapreduce;
|
||||||
|
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.mapreduce.RecordWriter;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An {@link OutputFormat} that writes binary files.
|
||||||
|
* Only writes the key. Does not write any delimiter/newline after the key.
|
||||||
|
*/
|
||||||
|
public class ByteKeyOutputFormat<K, V> extends RawKeyTextOutputFormat<K, V> {
|
||||||
|
|
||||||
|
// currently don't support compression
|
||||||
|
private static final String FILE_EXTENSION = "";
|
||||||
|
|
||||||
|
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
|
||||||
|
throws IOException {
|
||||||
|
DataOutputStream ostream = getFSDataOutputStream(context,FILE_EXTENSION);
|
||||||
|
return new KeyRecordWriters.BinaryKeyRecordWriter<K,V>(ostream);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
97
src/java/org/apache/sqoop/mapreduce/KeyRecordWriters.java
Normal file
97
src/java/org/apache/sqoop/mapreduce/KeyRecordWriters.java
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.sqoop.mapreduce;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.BytesWritable;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapreduce.RecordWriter;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||||
|
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class KeyRecordWriters {
|
||||||
|
/**
|
||||||
|
* RecordWriter to write to plain text files.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public static class GenericRecordWriter<K, V> extends RecordWriter<K, V> {
|
||||||
|
private static final String UTF8 = "UTF-8";
|
||||||
|
|
||||||
|
protected DataOutputStream out;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the object to the byte stream, handling Text as a special
|
||||||
|
* case.
|
||||||
|
*
|
||||||
|
* @param o the object to print
|
||||||
|
* @param value the corresponding value for key o
|
||||||
|
* @throws IOException if the write throws, we pass it on
|
||||||
|
*/
|
||||||
|
protected void writeObject(Object o,Object value) throws IOException {
|
||||||
|
if (o instanceof Text) {
|
||||||
|
Text to = (Text) o;
|
||||||
|
out.write(to.getBytes(), 0, to.getLength());
|
||||||
|
} else {
|
||||||
|
out.write(o.toString().getBytes(UTF8));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void write(K key, V value) throws IOException, InterruptedException {
|
||||||
|
writeObject(key,value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class RawKeyRecordWriter<K, V> extends GenericRecordWriter<K, V> {
|
||||||
|
|
||||||
|
public RawKeyRecordWriter(DataOutputStream out) {
|
||||||
|
this.out = out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RecordWriter to write to plain text files.
|
||||||
|
*/
|
||||||
|
public static class BinaryKeyRecordWriter<K, V> extends GenericRecordWriter<K, V> {
|
||||||
|
|
||||||
|
public BinaryKeyRecordWriter(DataOutputStream out) {
|
||||||
|
this.out = out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write the object to the byte stream, handling Text as a special
|
||||||
|
* case.
|
||||||
|
* @param o the object to print
|
||||||
|
* @throws IOException if the write throws, we pass it on
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void writeObject(Object o, Object value) throws IOException {
|
||||||
|
if (o instanceof BytesWritable) {
|
||||||
|
BytesWritable to = (BytesWritable) o;
|
||||||
|
out.write(to.getBytes(), 0, to.getLength());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -24,7 +24,6 @@
|
|||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.hadoop.io.compress.CompressionCodec;
|
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||||
import org.apache.hadoop.io.compress.GzipCodec;
|
import org.apache.hadoop.io.compress.GzipCodec;
|
||||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||||
@ -38,47 +37,15 @@
|
|||||||
*/
|
*/
|
||||||
public class RawKeyTextOutputFormat<K, V> extends FileOutputFormat<K, V> {
|
public class RawKeyTextOutputFormat<K, V> extends FileOutputFormat<K, V> {
|
||||||
|
|
||||||
/**
|
protected FSDataOutputStream getFSDataOutputStream(TaskAttemptContext context, String ext) throws IOException {
|
||||||
* RecordWriter to write to plain text files.
|
Configuration conf = context.getConfiguration();
|
||||||
*/
|
Path file = getDefaultWorkFile(context, ext);
|
||||||
public static class RawKeyRecordWriter<K, V> extends RecordWriter<K, V> {
|
FileSystem fs = file.getFileSystem(conf);
|
||||||
|
FSDataOutputStream fileOut = fs.create(file, false);
|
||||||
private static final String UTF8 = "UTF-8";
|
return fileOut;
|
||||||
|
|
||||||
protected DataOutputStream out;
|
|
||||||
|
|
||||||
public RawKeyRecordWriter(DataOutputStream out) {
|
|
||||||
this.out = out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Write the object to the byte stream, handling Text as a special
|
|
||||||
* case.
|
|
||||||
* @param o the object to print
|
|
||||||
* @throws IOException if the write throws, we pass it on
|
|
||||||
*/
|
|
||||||
private void writeObject(Object o) throws IOException {
|
|
||||||
if (o instanceof Text) {
|
|
||||||
Text to = (Text) o;
|
|
||||||
out.write(to.getBytes(), 0, to.getLength());
|
|
||||||
} else {
|
|
||||||
out.write(o.toString().getBytes(UTF8));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void write(K key, V value) throws IOException {
|
|
||||||
writeObject(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void close(TaskAttemptContext context)
|
|
||||||
throws IOException {
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
|
protected DataOutputStream getOutputStream(TaskAttemptContext context) throws IOException {
|
||||||
throws IOException {
|
|
||||||
boolean isCompressed = getCompressOutput(context);
|
boolean isCompressed = getCompressOutput(context);
|
||||||
Configuration conf = context.getConfiguration();
|
Configuration conf = context.getConfiguration();
|
||||||
String ext = "";
|
String ext = "";
|
||||||
@ -93,17 +60,18 @@ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
|
|||||||
ext = codec.getDefaultExtension();
|
ext = codec.getDefaultExtension();
|
||||||
}
|
}
|
||||||
|
|
||||||
Path file = getDefaultWorkFile(context, ext);
|
FSDataOutputStream fileOut = getFSDataOutputStream(context,ext);
|
||||||
FileSystem fs = file.getFileSystem(conf);
|
|
||||||
FSDataOutputStream fileOut = fs.create(file, false);
|
|
||||||
DataOutputStream ostream = fileOut;
|
DataOutputStream ostream = fileOut;
|
||||||
|
|
||||||
if (isCompressed) {
|
if (isCompressed) {
|
||||||
ostream = new DataOutputStream(codec.createOutputStream(fileOut));
|
ostream = new DataOutputStream(codec.createOutputStream(fileOut));
|
||||||
}
|
}
|
||||||
|
return ostream;
|
||||||
return new RawKeyRecordWriter<K, V>(ostream);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
|
||||||
|
throws IOException {
|
||||||
|
DataOutputStream ostream = getOutputStream(context);
|
||||||
|
return new KeyRecordWriters.RawKeyRecordWriter<K, V>(ostream);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,65 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.sqoop.mapreduce.mainframe;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.LongWritable;
|
||||||
|
import org.apache.hadoop.io.NullWritable;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
|
||||||
|
import org.apache.sqoop.config.ConfigurationConstants;
|
||||||
|
import org.apache.sqoop.lib.SqoopRecord;
|
||||||
|
import org.apache.sqoop.mapreduce.AutoProgressMapper;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public abstract class AbstractMainframeDatasetImportMapper<KEY>
|
||||||
|
extends AutoProgressMapper<LongWritable, SqoopRecord, KEY, NullWritable> {
|
||||||
|
|
||||||
|
private MainframeDatasetInputSplit inputSplit;
|
||||||
|
private MultipleOutputs<KEY, NullWritable> multiFileWriter;
|
||||||
|
private long numberOfRecords;
|
||||||
|
|
||||||
|
public void map(LongWritable key, SqoopRecord val, Context context)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
String dataset = inputSplit.getCurrentDataset();
|
||||||
|
numberOfRecords++;
|
||||||
|
multiFileWriter.write(createOutKey(val), NullWritable.get(), dataset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setup(Context context)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
super.setup(context);
|
||||||
|
inputSplit = (MainframeDatasetInputSplit)context.getInputSplit();
|
||||||
|
multiFileWriter = new MultipleOutputs<>(context);
|
||||||
|
numberOfRecords = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void cleanup(Context context)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
super.cleanup(context);
|
||||||
|
multiFileWriter.close();
|
||||||
|
context.getCounter(
|
||||||
|
ConfigurationConstants.COUNTER_GROUP_MAPRED_TASK_COUNTERS,
|
||||||
|
ConfigurationConstants.COUNTER_MAP_OUTPUT_RECORDS)
|
||||||
|
.increment(numberOfRecords);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract KEY createOutKey(SqoopRecord sqoopRecord);
|
||||||
|
}
|
@ -33,4 +33,15 @@ public class MainframeConfiguration
|
|||||||
public static final String MAINFRAME_INPUT_DATASET_TAPE = "mainframe.input.dataset.tape";
|
public static final String MAINFRAME_INPUT_DATASET_TAPE = "mainframe.input.dataset.tape";
|
||||||
|
|
||||||
public static final String MAINFRAME_FTP_FILE_ENTRY_PARSER_CLASSNAME = "org.apache.sqoop.mapreduce.mainframe.MainframeFTPFileEntryParser";
|
public static final String MAINFRAME_FTP_FILE_ENTRY_PARSER_CLASSNAME = "org.apache.sqoop.mapreduce.mainframe.MainframeFTPFileEntryParser";
|
||||||
|
|
||||||
|
public static final String MAINFRAME_FTP_TRANSFER_MODE = "mainframe.ftp.transfermode";
|
||||||
|
|
||||||
|
public static final String MAINFRAME_FTP_TRANSFER_MODE_ASCII = "ascii";
|
||||||
|
|
||||||
|
public static final String MAINFRAME_FTP_TRANSFER_MODE_BINARY = "binary";
|
||||||
|
|
||||||
|
// this is the default buffer size used when doing binary ftp transfers from mainframe
|
||||||
|
public static final Integer MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE = 32760;
|
||||||
|
|
||||||
|
public static final String MAINFRAME_FTP_TRANSFER_BINARY_BUFFER_SIZE = "mainframe.ftp.buffersize";
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.sqoop.mapreduce.mainframe;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.BytesWritable;
|
||||||
|
import org.apache.sqoop.lib.SqoopRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mapper that writes mainframe dataset records in binary format to multiple files
|
||||||
|
* based on the key, which is the index of the datasets in the input split.
|
||||||
|
*/
|
||||||
|
public class MainframeDatasetBinaryImportMapper extends AbstractMainframeDatasetImportMapper<BytesWritable> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected BytesWritable createOutKey(SqoopRecord sqoopRecord) {
|
||||||
|
BytesWritable result = new BytesWritable();
|
||||||
|
byte[] bytes = (byte[]) sqoopRecord.getFieldMap().entrySet().iterator().next().getValue();
|
||||||
|
result.set(bytes,0, bytes.length);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,123 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.mainframe;
|
||||||
|
|
||||||
|
import org.apache.sqoop.lib.DelimiterSet;
|
||||||
|
import org.apache.sqoop.lib.LargeObjectLoader;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.sqoop.lib.SqoopRecord;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class MainframeDatasetBinaryRecord extends SqoopRecord {
|
||||||
|
|
||||||
|
private byte[] field;
|
||||||
|
|
||||||
|
public Map<String, Object> getFieldMap() {
|
||||||
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
|
map.put("fieldName", field);
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setField(String fieldName, Object fieldVal) {
|
||||||
|
if (fieldVal instanceof byte[]) {
|
||||||
|
field = (byte[]) fieldVal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setField(final byte[] val) {
|
||||||
|
this.field = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException {
|
||||||
|
in.readFully(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException {
|
||||||
|
out.write(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readFields(ResultSet rs) throws SQLException {
|
||||||
|
field = rs.getBytes(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(PreparedStatement s) throws SQLException {
|
||||||
|
s.setBytes(1, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return field.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int write(PreparedStatement stmt, int offset) throws SQLException {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(DelimiterSet delimiters) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getClassFormatVersion() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return field.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parse(CharSequence s) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parse(Text s) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parse(byte[] s) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parse(char[] s) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parse(ByteBuffer s) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parse(CharBuffer s) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void loadLargeObjects(LargeObjectLoader objLoader) throws SQLException, IOException, InterruptedException {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@ -18,9 +18,11 @@
|
|||||||
|
|
||||||
package org.apache.sqoop.mapreduce.mainframe;
|
package org.apache.sqoop.mapreduce.mainframe;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
@ -38,6 +40,7 @@ public class MainframeDatasetFTPRecordReader <T extends SqoopRecord>
|
|||||||
extends MainframeDatasetRecordReader<T> {
|
extends MainframeDatasetRecordReader<T> {
|
||||||
private FTPClient ftp = null;
|
private FTPClient ftp = null;
|
||||||
private BufferedReader datasetReader = null;
|
private BufferedReader datasetReader = null;
|
||||||
|
private BufferedInputStream inputStream = null;
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(
|
private static final Log LOG = LogFactory.getLog(
|
||||||
MainframeDatasetFTPRecordReader.class.getName());
|
MainframeDatasetFTPRecordReader.class.getName());
|
||||||
@ -50,21 +53,27 @@ public void initialize(InputSplit inputSplit,
|
|||||||
|
|
||||||
Configuration conf = getConfiguration();
|
Configuration conf = getConfiguration();
|
||||||
ftp = MainframeFTPClientUtils.getFTPConnection(conf);
|
ftp = MainframeFTPClientUtils.getFTPConnection(conf);
|
||||||
|
initialize(ftp,conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize(FTPClient ftpClient, Configuration conf)
|
||||||
|
throws IOException {
|
||||||
|
ftp = ftpClient;
|
||||||
if (ftp != null) {
|
if (ftp != null) {
|
||||||
String dsName = conf.get(MainframeConfiguration.MAINFRAME_INPUT_DATASET_NAME);
|
String dsName = conf.get(MainframeConfiguration.MAINFRAME_INPUT_DATASET_NAME);
|
||||||
String dsType = conf.get(MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE);
|
String dsType = conf.get(MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE);
|
||||||
MainframeDatasetPath p = null;
|
MainframeDatasetPath p = null;
|
||||||
try {
|
try {
|
||||||
p = new MainframeDatasetPath(dsName,conf);
|
p = new MainframeDatasetPath(dsName,conf);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error(e.getMessage());
|
LOG.error(e.getMessage());
|
||||||
LOG.error("MainframeDatasetPath helper class incorrectly initialised");
|
LOG.error("MainframeDatasetPath helper class incorrectly initialised");
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
}
|
}
|
||||||
if (dsType != null && p != null) {
|
if (dsType != null && p != null) {
|
||||||
dsName = p.getMainframeDatasetFolder();
|
dsName = p.getMainframeDatasetFolder();
|
||||||
}
|
}
|
||||||
ftp.changeWorkingDirectory("'" + dsName + "'");
|
ftp.changeWorkingDirectory("'" + dsName + "'");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,6 +89,10 @@ public void close() throws IOException {
|
|||||||
|
|
||||||
protected boolean getNextRecord(T sqoopRecord) throws IOException {
|
protected boolean getNextRecord(T sqoopRecord) throws IOException {
|
||||||
String line = null;
|
String line = null;
|
||||||
|
Configuration conf = getConfiguration();
|
||||||
|
if (MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE_BINARY.equals(conf.get(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE))) {
|
||||||
|
return getNextBinaryRecord(sqoopRecord);
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
do {
|
do {
|
||||||
if (datasetReader == null) {
|
if (datasetReader == null) {
|
||||||
@ -112,9 +125,85 @@ protected boolean getNextRecord(T sqoopRecord) throws IOException {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected boolean getNextBinaryRecord(T sqoopRecord) throws IOException {
|
||||||
|
Configuration conf = getConfiguration();
|
||||||
|
// typical estimated max size for mainframe record
|
||||||
|
int BUFFER_SIZE = MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE;
|
||||||
|
if (conf != null) {
|
||||||
|
BUFFER_SIZE = conf.getInt(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_BUFFER_SIZE, MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
byte[] buf = new byte[BUFFER_SIZE];
|
||||||
|
int bytesRead = -1;
|
||||||
|
int cumulativeBytesRead = 0;
|
||||||
|
try {
|
||||||
|
Boolean streamInited = initInputStream(BUFFER_SIZE);
|
||||||
|
if (!streamInited) {
|
||||||
|
LOG.info("No more datasets to process.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
do {
|
||||||
|
bytesRead = inputStream.read(buf,cumulativeBytesRead,BUFFER_SIZE-cumulativeBytesRead);
|
||||||
|
if (bytesRead == -1) {
|
||||||
|
// EOF
|
||||||
|
closeFtpInputStream();
|
||||||
|
LOG.info("Data transfer completed.");
|
||||||
|
return writeBytesToSqoopRecord(buf,cumulativeBytesRead,sqoopRecord);
|
||||||
|
}
|
||||||
|
cumulativeBytesRead += bytesRead;
|
||||||
|
if (cumulativeBytesRead == BUFFER_SIZE) {
|
||||||
|
return writeBytesToSqoopRecord(buf,cumulativeBytesRead,sqoopRecord);
|
||||||
|
}
|
||||||
|
} while (bytesRead != -1);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
throw new IOException("IOException during data transfer: " + ioe);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Boolean initInputStream(int bufferSize) throws IOException {
|
||||||
|
if (inputStream == null) {
|
||||||
|
String dsName = getNextDataset();
|
||||||
|
if (dsName == null) {
|
||||||
|
LOG.info("No more datasets to process. Returning.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
LOG.info("Attempting to retrieve file stream for: "+dsName);
|
||||||
|
LOG.info("Buffer size: "+bufferSize);
|
||||||
|
inputStream = new BufferedInputStream(ftp.retrieveFileStream(dsName));
|
||||||
|
if (inputStream == null) {
|
||||||
|
throw new IOException("Failed to retrieve FTP file stream.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void closeFtpInputStream() throws IOException {
|
||||||
|
inputStream.close();
|
||||||
|
inputStream = null;
|
||||||
|
if (!ftp.completePendingCommand()) {
|
||||||
|
throw new IOException("Failed to complete ftp command. FTP Response: "+ftp.getReplyString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Boolean writeBytesToSqoopRecord(byte[] buf, int cumulativeBytesRead, SqoopRecord sqoopRecord) {
|
||||||
|
if (cumulativeBytesRead <= 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ByteBuffer buffer = ByteBuffer.allocate(cumulativeBytesRead);
|
||||||
|
buffer.put(buf,0,cumulativeBytesRead);
|
||||||
|
convertToSqoopRecord(buffer.array(), sqoopRecord);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private void convertToSqoopRecord(String line, SqoopRecord sqoopRecord) {
|
private void convertToSqoopRecord(String line, SqoopRecord sqoopRecord) {
|
||||||
String fieldName
|
String fieldName
|
||||||
= sqoopRecord.getFieldMap().entrySet().iterator().next().getKey();
|
= sqoopRecord.getFieldMap().entrySet().iterator().next().getKey();
|
||||||
sqoopRecord.setField(fieldName, line);
|
sqoopRecord.setField(fieldName, line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void convertToSqoopRecord(byte[] buf, SqoopRecord sqoopRecord) {
|
||||||
|
String fieldName
|
||||||
|
= sqoopRecord.getFieldMap().entrySet().iterator().next().getKey();
|
||||||
|
sqoopRecord.setField(fieldName, buf);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,61 +18,19 @@
|
|||||||
|
|
||||||
package org.apache.sqoop.mapreduce.mainframe;
|
package org.apache.sqoop.mapreduce.mainframe;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.io.LongWritable;
|
|
||||||
import org.apache.hadoop.io.NullWritable;
|
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
|
|
||||||
|
|
||||||
import org.apache.sqoop.config.ConfigurationConstants;
|
|
||||||
import org.apache.sqoop.lib.SqoopRecord;
|
import org.apache.sqoop.lib.SqoopRecord;
|
||||||
import org.apache.sqoop.mapreduce.AutoProgressMapper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mapper that writes mainframe dataset records in Text format to multiple files
|
* Mapper that writes mainframe dataset records in Text format to multiple files
|
||||||
* based on the key, which is the index of the datasets in the input split.
|
* based on the key, which is the index of the datasets in the input split.
|
||||||
*/
|
*/
|
||||||
public class MainframeDatasetImportMapper
|
public class MainframeDatasetImportMapper extends AbstractMainframeDatasetImportMapper<Text> {
|
||||||
extends AutoProgressMapper<LongWritable, SqoopRecord, Text, NullWritable> {
|
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(
|
|
||||||
MainframeDatasetImportMapper.class.getName());
|
|
||||||
|
|
||||||
private MainframeDatasetInputSplit inputSplit;
|
|
||||||
private MultipleOutputs<Text, NullWritable> mos;
|
|
||||||
private long numberOfRecords;
|
|
||||||
private Text outkey;
|
|
||||||
|
|
||||||
public void map(LongWritable key, SqoopRecord val, Context context)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
String dataset = inputSplit.getCurrentDataset();
|
|
||||||
outkey.set(val.toString());
|
|
||||||
numberOfRecords++;
|
|
||||||
mos.write(outkey, NullWritable.get(), dataset);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setup(Context context)
|
protected Text createOutKey(SqoopRecord sqoopRecord) {
|
||||||
throws IOException, InterruptedException {
|
Text result = new Text();
|
||||||
super.setup(context);
|
result.set(sqoopRecord.toString());
|
||||||
inputSplit = (MainframeDatasetInputSplit)context.getInputSplit();
|
return result;
|
||||||
mos = new MultipleOutputs<Text, NullWritable>(context);
|
|
||||||
numberOfRecords = 0;
|
|
||||||
outkey = new Text();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void cleanup(Context context)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
super.cleanup(context);
|
|
||||||
mos.close();
|
|
||||||
context.getCounter(
|
|
||||||
ConfigurationConstants.COUNTER_GROUP_MAPRED_TASK_COUNTERS,
|
|
||||||
ConfigurationConstants.COUNTER_MAP_OUTPUT_RECORDS)
|
|
||||||
.increment(numberOfRecords);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,14 +22,19 @@
|
|||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.io.BytesWritable;
|
||||||
|
import org.apache.hadoop.io.NullWritable;
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
import org.apache.hadoop.mapreduce.Mapper;
|
import org.apache.hadoop.mapreduce.Mapper;
|
||||||
|
import org.apache.hadoop.mapreduce.OutputFormat;
|
||||||
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
|
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
|
||||||
|
|
||||||
import org.apache.sqoop.SqoopOptions;
|
import org.apache.sqoop.SqoopOptions;
|
||||||
import org.apache.sqoop.manager.ImportJobContext;
|
import org.apache.sqoop.manager.ImportJobContext;
|
||||||
|
import org.apache.sqoop.mapreduce.DBWritable;
|
||||||
import org.apache.sqoop.mapreduce.DataDrivenImportJob;
|
import org.apache.sqoop.mapreduce.DataDrivenImportJob;
|
||||||
|
import org.apache.sqoop.mapreduce.RawKeyTextOutputFormat;
|
||||||
|
import org.apache.sqoop.mapreduce.ByteKeyOutputFormat;
|
||||||
import org.apache.sqoop.mapreduce.parquet.ParquetImportJobConfigurator;
|
import org.apache.sqoop.mapreduce.parquet.ParquetImportJobConfigurator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -46,7 +51,10 @@ public MainframeImportJob(final SqoopOptions opts, ImportJobContext context, Par
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Class<? extends Mapper> getMapperClass() {
|
protected Class<? extends Mapper> getMapperClass() {
|
||||||
if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
|
if (SqoopOptions.FileLayout.BinaryFile.equals(options.getFileLayout())) {
|
||||||
|
LOG.debug("Using MainframeDatasetBinaryImportMapper");
|
||||||
|
return MainframeDatasetBinaryImportMapper.class;
|
||||||
|
} else if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
|
||||||
return MainframeDatasetImportMapper.class;
|
return MainframeDatasetImportMapper.class;
|
||||||
} else {
|
} else {
|
||||||
return super.getMapperClass();
|
return super.getMapperClass();
|
||||||
@ -66,13 +74,58 @@ protected void configureInputFormat(Job job, String tableName,
|
|||||||
job.getConfiguration().set(
|
job.getConfiguration().set(
|
||||||
MainframeConfiguration.MAINFRAME_INPUT_DATASET_TAPE,
|
MainframeConfiguration.MAINFRAME_INPUT_DATASET_TAPE,
|
||||||
options.getMainframeInputDatasetTape().toString());
|
options.getMainframeInputDatasetTape().toString());
|
||||||
|
if (SqoopOptions.FileLayout.BinaryFile == options.getFileLayout()) {
|
||||||
|
job.getConfiguration().set(
|
||||||
|
MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE,
|
||||||
|
MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE_BINARY);
|
||||||
|
job.getConfiguration().setInt(
|
||||||
|
MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_BUFFER_SIZE,
|
||||||
|
options.getBufferSize()
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
job.getConfiguration().set(
|
||||||
|
MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE,
|
||||||
|
MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE_ASCII);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void configureOutputFormat(Job job, String tableName,
|
protected void configureOutputFormat(Job job, String tableName,
|
||||||
String tableClassName) throws ClassNotFoundException, IOException {
|
String tableClassName) throws ClassNotFoundException, IOException {
|
||||||
super.configureOutputFormat(job, tableName, tableClassName);
|
super.configureOutputFormat(job, tableName, tableClassName);
|
||||||
|
job.getConfiguration().set(
|
||||||
|
MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE,
|
||||||
|
options.getMainframeFtpTransferMode());
|
||||||
|
// use the default outputformat
|
||||||
LazyOutputFormat.setOutputFormatClass(job, getOutputFormatClass());
|
LazyOutputFormat.setOutputFormatClass(job, getOutputFormatClass());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void configureMapper(Job job, String tableName,
|
||||||
|
String tableClassName) throws IOException {
|
||||||
|
super.configureMapper(job, tableName, tableClassName);
|
||||||
|
if (SqoopOptions.FileLayout.BinaryFile == options.getFileLayout()) {
|
||||||
|
job.setOutputKeyClass(BytesWritable.class);
|
||||||
|
job.setOutputValueClass(NullWritable.class);
|
||||||
|
|
||||||
|
// this is required as code generated class assumes setField method takes String
|
||||||
|
// and will fail with ClassCastException when a byte array is passed instead
|
||||||
|
// java.lang.ClassCastException: [B cannot be cast to java.lang.String
|
||||||
|
Configuration conf = job.getConfiguration();
|
||||||
|
conf.setClass(org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_CLASS_PROPERTY, MainframeDatasetBinaryRecord.class,
|
||||||
|
DBWritable.class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Class<? extends OutputFormat> getOutputFormatClass() {
|
||||||
|
if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
|
||||||
|
return RawKeyTextOutputFormat.class;
|
||||||
|
} else if (options.getFileLayout()
|
||||||
|
== SqoopOptions.FileLayout.BinaryFile) {
|
||||||
|
return ByteKeyOutputFormat.class;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -114,6 +114,7 @@ public abstract class BaseSqoopTool extends org.apache.sqoop.tool.SqoopTool {
|
|||||||
public static final String FMT_TEXTFILE_ARG = "as-textfile";
|
public static final String FMT_TEXTFILE_ARG = "as-textfile";
|
||||||
public static final String FMT_AVRODATAFILE_ARG = "as-avrodatafile";
|
public static final String FMT_AVRODATAFILE_ARG = "as-avrodatafile";
|
||||||
public static final String FMT_PARQUETFILE_ARG = "as-parquetfile";
|
public static final String FMT_PARQUETFILE_ARG = "as-parquetfile";
|
||||||
|
public static final String FMT_BINARYFILE_ARG = "as-binaryfile";
|
||||||
public static final String HIVE_IMPORT_ARG = "hive-import";
|
public static final String HIVE_IMPORT_ARG = "hive-import";
|
||||||
public static final String HIVE_TABLE_ARG = "hive-table";
|
public static final String HIVE_TABLE_ARG = "hive-table";
|
||||||
public static final String HIVE_DATABASE_ARG = "hive-database";
|
public static final String HIVE_DATABASE_ARG = "hive-database";
|
||||||
|
@ -744,6 +744,10 @@ protected RelatedOptions getImportOptions() {
|
|||||||
.withDescription("Imports data to Parquet files")
|
.withDescription("Imports data to Parquet files")
|
||||||
.withLongOpt(BaseSqoopTool.FMT_PARQUETFILE_ARG)
|
.withLongOpt(BaseSqoopTool.FMT_PARQUETFILE_ARG)
|
||||||
.create());
|
.create());
|
||||||
|
importOpts.addOption(OptionBuilder
|
||||||
|
.withDescription("Imports data to Binary files")
|
||||||
|
.withLongOpt(BaseSqoopTool.FMT_BINARYFILE_ARG)
|
||||||
|
.create());
|
||||||
importOpts.addOption(OptionBuilder.withArgName("n")
|
importOpts.addOption(OptionBuilder.withArgName("n")
|
||||||
.hasArg().withDescription("Use 'n' map tasks to import in parallel")
|
.hasArg().withDescription("Use 'n' map tasks to import in parallel")
|
||||||
.withLongOpt(NUM_MAPPERS_ARG)
|
.withLongOpt(NUM_MAPPERS_ARG)
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
import org.apache.commons.cli.CommandLine;
|
import org.apache.commons.cli.CommandLine;
|
||||||
import org.apache.commons.cli.OptionBuilder;
|
import org.apache.commons.cli.OptionBuilder;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
import org.apache.hadoop.util.ToolRunner;
|
||||||
@ -40,6 +41,7 @@ public class MainframeImportTool extends ImportTool {
|
|||||||
public static final String DS_ARG = "dataset";
|
public static final String DS_ARG = "dataset";
|
||||||
public static final String DS_TYPE_ARG = "datasettype";
|
public static final String DS_TYPE_ARG = "datasettype";
|
||||||
public static final String DS_TAPE_ARG = "tape";
|
public static final String DS_TAPE_ARG = "tape";
|
||||||
|
public static final String BUFFERSIZE_ARG = "buffersize";
|
||||||
|
|
||||||
public MainframeImportTool() {
|
public MainframeImportTool() {
|
||||||
super("import-mainframe", false);
|
super("import-mainframe", false);
|
||||||
@ -82,6 +84,14 @@ protected RelatedOptions getImportOptions() {
|
|||||||
.withDescription("Imports data as plain text (default)")
|
.withDescription("Imports data as plain text (default)")
|
||||||
.withLongOpt(FMT_TEXTFILE_ARG)
|
.withLongOpt(FMT_TEXTFILE_ARG)
|
||||||
.create());
|
.create());
|
||||||
|
importOpts.addOption(OptionBuilder
|
||||||
|
.withDescription("Imports data as binary")
|
||||||
|
.withLongOpt(FMT_BINARYFILE_ARG)
|
||||||
|
.create());
|
||||||
|
importOpts.addOption(OptionBuilder
|
||||||
|
.hasArg().withDescription("Sets buffer size for binary import in bytes (default=32kB)")
|
||||||
|
.withLongOpt(BUFFERSIZE_ARG)
|
||||||
|
.create());
|
||||||
importOpts.addOption(OptionBuilder.withArgName("n")
|
importOpts.addOption(OptionBuilder.withArgName("n")
|
||||||
.hasArg().withDescription("Use 'n' map tasks to import in parallel")
|
.hasArg().withDescription("Use 'n' map tasks to import in parallel")
|
||||||
.withLongOpt(NUM_MAPPERS_ARG)
|
.withLongOpt(NUM_MAPPERS_ARG)
|
||||||
@ -168,6 +178,28 @@ public void applyOptions(CommandLine in, SqoopOptions out)
|
|||||||
// set default tape value to false
|
// set default tape value to false
|
||||||
out.setMainframeInputDatasetTape("false");
|
out.setMainframeInputDatasetTape("false");
|
||||||
}
|
}
|
||||||
|
if (in.hasOption(FMT_BINARYFILE_ARG)) {
|
||||||
|
out.setMainframeFtpTransferMode(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE_BINARY);
|
||||||
|
out.setFileLayout(SqoopOptions.FileLayout.BinaryFile);
|
||||||
|
} else {
|
||||||
|
// set default transfer mode to ascii
|
||||||
|
out.setMainframeFtpTransferMode(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE_ASCII);
|
||||||
|
out.setFileLayout(SqoopOptions.FileLayout.TextFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in.hasOption(BUFFERSIZE_ARG)) {
|
||||||
|
// if we specify --buffersize set the buffer size
|
||||||
|
int bufSize = Integer.valueOf(in.getOptionValue(BUFFERSIZE_ARG));
|
||||||
|
if (bufSize > 0) {
|
||||||
|
out.setBufferSize(bufSize);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
out.setBufferSize(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// set the default buffer size to 32kB
|
||||||
|
out.setBufferSize(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -190,6 +222,17 @@ protected void validateImportOptions(SqoopOptions options)
|
|||||||
throw new InvalidOptionsException(
|
throw new InvalidOptionsException(
|
||||||
"--" + DS_TAPE_ARG + " specified is invalid. " + HELP_STR);
|
"--" + DS_TAPE_ARG + " specified is invalid. " + HELP_STR);
|
||||||
}
|
}
|
||||||
|
/* only allow FileLayout.BinaryFile to be selected for mainframe import */
|
||||||
|
if (SqoopOptions.FileLayout.BinaryFile.equals(options.getFileLayout()) && StringUtils.isEmpty(options.getMainframeInputDatasetName())) {
|
||||||
|
throw new InvalidOptionsException("--as-binaryfile should only be used with import-mainframe module.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// only allow buffer size to be set different to default when binary file is selected
|
||||||
|
// in any case, if --as-binaryfile isn't selected, --buffersize parameter is harmless
|
||||||
|
if (!SqoopOptions.FileLayout.BinaryFile.equals(options.getFileLayout()) && !MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE.equals(options.getBufferSize())) {
|
||||||
|
throw new InvalidOptionsException("--buffersize should only be used with --as-binaryfile parameter.");
|
||||||
|
}
|
||||||
|
|
||||||
super.validateImportOptions(options);
|
super.validateImportOptions(options);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.net.PrintCommandListener;
|
import org.apache.commons.net.PrintCommandListener;
|
||||||
import org.apache.commons.net.ftp.FTP;
|
import org.apache.commons.net.ftp.FTP;
|
||||||
import org.apache.commons.net.ftp.FTPClient;
|
import org.apache.commons.net.ftp.FTPClient;
|
||||||
@ -207,8 +208,18 @@ public static FTPClient getFTPConnection(Configuration conf)
|
|||||||
throw new IOException("Could not login to server " + server
|
throw new IOException("Could not login to server " + server
|
||||||
+ ":" + ftp.getReplyString());
|
+ ":" + ftp.getReplyString());
|
||||||
}
|
}
|
||||||
// set ASCII transfer mode
|
// set transfer mode
|
||||||
ftp.setFileType(FTP.ASCII_FILE_TYPE);
|
String transferMode = conf.get(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE);
|
||||||
|
if (StringUtils.equalsIgnoreCase(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_MODE_BINARY,transferMode)) {
|
||||||
|
LOG.info("Setting FTP transfer mode to binary");
|
||||||
|
// ftp.setFileTransferMode(FTP.BINARY_FILE_TYPE) doesn't work for MVS, it throws a syntax error
|
||||||
|
ftp.sendCommand("TYPE I");
|
||||||
|
// this is IMPORTANT - otherwise it will convert 0x0d0a to 0x0a = dropping bytes
|
||||||
|
ftp.setFileType(FTP.BINARY_FILE_TYPE);
|
||||||
|
} else {
|
||||||
|
LOG.info("Defaulting FTP transfer mode to ascii");
|
||||||
|
ftp.setFileTransferMode(FTP.ASCII_FILE_TYPE);
|
||||||
|
}
|
||||||
// Use passive mode as default.
|
// Use passive mode as default.
|
||||||
ftp.enterLocalPassiveMode();
|
ftp.enterLocalPassiveMode();
|
||||||
LOG.info("System type detected: " + ftp.getSystemType());
|
LOG.info("System type detected: " + ftp.getSystemType());
|
||||||
|
@ -113,6 +113,42 @@ public void testImportGdgText() throws IOException {
|
|||||||
doImportAndVerify(MainframeTestUtil.GDG_DATASET_NAME, MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE_GDG, files);
|
doImportAndVerify(MainframeTestUtil.GDG_DATASET_NAME, MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE_GDG, files);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testImportGdgBinary() throws IOException {
|
||||||
|
HashMap<String,String> files = new HashMap<String,String>();
|
||||||
|
files.put(MainframeTestUtil.GDG_BINARY_DATASET_FILENAME, MainframeTestUtil.EXPECTED_GDG_BINARY_DATASET_MD5);
|
||||||
|
doImportAndVerify(MainframeTestUtil.GDG_BINARY_DATASET_NAME, MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE_GDG, files, "--as-binaryfile");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testImportGdgBinaryWithBufferSize() throws IOException {
|
||||||
|
HashMap<String,String> files = new HashMap<String,String>();
|
||||||
|
files.put(MainframeTestUtil.GDG_BINARY_DATASET_FILENAME, MainframeTestUtil.EXPECTED_GDG_BINARY_DATASET_MD5);
|
||||||
|
doImportAndVerify(MainframeTestUtil.GDG_BINARY_DATASET_NAME, MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE_GDG, files, "--as-binaryfile", "--buffersize", "64000");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testImportSequential() throws IOException {
|
||||||
|
// can reuse the same dataset as binary as the dataset is plain text
|
||||||
|
HashMap<String,String> files = new HashMap<String,String>();
|
||||||
|
files.put(MainframeTestUtil.SEQ_DATASET_FILENAME, MainframeTestUtil.EXPECTED_SEQ_DATASET_MD5);
|
||||||
|
doImportAndVerify(MainframeTestUtil.SEQ_DATASET_NAME, MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE_SEQUENTIAL, files);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testImportSequentialBinary() throws IOException {
|
||||||
|
HashMap<String,String> files = new HashMap<String,String>();
|
||||||
|
files.put(MainframeTestUtil.SEQ_BINARY_DATASET_FILENAME, MainframeTestUtil.EXPECTED_SEQ_BINARY_DATASET_MD5);
|
||||||
|
doImportAndVerify(MainframeTestUtil.SEQ_BINARY_DATASET_NAME, MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE_SEQUENTIAL, files, "--as-binaryfile");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testImportSequentialBinaryWithBufferSize() throws IOException {
|
||||||
|
HashMap<String,String> files = new HashMap<String,String>();
|
||||||
|
files.put(MainframeTestUtil.SEQ_BINARY_DATASET_FILENAME, MainframeTestUtil.EXPECTED_SEQ_BINARY_DATASET_MD5);
|
||||||
|
doImportAndVerify(MainframeTestUtil.SEQ_BINARY_DATASET_NAME, MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE_SEQUENTIAL, files, "--as-binaryfile", "--buffersize", "64000");
|
||||||
|
}
|
||||||
|
|
||||||
private String [] getArgv(String datasetName, String datasetType, String ... extraArgs) {
|
private String [] getArgv(String datasetName, String datasetType, String ... extraArgs) {
|
||||||
ArrayList<String> args = new ArrayList<String>();
|
ArrayList<String> args = new ArrayList<String>();
|
||||||
|
|
||||||
@ -130,7 +166,6 @@ public void testImportGdgText() throws IOException {
|
|||||||
args.add(datasetType);
|
args.add(datasetType);
|
||||||
|
|
||||||
if (extraArgs.length > 0) {
|
if (extraArgs.length > 0) {
|
||||||
args.add("--");
|
|
||||||
for (String arg : extraArgs) {
|
for (String arg : extraArgs) {
|
||||||
args.add(arg);
|
args.add(arg);
|
||||||
}
|
}
|
||||||
|
@ -41,4 +41,31 @@ public class MainframeTestUtil {
|
|||||||
public static final String EXPECTED_GDG_DATASET_MD5 = System.getProperty(
|
public static final String EXPECTED_GDG_DATASET_MD5 = System.getProperty(
|
||||||
"sqoop.test.mainframe.ftp.dataset.gdg.md5",
|
"sqoop.test.mainframe.ftp.dataset.gdg.md5",
|
||||||
"f0d0d171fdb8a03dbc1266ed179d7093");
|
"f0d0d171fdb8a03dbc1266ed179d7093");
|
||||||
|
public static final String GDG_BINARY_DATASET_NAME = System.getProperty(
|
||||||
|
"sqoop.test.mainframe.ftp.binary.dataset.gdg",
|
||||||
|
"TSODIQ1.FOLDER");
|
||||||
|
public static final String GDG_BINARY_DATASET_FILENAME = System.getProperty(
|
||||||
|
"sqoop.test.mainframe.ftp.binary.dataset.gdg.filename",
|
||||||
|
"G0002V45");
|
||||||
|
public static final String EXPECTED_GDG_BINARY_DATASET_MD5 = System.getProperty(
|
||||||
|
"sqoop.test.mainframe.ftp.binary.dataset.gdg.md5",
|
||||||
|
"43eefbe34e466dd3f65a3e867a60809a");
|
||||||
|
public static final String SEQ_DATASET_NAME = System.getProperty(
|
||||||
|
"sqoop.test.mainframe.ftp.dataset.seq",
|
||||||
|
"TSODIQ1.GDGTEXT.G0001V43");
|
||||||
|
public static final String SEQ_DATASET_FILENAME = System.getProperty(
|
||||||
|
"sqoop.test.mainframe.ftp.dataset.seq.filename",
|
||||||
|
"G0001V43");
|
||||||
|
public static final String EXPECTED_SEQ_DATASET_MD5 = System.getProperty(
|
||||||
|
"sqoop.test.mainframe.ftp.dataset.seq.md5",
|
||||||
|
"f0d0d171fdb8a03dbc1266ed179d7093");
|
||||||
|
public static final String SEQ_BINARY_DATASET_NAME = System.getProperty(
|
||||||
|
"sqoop.test.mainframe.ftp.binary.dataset.seq",
|
||||||
|
"TSODIQ1.FOLDER.FOLDERTXT");
|
||||||
|
public static final String SEQ_BINARY_DATASET_FILENAME = System.getProperty(
|
||||||
|
"sqoop.test.mainframe.ftp.binary.dataset.seq.filename",
|
||||||
|
"FOLDERTXT");
|
||||||
|
public static final String EXPECTED_SEQ_BINARY_DATASET_MD5 = System.getProperty(
|
||||||
|
"sqoop.test.mainframe.ftp.binary.dataset.seq.md5",
|
||||||
|
"1591c0fcc718fda7e9c1f3561d232b2b");
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,164 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.mainframe;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.commons.net.ftp.FTPClient;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
|
import org.mockito.stubbing.Answer;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import static org.mockito.Matchers.any;
|
||||||
|
import static org.mockito.Matchers.anyInt;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
import static org.mockito.Mockito.spy;
|
||||||
|
import static org.mockito.Mockito.doReturn;
|
||||||
|
|
||||||
|
public class TestMainframeDatasetBinaryRecord {
|
||||||
|
|
||||||
|
private MainframeDatasetFTPRecordReader ftpRecordReader;
|
||||||
|
private InputStream is;
|
||||||
|
private FTPClient ftp;
|
||||||
|
private final String DATASET_NAME = "dummy.ds";
|
||||||
|
private final String DATASET_TYPE = "g";
|
||||||
|
private static final Log LOG = LogFactory.getLog(
|
||||||
|
TestMainframeDatasetBinaryRecord.class.getName());
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws IOException, InterruptedException {
|
||||||
|
MainframeDatasetFTPRecordReader rdr = new MainframeDatasetFTPRecordReader();
|
||||||
|
Configuration conf;
|
||||||
|
MainframeDatasetInputSplit split;
|
||||||
|
TaskAttemptContext context;
|
||||||
|
ftpRecordReader = spy(rdr);
|
||||||
|
is = mock(InputStream.class);
|
||||||
|
ftp = mock(FTPClient.class);
|
||||||
|
split = mock(MainframeDatasetInputSplit.class);
|
||||||
|
context = mock(TaskAttemptContext.class);
|
||||||
|
conf = new Configuration();
|
||||||
|
when(ftp.retrieveFileStream(any(String.class))).thenReturn(is);
|
||||||
|
when(ftp.changeWorkingDirectory(any(String.class))).thenReturn(true);
|
||||||
|
doReturn("file1").when(ftpRecordReader).getNextDataset();
|
||||||
|
when(split.getNextDataset()).thenReturn(DATASET_NAME);
|
||||||
|
when(ftpRecordReader.getNextDataset()).thenReturn(DATASET_NAME);
|
||||||
|
conf.set(MainframeConfiguration.MAINFRAME_INPUT_DATASET_NAME,DATASET_NAME);
|
||||||
|
conf.set(MainframeConfiguration.MAINFRAME_INPUT_DATASET_TYPE,DATASET_TYPE);
|
||||||
|
conf.setInt(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_BUFFER_SIZE,MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE);
|
||||||
|
ftpRecordReader.initialize(ftp, conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mock the inputstream.read method and manipulate the function parameters
|
||||||
|
protected Answer returnSqoopRecord(final int byteLength) {
|
||||||
|
return new Answer() {
|
||||||
|
public Object answer(InvocationOnMock invocation) {
|
||||||
|
return byteLength;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNextBinaryRecordForFullRecord() {
|
||||||
|
|
||||||
|
MainframeDatasetBinaryRecord record = new MainframeDatasetBinaryRecord();
|
||||||
|
try {
|
||||||
|
when(is.read(any(byte[].class),anyInt(),anyInt()))
|
||||||
|
.thenAnswer(returnSqoopRecord(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE))
|
||||||
|
.thenReturn(-1);
|
||||||
|
when(ftp.completePendingCommand()).thenReturn(true);
|
||||||
|
Assert.assertTrue(ftpRecordReader.getNextBinaryRecord(record));
|
||||||
|
Assert.assertFalse(record.getFieldMap().values().isEmpty());
|
||||||
|
Assert.assertEquals(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE.intValue(),((byte[])record.getFieldMap().values().iterator().next()).length);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
LOG.error("Issue with reading 1 full binary buffer record", ioe);
|
||||||
|
throw new RuntimeException(ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNextBinaryRecordForPartialRecord() {
|
||||||
|
int expectedBytesRead = 10;
|
||||||
|
MainframeDatasetBinaryRecord record = new MainframeDatasetBinaryRecord();
|
||||||
|
try {
|
||||||
|
when(is.read(any(byte[].class),anyInt(),anyInt()))
|
||||||
|
.thenAnswer(returnSqoopRecord(10))
|
||||||
|
.thenReturn(-1);
|
||||||
|
when(ftp.completePendingCommand()).thenReturn(true);
|
||||||
|
Assert.assertTrue(ftpRecordReader.getNextBinaryRecord(record));
|
||||||
|
Assert.assertFalse(record.getFieldMap().values().isEmpty());
|
||||||
|
Assert.assertEquals(expectedBytesRead,(((byte[])record.getFieldMap().values().iterator().next()).length));
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
LOG.error("Issue with reading 10 byte binary record", ioe);
|
||||||
|
throw new RuntimeException(ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNextBinaryRecordFor2Records() {
|
||||||
|
// test 1 full record, and 1 partial
|
||||||
|
int expectedBytesRead = 10;
|
||||||
|
MainframeDatasetBinaryRecord record = new MainframeDatasetBinaryRecord();
|
||||||
|
try {
|
||||||
|
when(is.read(any(byte[].class),anyInt(),anyInt()))
|
||||||
|
.thenAnswer(returnSqoopRecord(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE))
|
||||||
|
.thenAnswer(returnSqoopRecord(10))
|
||||||
|
.thenReturn(-1);
|
||||||
|
when(ftp.completePendingCommand()).thenReturn(true);
|
||||||
|
Assert.assertTrue(ftpRecordReader.getNextBinaryRecord(record));
|
||||||
|
Assert.assertFalse(record.getFieldMap().values().isEmpty());
|
||||||
|
Assert.assertTrue(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE.equals((((byte[])record.getFieldMap().values().iterator().next()).length)));
|
||||||
|
record = new MainframeDatasetBinaryRecord();
|
||||||
|
Assert.assertTrue(ftpRecordReader.getNextBinaryRecord(record));
|
||||||
|
Assert.assertFalse(record.getFieldMap().values().isEmpty());
|
||||||
|
Assert.assertEquals(expectedBytesRead,(((byte[])record.getFieldMap().values().iterator().next()).length));
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
LOG.error("Issue with reading 1 full binary buffer record followed by 1 partial binary buffer record", ioe);
|
||||||
|
throw new RuntimeException(ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNextBinaryRecordForMultipleReads() {
|
||||||
|
// test reading 1 record where the stream returns less than a full buffer
|
||||||
|
MainframeDatasetBinaryRecord record = new MainframeDatasetBinaryRecord();
|
||||||
|
try {
|
||||||
|
when(is.read(any(byte[].class),anyInt(),anyInt()))
|
||||||
|
.thenAnswer(returnSqoopRecord(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE /2))
|
||||||
|
.thenAnswer(returnSqoopRecord(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE /2))
|
||||||
|
.thenReturn(-1);
|
||||||
|
when(ftp.completePendingCommand()).thenReturn(true);
|
||||||
|
Assert.assertTrue(ftpRecordReader.getNextBinaryRecord(record));
|
||||||
|
Assert.assertFalse(record.getFieldMap().values().isEmpty());
|
||||||
|
Assert.assertEquals(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE.intValue(),((byte[])record.getFieldMap().values().iterator().next()).length);
|
||||||
|
record = new MainframeDatasetBinaryRecord();
|
||||||
|
Assert.assertFalse(ftpRecordReader.getNextBinaryRecord(record));
|
||||||
|
Assert.assertNull((((byte[])record.getFieldMap().values().iterator().next())));
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
LOG.error("Issue with verifying reading partial buffer binary records", ioe);
|
||||||
|
throw new RuntimeException(ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -22,18 +22,18 @@
|
|||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
|
|
||||||
import org.apache.commons.cli.ParseException;
|
import org.apache.commons.cli.ParseException;
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.sqoop.cli.RelatedOptions;
|
import org.apache.sqoop.cli.RelatedOptions;
|
||||||
import org.apache.sqoop.mapreduce.mainframe.MainframeConfiguration;
|
import org.apache.sqoop.mapreduce.mainframe.MainframeConfiguration;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import org.apache.sqoop.SqoopOptions;
|
import org.apache.sqoop.SqoopOptions;
|
||||||
import org.apache.sqoop.SqoopOptions.InvalidOptionsException;
|
import org.apache.sqoop.SqoopOptions.InvalidOptionsException;
|
||||||
import org.apache.sqoop.cli.ToolOptions;
|
import org.apache.sqoop.cli.ToolOptions;
|
||||||
import org.apache.sqoop.testutil.BaseSqoopTestCase;
|
import org.apache.sqoop.testutil.BaseSqoopTestCase;
|
||||||
|
import org.junit.rules.ExpectedException;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
@ -43,15 +43,16 @@
|
|||||||
|
|
||||||
public class TestMainframeImportTool extends BaseSqoopTestCase {
|
public class TestMainframeImportTool extends BaseSqoopTestCase {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(TestMainframeImportTool.class
|
|
||||||
.getName());
|
|
||||||
|
|
||||||
private MainframeImportTool mfImportTool;
|
private MainframeImportTool mfImportTool;
|
||||||
|
private ToolOptions toolOptions;
|
||||||
|
private SqoopOptions sqoopOption;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() {
|
public void setUp() {
|
||||||
|
|
||||||
mfImportTool = new MainframeImportTool();
|
mfImportTool = new MainframeImportTool();
|
||||||
|
toolOptions = new ToolOptions();
|
||||||
|
sqoopOption = new SqoopOptions();
|
||||||
}
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
@ -183,4 +184,58 @@ public void testTapeOptionInvalidReturnsFalse() throws ParseException, InvalidOp
|
|||||||
Boolean isTape = sqoopOption.getMainframeInputDatasetTape();
|
Boolean isTape = sqoopOption.getMainframeInputDatasetTape();
|
||||||
assert(isTape != null && isTape.toString().equals("false"));
|
assert(isTape != null && isTape.toString().equals("false"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFtpTransferModeAscii() throws ParseException, InvalidOptionsException {
|
||||||
|
String[] args = new String[] { "--dataset", "mydatasetname", "--as-textfile" };
|
||||||
|
configureAndValidateOptions(args);
|
||||||
|
assertEquals(SqoopOptions.FileLayout.TextFile,sqoopOption.getFileLayout());
|
||||||
|
}
|
||||||
|
@Test
|
||||||
|
public void testFtpTransferModeDefaultsToAscii() throws ParseException, InvalidOptionsException {
|
||||||
|
String[] args = new String[] { "--dataset", "mydatasetname" };
|
||||||
|
configureAndValidateOptions(args);
|
||||||
|
assertEquals(SqoopOptions.FileLayout.TextFile,sqoopOption.getFileLayout());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAsBinaryFileSetsCorrectFileLayoutAndDefaultBufferSize() throws ParseException, InvalidOptionsException {
|
||||||
|
String[] args = new String[] { "--dataset", "mydatasetname", "--as-binaryfile" };
|
||||||
|
configureAndValidateOptions(args);
|
||||||
|
assertEquals(SqoopOptions.FileLayout.BinaryFile,sqoopOption.getFileLayout());
|
||||||
|
assertEquals(MainframeConfiguration.MAINFRAME_FTP_TRANSFER_BINARY_DEFAULT_BUFFER_SIZE, sqoopOption.getBufferSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSetBufferSize() throws ParseException, InvalidOptionsException {
|
||||||
|
final Integer EXPECTED_BUFFER = 1024;
|
||||||
|
String[] args = new String[] { "--dataset", "mydatasetname", "--as-binaryfile", "--buffersize", EXPECTED_BUFFER.toString() };
|
||||||
|
configureAndValidateOptions(args);
|
||||||
|
assertEquals(SqoopOptions.FileLayout.BinaryFile,sqoopOption.getFileLayout());
|
||||||
|
assertEquals(EXPECTED_BUFFER, sqoopOption.getBufferSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public final ExpectedException exception = ExpectedException.none();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBufferSizeWithoutBinaryThrowsException() throws ParseException, InvalidOptionsException {
|
||||||
|
final Integer EXPECTED_BUFFER = 1024;
|
||||||
|
String[] args = new String[] { "--buffersize", EXPECTED_BUFFER.toString() };
|
||||||
|
exception.expect(InvalidOptionsException.class);
|
||||||
|
configureAndValidateOptions(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvalidBufferSizeThrowsNumberFormatException() throws ParseException, InvalidOptionsException {
|
||||||
|
String[] args = new String[] { "--buffersize", "invalidinteger" };
|
||||||
|
exception.expect(NumberFormatException.class);
|
||||||
|
configureAndValidateOptions(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void configureAndValidateOptions(String[] args) throws ParseException, InvalidOptionsException {
|
||||||
|
mfImportTool.configureOptions(toolOptions);
|
||||||
|
sqoopOption = mfImportTool.parseArguments(args, null, sqoopOption, false);
|
||||||
|
mfImportTool.validateImportOptions(sqoopOption);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user