mirror of
https://github.com/apache/sqoop.git
synced 2025-05-05 06:29:49 +08:00
SQOOP-735 Introduce output format option to Sqoop import
(Jarek Jarcec Cecho)
This commit is contained in:
parent
8a5cd6728a
commit
cf3d71049d
@ -28,5 +28,7 @@ public class OutputForm {
|
|||||||
|
|
||||||
@Input public StorageType storageType;
|
@Input public StorageType storageType;
|
||||||
|
|
||||||
|
@Input public OutputFormat outputFormat;
|
||||||
|
|
||||||
@Input(size = 25) public String outputDirectory;
|
@Input(size = 25) public String outputDirectory;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,33 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.framework.configuration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Various supported formats on disk
|
||||||
|
*/
|
||||||
|
public enum OutputFormat {
|
||||||
|
/**
|
||||||
|
* Comma separated text file
|
||||||
|
*/
|
||||||
|
TEXT_FILE,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sequence file
|
||||||
|
*/
|
||||||
|
SEQUENCE_FILE,
|
||||||
|
}
|
@ -35,6 +35,9 @@ output.help = You must supply the information requested in order to \
|
|||||||
output.storageType.label = Storage type
|
output.storageType.label = Storage type
|
||||||
output.storageType.help = Target on Hadoop ecosystem where to store data
|
output.storageType.help = Target on Hadoop ecosystem where to store data
|
||||||
|
|
||||||
|
output.outputFormat.label = Output format
|
||||||
|
output.outputFormat.help = Format in which data should be serialized
|
||||||
|
|
||||||
output.outputDirectory.label = Output directory
|
output.outputDirectory.label = Output directory
|
||||||
output.outputDirectory.help = Output directory for final data
|
output.outputDirectory.help = Output directory for final data
|
||||||
|
|
||||||
|
@ -19,9 +19,14 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.io.NullWritable;
|
import org.apache.hadoop.io.NullWritable;
|
||||||
import org.apache.sqoop.common.MutableMapContext;
|
import org.apache.sqoop.common.MutableMapContext;
|
||||||
|
import org.apache.sqoop.common.SqoopException;
|
||||||
import org.apache.sqoop.framework.ExecutionEngine;
|
import org.apache.sqoop.framework.ExecutionEngine;
|
||||||
import org.apache.sqoop.framework.SubmissionRequest;
|
import org.apache.sqoop.framework.SubmissionRequest;
|
||||||
|
import org.apache.sqoop.framework.configuration.ImportJobConfiguration;
|
||||||
|
import org.apache.sqoop.framework.configuration.OutputFormat;
|
||||||
import org.apache.sqoop.job.JobConstants;
|
import org.apache.sqoop.job.JobConstants;
|
||||||
|
import org.apache.sqoop.job.MapreduceExecutionError;
|
||||||
|
import org.apache.sqoop.job.etl.HdfsSequenceImportLoader;
|
||||||
import org.apache.sqoop.job.etl.HdfsTextImportLoader;
|
import org.apache.sqoop.job.etl.HdfsTextImportLoader;
|
||||||
import org.apache.sqoop.job.etl.Importer;
|
import org.apache.sqoop.job.etl.Importer;
|
||||||
import org.apache.sqoop.job.io.Data;
|
import org.apache.sqoop.job.io.Data;
|
||||||
@ -42,6 +47,7 @@ public SubmissionRequest createSubmissionRequest() {
|
|||||||
@Override
|
@Override
|
||||||
public void prepareImportSubmission(SubmissionRequest gRequest) {
|
public void prepareImportSubmission(SubmissionRequest gRequest) {
|
||||||
MRSubmissionRequest request = (MRSubmissionRequest) gRequest;
|
MRSubmissionRequest request = (MRSubmissionRequest) gRequest;
|
||||||
|
ImportJobConfiguration jobConf = (ImportJobConfiguration) request.getConfigFrameworkJob();
|
||||||
|
|
||||||
// Configure map-reduce classes for import
|
// Configure map-reduce classes for import
|
||||||
request.setInputFormatClass(SqoopInputFormat.class);
|
request.setInputFormatClass(SqoopInputFormat.class);
|
||||||
@ -61,6 +67,15 @@ public void prepareImportSubmission(SubmissionRequest gRequest) {
|
|||||||
context.setString(JobConstants.JOB_ETL_PARTITIONER, importer.getPartitioner().getName());
|
context.setString(JobConstants.JOB_ETL_PARTITIONER, importer.getPartitioner().getName());
|
||||||
context.setString(JobConstants.JOB_ETL_EXTRACTOR, importer.getExtractor().getName());
|
context.setString(JobConstants.JOB_ETL_EXTRACTOR, importer.getExtractor().getName());
|
||||||
context.setString(JobConstants.JOB_ETL_DESTROYER, importer.getDestroyer().getName());
|
context.setString(JobConstants.JOB_ETL_DESTROYER, importer.getDestroyer().getName());
|
||||||
context.setString(JobConstants.JOB_ETL_LOADER, HdfsTextImportLoader.class.getName());
|
|
||||||
|
// TODO: This settings should be abstracted to core module at some point
|
||||||
|
if(jobConf.output.outputFormat == OutputFormat.TEXT_FILE) {
|
||||||
|
context.setString(JobConstants.JOB_ETL_LOADER, HdfsTextImportLoader.class.getName());
|
||||||
|
} else if(jobConf.output.outputFormat == OutputFormat.SEQUENCE_FILE) {
|
||||||
|
context.setString(JobConstants.JOB_ETL_LOADER, HdfsSequenceImportLoader.class.getName());
|
||||||
|
} else {
|
||||||
|
throw new SqoopException(MapreduceExecutionError.MAPRED_EXEC_0024,
|
||||||
|
"Format: " + jobConf.output.outputFormat);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -73,6 +73,9 @@ public enum MapreduceExecutionError implements ErrorCode {
|
|||||||
/** Unknown job type */
|
/** Unknown job type */
|
||||||
MAPRED_EXEC_0023("Unknown job type"),
|
MAPRED_EXEC_0023("Unknown job type"),
|
||||||
|
|
||||||
|
/** Unsupported output format type found **/
|
||||||
|
MAPRED_EXEC_0024("Unknown output format type"),
|
||||||
|
|
||||||
;
|
;
|
||||||
|
|
||||||
private final String message;
|
private final String message;
|
||||||
|
Loading…
Reference in New Issue
Block a user