mirror of
https://github.com/apache/sqoop.git
synced 2025-05-04 07:30:19 +08:00

Signed-off-by: Aaron Kimball <aaron@cloudera.com> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149913 13f79535-47bb-0310-9956-ffa450edef68
144 lines
5.2 KiB
Java
144 lines
5.2 KiB
Java
/**
|
|
* Licensed to Cloudera, Inc. under one
|
|
* or more contributor license agreements. See the NOTICE file
|
|
* distributed with this work for additional information
|
|
* regarding copyright ownership. Cloudera, Inc. licenses this file
|
|
* to you under the Apache License, Version 2.0 (the
|
|
* "License"); you may not use this file except in compliance
|
|
* with the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package com.cloudera.sqoop.mapreduce;
|
|
|
|
import java.io.IOException;
|
|
import java.sql.SQLException;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.io.NullWritable;
|
|
import org.apache.hadoop.mapreduce.InputFormat;
|
|
import org.apache.hadoop.mapreduce.Job;
|
|
import org.apache.hadoop.mapreduce.OutputFormat;
|
|
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
|
|
import org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat;
|
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
|
|
|
import com.cloudera.sqoop.ConnFactory;
|
|
import com.cloudera.sqoop.SqoopOptions;
|
|
import com.cloudera.sqoop.manager.ConnManager;
|
|
import com.cloudera.sqoop.manager.MySQLUtils;
|
|
import com.cloudera.sqoop.shims.ShimLoader;
|
|
import com.cloudera.sqoop.manager.ImportJobContext;
|
|
|
|
/**
|
|
* Class that runs an import job using mysqldump in the mapper.
|
|
*/
|
|
public class MySQLDumpImportJob extends ImportJobBase {
|
|
|
|
public static final Log LOG =
|
|
LogFactory.getLog(MySQLDumpImportJob.class.getName());
|
|
|
|
public MySQLDumpImportJob(final SqoopOptions opts, ImportJobContext context)
|
|
throws ClassNotFoundException {
|
|
super(opts, MySQLDumpMapper.class,
|
|
(Class<? extends InputFormat>) ShimLoader.getShimClass(
|
|
"com.cloudera.sqoop.mapreduce.MySQLDumpInputFormat"),
|
|
(Class<? extends OutputFormat>) ShimLoader.getShimClass(
|
|
"com.cloudera.sqoop.mapreduce.RawKeyTextOutputFormat"), context);
|
|
}
|
|
|
|
/**
|
|
* Configure the inputformat to use for the job.
|
|
*/
|
|
protected void configureInputFormat(Job job, String tableName,
|
|
String tableClassName, String splitByCol)
|
|
throws ClassNotFoundException, IOException {
|
|
|
|
ConnManager mgr = new ConnFactory(options.getConf()).getManager(options);
|
|
|
|
try {
|
|
String username = options.getUsername();
|
|
if (null == username || username.length() == 0) {
|
|
DBConfiguration.configureDB(job.getConfiguration(),
|
|
mgr.getDriverClass(), options.getConnectString());
|
|
} else {
|
|
DBConfiguration.configureDB(job.getConfiguration(),
|
|
mgr.getDriverClass(), options.getConnectString(), username,
|
|
options.getPassword());
|
|
}
|
|
|
|
String [] colNames = options.getColumns();
|
|
if (null == colNames) {
|
|
colNames = mgr.getColumnNames(tableName);
|
|
}
|
|
|
|
String [] sqlColNames = null;
|
|
if (null != colNames) {
|
|
sqlColNames = new String[colNames.length];
|
|
for (int i = 0; i < colNames.length; i++) {
|
|
sqlColNames[i] = mgr.escapeColName(colNames[i]);
|
|
}
|
|
}
|
|
|
|
// It's ok if the where clause is null in DBInputFormat.setInput.
|
|
String whereClause = options.getWhereClause();
|
|
|
|
// We can't set the class properly in here, because we may not have the
|
|
// jar loaded in this JVM. So we start by calling setInput() with
|
|
// DBWritable and then overriding the string manually.
|
|
|
|
// Note that mysqldump also does *not* want a quoted table name.
|
|
DataDrivenDBInputFormat.setInput(job, DBWritable.class,
|
|
tableName, whereClause,
|
|
mgr.escapeColName(splitByCol), sqlColNames);
|
|
|
|
Configuration conf = job.getConfiguration();
|
|
conf.setInt(MySQLUtils.OUTPUT_FIELD_DELIM_KEY,
|
|
options.getOutputFieldDelim());
|
|
conf.setInt(MySQLUtils.OUTPUT_RECORD_DELIM_KEY,
|
|
options.getOutputRecordDelim());
|
|
conf.setInt(MySQLUtils.OUTPUT_ENCLOSED_BY_KEY,
|
|
options.getOutputEnclosedBy());
|
|
conf.setInt(MySQLUtils.OUTPUT_ESCAPED_BY_KEY,
|
|
options.getOutputEscapedBy());
|
|
conf.setBoolean(MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY,
|
|
options.isOutputEncloseRequired());
|
|
String [] extraArgs = options.getExtraArgs();
|
|
if (null != extraArgs) {
|
|
conf.setStrings(MySQLUtils.EXTRA_ARGS_KEY, extraArgs);
|
|
}
|
|
|
|
LOG.debug("Using InputFormat: " + inputFormatClass);
|
|
job.setInputFormatClass(getInputFormatClass());
|
|
} finally {
|
|
try {
|
|
mgr.close();
|
|
} catch (SQLException sqlE) {
|
|
LOG.warn("Error closing connection: " + sqlE);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Set the mapper class implementation to use in the job,
|
|
* as well as any related configuration (e.g., map output types).
|
|
*/
|
|
protected void configureMapper(Job job, String tableName,
|
|
String tableClassName) throws ClassNotFoundException, IOException {
|
|
job.setMapperClass(getMapperClass());
|
|
job.setOutputKeyClass(String.class);
|
|
job.setOutputValueClass(NullWritable.class);
|
|
}
|
|
|
|
}
|