5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-04 07:30:19 +08:00
sqoop/src/java/com/cloudera/sqoop/mapreduce/MySQLDumpImportJob.java
Andrew Bayer 568a827a1c Append mode import and target-dir output
Signed-off-by: Aaron Kimball <aaron@cloudera.com>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149913 13f79535-47bb-0310-9956-ffa450edef68
2011-07-22 20:03:56 +00:00

144 lines
5.2 KiB
Java

/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.mapreduce;
import java.io.IOException;
import java.sql.SQLException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import com.cloudera.sqoop.ConnFactory;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.manager.ConnManager;
import com.cloudera.sqoop.manager.MySQLUtils;
import com.cloudera.sqoop.shims.ShimLoader;
import com.cloudera.sqoop.manager.ImportJobContext;
/**
* Class that runs an import job using mysqldump in the mapper.
*/
public class MySQLDumpImportJob extends ImportJobBase {
public static final Log LOG =
LogFactory.getLog(MySQLDumpImportJob.class.getName());
public MySQLDumpImportJob(final SqoopOptions opts, ImportJobContext context)
throws ClassNotFoundException {
super(opts, MySQLDumpMapper.class,
(Class<? extends InputFormat>) ShimLoader.getShimClass(
"com.cloudera.sqoop.mapreduce.MySQLDumpInputFormat"),
(Class<? extends OutputFormat>) ShimLoader.getShimClass(
"com.cloudera.sqoop.mapreduce.RawKeyTextOutputFormat"), context);
}
/**
* Configure the inputformat to use for the job.
*/
protected void configureInputFormat(Job job, String tableName,
String tableClassName, String splitByCol)
throws ClassNotFoundException, IOException {
ConnManager mgr = new ConnFactory(options.getConf()).getManager(options);
try {
String username = options.getUsername();
if (null == username || username.length() == 0) {
DBConfiguration.configureDB(job.getConfiguration(),
mgr.getDriverClass(), options.getConnectString());
} else {
DBConfiguration.configureDB(job.getConfiguration(),
mgr.getDriverClass(), options.getConnectString(), username,
options.getPassword());
}
String [] colNames = options.getColumns();
if (null == colNames) {
colNames = mgr.getColumnNames(tableName);
}
String [] sqlColNames = null;
if (null != colNames) {
sqlColNames = new String[colNames.length];
for (int i = 0; i < colNames.length; i++) {
sqlColNames[i] = mgr.escapeColName(colNames[i]);
}
}
// It's ok if the where clause is null in DBInputFormat.setInput.
String whereClause = options.getWhereClause();
// We can't set the class properly in here, because we may not have the
// jar loaded in this JVM. So we start by calling setInput() with
// DBWritable and then overriding the string manually.
// Note that mysqldump also does *not* want a quoted table name.
DataDrivenDBInputFormat.setInput(job, DBWritable.class,
tableName, whereClause,
mgr.escapeColName(splitByCol), sqlColNames);
Configuration conf = job.getConfiguration();
conf.setInt(MySQLUtils.OUTPUT_FIELD_DELIM_KEY,
options.getOutputFieldDelim());
conf.setInt(MySQLUtils.OUTPUT_RECORD_DELIM_KEY,
options.getOutputRecordDelim());
conf.setInt(MySQLUtils.OUTPUT_ENCLOSED_BY_KEY,
options.getOutputEnclosedBy());
conf.setInt(MySQLUtils.OUTPUT_ESCAPED_BY_KEY,
options.getOutputEscapedBy());
conf.setBoolean(MySQLUtils.OUTPUT_ENCLOSE_REQUIRED_KEY,
options.isOutputEncloseRequired());
String [] extraArgs = options.getExtraArgs();
if (null != extraArgs) {
conf.setStrings(MySQLUtils.EXTRA_ARGS_KEY, extraArgs);
}
LOG.debug("Using InputFormat: " + inputFormatClass);
job.setInputFormatClass(getInputFormatClass());
} finally {
try {
mgr.close();
} catch (SQLException sqlE) {
LOG.warn("Error closing connection: " + sqlE);
}
}
}
/**
* Set the mapper class implementation to use in the job,
* as well as any related configuration (e.g., map output types).
*/
protected void configureMapper(Job job, String tableName,
String tableClassName) throws ClassNotFoundException, IOException {
job.setMapperClass(getMapperClass());
job.setOutputKeyClass(String.class);
job.setOutputValueClass(NullWritable.class);
}
}