5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 21:49:40 +08:00

Support multiple MySQL export tasks per job/node

MySQLExportMapper now uses a task attempt-specific local directory to
store its FIFO objects.

From: Aaron Kimball <aaron@cloudera.com>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149890 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:03:44 +00:00
parent 13e32e71cf
commit 6ea36a07ad
3 changed files with 63 additions and 3 deletions

View File

@ -83,8 +83,7 @@ public LargeObjectLoader(Configuration conf, Path workPath)
* @return a filename to use to put an external LOB in.
*/
private String getNextLobFileName() {
String file = "_lob/obj_" + conf.get("mapreduce.task.id",
conf.get("mapred.task.id", "unknown_task_id"))
String file = "_lob/obj_" + TaskId.get(conf, "unknown_task_id")
+ nextLobFileId;
nextLobFileId++;

View File

@ -0,0 +1,41 @@
/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.sqoop.lib;
import org.apache.hadoop.conf.Configuration;
/**
* Utility class; returns task attempt Id of the current job
* regardless of Hadoop version being used.
*/
public final class TaskId {
private TaskId() {
}
/**
* @param conf the Configuration to check for the current task attempt id.
* @param defaultVal the value to return if a task attempt id is not set.
* @return the current task attempt id, or the default value if one isn't set.
*/
public static String get(Configuration conf, String defaultVal) {
return conf.get("mapreduce.task.id",
conf.get("mapred.task.id", defaultVal));
}
}

View File

@ -33,6 +33,7 @@
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.sqoop.lib.TaskId;
import org.apache.hadoop.sqoop.manager.MySQLUtils;
import org.apache.hadoop.sqoop.shims.HadoopShim;
import org.apache.hadoop.sqoop.util.AsyncSink;
@ -100,7 +101,19 @@ public class MySQLExportMapper<KEYIN, VALIN>
private void initMySQLImportProcess() throws IOException {
String tmpDir = conf.get(HadoopShim.get().getJobLocalDirProperty(),
"/tmp/");
this.fifoFile = new File(tmpDir,
// Create a local subdir specific to this task attempt.
String taskAttemptStr = TaskId.get(conf, "mysql_export");
File taskAttemptDir = new File(tmpDir, taskAttemptStr);
if (!taskAttemptDir.exists()) {
boolean createdDir = taskAttemptDir.mkdir();
if (!createdDir) {
LOG.warn("Could not create non-existent task attempt dir: "
+ taskAttemptDir.toString());
}
}
this.fifoFile = new File(taskAttemptDir,
conf.get(MySQLUtils.TABLE_NAME_KEY, "UNKNOWN_TABLE") + ".txt");
String filename = fifoFile.toString();
@ -282,6 +295,13 @@ private void closeExportHandles() throws IOException, InterruptedException {
LOG.error("Could not clean up named FIFO after completing mapper");
}
// We put the FIFO file in a one-off subdir. Remove that.
File fifoParentDir = this.fifoFile.getParentFile();
LOG.debug("Removing task attempt tmpdir");
if (!fifoParentDir.delete()) {
LOG.error("Could not clean up task dir after completing mapper");
}
this.fifoFile = null;
}