mirror of
https://github.com/apache/sqoop.git
synced 2025-05-21 19:31:13 +08:00
SQOOP-483. Allow target dir to be set to a different name than table name for hive import.
(Cheolsoo Park via Jarek Jarcec Cecho) git-svn-id: https://svn.apache.org/repos/asf/sqoop/trunk@1342998 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
35365d5e21
commit
06b636aac6
@ -101,10 +101,8 @@ private String getHiveBinPath() {
|
|||||||
* If we used a MapReduce-based upload of the data, remove the _logs dir
|
* If we used a MapReduce-based upload of the data, remove the _logs dir
|
||||||
* from where we put it, before running Hive LOAD DATA INPATH.
|
* from where we put it, before running Hive LOAD DATA INPATH.
|
||||||
*/
|
*/
|
||||||
private void removeTempLogs(String tableName) throws IOException {
|
private void removeTempLogs(Path tablePath) throws IOException {
|
||||||
FileSystem fs = FileSystem.get(configuration);
|
FileSystem fs = FileSystem.get(configuration);
|
||||||
Path tablePath = getOutputPath(tableName);
|
|
||||||
|
|
||||||
Path logsPath = new Path(tablePath, "_logs");
|
Path logsPath = new Path(tablePath, "_logs");
|
||||||
if (fs.exists(logsPath)) {
|
if (fs.exists(logsPath)) {
|
||||||
LOG.info("Removing temporary files from import process: " + logsPath);
|
LOG.info("Removing temporary files from import process: " + logsPath);
|
||||||
@ -115,26 +113,6 @@ private void removeTempLogs(String tableName) throws IOException {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get directory where we stored job output files.
|
|
||||||
*
|
|
||||||
* @param tableName imported table name
|
|
||||||
* @return Path with directory where output files can be found
|
|
||||||
*/
|
|
||||||
private Path getOutputPath(String tableName) {
|
|
||||||
if (null != tableName) {
|
|
||||||
String warehouseDir = options.getWarehouseDir();
|
|
||||||
if (warehouseDir != null) {
|
|
||||||
return new Path(new Path(warehouseDir), tableName);
|
|
||||||
} else {
|
|
||||||
return new Path(tableName);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// --table option is not used, so use the target dir instead
|
|
||||||
return new Path(options.getTargetDir());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if we're just generating the DDL for the import, but
|
* @return true if we're just generating the DDL for the import, but
|
||||||
* not actually running it (i.e., --generate-only mode). If so, don't
|
* not actually running it (i.e., --generate-only mode). If so, don't
|
||||||
@ -171,11 +149,6 @@ private File getScriptFile(String outputTableName) throws IOException {
|
|||||||
public void importTable(String inputTableName, String outputTableName,
|
public void importTable(String inputTableName, String outputTableName,
|
||||||
boolean createOnly) throws IOException {
|
boolean createOnly) throws IOException {
|
||||||
|
|
||||||
if (!isGenerateOnly()) {
|
|
||||||
removeTempLogs(inputTableName);
|
|
||||||
LOG.info("Loading uploaded data into Hive");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (null == outputTableName) {
|
if (null == outputTableName) {
|
||||||
outputTableName = inputTableName;
|
outputTableName = inputTableName;
|
||||||
}
|
}
|
||||||
@ -200,17 +173,21 @@ public void importTable(String inputTableName, String outputTableName,
|
|||||||
configuration, !debugMode);
|
configuration, !debugMode);
|
||||||
String createTableStr = tableWriter.getCreateTableStmt() + ";\n";
|
String createTableStr = tableWriter.getCreateTableStmt() + ";\n";
|
||||||
String loadDataStmtStr = tableWriter.getLoadDataStmt() + ";\n";
|
String loadDataStmtStr = tableWriter.getLoadDataStmt() + ";\n";
|
||||||
|
Path finalPath = tableWriter.getFinalPath();
|
||||||
|
|
||||||
if (!isGenerateOnly()) {
|
if (!isGenerateOnly()) {
|
||||||
|
removeTempLogs(finalPath);
|
||||||
|
LOG.info("Loading uploaded data into Hive");
|
||||||
|
|
||||||
String codec = options.getCompressionCodec();
|
String codec = options.getCompressionCodec();
|
||||||
if (codec != null && (codec.equals(CodecMap.LZOP)
|
if (codec != null && (codec.equals(CodecMap.LZOP)
|
||||||
|| codec.equals(CodecMap.getCodecClassName(CodecMap.LZOP)))) {
|
|| codec.equals(CodecMap.getCodecClassName(CodecMap.LZOP)))) {
|
||||||
try {
|
try {
|
||||||
String finalPathStr = tableWriter.getFinalPathStr();
|
|
||||||
Tool tool = ReflectionUtils.newInstance(Class.
|
Tool tool = ReflectionUtils.newInstance(Class.
|
||||||
forName("com.hadoop.compression.lzo.DistributedLzoIndexer").
|
forName("com.hadoop.compression.lzo.DistributedLzoIndexer").
|
||||||
asSubclass(Tool.class), configuration);
|
asSubclass(Tool.class), configuration);
|
||||||
ToolRunner.run(configuration, tool, new String[] { finalPathStr });
|
ToolRunner.run(configuration, tool,
|
||||||
|
new String[] { finalPath.toString() });
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
LOG.error("Error indexing lzo files", ex);
|
LOG.error("Error indexing lzo files", ex);
|
||||||
throw new IOException("Error indexing lzo files", ex);
|
throw new IOException("Error indexing lzo files", ex);
|
||||||
@ -250,7 +227,7 @@ public void importTable(String inputTableName, String outputTableName,
|
|||||||
|
|
||||||
LOG.info("Hive import complete.");
|
LOG.info("Hive import complete.");
|
||||||
|
|
||||||
cleanUp(inputTableName);
|
cleanUp(finalPath);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (!isGenerateOnly()) {
|
if (!isGenerateOnly()) {
|
||||||
@ -267,23 +244,22 @@ public void importTable(String inputTableName, String outputTableName,
|
|||||||
/**
|
/**
|
||||||
* Clean up after successful HIVE import.
|
* Clean up after successful HIVE import.
|
||||||
*
|
*
|
||||||
* @param table Imported table name
|
* @param outputPath path to the output directory
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private void cleanUp(String table) throws IOException {
|
private void cleanUp(Path outputPath) throws IOException {
|
||||||
FileSystem fs = FileSystem.get(configuration);
|
FileSystem fs = FileSystem.get(configuration);
|
||||||
|
|
||||||
// HIVE is not always removing input directory after LOAD DATA statement
|
// HIVE is not always removing input directory after LOAD DATA statement
|
||||||
// (which is our export directory). We're removing export directory in case
|
// (which is our export directory). We're removing export directory in case
|
||||||
// that is blank for case that user wants to periodically populate HIVE
|
// that is blank for case that user wants to periodically populate HIVE
|
||||||
// table (for example with --hive-overwrite).
|
// table (for example with --hive-overwrite).
|
||||||
Path outputPath = getOutputPath(table);
|
|
||||||
try {
|
try {
|
||||||
if (outputPath != null && fs.exists(outputPath)) {
|
if (outputPath != null && fs.exists(outputPath)) {
|
||||||
FileStatus[] statuses = fs.listStatus(outputPath);
|
FileStatus[] statuses = fs.listStatus(outputPath);
|
||||||
if (statuses.length == 0) {
|
if (statuses.length == 0) {
|
||||||
LOG.info("Export directory is empty, removing it.");
|
LOG.info("Export directory is empty, removing it.");
|
||||||
fs.delete(getOutputPath(table));
|
fs.delete(outputPath, true);
|
||||||
} else {
|
} else {
|
||||||
LOG.info("Export directory is not empty, keeping it.");
|
LOG.info("Export directory is not empty, keeping it.");
|
||||||
}
|
}
|
||||||
|
@ -217,11 +217,11 @@ public String getCreateTableStmt() throws IOException {
|
|||||||
* @return the LOAD DATA statement to import the data in HDFS into hive.
|
* @return the LOAD DATA statement to import the data in HDFS into hive.
|
||||||
*/
|
*/
|
||||||
public String getLoadDataStmt() throws IOException {
|
public String getLoadDataStmt() throws IOException {
|
||||||
String finalPathStr = getFinalPathStr();
|
Path finalPath = getFinalPath();
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append("LOAD DATA INPATH '");
|
sb.append("LOAD DATA INPATH '");
|
||||||
sb.append(finalPathStr + "'");
|
sb.append(finalPath.toString() + "'");
|
||||||
if (options.doOverwriteHiveTable()) {
|
if (options.doOverwriteHiveTable()) {
|
||||||
sb.append(" OVERWRITE");
|
sb.append(" OVERWRITE");
|
||||||
}
|
}
|
||||||
@ -240,7 +240,7 @@ public String getLoadDataStmt() throws IOException {
|
|||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getFinalPathStr() throws IOException {
|
public Path getFinalPath() throws IOException {
|
||||||
String warehouseDir = options.getWarehouseDir();
|
String warehouseDir = options.getWarehouseDir();
|
||||||
if (null == warehouseDir) {
|
if (null == warehouseDir) {
|
||||||
warehouseDir = "";
|
warehouseDir = "";
|
||||||
@ -248,15 +248,18 @@ public String getFinalPathStr() throws IOException {
|
|||||||
warehouseDir = warehouseDir + File.separator;
|
warehouseDir = warehouseDir + File.separator;
|
||||||
}
|
}
|
||||||
|
|
||||||
String tablePath;
|
// Final path is determined in the following order:
|
||||||
if (null != inputTableName) {
|
// 1. Use target dir if the user specified.
|
||||||
tablePath = warehouseDir + inputTableName;
|
// 2. Use input table name.
|
||||||
|
String tablePath = null;
|
||||||
|
String targetDir = options.getTargetDir();
|
||||||
|
if (null != targetDir) {
|
||||||
|
tablePath = warehouseDir + targetDir;
|
||||||
} else {
|
} else {
|
||||||
tablePath = options.getTargetDir();
|
tablePath = warehouseDir + inputTableName;
|
||||||
}
|
}
|
||||||
FileSystem fs = FileSystem.get(configuration);
|
FileSystem fs = FileSystem.get(configuration);
|
||||||
Path finalPath = new Path(tablePath).makeQualified(fs);
|
return new Path(tablePath).makeQualified(fs);
|
||||||
return finalPath.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -83,6 +83,34 @@ public void testDifferentTableNames() throws Exception {
|
|||||||
assertTrue(loadData.indexOf("/inputTable'") != -1);
|
assertTrue(loadData.indexOf("/inputTable'") != -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDifferentTargetDirs() throws Exception {
|
||||||
|
String targetDir = "targetDir";
|
||||||
|
String inputTable = "inputTable";
|
||||||
|
String outputTable = "outputTable";
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
SqoopOptions options = new SqoopOptions();
|
||||||
|
// Specify a different target dir from input table name
|
||||||
|
options.setTargetDir(targetDir);
|
||||||
|
TableDefWriter writer = new TableDefWriter(options, null,
|
||||||
|
inputTable, outputTable, conf, false);
|
||||||
|
|
||||||
|
Map<String, Integer> colTypes = new HashMap<String, Integer>();
|
||||||
|
writer.setColumnTypes(colTypes);
|
||||||
|
|
||||||
|
String createTable = writer.getCreateTableStmt();
|
||||||
|
String loadData = writer.getLoadDataStmt();
|
||||||
|
|
||||||
|
LOG.debug("Create table stmt: " + createTable);
|
||||||
|
LOG.debug("Load data stmt: " + loadData);
|
||||||
|
|
||||||
|
// Assert that the statements generated have the form we expect.
|
||||||
|
assertTrue(createTable.indexOf(
|
||||||
|
"CREATE TABLE IF NOT EXISTS `" + outputTable + "`") != -1);
|
||||||
|
assertTrue(loadData.indexOf("INTO TABLE `" + outputTable + "`") != -1);
|
||||||
|
assertTrue(loadData.indexOf("/" + targetDir + "'") != -1);
|
||||||
|
}
|
||||||
|
|
||||||
public void testPartitions() throws Exception {
|
public void testPartitions() throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
"--hive-partition-key", "ds",
|
"--hive-partition-key", "ds",
|
||||||
|
Loading…
Reference in New Issue
Block a user