5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-02 19:01:27 +08:00

If --hive-import and --generate-only are specified, create a ddl script file.

From: Aaron Kimball <aaron@cloudera.com>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149870 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:03:38 +00:00
parent df76e995e8
commit 6cbe7572e9
6 changed files with 112 additions and 32 deletions

1
.gitignore vendored
View File

@ -18,4 +18,5 @@
.project
.launches
.settings
/tags

View File

@ -162,7 +162,7 @@
</fileset>
</copy>
<!-- copy the dependency libraries from ivy into the contrib/lib dir -->
<!-- copy the dependency libraries from ivy into the output lib dir -->
<mkdir dir="${dist.dir}/lib"/>
<copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
<fileset dir="${redist.ivy.lib.dir}">
@ -248,7 +248,7 @@
<!-- requires fork=yes for:
relative File paths to use the specified user.dir
classpath to use build/contrib/*.jar
classpath to use build/*.jar
-->
<sysproperty key="user.dir" value="${build.test}/data"/>
@ -303,7 +303,7 @@
<target name="checkfailure" if="tests.failed">
<touch file="${build.dir}/testsfailed"/>
<fail unless="continueOnFailure">Contrib Tests failed!</fail>
<fail unless="continueOnFailure">Unit tests failed!</fail>
</target>
<target name="clean" description="Clean build target files">

View File

@ -54,6 +54,12 @@ The table name used in Hive is, by default, the same as that of the
source table. You can control the output table name with the +--hive-table+
option.
If Hive import commands are used in conjunction with the +--generate-only+
option, then a Hive import will not occur. Instead, the DDL commands to
perform the import from HDFS to Hive are written to a file named +_tableName_.q+
which you can then execute with +hive -f+ after the data is brought into
HDFS.
Hive's Type System
~~~~~~~~~~~~~~~~~~

View File

@ -129,11 +129,14 @@ private void importTable(String tableName) throws IOException, ImportException {
ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
manager.importTable(context);
}
}
// If the user wants this table to be in Hive, perform that post-load.
if (options.doHiveImport()) {
hiveImport.importTable(tableName, options.getHiveTableName());
}
// If the user wants this table to be in Hive, perform that post-load.
// If the user is in gen-only mode, this code will generate a Hive DDL
// statement and write it to a file, but will not actually perform the
// import.
if (options.doHiveImport()) {
hiveImport.importTable(tableName, options.getHiveTableName());
}
}

View File

@ -104,6 +104,31 @@ private void removeTempLogs(String tableName) throws IOException {
}
}
/**
* @return true if we're just generating the DDL for the import, but
* not actually running it (i.e., --generate-only mode). If so, don't
* do any side-effecting actions in Hive.
*/
private boolean isGenerateOnly() {
return options.getAction() == SqoopOptions.ControlAction.GenerateOnly;
}
/**
* @return a File object that can be used to write the DDL statement.
* If we're in gen-only mode, this should be a file in the outdir, named
* after the Hive table we're creating. If we're in import mode, this should
* be a one-off temporary file.
*/
private File getScriptFile(String outputTableName) throws IOException {
if (!isGenerateOnly()) {
return File.createTempFile("hive-script-",".txt",
new File(options.getTempDir()));
} else {
return new File(new File(options.getCodeOutputDir()),
outputTableName + ".q");
}
}
/**
* Perform the import of data from an HDFS path to a Hive table.
*
@ -112,9 +137,11 @@ private void removeTempLogs(String tableName) throws IOException {
*/
public void importTable(String inputTableName, String outputTableName)
throws IOException {
removeTempLogs(inputTableName);
LOG.info("Loading uploaded data into Hive");
if (!isGenerateOnly()) {
removeTempLogs(inputTableName);
LOG.info("Loading uploaded data into Hive");
}
if (null == outputTableName) {
outputTableName = inputTableName;
@ -142,12 +169,12 @@ public void importTable(String inputTableName, String outputTableName)
String loadDataStmtStr = tableWriter.getLoadDataStmt() + ";\n";
// write them to a script file.
File tempFile = File.createTempFile("hive-script-",".txt", new File(options.getTempDir()));
File scriptFile = getScriptFile(outputTableName);
try {
String tmpFilename = tempFile.toString();
String filename = scriptFile.toString();
BufferedWriter w = null;
try {
FileOutputStream fos = new FileOutputStream(tempFile);
FileOutputStream fos = new FileOutputStream(scriptFile);
w = new BufferedWriter(new OutputStreamWriter(fos));
w.write(createTableStr, 0, createTableStr.length());
if (!options.doCreateHiveTableOnly()) {
@ -167,26 +194,31 @@ public void importTable(String inputTableName, String outputTableName)
}
}
// run Hive on the script and note the return code.
String hiveExec = getHiveBinPath();
ArrayList<String> args = new ArrayList<String>();
args.add(hiveExec);
args.add("-f");
args.add(tmpFilename);
if (!isGenerateOnly()) {
// run Hive on the script and note the return code.
String hiveExec = getHiveBinPath();
ArrayList<String> args = new ArrayList<String>();
args.add(hiveExec);
args.add("-f");
args.add(filename);
LoggingAsyncSink logSink = new LoggingAsyncSink(LOG);
int ret = Executor.exec(args.toArray(new String[0]),
env.toArray(new String[0]), logSink, logSink);
if (0 != ret) {
throw new IOException("Hive exited with status " + ret);
LoggingAsyncSink logSink = new LoggingAsyncSink(LOG);
int ret = Executor.exec(args.toArray(new String[0]),
env.toArray(new String[0]), logSink, logSink);
if (0 != ret) {
throw new IOException("Hive exited with status " + ret);
}
LOG.info("Hive import complete.");
}
LOG.info("Hive import complete.");
} finally {
if (!tempFile.delete()) {
LOG.warn("Could not remove temporary file: " + tempFile.toString());
// try to delete the file later.
tempFile.deleteOnExit();
if (!isGenerateOnly()) {
// User isn't interested in saving the DDL. Remove the file.
if (!scriptFile.delete()) {
LOG.warn("Could not remove temporary file: " + scriptFile.toString());
// try to delete the file later.
scriptFile.deleteOnExit();
}
}
}
}

View File

@ -25,8 +25,9 @@
import org.apache.commons.logging.LogFactory;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.sqoop.SqoopOptions;
import org.apache.hadoop.sqoop.testutil.CommonArgs;
import org.apache.hadoop.sqoop.testutil.HsqldbTestServer;
@ -57,8 +58,11 @@ public class TestHiveImport extends ImportJobTestCase {
args.add("--connect");
args.add(HsqldbTestServer.getUrl());
args.add("--hive-import");
args.add("--split-by");
args.add(getColNames()[0]);
String [] colNames = getColNames();
if (null != colNames) {
args.add("--split-by");
args.add(colNames[0]);
}
args.add("--num-mappers");
args.add("1");
@ -101,6 +105,40 @@ private void runImportTest(String tableName, String [] types, String [] values,
runImport(getArgv(true, extraArgs));
}
/** Test that we can generate a file containing the DDL and not import. */
@Test
public void testGenerateOnly() throws IOException {
final String TABLE_NAME = "GenerateOnly";
String [] extraArgs = { "--generate-only" };
// Figure out where our target generated .q file is going to be.
SqoopOptions options = getSqoopOptions(extraArgs);
Path ddlFile = new Path(new Path(options.getCodeOutputDir()),
TABLE_NAME + ".q");
FileSystem fs = FileSystem.getLocal(new Configuration());
// If it's already there, remove it before running the test to ensure
// that it's the current test that generated the file.
if (fs.exists(ddlFile)) {
if (!fs.delete(ddlFile, false)) {
LOG.warn("Could not delete previous ddl file: " + ddlFile);
}
}
// Run a basic import, but specify that we're just generating definitions.
String [] types = { "INTEGER" };
String [] vals = { "42" };
runImportTest(TABLE_NAME, types, vals, null, extraArgs);
// Test that the generated definition file exists.
assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));
Path hiveImportPath = new Path(new Path(options.getWarehouseDir()),
TABLE_NAME);
assertFalse("Import actually happened!", fs.exists(hiveImportPath));
}
/** Test that strings and ints are handled in the normal fashion */
@Test
public void testNormalHiveImport() throws IOException {