mirror of
https://github.com/apache/sqoop.git
synced 2025-05-02 19:01:27 +08:00
If --hive-import and --generate-only are specified, create a ddl script file.
From: Aaron Kimball <aaron@cloudera.com> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149870 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
df76e995e8
commit
6cbe7572e9
1
.gitignore
vendored
1
.gitignore
vendored
@ -18,4 +18,5 @@
|
||||
.project
|
||||
.launches
|
||||
.settings
|
||||
/tags
|
||||
|
||||
|
@ -162,7 +162,7 @@
|
||||
</fileset>
|
||||
</copy>
|
||||
|
||||
<!-- copy the dependency libraries from ivy into the contrib/lib dir -->
|
||||
<!-- copy the dependency libraries from ivy into the output lib dir -->
|
||||
<mkdir dir="${dist.dir}/lib"/>
|
||||
<copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
|
||||
<fileset dir="${redist.ivy.lib.dir}">
|
||||
@ -248,7 +248,7 @@
|
||||
|
||||
<!-- requires fork=yes for:
|
||||
relative File paths to use the specified user.dir
|
||||
classpath to use build/contrib/*.jar
|
||||
classpath to use build/*.jar
|
||||
-->
|
||||
<sysproperty key="user.dir" value="${build.test}/data"/>
|
||||
|
||||
@ -303,7 +303,7 @@
|
||||
|
||||
<target name="checkfailure" if="tests.failed">
|
||||
<touch file="${build.dir}/testsfailed"/>
|
||||
<fail unless="continueOnFailure">Contrib Tests failed!</fail>
|
||||
<fail unless="continueOnFailure">Unit tests failed!</fail>
|
||||
</target>
|
||||
|
||||
<target name="clean" description="Clean build target files">
|
||||
|
@ -54,6 +54,12 @@ The table name used in Hive is, by default, the same as that of the
|
||||
source table. You can control the output table name with the +--hive-table+
|
||||
option.
|
||||
|
||||
If Hive import commands are used in conjunction with the +--generate-only+
|
||||
option, then a Hive import will not occur. Instead, the DDL commands to
|
||||
perform the import from HDFS to Hive are written to a file named +_tableName_.q+
|
||||
which you can then execute with +hive -f+ after the data is brought into
|
||||
HDFS.
|
||||
|
||||
Hive's Type System
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -129,11 +129,14 @@ private void importTable(String tableName) throws IOException, ImportException {
|
||||
ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
|
||||
manager.importTable(context);
|
||||
}
|
||||
}
|
||||
|
||||
// If the user wants this table to be in Hive, perform that post-load.
|
||||
if (options.doHiveImport()) {
|
||||
hiveImport.importTable(tableName, options.getHiveTableName());
|
||||
}
|
||||
// If the user wants this table to be in Hive, perform that post-load.
|
||||
// If the user is in gen-only mode, this code will generate a Hive DDL
|
||||
// statement and write it to a file, but will not actually perform the
|
||||
// import.
|
||||
if (options.doHiveImport()) {
|
||||
hiveImport.importTable(tableName, options.getHiveTableName());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,6 +104,31 @@ private void removeTempLogs(String tableName) throws IOException {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if we're just generating the DDL for the import, but
|
||||
* not actually running it (i.e., --generate-only mode). If so, don't
|
||||
* do any side-effecting actions in Hive.
|
||||
*/
|
||||
private boolean isGenerateOnly() {
|
||||
return options.getAction() == SqoopOptions.ControlAction.GenerateOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a File object that can be used to write the DDL statement.
|
||||
* If we're in gen-only mode, this should be a file in the outdir, named
|
||||
* after the Hive table we're creating. If we're in import mode, this should
|
||||
* be a one-off temporary file.
|
||||
*/
|
||||
private File getScriptFile(String outputTableName) throws IOException {
|
||||
if (!isGenerateOnly()) {
|
||||
return File.createTempFile("hive-script-",".txt",
|
||||
new File(options.getTempDir()));
|
||||
} else {
|
||||
return new File(new File(options.getCodeOutputDir()),
|
||||
outputTableName + ".q");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the import of data from an HDFS path to a Hive table.
|
||||
*
|
||||
@ -112,9 +137,11 @@ private void removeTempLogs(String tableName) throws IOException {
|
||||
*/
|
||||
public void importTable(String inputTableName, String outputTableName)
|
||||
throws IOException {
|
||||
removeTempLogs(inputTableName);
|
||||
|
||||
LOG.info("Loading uploaded data into Hive");
|
||||
if (!isGenerateOnly()) {
|
||||
removeTempLogs(inputTableName);
|
||||
LOG.info("Loading uploaded data into Hive");
|
||||
}
|
||||
|
||||
if (null == outputTableName) {
|
||||
outputTableName = inputTableName;
|
||||
@ -142,12 +169,12 @@ public void importTable(String inputTableName, String outputTableName)
|
||||
String loadDataStmtStr = tableWriter.getLoadDataStmt() + ";\n";
|
||||
|
||||
// write them to a script file.
|
||||
File tempFile = File.createTempFile("hive-script-",".txt", new File(options.getTempDir()));
|
||||
File scriptFile = getScriptFile(outputTableName);
|
||||
try {
|
||||
String tmpFilename = tempFile.toString();
|
||||
String filename = scriptFile.toString();
|
||||
BufferedWriter w = null;
|
||||
try {
|
||||
FileOutputStream fos = new FileOutputStream(tempFile);
|
||||
FileOutputStream fos = new FileOutputStream(scriptFile);
|
||||
w = new BufferedWriter(new OutputStreamWriter(fos));
|
||||
w.write(createTableStr, 0, createTableStr.length());
|
||||
if (!options.doCreateHiveTableOnly()) {
|
||||
@ -167,26 +194,31 @@ public void importTable(String inputTableName, String outputTableName)
|
||||
}
|
||||
}
|
||||
|
||||
// run Hive on the script and note the return code.
|
||||
String hiveExec = getHiveBinPath();
|
||||
ArrayList<String> args = new ArrayList<String>();
|
||||
args.add(hiveExec);
|
||||
args.add("-f");
|
||||
args.add(tmpFilename);
|
||||
if (!isGenerateOnly()) {
|
||||
// run Hive on the script and note the return code.
|
||||
String hiveExec = getHiveBinPath();
|
||||
ArrayList<String> args = new ArrayList<String>();
|
||||
args.add(hiveExec);
|
||||
args.add("-f");
|
||||
args.add(filename);
|
||||
|
||||
LoggingAsyncSink logSink = new LoggingAsyncSink(LOG);
|
||||
int ret = Executor.exec(args.toArray(new String[0]),
|
||||
env.toArray(new String[0]), logSink, logSink);
|
||||
if (0 != ret) {
|
||||
throw new IOException("Hive exited with status " + ret);
|
||||
LoggingAsyncSink logSink = new LoggingAsyncSink(LOG);
|
||||
int ret = Executor.exec(args.toArray(new String[0]),
|
||||
env.toArray(new String[0]), logSink, logSink);
|
||||
if (0 != ret) {
|
||||
throw new IOException("Hive exited with status " + ret);
|
||||
}
|
||||
|
||||
LOG.info("Hive import complete.");
|
||||
}
|
||||
|
||||
LOG.info("Hive import complete.");
|
||||
} finally {
|
||||
if (!tempFile.delete()) {
|
||||
LOG.warn("Could not remove temporary file: " + tempFile.toString());
|
||||
// try to delete the file later.
|
||||
tempFile.deleteOnExit();
|
||||
if (!isGenerateOnly()) {
|
||||
// User isn't interested in saving the DDL. Remove the file.
|
||||
if (!scriptFile.delete()) {
|
||||
LOG.warn("Could not remove temporary file: " + scriptFile.toString());
|
||||
// try to delete the file later.
|
||||
scriptFile.deleteOnExit();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -25,8 +25,9 @@
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import org.apache.hadoop.sqoop.SqoopOptions;
|
||||
import org.apache.hadoop.sqoop.testutil.CommonArgs;
|
||||
import org.apache.hadoop.sqoop.testutil.HsqldbTestServer;
|
||||
@ -57,8 +58,11 @@ public class TestHiveImport extends ImportJobTestCase {
|
||||
args.add("--connect");
|
||||
args.add(HsqldbTestServer.getUrl());
|
||||
args.add("--hive-import");
|
||||
args.add("--split-by");
|
||||
args.add(getColNames()[0]);
|
||||
String [] colNames = getColNames();
|
||||
if (null != colNames) {
|
||||
args.add("--split-by");
|
||||
args.add(colNames[0]);
|
||||
}
|
||||
args.add("--num-mappers");
|
||||
args.add("1");
|
||||
|
||||
@ -101,6 +105,40 @@ private void runImportTest(String tableName, String [] types, String [] values,
|
||||
runImport(getArgv(true, extraArgs));
|
||||
}
|
||||
|
||||
/** Test that we can generate a file containing the DDL and not import. */
|
||||
@Test
|
||||
public void testGenerateOnly() throws IOException {
|
||||
final String TABLE_NAME = "GenerateOnly";
|
||||
String [] extraArgs = { "--generate-only" };
|
||||
|
||||
// Figure out where our target generated .q file is going to be.
|
||||
SqoopOptions options = getSqoopOptions(extraArgs);
|
||||
Path ddlFile = new Path(new Path(options.getCodeOutputDir()),
|
||||
TABLE_NAME + ".q");
|
||||
FileSystem fs = FileSystem.getLocal(new Configuration());
|
||||
|
||||
// If it's already there, remove it before running the test to ensure
|
||||
// that it's the current test that generated the file.
|
||||
if (fs.exists(ddlFile)) {
|
||||
if (!fs.delete(ddlFile, false)) {
|
||||
LOG.warn("Could not delete previous ddl file: " + ddlFile);
|
||||
}
|
||||
}
|
||||
|
||||
// Run a basic import, but specify that we're just generating definitions.
|
||||
String [] types = { "INTEGER" };
|
||||
String [] vals = { "42" };
|
||||
runImportTest(TABLE_NAME, types, vals, null, extraArgs);
|
||||
|
||||
// Test that the generated definition file exists.
|
||||
assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));
|
||||
|
||||
Path hiveImportPath = new Path(new Path(options.getWarehouseDir()),
|
||||
TABLE_NAME);
|
||||
assertFalse("Import actually happened!", fs.exists(hiveImportPath));
|
||||
}
|
||||
|
||||
|
||||
/** Test that strings and ints are handled in the normal fashion */
|
||||
@Test
|
||||
public void testNormalHiveImport() throws IOException {
|
||||
|
Loading…
Reference in New Issue
Block a user