5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 19:02:36 +08:00

If --hive-import and --generate-only are specified, create a ddl script file.

From: Aaron Kimball <aaron@cloudera.com>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149870 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:03:38 +00:00
parent df76e995e8
commit 6cbe7572e9
6 changed files with 112 additions and 32 deletions

1
.gitignore vendored
View File

@ -18,4 +18,5 @@
.project .project
.launches .launches
.settings .settings
/tags

View File

@ -162,7 +162,7 @@
</fileset> </fileset>
</copy> </copy>
<!-- copy the dependency libraries from ivy into the contrib/lib dir --> <!-- copy the dependency libraries from ivy into the output lib dir -->
<mkdir dir="${dist.dir}/lib"/> <mkdir dir="${dist.dir}/lib"/>
<copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true"> <copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
<fileset dir="${redist.ivy.lib.dir}"> <fileset dir="${redist.ivy.lib.dir}">
@ -248,7 +248,7 @@
<!-- requires fork=yes for: <!-- requires fork=yes for:
relative File paths to use the specified user.dir relative File paths to use the specified user.dir
classpath to use build/contrib/*.jar classpath to use build/*.jar
--> -->
<sysproperty key="user.dir" value="${build.test}/data"/> <sysproperty key="user.dir" value="${build.test}/data"/>
@ -303,7 +303,7 @@
<target name="checkfailure" if="tests.failed"> <target name="checkfailure" if="tests.failed">
<touch file="${build.dir}/testsfailed"/> <touch file="${build.dir}/testsfailed"/>
<fail unless="continueOnFailure">Contrib Tests failed!</fail> <fail unless="continueOnFailure">Unit tests failed!</fail>
</target> </target>
<target name="clean" description="Clean build target files"> <target name="clean" description="Clean build target files">

View File

@ -54,6 +54,12 @@ The table name used in Hive is, by default, the same as that of the
source table. You can control the output table name with the +--hive-table+ source table. You can control the output table name with the +--hive-table+
option. option.
If Hive import commands are used in conjunction with the +--generate-only+
option, then a Hive import will not occur. Instead, the DDL commands to
perform the import from HDFS to Hive are written to a file named +_tableName_.q+
which you can then execute with +hive -f+ after the data is brought into
HDFS.
Hive's Type System Hive's Type System
~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~

View File

@ -129,13 +129,16 @@ private void importTable(String tableName) throws IOException, ImportException {
ImportJobContext context = new ImportJobContext(tableName, jarFile, options); ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
manager.importTable(context); manager.importTable(context);
} }
}
// If the user wants this table to be in Hive, perform that post-load. // If the user wants this table to be in Hive, perform that post-load.
// If the user is in gen-only mode, this code will generate a Hive DDL
// statement and write it to a file, but will not actually perform the
// import.
if (options.doHiveImport()) { if (options.doHiveImport()) {
hiveImport.importTable(tableName, options.getHiveTableName()); hiveImport.importTable(tableName, options.getHiveTableName());
} }
} }
}
private void exportTable(String tableName) throws ExportException, IOException { private void exportTable(String tableName) throws ExportException, IOException {
String jarFile = null; String jarFile = null;

View File

@ -104,6 +104,31 @@ private void removeTempLogs(String tableName) throws IOException {
} }
} }
/**
* @return true if we're just generating the DDL for the import, but
* not actually running it (i.e., --generate-only mode). If so, don't
* do any side-effecting actions in Hive.
*/
private boolean isGenerateOnly() {
return options.getAction() == SqoopOptions.ControlAction.GenerateOnly;
}
/**
* @return a File object that can be used to write the DDL statement.
* If we're in gen-only mode, this should be a file in the outdir, named
* after the Hive table we're creating. If we're in import mode, this should
* be a one-off temporary file.
*/
private File getScriptFile(String outputTableName) throws IOException {
if (!isGenerateOnly()) {
return File.createTempFile("hive-script-",".txt",
new File(options.getTempDir()));
} else {
return new File(new File(options.getCodeOutputDir()),
outputTableName + ".q");
}
}
/** /**
* Perform the import of data from an HDFS path to a Hive table. * Perform the import of data from an HDFS path to a Hive table.
* *
@ -112,9 +137,11 @@ private void removeTempLogs(String tableName) throws IOException {
*/ */
public void importTable(String inputTableName, String outputTableName) public void importTable(String inputTableName, String outputTableName)
throws IOException { throws IOException {
removeTempLogs(inputTableName);
if (!isGenerateOnly()) {
removeTempLogs(inputTableName);
LOG.info("Loading uploaded data into Hive"); LOG.info("Loading uploaded data into Hive");
}
if (null == outputTableName) { if (null == outputTableName) {
outputTableName = inputTableName; outputTableName = inputTableName;
@ -142,12 +169,12 @@ public void importTable(String inputTableName, String outputTableName)
String loadDataStmtStr = tableWriter.getLoadDataStmt() + ";\n"; String loadDataStmtStr = tableWriter.getLoadDataStmt() + ";\n";
// write them to a script file. // write them to a script file.
File tempFile = File.createTempFile("hive-script-",".txt", new File(options.getTempDir())); File scriptFile = getScriptFile(outputTableName);
try { try {
String tmpFilename = tempFile.toString(); String filename = scriptFile.toString();
BufferedWriter w = null; BufferedWriter w = null;
try { try {
FileOutputStream fos = new FileOutputStream(tempFile); FileOutputStream fos = new FileOutputStream(scriptFile);
w = new BufferedWriter(new OutputStreamWriter(fos)); w = new BufferedWriter(new OutputStreamWriter(fos));
w.write(createTableStr, 0, createTableStr.length()); w.write(createTableStr, 0, createTableStr.length());
if (!options.doCreateHiveTableOnly()) { if (!options.doCreateHiveTableOnly()) {
@ -167,12 +194,13 @@ public void importTable(String inputTableName, String outputTableName)
} }
} }
if (!isGenerateOnly()) {
// run Hive on the script and note the return code. // run Hive on the script and note the return code.
String hiveExec = getHiveBinPath(); String hiveExec = getHiveBinPath();
ArrayList<String> args = new ArrayList<String>(); ArrayList<String> args = new ArrayList<String>();
args.add(hiveExec); args.add(hiveExec);
args.add("-f"); args.add("-f");
args.add(tmpFilename); args.add(filename);
LoggingAsyncSink logSink = new LoggingAsyncSink(LOG); LoggingAsyncSink logSink = new LoggingAsyncSink(LOG);
int ret = Executor.exec(args.toArray(new String[0]), int ret = Executor.exec(args.toArray(new String[0]),
@ -182,11 +210,15 @@ public void importTable(String inputTableName, String outputTableName)
} }
LOG.info("Hive import complete."); LOG.info("Hive import complete.");
}
} finally { } finally {
if (!tempFile.delete()) { if (!isGenerateOnly()) {
LOG.warn("Could not remove temporary file: " + tempFile.toString()); // User isn't interested in saving the DDL. Remove the file.
if (!scriptFile.delete()) {
LOG.warn("Could not remove temporary file: " + scriptFile.toString());
// try to delete the file later. // try to delete the file later.
tempFile.deleteOnExit(); scriptFile.deleteOnExit();
}
} }
} }
} }

View File

@ -25,8 +25,9 @@
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.junit.Test; import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.sqoop.SqoopOptions; import org.apache.hadoop.sqoop.SqoopOptions;
import org.apache.hadoop.sqoop.testutil.CommonArgs; import org.apache.hadoop.sqoop.testutil.CommonArgs;
import org.apache.hadoop.sqoop.testutil.HsqldbTestServer; import org.apache.hadoop.sqoop.testutil.HsqldbTestServer;
@ -57,8 +58,11 @@ public class TestHiveImport extends ImportJobTestCase {
args.add("--connect"); args.add("--connect");
args.add(HsqldbTestServer.getUrl()); args.add(HsqldbTestServer.getUrl());
args.add("--hive-import"); args.add("--hive-import");
String [] colNames = getColNames();
if (null != colNames) {
args.add("--split-by"); args.add("--split-by");
args.add(getColNames()[0]); args.add(colNames[0]);
}
args.add("--num-mappers"); args.add("--num-mappers");
args.add("1"); args.add("1");
@ -101,6 +105,40 @@ private void runImportTest(String tableName, String [] types, String [] values,
runImport(getArgv(true, extraArgs)); runImport(getArgv(true, extraArgs));
} }
/** Test that we can generate a file containing the DDL and not import. */
@Test
public void testGenerateOnly() throws IOException {
final String TABLE_NAME = "GenerateOnly";
String [] extraArgs = { "--generate-only" };
// Figure out where our target generated .q file is going to be.
SqoopOptions options = getSqoopOptions(extraArgs);
Path ddlFile = new Path(new Path(options.getCodeOutputDir()),
TABLE_NAME + ".q");
FileSystem fs = FileSystem.getLocal(new Configuration());
// If it's already there, remove it before running the test to ensure
// that it's the current test that generated the file.
if (fs.exists(ddlFile)) {
if (!fs.delete(ddlFile, false)) {
LOG.warn("Could not delete previous ddl file: " + ddlFile);
}
}
// Run a basic import, but specify that we're just generating definitions.
String [] types = { "INTEGER" };
String [] vals = { "42" };
runImportTest(TABLE_NAME, types, vals, null, extraArgs);
// Test that the generated definition file exists.
assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));
Path hiveImportPath = new Path(new Path(options.getWarehouseDir()),
TABLE_NAME);
assertFalse("Import actually happened!", fs.exists(hiveImportPath));
}
/** Test that strings and ints are handled in the normal fashion */ /** Test that strings and ints are handled in the normal fashion */
@Test @Test
public void testNormalHiveImport() throws IOException { public void testNormalHiveImport() throws IOException {