mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 19:02:36 +08:00
If --hive-import and --generate-only are specified, create a ddl script file.
From: Aaron Kimball <aaron@cloudera.com> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149870 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
df76e995e8
commit
6cbe7572e9
1
.gitignore
vendored
1
.gitignore
vendored
@ -18,4 +18,5 @@
|
|||||||
.project
|
.project
|
||||||
.launches
|
.launches
|
||||||
.settings
|
.settings
|
||||||
|
/tags
|
||||||
|
|
||||||
|
@ -162,7 +162,7 @@
|
|||||||
</fileset>
|
</fileset>
|
||||||
</copy>
|
</copy>
|
||||||
|
|
||||||
<!-- copy the dependency libraries from ivy into the contrib/lib dir -->
|
<!-- copy the dependency libraries from ivy into the output lib dir -->
|
||||||
<mkdir dir="${dist.dir}/lib"/>
|
<mkdir dir="${dist.dir}/lib"/>
|
||||||
<copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
|
<copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
|
||||||
<fileset dir="${redist.ivy.lib.dir}">
|
<fileset dir="${redist.ivy.lib.dir}">
|
||||||
@ -248,7 +248,7 @@
|
|||||||
|
|
||||||
<!-- requires fork=yes for:
|
<!-- requires fork=yes for:
|
||||||
relative File paths to use the specified user.dir
|
relative File paths to use the specified user.dir
|
||||||
classpath to use build/contrib/*.jar
|
classpath to use build/*.jar
|
||||||
-->
|
-->
|
||||||
<sysproperty key="user.dir" value="${build.test}/data"/>
|
<sysproperty key="user.dir" value="${build.test}/data"/>
|
||||||
|
|
||||||
@ -303,7 +303,7 @@
|
|||||||
|
|
||||||
<target name="checkfailure" if="tests.failed">
|
<target name="checkfailure" if="tests.failed">
|
||||||
<touch file="${build.dir}/testsfailed"/>
|
<touch file="${build.dir}/testsfailed"/>
|
||||||
<fail unless="continueOnFailure">Contrib Tests failed!</fail>
|
<fail unless="continueOnFailure">Unit tests failed!</fail>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="clean" description="Clean build target files">
|
<target name="clean" description="Clean build target files">
|
||||||
|
@ -54,6 +54,12 @@ The table name used in Hive is, by default, the same as that of the
|
|||||||
source table. You can control the output table name with the +--hive-table+
|
source table. You can control the output table name with the +--hive-table+
|
||||||
option.
|
option.
|
||||||
|
|
||||||
|
If Hive import commands are used in conjunction with the +--generate-only+
|
||||||
|
option, then a Hive import will not occur. Instead, the DDL commands to
|
||||||
|
perform the import from HDFS to Hive are written to a file named +_tableName_.q+
|
||||||
|
which you can then execute with +hive -f+ after the data is brought into
|
||||||
|
HDFS.
|
||||||
|
|
||||||
Hive's Type System
|
Hive's Type System
|
||||||
~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
@ -129,13 +129,16 @@ private void importTable(String tableName) throws IOException, ImportException {
|
|||||||
ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
|
ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
|
||||||
manager.importTable(context);
|
manager.importTable(context);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If the user wants this table to be in Hive, perform that post-load.
|
// If the user wants this table to be in Hive, perform that post-load.
|
||||||
|
// If the user is in gen-only mode, this code will generate a Hive DDL
|
||||||
|
// statement and write it to a file, but will not actually perform the
|
||||||
|
// import.
|
||||||
if (options.doHiveImport()) {
|
if (options.doHiveImport()) {
|
||||||
hiveImport.importTable(tableName, options.getHiveTableName());
|
hiveImport.importTable(tableName, options.getHiveTableName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private void exportTable(String tableName) throws ExportException, IOException {
|
private void exportTable(String tableName) throws ExportException, IOException {
|
||||||
String jarFile = null;
|
String jarFile = null;
|
||||||
|
@ -104,6 +104,31 @@ private void removeTempLogs(String tableName) throws IOException {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if we're just generating the DDL for the import, but
|
||||||
|
* not actually running it (i.e., --generate-only mode). If so, don't
|
||||||
|
* do any side-effecting actions in Hive.
|
||||||
|
*/
|
||||||
|
private boolean isGenerateOnly() {
|
||||||
|
return options.getAction() == SqoopOptions.ControlAction.GenerateOnly;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return a File object that can be used to write the DDL statement.
|
||||||
|
* If we're in gen-only mode, this should be a file in the outdir, named
|
||||||
|
* after the Hive table we're creating. If we're in import mode, this should
|
||||||
|
* be a one-off temporary file.
|
||||||
|
*/
|
||||||
|
private File getScriptFile(String outputTableName) throws IOException {
|
||||||
|
if (!isGenerateOnly()) {
|
||||||
|
return File.createTempFile("hive-script-",".txt",
|
||||||
|
new File(options.getTempDir()));
|
||||||
|
} else {
|
||||||
|
return new File(new File(options.getCodeOutputDir()),
|
||||||
|
outputTableName + ".q");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform the import of data from an HDFS path to a Hive table.
|
* Perform the import of data from an HDFS path to a Hive table.
|
||||||
*
|
*
|
||||||
@ -112,9 +137,11 @@ private void removeTempLogs(String tableName) throws IOException {
|
|||||||
*/
|
*/
|
||||||
public void importTable(String inputTableName, String outputTableName)
|
public void importTable(String inputTableName, String outputTableName)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
removeTempLogs(inputTableName);
|
|
||||||
|
|
||||||
|
if (!isGenerateOnly()) {
|
||||||
|
removeTempLogs(inputTableName);
|
||||||
LOG.info("Loading uploaded data into Hive");
|
LOG.info("Loading uploaded data into Hive");
|
||||||
|
}
|
||||||
|
|
||||||
if (null == outputTableName) {
|
if (null == outputTableName) {
|
||||||
outputTableName = inputTableName;
|
outputTableName = inputTableName;
|
||||||
@ -142,12 +169,12 @@ public void importTable(String inputTableName, String outputTableName)
|
|||||||
String loadDataStmtStr = tableWriter.getLoadDataStmt() + ";\n";
|
String loadDataStmtStr = tableWriter.getLoadDataStmt() + ";\n";
|
||||||
|
|
||||||
// write them to a script file.
|
// write them to a script file.
|
||||||
File tempFile = File.createTempFile("hive-script-",".txt", new File(options.getTempDir()));
|
File scriptFile = getScriptFile(outputTableName);
|
||||||
try {
|
try {
|
||||||
String tmpFilename = tempFile.toString();
|
String filename = scriptFile.toString();
|
||||||
BufferedWriter w = null;
|
BufferedWriter w = null;
|
||||||
try {
|
try {
|
||||||
FileOutputStream fos = new FileOutputStream(tempFile);
|
FileOutputStream fos = new FileOutputStream(scriptFile);
|
||||||
w = new BufferedWriter(new OutputStreamWriter(fos));
|
w = new BufferedWriter(new OutputStreamWriter(fos));
|
||||||
w.write(createTableStr, 0, createTableStr.length());
|
w.write(createTableStr, 0, createTableStr.length());
|
||||||
if (!options.doCreateHiveTableOnly()) {
|
if (!options.doCreateHiveTableOnly()) {
|
||||||
@ -167,12 +194,13 @@ public void importTable(String inputTableName, String outputTableName)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!isGenerateOnly()) {
|
||||||
// run Hive on the script and note the return code.
|
// run Hive on the script and note the return code.
|
||||||
String hiveExec = getHiveBinPath();
|
String hiveExec = getHiveBinPath();
|
||||||
ArrayList<String> args = new ArrayList<String>();
|
ArrayList<String> args = new ArrayList<String>();
|
||||||
args.add(hiveExec);
|
args.add(hiveExec);
|
||||||
args.add("-f");
|
args.add("-f");
|
||||||
args.add(tmpFilename);
|
args.add(filename);
|
||||||
|
|
||||||
LoggingAsyncSink logSink = new LoggingAsyncSink(LOG);
|
LoggingAsyncSink logSink = new LoggingAsyncSink(LOG);
|
||||||
int ret = Executor.exec(args.toArray(new String[0]),
|
int ret = Executor.exec(args.toArray(new String[0]),
|
||||||
@ -182,11 +210,15 @@ public void importTable(String inputTableName, String outputTableName)
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Hive import complete.");
|
LOG.info("Hive import complete.");
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (!tempFile.delete()) {
|
if (!isGenerateOnly()) {
|
||||||
LOG.warn("Could not remove temporary file: " + tempFile.toString());
|
// User isn't interested in saving the DDL. Remove the file.
|
||||||
|
if (!scriptFile.delete()) {
|
||||||
|
LOG.warn("Could not remove temporary file: " + scriptFile.toString());
|
||||||
// try to delete the file later.
|
// try to delete the file later.
|
||||||
tempFile.deleteOnExit();
|
scriptFile.deleteOnExit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,8 +25,9 @@
|
|||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
import org.apache.hadoop.sqoop.SqoopOptions;
|
import org.apache.hadoop.sqoop.SqoopOptions;
|
||||||
import org.apache.hadoop.sqoop.testutil.CommonArgs;
|
import org.apache.hadoop.sqoop.testutil.CommonArgs;
|
||||||
import org.apache.hadoop.sqoop.testutil.HsqldbTestServer;
|
import org.apache.hadoop.sqoop.testutil.HsqldbTestServer;
|
||||||
@ -57,8 +58,11 @@ public class TestHiveImport extends ImportJobTestCase {
|
|||||||
args.add("--connect");
|
args.add("--connect");
|
||||||
args.add(HsqldbTestServer.getUrl());
|
args.add(HsqldbTestServer.getUrl());
|
||||||
args.add("--hive-import");
|
args.add("--hive-import");
|
||||||
|
String [] colNames = getColNames();
|
||||||
|
if (null != colNames) {
|
||||||
args.add("--split-by");
|
args.add("--split-by");
|
||||||
args.add(getColNames()[0]);
|
args.add(colNames[0]);
|
||||||
|
}
|
||||||
args.add("--num-mappers");
|
args.add("--num-mappers");
|
||||||
args.add("1");
|
args.add("1");
|
||||||
|
|
||||||
@ -101,6 +105,40 @@ private void runImportTest(String tableName, String [] types, String [] values,
|
|||||||
runImport(getArgv(true, extraArgs));
|
runImport(getArgv(true, extraArgs));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Test that we can generate a file containing the DDL and not import. */
|
||||||
|
@Test
|
||||||
|
public void testGenerateOnly() throws IOException {
|
||||||
|
final String TABLE_NAME = "GenerateOnly";
|
||||||
|
String [] extraArgs = { "--generate-only" };
|
||||||
|
|
||||||
|
// Figure out where our target generated .q file is going to be.
|
||||||
|
SqoopOptions options = getSqoopOptions(extraArgs);
|
||||||
|
Path ddlFile = new Path(new Path(options.getCodeOutputDir()),
|
||||||
|
TABLE_NAME + ".q");
|
||||||
|
FileSystem fs = FileSystem.getLocal(new Configuration());
|
||||||
|
|
||||||
|
// If it's already there, remove it before running the test to ensure
|
||||||
|
// that it's the current test that generated the file.
|
||||||
|
if (fs.exists(ddlFile)) {
|
||||||
|
if (!fs.delete(ddlFile, false)) {
|
||||||
|
LOG.warn("Could not delete previous ddl file: " + ddlFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run a basic import, but specify that we're just generating definitions.
|
||||||
|
String [] types = { "INTEGER" };
|
||||||
|
String [] vals = { "42" };
|
||||||
|
runImportTest(TABLE_NAME, types, vals, null, extraArgs);
|
||||||
|
|
||||||
|
// Test that the generated definition file exists.
|
||||||
|
assertTrue("Couldn't find expected ddl file", fs.exists(ddlFile));
|
||||||
|
|
||||||
|
Path hiveImportPath = new Path(new Path(options.getWarehouseDir()),
|
||||||
|
TABLE_NAME);
|
||||||
|
assertFalse("Import actually happened!", fs.exists(hiveImportPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Test that strings and ints are handled in the normal fashion */
|
/** Test that strings and ints are handled in the normal fashion */
|
||||||
@Test
|
@Test
|
||||||
public void testNormalHiveImport() throws IOException {
|
public void testNormalHiveImport() throws IOException {
|
||||||
|
Loading…
Reference in New Issue
Block a user