5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 01:31:04 +08:00

MAPREDUCE-1341. Sqoop should have an option to create hive tables and skip the table import step. Contributed by Leonid Furman.

From: Thomas White <tomwhite@apache.org>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149856 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:03:32 +00:00
parent de836d714a
commit a625fd478c
15 changed files with 81 additions and 19 deletions

View File

@ -98,6 +98,13 @@ Import control options
--hive-import::
If set, then import the table into Hive
--hive-create-only::
Creates table in hive and skips the data import step
--hive-overwrite::
Overwrites existing table in hive.
By default it does not overwrite existing table.
--table (table-name)::
The table to import

View File

@ -27,14 +27,18 @@ TABLE+ statement to define the data's layout in Hive. Importing data
into Hive is as simple as adding the *+--hive-import+* option to your
Sqoop command line.
After your data is imported into HDFS, Sqoop will generate a Hive
script containing a +CREATE TABLE+ operation defining your columns using
Hive's types, and a +LOAD DATA INPATH+ statement to move the data files
into Hive's warehouse directory. The script will be executed by
calling the installed copy of hive on the machine where Sqoop is run.
If you have multiple Hive installations, or +hive+ is not in your
+$PATH+ use the *+--hive-home+* option to identify the Hive installation
directory. Sqoop will use +$HIVE_HOME/bin/hive+ from here.
By default the data is imported into HDFS, but you can skip this operation
by using the *+--hive-create+* option. Optionally, you can specify the
*+--hive-overwrite+* option to indicate that existing table in hive must
be replaced. After your data is imported into HDFS or this step is
omitted, Sqoop will generate a Hive script containing a +CREATE TABLE+
operation defining your columns using Hive's types, and a +LOAD DATA INPATH+
statement to move the data files into Hive's warehouse directory if
*+--hive-create+* option is not added. The script will be executed by calling
the installed copy of hive on the machine where Sqoop is run. If you have
multiple Hive installations, or +hive+ is not in your +$PATH+ use the
*+--hive-home+* option to identify the Hive installation directory.
Sqoop will use +$HIVE_HOME/bin/hive+ from here.
NOTE: This function is incompatible with +--as-sequencefile+.

View File

@ -118,9 +118,12 @@ private void importTable(String tableName) throws IOException, ImportException {
jarFile = generateORM(tableName);
if (options.getAction() == SqoopOptions.ControlAction.FullImport) {
// Proceed onward to do the import.
ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
manager.importTable(context);
// check if data import is to be performed
if (!options.doCreateHiveTableOnly()) {
// Proceed onward to do the import.
ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
manager.importTable(context);
}
// If the user wants this table to be in Hive, perform that post-load.
if (options.doHiveImport()) {

View File

@ -100,6 +100,8 @@ public enum FileLayout {
private String tmpDir; // where temp data goes; usually /tmp
private String hiveHome;
private boolean hiveImport;
private boolean createHiveTableOnly;
private boolean overwriteHiveTable;
private String hiveTableName;
private String packageName; // package to prepend to auto-named classes.
private String className; // package+class to apply to individual table import.
@ -204,6 +206,8 @@ private void loadFromProperties() {
this.direct = getBooleanProperty(props, "direct.import", this.direct);
this.hiveImport = getBooleanProperty(props, "hive.import", this.hiveImport);
this.createHiveTableOnly = getBooleanProperty(props, "hive.create.table.only", this.createHiveTableOnly);
this.overwriteHiveTable = getBooleanProperty(props, "hive.overwrite.table", this.overwriteHiveTable);
this.useCompression = getBooleanProperty(props, "compression", this.useCompression);
this.directSplitSize = getLongProperty(props, "direct.split.size",
this.directSplitSize);
@ -513,6 +517,10 @@ public void parse(String [] args) throws InvalidOptionsException {
this.hiveHome = args[++i];
} else if (args[i].equals("--hive-import")) {
this.hiveImport = true;
} else if (args[i].equals("--hive-create-only")) {
this.createHiveTableOnly = true;
} else if (args[i].equals("--hive-overwrite")) {
this.overwriteHiveTable = true;
} else if (args[i].equals("--hive-table")) {
this.hiveTableName = args[++i];
} else if (args[i].equals("--num-mappers") || args[i].equals("-m")) {
@ -779,6 +787,20 @@ public boolean doHiveImport() {
return hiveImport;
}
/**
* @return the user-specified option to create tables in hive with no loading
*/
public boolean doCreateHiveTableOnly() {
return createHiveTableOnly;
}
/**
* @return the user-specified option to overwrite existing table in hive
*/
public boolean doOverwriteHiveTable() {
return overwriteHiveTable;
}
/**
* @return location where .java files go; guaranteed to end with '/'
*/

View File

@ -150,7 +150,9 @@ public void importTable(String inputTableName, String outputTableName)
FileOutputStream fos = new FileOutputStream(tempFile);
w = new BufferedWriter(new OutputStreamWriter(fos));
w.write(createTableStr, 0, createTableStr.length());
w.write(loadDataStmtStr, 0, loadDataStmtStr.length());
if (!options.doCreateHiveTableOnly()) {
w.write(loadDataStmtStr, 0, loadDataStmtStr.length());
}
} catch (IOException ioe) {
LOG.error("Error writing Hive load-in script: " + ioe.toString());
ioe.printStackTrace();

View File

@ -121,7 +121,11 @@ public String getCreateTableStmt() throws IOException {
String [] colNames = getColumnNames();
StringBuilder sb = new StringBuilder();
sb.append("CREATE TABLE " + outputTableName + " ( ");
if (options.doOverwriteHiveTable()) {
sb.append("CREATE TABLE " + outputTableName + " ( ");
} else {
sb.append("CREATE TABLE IF NOT EXISTS " + outputTableName + " ( ");
}
boolean first = true;
for (String col : colNames) {

View File

@ -109,6 +109,24 @@ public void testNormalHiveImport() throws IOException {
runImportTest("NORMAL_HIVE_IMPORT", types, vals, "normalImport.q", null);
}
/** Test that table is created in hive with no data import */
@Test
public void testCreateOnlyHiveImport() throws IOException {
String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
String [] vals = { "'test'", "42", "'somestring'" };
String [] extraArgs = {"--hive-create-only"};
runImportTest("CREATE_ONLY_HIVE_IMPORT", types, vals, "createOnlyImport.q", extraArgs);
}
/** Test that table is created in hive and replaces the existing table if any */
@Test
public void testCreateOverwriteHiveImport() throws IOException {
String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
String [] vals = { "'test'", "42", "'somestring'" };
String [] extraArgs = {"--hive-create-only", "--hive-overwrite"};
runImportTest("CREATE_OVERWRITE_HIVE_IMPORT", types, vals, "createOverwriteImport.q", extraArgs);
}
/** Test that dates are coerced properly to strings */
@Test
public void testDate() throws IOException {

View File

@ -73,7 +73,7 @@ public void testDifferentTableNames() throws Exception {
LOG.debug("Load data stmt: " + loadData);
// Assert that the statements generated have the form we expect.
assertTrue(createTable.indexOf("CREATE TABLE outputTable") != -1);
assertTrue(createTable.indexOf("CREATE TABLE IF NOT EXISTS outputTable") != -1);
assertTrue(loadData.indexOf("INTO TABLE outputTable") != -1);
assertTrue(loadData.indexOf("/inputTable'") != -1);
}

View File

@ -0,0 +1 @@
CREATE TABLE IF NOT EXISTS CREATE_ONLY_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;

View File

@ -0,0 +1 @@
CREATE TABLE CREATE_OVERWRITE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;

View File

@ -1,2 +1,2 @@
CREATE TABLE CUSTOM_DELIM_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' LINES TERMINATED BY '\174' STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS CUSTOM_DELIM_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' LINES TERMINATED BY '\174' STORED AS TEXTFILE;
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/CUSTOM_DELIM_IMPORT' INTO TABLE CUSTOM_DELIM_IMPORT;

View File

@ -1,2 +1,2 @@
CREATE TABLE DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/DATE_HIVE_IMPORT' INTO TABLE DATE_HIVE_IMPORT;

View File

@ -1,2 +1,2 @@
CREATE TABLE DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/DATE_HIVE_IMPORT' INTO TABLE DATE_HIVE_IMPORT;

View File

@ -1,2 +1,2 @@
CREATE TABLE NORMAL_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS NORMAL_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/NORMAL_HIVE_IMPORT' INTO TABLE NORMAL_HIVE_IMPORT;

View File

@ -1,2 +1,2 @@
CREATE TABLE NUMERIC_HIVE_IMPORT ( DATA_COL0 DOUBLE, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS NUMERIC_HIVE_IMPORT ( DATA_COL0 DOUBLE, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/NUMERIC_HIVE_IMPORT' INTO TABLE NUMERIC_HIVE_IMPORT;