mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 14:11:49 +08:00
MAPREDUCE-1341. Sqoop should have an option to create hive tables and skip the table import step. Contributed by Leonid Furman.
From: Thomas White <tomwhite@apache.org> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149856 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
de836d714a
commit
a625fd478c
@ -98,6 +98,13 @@ Import control options
|
|||||||
--hive-import::
|
--hive-import::
|
||||||
If set, then import the table into Hive
|
If set, then import the table into Hive
|
||||||
|
|
||||||
|
--hive-create-only::
|
||||||
|
Creates table in hive and skips the data import step
|
||||||
|
|
||||||
|
--hive-overwrite::
|
||||||
|
Overwrites existing table in hive.
|
||||||
|
By default it does not overwrite existing table.
|
||||||
|
|
||||||
--table (table-name)::
|
--table (table-name)::
|
||||||
The table to import
|
The table to import
|
||||||
|
|
||||||
|
20
doc/hive.txt
20
doc/hive.txt
@ -27,14 +27,18 @@ TABLE+ statement to define the data's layout in Hive. Importing data
|
|||||||
into Hive is as simple as adding the *+--hive-import+* option to your
|
into Hive is as simple as adding the *+--hive-import+* option to your
|
||||||
Sqoop command line.
|
Sqoop command line.
|
||||||
|
|
||||||
After your data is imported into HDFS, Sqoop will generate a Hive
|
By default the data is imported into HDFS, but you can skip this operation
|
||||||
script containing a +CREATE TABLE+ operation defining your columns using
|
by using the *+--hive-create+* option. Optionally, you can specify the
|
||||||
Hive's types, and a +LOAD DATA INPATH+ statement to move the data files
|
*+--hive-overwrite+* option to indicate that existing table in hive must
|
||||||
into Hive's warehouse directory. The script will be executed by
|
be replaced. After your data is imported into HDFS or this step is
|
||||||
calling the installed copy of hive on the machine where Sqoop is run.
|
omitted, Sqoop will generate a Hive script containing a +CREATE TABLE+
|
||||||
If you have multiple Hive installations, or +hive+ is not in your
|
operation defining your columns using Hive's types, and a +LOAD DATA INPATH+
|
||||||
+$PATH+ use the *+--hive-home+* option to identify the Hive installation
|
statement to move the data files into Hive's warehouse directory if
|
||||||
directory. Sqoop will use +$HIVE_HOME/bin/hive+ from here.
|
*+--hive-create+* option is not added. The script will be executed by calling
|
||||||
|
the installed copy of hive on the machine where Sqoop is run. If you have
|
||||||
|
multiple Hive installations, or +hive+ is not in your +$PATH+ use the
|
||||||
|
*+--hive-home+* option to identify the Hive installation directory.
|
||||||
|
Sqoop will use +$HIVE_HOME/bin/hive+ from here.
|
||||||
|
|
||||||
NOTE: This function is incompatible with +--as-sequencefile+.
|
NOTE: This function is incompatible with +--as-sequencefile+.
|
||||||
|
|
||||||
|
@ -118,9 +118,12 @@ private void importTable(String tableName) throws IOException, ImportException {
|
|||||||
jarFile = generateORM(tableName);
|
jarFile = generateORM(tableName);
|
||||||
|
|
||||||
if (options.getAction() == SqoopOptions.ControlAction.FullImport) {
|
if (options.getAction() == SqoopOptions.ControlAction.FullImport) {
|
||||||
// Proceed onward to do the import.
|
// check if data import is to be performed
|
||||||
ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
|
if (!options.doCreateHiveTableOnly()) {
|
||||||
manager.importTable(context);
|
// Proceed onward to do the import.
|
||||||
|
ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
|
||||||
|
manager.importTable(context);
|
||||||
|
}
|
||||||
|
|
||||||
// If the user wants this table to be in Hive, perform that post-load.
|
// If the user wants this table to be in Hive, perform that post-load.
|
||||||
if (options.doHiveImport()) {
|
if (options.doHiveImport()) {
|
||||||
|
@ -100,6 +100,8 @@ public enum FileLayout {
|
|||||||
private String tmpDir; // where temp data goes; usually /tmp
|
private String tmpDir; // where temp data goes; usually /tmp
|
||||||
private String hiveHome;
|
private String hiveHome;
|
||||||
private boolean hiveImport;
|
private boolean hiveImport;
|
||||||
|
private boolean createHiveTableOnly;
|
||||||
|
private boolean overwriteHiveTable;
|
||||||
private String hiveTableName;
|
private String hiveTableName;
|
||||||
private String packageName; // package to prepend to auto-named classes.
|
private String packageName; // package to prepend to auto-named classes.
|
||||||
private String className; // package+class to apply to individual table import.
|
private String className; // package+class to apply to individual table import.
|
||||||
@ -204,6 +206,8 @@ private void loadFromProperties() {
|
|||||||
|
|
||||||
this.direct = getBooleanProperty(props, "direct.import", this.direct);
|
this.direct = getBooleanProperty(props, "direct.import", this.direct);
|
||||||
this.hiveImport = getBooleanProperty(props, "hive.import", this.hiveImport);
|
this.hiveImport = getBooleanProperty(props, "hive.import", this.hiveImport);
|
||||||
|
this.createHiveTableOnly = getBooleanProperty(props, "hive.create.table.only", this.createHiveTableOnly);
|
||||||
|
this.overwriteHiveTable = getBooleanProperty(props, "hive.overwrite.table", this.overwriteHiveTable);
|
||||||
this.useCompression = getBooleanProperty(props, "compression", this.useCompression);
|
this.useCompression = getBooleanProperty(props, "compression", this.useCompression);
|
||||||
this.directSplitSize = getLongProperty(props, "direct.split.size",
|
this.directSplitSize = getLongProperty(props, "direct.split.size",
|
||||||
this.directSplitSize);
|
this.directSplitSize);
|
||||||
@ -513,6 +517,10 @@ public void parse(String [] args) throws InvalidOptionsException {
|
|||||||
this.hiveHome = args[++i];
|
this.hiveHome = args[++i];
|
||||||
} else if (args[i].equals("--hive-import")) {
|
} else if (args[i].equals("--hive-import")) {
|
||||||
this.hiveImport = true;
|
this.hiveImport = true;
|
||||||
|
} else if (args[i].equals("--hive-create-only")) {
|
||||||
|
this.createHiveTableOnly = true;
|
||||||
|
} else if (args[i].equals("--hive-overwrite")) {
|
||||||
|
this.overwriteHiveTable = true;
|
||||||
} else if (args[i].equals("--hive-table")) {
|
} else if (args[i].equals("--hive-table")) {
|
||||||
this.hiveTableName = args[++i];
|
this.hiveTableName = args[++i];
|
||||||
} else if (args[i].equals("--num-mappers") || args[i].equals("-m")) {
|
} else if (args[i].equals("--num-mappers") || args[i].equals("-m")) {
|
||||||
@ -779,6 +787,20 @@ public boolean doHiveImport() {
|
|||||||
return hiveImport;
|
return hiveImport;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the user-specified option to create tables in hive with no loading
|
||||||
|
*/
|
||||||
|
public boolean doCreateHiveTableOnly() {
|
||||||
|
return createHiveTableOnly;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the user-specified option to overwrite existing table in hive
|
||||||
|
*/
|
||||||
|
public boolean doOverwriteHiveTable() {
|
||||||
|
return overwriteHiveTable;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return location where .java files go; guaranteed to end with '/'
|
* @return location where .java files go; guaranteed to end with '/'
|
||||||
*/
|
*/
|
||||||
|
@ -150,7 +150,9 @@ public void importTable(String inputTableName, String outputTableName)
|
|||||||
FileOutputStream fos = new FileOutputStream(tempFile);
|
FileOutputStream fos = new FileOutputStream(tempFile);
|
||||||
w = new BufferedWriter(new OutputStreamWriter(fos));
|
w = new BufferedWriter(new OutputStreamWriter(fos));
|
||||||
w.write(createTableStr, 0, createTableStr.length());
|
w.write(createTableStr, 0, createTableStr.length());
|
||||||
w.write(loadDataStmtStr, 0, loadDataStmtStr.length());
|
if (!options.doCreateHiveTableOnly()) {
|
||||||
|
w.write(loadDataStmtStr, 0, loadDataStmtStr.length());
|
||||||
|
}
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
LOG.error("Error writing Hive load-in script: " + ioe.toString());
|
LOG.error("Error writing Hive load-in script: " + ioe.toString());
|
||||||
ioe.printStackTrace();
|
ioe.printStackTrace();
|
||||||
|
@ -121,7 +121,11 @@ public String getCreateTableStmt() throws IOException {
|
|||||||
|
|
||||||
String [] colNames = getColumnNames();
|
String [] colNames = getColumnNames();
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append("CREATE TABLE " + outputTableName + " ( ");
|
if (options.doOverwriteHiveTable()) {
|
||||||
|
sb.append("CREATE TABLE " + outputTableName + " ( ");
|
||||||
|
} else {
|
||||||
|
sb.append("CREATE TABLE IF NOT EXISTS " + outputTableName + " ( ");
|
||||||
|
}
|
||||||
|
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
for (String col : colNames) {
|
for (String col : colNames) {
|
||||||
|
@ -109,6 +109,24 @@ public void testNormalHiveImport() throws IOException {
|
|||||||
runImportTest("NORMAL_HIVE_IMPORT", types, vals, "normalImport.q", null);
|
runImportTest("NORMAL_HIVE_IMPORT", types, vals, "normalImport.q", null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Test that table is created in hive with no data import */
|
||||||
|
@Test
|
||||||
|
public void testCreateOnlyHiveImport() throws IOException {
|
||||||
|
String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
|
||||||
|
String [] vals = { "'test'", "42", "'somestring'" };
|
||||||
|
String [] extraArgs = {"--hive-create-only"};
|
||||||
|
runImportTest("CREATE_ONLY_HIVE_IMPORT", types, vals, "createOnlyImport.q", extraArgs);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Test that table is created in hive and replaces the existing table if any */
|
||||||
|
@Test
|
||||||
|
public void testCreateOverwriteHiveImport() throws IOException {
|
||||||
|
String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
|
||||||
|
String [] vals = { "'test'", "42", "'somestring'" };
|
||||||
|
String [] extraArgs = {"--hive-create-only", "--hive-overwrite"};
|
||||||
|
runImportTest("CREATE_OVERWRITE_HIVE_IMPORT", types, vals, "createOverwriteImport.q", extraArgs);
|
||||||
|
}
|
||||||
|
|
||||||
/** Test that dates are coerced properly to strings */
|
/** Test that dates are coerced properly to strings */
|
||||||
@Test
|
@Test
|
||||||
public void testDate() throws IOException {
|
public void testDate() throws IOException {
|
||||||
|
@ -73,7 +73,7 @@ public void testDifferentTableNames() throws Exception {
|
|||||||
LOG.debug("Load data stmt: " + loadData);
|
LOG.debug("Load data stmt: " + loadData);
|
||||||
|
|
||||||
// Assert that the statements generated have the form we expect.
|
// Assert that the statements generated have the form we expect.
|
||||||
assertTrue(createTable.indexOf("CREATE TABLE outputTable") != -1);
|
assertTrue(createTable.indexOf("CREATE TABLE IF NOT EXISTS outputTable") != -1);
|
||||||
assertTrue(loadData.indexOf("INTO TABLE outputTable") != -1);
|
assertTrue(loadData.indexOf("INTO TABLE outputTable") != -1);
|
||||||
assertTrue(loadData.indexOf("/inputTable'") != -1);
|
assertTrue(loadData.indexOf("/inputTable'") != -1);
|
||||||
}
|
}
|
||||||
|
1
testdata/hive/scripts/createOnlyImport.q
vendored
Normal file
1
testdata/hive/scripts/createOnlyImport.q
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
CREATE TABLE IF NOT EXISTS CREATE_ONLY_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
1
testdata/hive/scripts/createOverwriteImport.q
vendored
Normal file
1
testdata/hive/scripts/createOverwriteImport.q
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
CREATE TABLE CREATE_OVERWRITE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
2
testdata/hive/scripts/customDelimImport.q
vendored
2
testdata/hive/scripts/customDelimImport.q
vendored
@ -1,2 +1,2 @@
|
|||||||
CREATE TABLE CUSTOM_DELIM_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' LINES TERMINATED BY '\174' STORED AS TEXTFILE;
|
CREATE TABLE IF NOT EXISTS CUSTOM_DELIM_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' LINES TERMINATED BY '\174' STORED AS TEXTFILE;
|
||||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/CUSTOM_DELIM_IMPORT' INTO TABLE CUSTOM_DELIM_IMPORT;
|
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/CUSTOM_DELIM_IMPORT' INTO TABLE CUSTOM_DELIM_IMPORT;
|
||||||
|
2
testdata/hive/scripts/dateImport.q
vendored
2
testdata/hive/scripts/dateImport.q
vendored
@ -1,2 +1,2 @@
|
|||||||
CREATE TABLE DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
CREATE TABLE IF NOT EXISTS DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/DATE_HIVE_IMPORT' INTO TABLE DATE_HIVE_IMPORT;
|
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/DATE_HIVE_IMPORT' INTO TABLE DATE_HIVE_IMPORT;
|
||||||
|
2
testdata/hive/scripts/failingImport.q
vendored
2
testdata/hive/scripts/failingImport.q
vendored
@ -1,2 +1,2 @@
|
|||||||
CREATE TABLE DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
CREATE TABLE IF NOT EXISTS DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/DATE_HIVE_IMPORT' INTO TABLE DATE_HIVE_IMPORT;
|
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/DATE_HIVE_IMPORT' INTO TABLE DATE_HIVE_IMPORT;
|
||||||
|
2
testdata/hive/scripts/normalImport.q
vendored
2
testdata/hive/scripts/normalImport.q
vendored
@ -1,2 +1,2 @@
|
|||||||
CREATE TABLE NORMAL_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
CREATE TABLE IF NOT EXISTS NORMAL_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/NORMAL_HIVE_IMPORT' INTO TABLE NORMAL_HIVE_IMPORT;
|
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/NORMAL_HIVE_IMPORT' INTO TABLE NORMAL_HIVE_IMPORT;
|
||||||
|
2
testdata/hive/scripts/numericImport.q
vendored
2
testdata/hive/scripts/numericImport.q
vendored
@ -1,2 +1,2 @@
|
|||||||
CREATE TABLE NUMERIC_HIVE_IMPORT ( DATA_COL0 DOUBLE, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
CREATE TABLE IF NOT EXISTS NUMERIC_HIVE_IMPORT ( DATA_COL0 DOUBLE, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/NUMERIC_HIVE_IMPORT' INTO TABLE NUMERIC_HIVE_IMPORT;
|
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/NUMERIC_HIVE_IMPORT' INTO TABLE NUMERIC_HIVE_IMPORT;
|
||||||
|
Loading…
Reference in New Issue
Block a user