mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 03:40:34 +08:00
MAPREDUCE-1341. Sqoop should have an option to create hive tables and skip the table import step. Contributed by Leonid Furman.
From: Thomas White <tomwhite@apache.org> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149856 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
de836d714a
commit
a625fd478c
@ -98,6 +98,13 @@ Import control options
|
||||
--hive-import::
|
||||
If set, then import the table into Hive
|
||||
|
||||
--hive-create-only::
|
||||
Creates table in hive and skips the data import step
|
||||
|
||||
--hive-overwrite::
|
||||
Overwrites existing table in hive.
|
||||
By default it does not overwrite existing table.
|
||||
|
||||
--table (table-name)::
|
||||
The table to import
|
||||
|
||||
|
20
doc/hive.txt
20
doc/hive.txt
@ -27,14 +27,18 @@ TABLE+ statement to define the data's layout in Hive. Importing data
|
||||
into Hive is as simple as adding the *+--hive-import+* option to your
|
||||
Sqoop command line.
|
||||
|
||||
After your data is imported into HDFS, Sqoop will generate a Hive
|
||||
script containing a +CREATE TABLE+ operation defining your columns using
|
||||
Hive's types, and a +LOAD DATA INPATH+ statement to move the data files
|
||||
into Hive's warehouse directory. The script will be executed by
|
||||
calling the installed copy of hive on the machine where Sqoop is run.
|
||||
If you have multiple Hive installations, or +hive+ is not in your
|
||||
+$PATH+ use the *+--hive-home+* option to identify the Hive installation
|
||||
directory. Sqoop will use +$HIVE_HOME/bin/hive+ from here.
|
||||
By default the data is imported into HDFS, but you can skip this operation
|
||||
by using the *+--hive-create+* option. Optionally, you can specify the
|
||||
*+--hive-overwrite+* option to indicate that existing table in hive must
|
||||
be replaced. After your data is imported into HDFS or this step is
|
||||
omitted, Sqoop will generate a Hive script containing a +CREATE TABLE+
|
||||
operation defining your columns using Hive's types, and a +LOAD DATA INPATH+
|
||||
statement to move the data files into Hive's warehouse directory if
|
||||
*+--hive-create+* option is not added. The script will be executed by calling
|
||||
the installed copy of hive on the machine where Sqoop is run. If you have
|
||||
multiple Hive installations, or +hive+ is not in your +$PATH+ use the
|
||||
*+--hive-home+* option to identify the Hive installation directory.
|
||||
Sqoop will use +$HIVE_HOME/bin/hive+ from here.
|
||||
|
||||
NOTE: This function is incompatible with +--as-sequencefile+.
|
||||
|
||||
|
@ -118,9 +118,12 @@ private void importTable(String tableName) throws IOException, ImportException {
|
||||
jarFile = generateORM(tableName);
|
||||
|
||||
if (options.getAction() == SqoopOptions.ControlAction.FullImport) {
|
||||
// check if data import is to be performed
|
||||
if (!options.doCreateHiveTableOnly()) {
|
||||
// Proceed onward to do the import.
|
||||
ImportJobContext context = new ImportJobContext(tableName, jarFile, options);
|
||||
manager.importTable(context);
|
||||
}
|
||||
|
||||
// If the user wants this table to be in Hive, perform that post-load.
|
||||
if (options.doHiveImport()) {
|
||||
|
@ -100,6 +100,8 @@ public enum FileLayout {
|
||||
private String tmpDir; // where temp data goes; usually /tmp
|
||||
private String hiveHome;
|
||||
private boolean hiveImport;
|
||||
private boolean createHiveTableOnly;
|
||||
private boolean overwriteHiveTable;
|
||||
private String hiveTableName;
|
||||
private String packageName; // package to prepend to auto-named classes.
|
||||
private String className; // package+class to apply to individual table import.
|
||||
@ -204,6 +206,8 @@ private void loadFromProperties() {
|
||||
|
||||
this.direct = getBooleanProperty(props, "direct.import", this.direct);
|
||||
this.hiveImport = getBooleanProperty(props, "hive.import", this.hiveImport);
|
||||
this.createHiveTableOnly = getBooleanProperty(props, "hive.create.table.only", this.createHiveTableOnly);
|
||||
this.overwriteHiveTable = getBooleanProperty(props, "hive.overwrite.table", this.overwriteHiveTable);
|
||||
this.useCompression = getBooleanProperty(props, "compression", this.useCompression);
|
||||
this.directSplitSize = getLongProperty(props, "direct.split.size",
|
||||
this.directSplitSize);
|
||||
@ -513,6 +517,10 @@ public void parse(String [] args) throws InvalidOptionsException {
|
||||
this.hiveHome = args[++i];
|
||||
} else if (args[i].equals("--hive-import")) {
|
||||
this.hiveImport = true;
|
||||
} else if (args[i].equals("--hive-create-only")) {
|
||||
this.createHiveTableOnly = true;
|
||||
} else if (args[i].equals("--hive-overwrite")) {
|
||||
this.overwriteHiveTable = true;
|
||||
} else if (args[i].equals("--hive-table")) {
|
||||
this.hiveTableName = args[++i];
|
||||
} else if (args[i].equals("--num-mappers") || args[i].equals("-m")) {
|
||||
@ -779,6 +787,20 @@ public boolean doHiveImport() {
|
||||
return hiveImport;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the user-specified option to create tables in hive with no loading
|
||||
*/
|
||||
public boolean doCreateHiveTableOnly() {
|
||||
return createHiveTableOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the user-specified option to overwrite existing table in hive
|
||||
*/
|
||||
public boolean doOverwriteHiveTable() {
|
||||
return overwriteHiveTable;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return location where .java files go; guaranteed to end with '/'
|
||||
*/
|
||||
|
@ -150,7 +150,9 @@ public void importTable(String inputTableName, String outputTableName)
|
||||
FileOutputStream fos = new FileOutputStream(tempFile);
|
||||
w = new BufferedWriter(new OutputStreamWriter(fos));
|
||||
w.write(createTableStr, 0, createTableStr.length());
|
||||
if (!options.doCreateHiveTableOnly()) {
|
||||
w.write(loadDataStmtStr, 0, loadDataStmtStr.length());
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
LOG.error("Error writing Hive load-in script: " + ioe.toString());
|
||||
ioe.printStackTrace();
|
||||
|
@ -121,7 +121,11 @@ public String getCreateTableStmt() throws IOException {
|
||||
|
||||
String [] colNames = getColumnNames();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (options.doOverwriteHiveTable()) {
|
||||
sb.append("CREATE TABLE " + outputTableName + " ( ");
|
||||
} else {
|
||||
sb.append("CREATE TABLE IF NOT EXISTS " + outputTableName + " ( ");
|
||||
}
|
||||
|
||||
boolean first = true;
|
||||
for (String col : colNames) {
|
||||
|
@ -109,6 +109,24 @@ public void testNormalHiveImport() throws IOException {
|
||||
runImportTest("NORMAL_HIVE_IMPORT", types, vals, "normalImport.q", null);
|
||||
}
|
||||
|
||||
/** Test that table is created in hive with no data import */
|
||||
@Test
|
||||
public void testCreateOnlyHiveImport() throws IOException {
|
||||
String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
|
||||
String [] vals = { "'test'", "42", "'somestring'" };
|
||||
String [] extraArgs = {"--hive-create-only"};
|
||||
runImportTest("CREATE_ONLY_HIVE_IMPORT", types, vals, "createOnlyImport.q", extraArgs);
|
||||
}
|
||||
|
||||
/** Test that table is created in hive and replaces the existing table if any */
|
||||
@Test
|
||||
public void testCreateOverwriteHiveImport() throws IOException {
|
||||
String [] types = { "VARCHAR(32)", "INTEGER", "CHAR(64)" };
|
||||
String [] vals = { "'test'", "42", "'somestring'" };
|
||||
String [] extraArgs = {"--hive-create-only", "--hive-overwrite"};
|
||||
runImportTest("CREATE_OVERWRITE_HIVE_IMPORT", types, vals, "createOverwriteImport.q", extraArgs);
|
||||
}
|
||||
|
||||
/** Test that dates are coerced properly to strings */
|
||||
@Test
|
||||
public void testDate() throws IOException {
|
||||
|
@ -73,7 +73,7 @@ public void testDifferentTableNames() throws Exception {
|
||||
LOG.debug("Load data stmt: " + loadData);
|
||||
|
||||
// Assert that the statements generated have the form we expect.
|
||||
assertTrue(createTable.indexOf("CREATE TABLE outputTable") != -1);
|
||||
assertTrue(createTable.indexOf("CREATE TABLE IF NOT EXISTS outputTable") != -1);
|
||||
assertTrue(loadData.indexOf("INTO TABLE outputTable") != -1);
|
||||
assertTrue(loadData.indexOf("/inputTable'") != -1);
|
||||
}
|
||||
|
1
testdata/hive/scripts/createOnlyImport.q
vendored
Normal file
1
testdata/hive/scripts/createOnlyImport.q
vendored
Normal file
@ -0,0 +1 @@
|
||||
CREATE TABLE IF NOT EXISTS CREATE_ONLY_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
1
testdata/hive/scripts/createOverwriteImport.q
vendored
Normal file
1
testdata/hive/scripts/createOverwriteImport.q
vendored
Normal file
@ -0,0 +1 @@
|
||||
CREATE TABLE CREATE_OVERWRITE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
2
testdata/hive/scripts/customDelimImport.q
vendored
2
testdata/hive/scripts/customDelimImport.q
vendored
@ -1,2 +1,2 @@
|
||||
CREATE TABLE CUSTOM_DELIM_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' LINES TERMINATED BY '\174' STORED AS TEXTFILE;
|
||||
CREATE TABLE IF NOT EXISTS CUSTOM_DELIM_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' LINES TERMINATED BY '\174' STORED AS TEXTFILE;
|
||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/CUSTOM_DELIM_IMPORT' INTO TABLE CUSTOM_DELIM_IMPORT;
|
||||
|
2
testdata/hive/scripts/dateImport.q
vendored
2
testdata/hive/scripts/dateImport.q
vendored
@ -1,2 +1,2 @@
|
||||
CREATE TABLE DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||
CREATE TABLE IF NOT EXISTS DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/DATE_HIVE_IMPORT' INTO TABLE DATE_HIVE_IMPORT;
|
||||
|
2
testdata/hive/scripts/failingImport.q
vendored
2
testdata/hive/scripts/failingImport.q
vendored
@ -1,2 +1,2 @@
|
||||
CREATE TABLE DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||
CREATE TABLE IF NOT EXISTS DATE_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/DATE_HIVE_IMPORT' INTO TABLE DATE_HIVE_IMPORT;
|
||||
|
2
testdata/hive/scripts/normalImport.q
vendored
2
testdata/hive/scripts/normalImport.q
vendored
@ -1,2 +1,2 @@
|
||||
CREATE TABLE NORMAL_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||
CREATE TABLE IF NOT EXISTS NORMAL_HIVE_IMPORT ( DATA_COL0 STRING, DATA_COL1 INT, DATA_COL2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/NORMAL_HIVE_IMPORT' INTO TABLE NORMAL_HIVE_IMPORT;
|
||||
|
2
testdata/hive/scripts/numericImport.q
vendored
2
testdata/hive/scripts/numericImport.q
vendored
@ -1,2 +1,2 @@
|
||||
CREATE TABLE NUMERIC_HIVE_IMPORT ( DATA_COL0 DOUBLE, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||
CREATE TABLE IF NOT EXISTS NUMERIC_HIVE_IMPORT ( DATA_COL0 DOUBLE, DATA_COL1 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LINES TERMINATED BY '\012' STORED AS TEXTFILE;
|
||||
LOAD DATA INPATH 'file:BASEPATH/sqoop/warehouse/NUMERIC_HIVE_IMPORT' INTO TABLE NUMERIC_HIVE_IMPORT;
|
||||
|
Loading…
Reference in New Issue
Block a user