From be260e3096a6a2710e661c7fe33f6b402ce66476 Mon Sep 17 00:00:00 2001 From: Denes Bodo Date: Wed, 5 Jun 2019 10:39:04 +0200 Subject: [PATCH] SQOOP-3438: Sqoop Import with create hcatalog table for ORC will not work with Hive3 as the table created would be a ACID table and transactional (Denes Bodo via Boglarka Egyed) --- src/docs/user/hcatalog.txt | 7 +++ src/java/org/apache/sqoop/SqoopOptions.java | 10 +++ .../mapreduce/hcat/SqoopHCatUtilities.java | 12 ++-- .../org/apache/sqoop/tool/BaseSqoopTool.java | 16 +++++ .../apache/sqoop/hcat/HCatalogImportTest.java | 63 +++++++++++++++++++ .../apache/sqoop/hcat/HCatalogTestUtils.java | 19 ++++++ 6 files changed, 123 insertions(+), 4 deletions(-) diff --git a/src/docs/user/hcatalog.txt b/src/docs/user/hcatalog.txt index 2ae1d54d..96a90f75 100644 --- a/src/docs/user/hcatalog.txt +++ b/src/docs/user/hcatalog.txt @@ -61,6 +61,13 @@ The presence of the +--hcatalog-table+ option signifies that the import or export job is done using HCatalog tables, and it is a required option for HCatalog jobs. ++--hcatalog-external-table+:: +Use this flag if you need to create external Hive table for example to store +data in non-transactional tables. For e.g. with: +--hcatalog-storage-stanza "stored as orc tblproperties (\"transactional\"=\"false\")" +This flag can only be used when +--create-hcatalog-table or --drop-and-create-hcatalog-table is used. + +--hcatalog-home+:: The home directory for the HCatalog installation. The directory is expected to have a +lib+ subdirectory and a +share/hcatalog+ subdirectory diff --git a/src/java/org/apache/sqoop/SqoopOptions.java b/src/java/org/apache/sqoop/SqoopOptions.java index a7c19daa..cec81034 100644 --- a/src/java/org/apache/sqoop/SqoopOptions.java +++ b/src/java/org/apache/sqoop/SqoopOptions.java @@ -240,6 +240,8 @@ public String toString() { @StoredAsProperty("hive.partition.value") private String hivePartitionValue; @StoredAsProperty("hcatalog.table.name") private String hCatTableName; + @StoredAsProperty("hcatalog.external.table") + private boolean isExternalHCatTable = false; @StoredAsProperty("hcatalog.database.name") private String hCatDatabaseName; @StoredAsProperty("hcatalog.create.table") @@ -1654,6 +1656,14 @@ public String getHCatTableName() { return this.hCatTableName; } + public void setExternalHCatTable(boolean value) { + this.isExternalHCatTable = value; + } + + public boolean isHCatTableExternal() { + return this.isExternalHCatTable; + } + public void setHCatDatabaseName(String hd) { this.hCatDatabaseName = hd; } diff --git a/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java b/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java index 234b7a87..cd939337 100644 --- a/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java +++ b/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java @@ -377,11 +377,11 @@ public void configureHCat(final SqoopOptions opts, final Job job, if (options.doCreateHCatalogTable()) { LOG.info("Creating HCatalog table " + hCatQualifiedTableName + " for import"); - createHCatTable(false); + createHCatTable(false, options.isHCatTableExternal()); } else if (options.doDropAndCreateHCatalogTable()) { LOG.info("Dropping and Creating HCatalog table " + hCatQualifiedTableName + " for import"); - createHCatTable(true); + createHCatTable(true, options.isHCatTableExternal()); } // For serializing the schema to conf HCatInputFormat hif = HCatInputFormat.setInput(hCatJob, hCatDatabaseName, @@ -599,14 +599,18 @@ public static StringBuilder escHCatObj(String objectName) { return sb; } - private void createHCatTable(boolean dropIfExists) throws IOException { + private void createHCatTable(boolean dropIfExists, boolean isExternal) throws IOException { StringBuilder sb = new StringBuilder(); if (dropIfExists) { sb.append("drop table "). append(escHCatObj(hCatDatabaseName)).append('.'). append(escHCatObj(hCatTableName)).append(";\n"); } - sb.append("create table "). + sb.append("create "); + if(isExternal) { + sb.append("external "); + } + sb.append("table "). append(escHCatObj(hCatDatabaseName)).append('.'); sb.append(escHCatObj(hCatTableName)).append(" (\n\t"); boolean first = true; diff --git a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java index 96f06de0..caf0598d 100644 --- a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java +++ b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java @@ -132,6 +132,7 @@ public abstract class BaseSqoopTool extends org.apache.sqoop.tool.SqoopTool { public static final String CREATE_HIVE_TABLE_ARG = "create-hive-table"; public static final String HCATALOG_TABLE_ARG = "hcatalog-table"; + public static final String HCATALOG_EXTERNAL_TABLE_ARG = "hcatalog-external-table"; public static final String HCATALOG_DATABASE_ARG = "hcatalog-database"; public static final String CREATE_HCATALOG_TABLE_ARG = "create-hcatalog-table"; @@ -661,6 +662,10 @@ protected RelatedOptions getHCatalogOptions() { .withDescription("HCatalog table name") .withLongOpt(HCATALOG_TABLE_ARG) .create()); + hCatOptions.addOption(OptionBuilder + .withDescription("Signing that HCatalog table shall be created as external") + .withLongOpt(HCATALOG_EXTERNAL_TABLE_ARG) + .create()); hCatOptions.addOption(OptionBuilder .hasArg() .withDescription("HCatalog database name") @@ -1302,6 +1307,10 @@ protected void applyHCatalogOptions(CommandLine in, SqoopOptions out) { out.setHCatTableName(in.getOptionValue(HCATALOG_TABLE_ARG)); } + if (in.hasOption(HCATALOG_EXTERNAL_TABLE_ARG)) { + out.setExternalHCatTable(true); + } + if (in.hasOption(HCATALOG_DATABASE_ARG)) { out.setHCatDatabaseName(in.getOptionValue(HCATALOG_DATABASE_ARG)); } @@ -1588,6 +1597,13 @@ protected void validateHiveOptions(SqoopOptions options) + " option." + HELP_STR); } + if(options.isHCatTableExternal() && + !(options.doCreateHCatalogTable() || options.doDropAndCreateHCatalogTable())) { + throw new InvalidOptionsException(String.format( + "Using --%s only takes effect when --%s or --%s is present", + HCATALOG_EXTERNAL_TABLE_ARG, CREATE_HCATALOG_TABLE_ARG, DROP_AND_CREATE_HCATALOG_TABLE)); + } + if (options.doHiveImport() && options.getFileLayout() == SqoopOptions.FileLayout.AvroDataFile) { throw new InvalidOptionsException("Hive import is not compatible with " diff --git a/src/test/org/apache/sqoop/hcat/HCatalogImportTest.java b/src/test/org/apache/sqoop/hcat/HCatalogImportTest.java index c7e1ea6f..5b5dea9a 100644 --- a/src/test/org/apache/sqoop/hcat/HCatalogImportTest.java +++ b/src/test/org/apache/sqoop/hcat/HCatalogImportTest.java @@ -62,6 +62,10 @@ import org.junit.Test; import org.junit.rules.ExpectedException; +import static org.apache.sqoop.tool.BaseSqoopTool.CREATE_HCATALOG_TABLE_ARG; +import static org.apache.sqoop.tool.BaseSqoopTool.DROP_AND_CREATE_HCATALOG_TABLE; +import static org.apache.sqoop.tool.BaseSqoopTool.HCATALOG_EXTERNAL_TABLE_ARG; +import static org.apache.sqoop.tool.BaseSqoopTool.HCATALOG_STORAGE_STANZA_ARG; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -72,6 +76,8 @@ public class HCatalogImportTest extends ImportJobTestCase { private static final Log LOG = LogFactory.getLog(HCatalogImportTest.class); + public static final String TABLEPROPERTIES_ORC_NONTRANSACTIONAL = + "\"stored as orc tblproperties (\"transactional\"=\"false\")\""; private final HCatalogTestUtils utils = HCatalogTestUtils.instance(); private List extraTestArgs = null; private List configParams = null; @@ -770,6 +776,63 @@ public void testTableCreation() throws Exception { null, true, false); } + @Test + public void testExternalTableCreation() throws Exception { + externalTableCreationCoreTest(true, + "--" + CREATE_HCATALOG_TABLE_ARG, + "--" + HCATALOG_EXTERNAL_TABLE_ARG, + "--" + HCATALOG_STORAGE_STANZA_ARG, + TABLEPROPERTIES_ORC_NONTRANSACTIONAL); + } + + @Test + public void testExternalTableDropAndCreationWithExistingTargetTable() throws Exception { + utils.createHCatExternalTable(getTableName().toUpperCase()); + externalTableCreationCoreTest(false, + "--" + DROP_AND_CREATE_HCATALOG_TABLE, + "--" + HCATALOG_EXTERNAL_TABLE_ARG, + "--" + HCATALOG_STORAGE_STANZA_ARG, + TABLEPROPERTIES_ORC_NONTRANSACTIONAL); + } + + @Test (expected = IOException.class) + public void testExternalTableCreationFailsDueToExistingTable() throws Exception { + utils.createHCatExternalTable(getTableName().toUpperCase()); + externalTableCreationCoreTest(false, + "--" + CREATE_HCATALOG_TABLE_ARG, + "--" + HCATALOG_EXTERNAL_TABLE_ARG, + "--" + HCATALOG_STORAGE_STANZA_ARG, + TABLEPROPERTIES_ORC_NONTRANSACTIONAL); + } + + @Test(expected = IOException.class) + public void testExternalTableCreationFailsIfNoCreateOrDropTablePresent() throws Exception { + externalTableCreationCoreTest(true, + "--" + HCATALOG_EXTERNAL_TABLE_ARG, + "--" + HCATALOG_STORAGE_STANZA_ARG, + TABLEPROPERTIES_ORC_NONTRANSACTIONAL); + } + + private void externalTableCreationCoreTest(boolean dropHCatTableIfExists, String... lArgs) throws Exception { + final int TOTAL_RECORDS = 1 * 10; + String table = getTableName().toUpperCase(); + ColumnGenerator[] cols = new ColumnGenerator[] { + HCatalogTestUtils.colGenerator(HCatalogTestUtils.forIdx(0), + "varchar(20)", Types.VARCHAR, HCatFieldSchema.Type.STRING, 0, 0, + new HiveVarchar("1", 20), "1", KeyType.STATIC_KEY), + HCatalogTestUtils.colGenerator(HCatalogTestUtils.forIdx(1), + "varchar(20)", Types.VARCHAR, HCatFieldSchema.Type.STRING, 0, 0, + new HiveVarchar("2", 20), "2", KeyType.DYNAMIC_KEY), + }; + List addlArgsArray = new ArrayList(Arrays.asList(lArgs)); + setExtraArgs(addlArgsArray); + if (dropHCatTableIfExists) { + utils.dropHCatTableIfExists(table, SqoopHCatUtilities.DEFHCATDB); + } + runHCatImport(addlArgsArray, TOTAL_RECORDS, table, cols, + null, true, false); + } + @Test public void testTableCreationWithPartition() throws Exception { final int TOTAL_RECORDS = 1 * 10; diff --git a/src/test/org/apache/sqoop/hcat/HCatalogTestUtils.java b/src/test/org/apache/sqoop/hcat/HCatalogTestUtils.java index 28b42dc3..df660f58 100644 --- a/src/test/org/apache/sqoop/hcat/HCatalogTestUtils.java +++ b/src/test/org/apache/sqoop/hcat/HCatalogTestUtils.java @@ -752,6 +752,25 @@ public HCatSchema createHCatTable(CreateMode mode, int count, return hCatFullSchema; } + HCatSchema createHCatExternalTable(String table, ColumnGenerator... extraCols) + throws Exception { + HCatSchema hCatTblSchema = generateHCatTableSchema(extraCols); + HCatSchema hCatPartSchema = generateHCatPartitionSchema(extraCols); + HCatSchema hCatFullSchema = new HCatSchema(hCatTblSchema.getFields()); + for (HCatFieldSchema hfs : hCatPartSchema.getFields()) { + hCatFullSchema.append(hfs); + } + String databaseName = SqoopHCatUtilities.DEFHCATDB; + String createCmd = getHCatCreateTableCmd(databaseName, table, + hCatTblSchema.getFields(), hCatPartSchema.getFields()) + .replaceFirst( + "create table", + "create external table"); + utils.launchHCatCli(createCmd); + LOG.info("Created HCatalog table " + databaseName + "." + table); + return hCatFullSchema; + } + private void loadHCatTable(HCatSchema hCatSchema, String table, int count, ColumnGenerator... extraCols) throws Exception {