From c329f360dd08ef3b9bd82897fcd611e7431d32c8 Mon Sep 17 00:00:00 2001 From: Szabolcs Vasas Date: Mon, 15 Oct 2018 15:32:39 +0200 Subject: [PATCH] SQOOP-3384: Document import into external Hive table backed by S3 (Boglarka Egyed via Szabolcs Vasas) --- src/docs/user/s3.txt | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/docs/user/s3.txt b/src/docs/user/s3.txt index 3724454d..c54b26bc 100644 --- a/src/docs/user/s3.txt +++ b/src/docs/user/s3.txt @@ -118,3 +118,47 @@ $ sqoop import \ ---- Data from RDBMS can be imported into S3 in incremental +lastmodified+ mode as Parquet file format too. + +Import Into External Hive Table Backed By S3 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To import data from RDBMS into an external Hive table backed by S3 the AWS credentials have to be set in the Hive +configuration file (+hive-site.xml+) too. For learning more about Hive on Amazon Web Services please see the Hive +documentation at https://cwiki.apache.org/confluence/display/Hive/HiveAws. + +The current implementation of Sqoop requires that both +target-dir+ and +external-table-dir+ options are set +where +external-table-dir+ has to point to the Hive table location in the S3 bucket. + +Import into an external Hive table backed by S3 for example: + +---- +$ sqoop import \ + -Dfs.s3a.access.key=$AWS_ACCES_KEY \ + -Dfs.s3a.secret.key=$AWS_SECRET_KEY \ + --connect $CONN \ + --username $USER \ + --password $PWD \ + --table $TABLE_NAME \ + --hive-import \ + --target-dir s3a://example-bucket/target-directory \ + --external-table-dir s3a://example-bucket/external-directory +---- + +Create an external Hive table backed by S3 for example: + +---- +$ sqoop import \ + -Dfs.s3a.access.key=$AWS_ACCES_KEY \ + -Dfs.s3a.secret.key=$AWS_SECRET_KEY \ + --connect $CONN \ + --username $USER \ + --password $PWD \ + --table $TABLE_NAME \ + --hive-import \ + --create-hive-table \ + --hive-table $HIVE_TABLE_NAME \ + --target-dir s3a://example-bucket/target-directory \ + --external-table-dir s3a://example-bucket/external-directory +---- + +Data from RDBMS can be imported into an external Hive table backed by S3 as Parquet file format too. \ No newline at end of file