diff --git a/src/docs/man/import-args.txt b/src/docs/man/import-args.txt index d2da92ed..259c5cf5 100644 --- a/src/docs/man/import-args.txt +++ b/src/docs/man/import-args.txt @@ -2,6 +2,9 @@ Import control options ~~~~~~~~~~~~~~~~~~~~~~ +--append:: + Append data to an existing HDFS dataset + --as-sequencefile:: Imports data to SequenceFiles @@ -30,6 +33,9 @@ Import control options --table (table-name):: The table to read (required) +--target-dir (dir):: + Explicit HDFS target directory for the import. + --warehouse-dir (dir):: Tables are uploaded to the HDFS path +/warehouse/dir/(tablename)/+ diff --git a/src/docs/man/sqoop-import.txt b/src/docs/man/sqoop-import.txt index cf33b990..e6b9e1d5 100644 --- a/src/docs/man/sqoop-import.txt +++ b/src/docs/man/sqoop-import.txt @@ -27,6 +27,9 @@ include::common-args.txt[] Import control options ~~~~~~~~~~~~~~~~~~~~~~ +--append:: + Append data to an existing HDFS dataset + --as-sequencefile:: Imports data to SequenceFiles @@ -55,6 +58,9 @@ Import control options --table (table-name):: The table to read (required) +--target-dir (dir):: + Explicit HDFS target directory for the import. + --warehouse-dir (dir):: Tables are uploaded to the HDFS path +/warehouse/dir/(tablename)/+ diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt index ed34ae3c..5cbce045 100644 --- a/src/docs/user/import.txt +++ b/src/docs/user/import.txt @@ -52,6 +52,8 @@ include::connecting.txt[] `-----------------------------`-------------------------------------- Argument Description --------------------------------------------------------------------- ++\--append+ Append data to an existing dataset\ + in HDFS +\--as-sequencefile+ Imports data to SequenceFiles +\--as-textfile+ Imports data as plain text (default) +\--columns + Columns to import from table @@ -63,6 +65,7 @@ Argument Description +\--split-by + Column of the table used to split work\ units +\--table + Table to read ++\--target-dir + HDFS destination dir +\--warehouse-dir + HDFS parent for table destination +\--where + WHERE clause to use during import +-z,\--compress+ Enable compression @@ -170,6 +173,16 @@ $ sqoop import --connnect --table foo --warehouse-dir /shared \ This command would write to a set of files in the +/shared/foo/+ directory. +You can also explicitly choose the target directory, like so: + +---- +$ sqoop import --connnect --table foo --target-dir /dest \ + ... +---- + +This will import the files into the +/dest+ directory. +\--target-dir+ is +incompatible with +\--warehouse-dir+. + When using direct mode, you can specify additional arguments which should be passed to the underlying tool. If the argument +\--+ is given on the command-line, then subsequent arguments are sent @@ -181,6 +194,13 @@ $ sqoop import --connect jdbc:mysql://server.foo.com/db --table bar \ --direct -- --default-character-set=latin1 ---- +By default, imports go to a new target location. If the destination directory +already exists in HDFS, Sqoop will refuse to import and overwrite that +directory's contents. If you use the +\--append+ argument, Sqoop will import +data to a temporary directory and then rename the files into the normal +target directory in a manner that does not conflict with existing filenames +in that directory. + File Formats ^^^^^^^^^^^^ @@ -494,4 +514,12 @@ $ hadoop fs -cat EMPLOYEES/part-m-00000 | head -n 10 ... ---- +Performing an incremental import of new data, after having already +imported the first 100,000 rows of a table: + +---- +$ sqoop import --connect jdbc:mysql://db.foo.com/somedb --table sometable \ + --where "id > 100000" --target-dir /incremental_dataset --append +---- +