mirror of
https://github.com/apache/sqoop.git
synced 2025-05-04 08:32:25 +08:00
Append mode import and target-dir output
Signed-off-by: Aaron Kimball <aaron@cloudera.com> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149913 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7ecb3ee286
commit
568a827a1c
@ -86,6 +86,8 @@ public enum FileLayout {
|
|||||||
private String debugSqlCmd;
|
private String debugSqlCmd;
|
||||||
private String driverClassName;
|
private String driverClassName;
|
||||||
private String warehouseDir;
|
private String warehouseDir;
|
||||||
|
private String targetDir;
|
||||||
|
private boolean append;
|
||||||
private FileLayout layout;
|
private FileLayout layout;
|
||||||
private boolean direct; // if true and conn is mysql, use mysqldump.
|
private boolean direct; // if true and conn is mysql, use mysqldump.
|
||||||
private String tmpDir; // where temp data goes; usually /tmp
|
private String tmpDir; // where temp data goes; usually /tmp
|
||||||
@ -619,6 +621,22 @@ public void setWarehouseDir(String warehouse) {
|
|||||||
this.warehouseDir = warehouse;
|
this.warehouseDir = warehouse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getTargetDir() {
|
||||||
|
return this.targetDir;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTargetDir(String dir) {
|
||||||
|
this.targetDir = dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAppendMode(boolean doAppend) {
|
||||||
|
this.append = doAppend;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isAppendMode() {
|
||||||
|
return this.append;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the destination file format
|
* @return the destination file format
|
||||||
*/
|
*/
|
||||||
|
@ -65,7 +65,7 @@ public void importTable(ImportJobContext context)
|
|||||||
|
|
||||||
MySQLDumpImportJob importer = null;
|
MySQLDumpImportJob importer = null;
|
||||||
try {
|
try {
|
||||||
importer = new MySQLDumpImportJob(options);
|
importer = new MySQLDumpImportJob(options, context);
|
||||||
} catch (ClassNotFoundException cnfe) {
|
} catch (ClassNotFoundException cnfe) {
|
||||||
throw new IOException("Could not load required classes", cnfe);
|
throw new IOException("Could not load required classes", cnfe);
|
||||||
}
|
}
|
||||||
|
@ -372,7 +372,7 @@ public void importTable(ImportJobContext context)
|
|||||||
|
|
||||||
// This writer will be closed by AsyncSink.
|
// This writer will be closed by AsyncSink.
|
||||||
SplittableBufferedWriter w = DirectImportUtils.createHdfsSink(
|
SplittableBufferedWriter w = DirectImportUtils.createHdfsSink(
|
||||||
options.getConf(), options, tableName);
|
options.getConf(), options, context);
|
||||||
|
|
||||||
// Actually start the psql dump.
|
// Actually start the psql dump.
|
||||||
p = Runtime.getRuntime().exec(args.toArray(new String[0]),
|
p = Runtime.getRuntime().exec(args.toArray(new String[0]),
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
import org.apache.hadoop.mapreduce.InputFormat;
|
import org.apache.hadoop.mapreduce.InputFormat;
|
||||||
import org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat;
|
import org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat;
|
||||||
import com.cloudera.sqoop.SqoopOptions;
|
import com.cloudera.sqoop.SqoopOptions;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A set of parameters describing an import operation; this is passed to
|
* A set of parameters describing an import operation; this is passed to
|
||||||
@ -32,13 +33,15 @@ public class ImportJobContext {
|
|||||||
private String jarFile;
|
private String jarFile;
|
||||||
private SqoopOptions options;
|
private SqoopOptions options;
|
||||||
private Class<? extends InputFormat> inputFormatClass;
|
private Class<? extends InputFormat> inputFormatClass;
|
||||||
|
private Path destination;
|
||||||
|
|
||||||
public ImportJobContext(final String table, final String jar,
|
public ImportJobContext(final String table, final String jar,
|
||||||
final SqoopOptions opts) {
|
final SqoopOptions opts, final Path destination) {
|
||||||
this.tableName = table;
|
this.tableName = table;
|
||||||
this.jarFile = jar;
|
this.jarFile = jar;
|
||||||
this.options = opts;
|
this.options = opts;
|
||||||
this.inputFormatClass = DataDrivenDBInputFormat.class;
|
this.inputFormatClass = DataDrivenDBInputFormat.class;
|
||||||
|
this.destination = destination;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @return the name of the table to import. */
|
/** @return the name of the table to import. */
|
||||||
@ -67,5 +70,14 @@ public void setInputFormat(Class<? extends InputFormat> ifClass) {
|
|||||||
public Class<? extends InputFormat> getInputFormat() {
|
public Class<? extends InputFormat> getInputFormat() {
|
||||||
return this.inputFormatClass;
|
return this.inputFormatClass;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the destination path to where the output files will
|
||||||
|
* be first saved.
|
||||||
|
*/
|
||||||
|
public Path getDestination() {
|
||||||
|
return this.destination;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,7 +304,7 @@ public void importTable(ImportJobContext context)
|
|||||||
SqoopOptions opts = context.getOptions();
|
SqoopOptions opts = context.getOptions();
|
||||||
|
|
||||||
DataDrivenImportJob importer =
|
DataDrivenImportJob importer =
|
||||||
new DataDrivenImportJob(opts, context.getInputFormat());
|
new DataDrivenImportJob(opts, context.getInputFormat(), context);
|
||||||
|
|
||||||
String splitCol = getSplitColumn(opts, tableName);
|
String splitCol = getSplitColumn(opts, tableName);
|
||||||
if (null == splitCol && opts.getNumMappers() > 1) {
|
if (null == splitCol && opts.getNumMappers() > 1) {
|
||||||
|
@ -41,6 +41,7 @@
|
|||||||
import com.cloudera.sqoop.lib.LargeObjectLoader;
|
import com.cloudera.sqoop.lib.LargeObjectLoader;
|
||||||
import com.cloudera.sqoop.shims.HadoopShim;
|
import com.cloudera.sqoop.shims.HadoopShim;
|
||||||
import com.cloudera.sqoop.shims.ShimLoader;
|
import com.cloudera.sqoop.shims.ShimLoader;
|
||||||
|
import com.cloudera.sqoop.manager.ImportJobContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Actually runs a jdbc import job using the ORM files generated by the
|
* Actually runs a jdbc import job using the ORM files generated by the
|
||||||
@ -53,12 +54,13 @@ public class DataDrivenImportJob extends ImportJobBase {
|
|||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public DataDrivenImportJob(final SqoopOptions opts) {
|
public DataDrivenImportJob(final SqoopOptions opts) {
|
||||||
super(opts, null, DataDrivenDBInputFormat.class, null);
|
super(opts, null, DataDrivenDBInputFormat.class, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DataDrivenImportJob(final SqoopOptions opts,
|
public DataDrivenImportJob(final SqoopOptions opts,
|
||||||
final Class<? extends InputFormat> inputFormatClass) {
|
final Class<? extends InputFormat> inputFormatClass,
|
||||||
super(opts, null, inputFormatClass, null);
|
ImportJobContext context) {
|
||||||
|
super(opts, null, inputFormatClass, null, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -24,7 +24,6 @@
|
|||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.io.SequenceFile.CompressionType;
|
import org.apache.hadoop.io.SequenceFile.CompressionType;
|
||||||
import org.apache.hadoop.io.compress.GzipCodec;
|
import org.apache.hadoop.io.compress.GzipCodec;
|
||||||
@ -40,6 +39,7 @@
|
|||||||
import com.cloudera.sqoop.shims.HadoopShim;
|
import com.cloudera.sqoop.shims.HadoopShim;
|
||||||
import com.cloudera.sqoop.util.ImportException;
|
import com.cloudera.sqoop.util.ImportException;
|
||||||
import com.cloudera.sqoop.util.PerfCounters;
|
import com.cloudera.sqoop.util.PerfCounters;
|
||||||
|
import com.cloudera.sqoop.manager.ImportJobContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for running an import MapReduce job.
|
* Base class for running an import MapReduce job.
|
||||||
@ -47,6 +47,8 @@
|
|||||||
*/
|
*/
|
||||||
public class ImportJobBase extends JobBase {
|
public class ImportJobBase extends JobBase {
|
||||||
|
|
||||||
|
private ImportJobContext context;
|
||||||
|
|
||||||
public static final Log LOG = LogFactory.getLog(
|
public static final Log LOG = LogFactory.getLog(
|
||||||
ImportJobBase.class.getName());
|
ImportJobBase.class.getName());
|
||||||
|
|
||||||
@ -55,14 +57,16 @@ public ImportJobBase() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public ImportJobBase(final SqoopOptions opts) {
|
public ImportJobBase(final SqoopOptions opts) {
|
||||||
this(opts, null, null, null);
|
this(opts, null, null, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ImportJobBase(final SqoopOptions opts,
|
public ImportJobBase(final SqoopOptions opts,
|
||||||
final Class<? extends Mapper> mapperClass,
|
final Class<? extends Mapper> mapperClass,
|
||||||
final Class<? extends InputFormat> inputFormatClass,
|
final Class<? extends InputFormat> inputFormatClass,
|
||||||
final Class<? extends OutputFormat> outputFormatClass) {
|
final Class<? extends OutputFormat> outputFormatClass,
|
||||||
|
final ImportJobContext context) {
|
||||||
super(opts, mapperClass, inputFormatClass, outputFormatClass);
|
super(opts, mapperClass, inputFormatClass, outputFormatClass);
|
||||||
|
this.context = context;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -71,17 +75,7 @@ public ImportJobBase(final SqoopOptions opts,
|
|||||||
@Override
|
@Override
|
||||||
protected void configureOutputFormat(Job job, String tableName,
|
protected void configureOutputFormat(Job job, String tableName,
|
||||||
String tableClassName) throws ClassNotFoundException, IOException {
|
String tableClassName) throws ClassNotFoundException, IOException {
|
||||||
String hdfsWarehouseDir = options.getWarehouseDir();
|
|
||||||
Path outputPath;
|
|
||||||
|
|
||||||
if (null != hdfsWarehouseDir) {
|
|
||||||
Path hdfsWarehousePath = new Path(hdfsWarehouseDir);
|
|
||||||
hdfsWarehousePath.makeQualified(FileSystem.get(job.getConfiguration()));
|
|
||||||
outputPath = new Path(hdfsWarehousePath, tableName);
|
|
||||||
} else {
|
|
||||||
outputPath = new Path(tableName);
|
|
||||||
}
|
|
||||||
|
|
||||||
job.setOutputFormatClass(getOutputFormatClass());
|
job.setOutputFormatClass(getOutputFormatClass());
|
||||||
|
|
||||||
if (options.getFileLayout() == SqoopOptions.FileLayout.SequenceFile) {
|
if (options.getFileLayout() == SqoopOptions.FileLayout.SequenceFile) {
|
||||||
@ -95,6 +89,7 @@ protected void configureOutputFormat(Job job, String tableName,
|
|||||||
CompressionType.BLOCK);
|
CompressionType.BLOCK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Path outputPath = context.getDestination();
|
||||||
FileOutputFormat.setOutputPath(job, outputPath);
|
FileOutputFormat.setOutputPath(job, outputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
import com.cloudera.sqoop.manager.ConnManager;
|
import com.cloudera.sqoop.manager.ConnManager;
|
||||||
import com.cloudera.sqoop.manager.MySQLUtils;
|
import com.cloudera.sqoop.manager.MySQLUtils;
|
||||||
import com.cloudera.sqoop.shims.ShimLoader;
|
import com.cloudera.sqoop.shims.ShimLoader;
|
||||||
|
import com.cloudera.sqoop.manager.ImportJobContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class that runs an import job using mysqldump in the mapper.
|
* Class that runs an import job using mysqldump in the mapper.
|
||||||
@ -47,13 +48,13 @@ public class MySQLDumpImportJob extends ImportJobBase {
|
|||||||
public static final Log LOG =
|
public static final Log LOG =
|
||||||
LogFactory.getLog(MySQLDumpImportJob.class.getName());
|
LogFactory.getLog(MySQLDumpImportJob.class.getName());
|
||||||
|
|
||||||
public MySQLDumpImportJob(final SqoopOptions opts)
|
public MySQLDumpImportJob(final SqoopOptions opts, ImportJobContext context)
|
||||||
throws ClassNotFoundException {
|
throws ClassNotFoundException {
|
||||||
super(opts, MySQLDumpMapper.class,
|
super(opts, MySQLDumpMapper.class,
|
||||||
(Class<? extends InputFormat>) ShimLoader.getShimClass(
|
(Class<? extends InputFormat>) ShimLoader.getShimClass(
|
||||||
"com.cloudera.sqoop.mapreduce.MySQLDumpInputFormat"),
|
"com.cloudera.sqoop.mapreduce.MySQLDumpInputFormat"),
|
||||||
(Class<? extends OutputFormat>) ShimLoader.getShimClass(
|
(Class<? extends OutputFormat>) ShimLoader.getShimClass(
|
||||||
"com.cloudera.sqoop.mapreduce.RawKeyTextOutputFormat"));
|
"com.cloudera.sqoop.mapreduce.RawKeyTextOutputFormat"), context);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -71,6 +71,9 @@ public abstract class BaseSqoopTool extends SqoopTool {
|
|||||||
public static final String HADOOP_HOME_ARG = "hadoop-home";
|
public static final String HADOOP_HOME_ARG = "hadoop-home";
|
||||||
public static final String HIVE_HOME_ARG = "hive-home";
|
public static final String HIVE_HOME_ARG = "hive-home";
|
||||||
public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
|
public static final String WAREHOUSE_DIR_ARG = "warehouse-dir";
|
||||||
|
public static final String TARGET_DIR_ARG = "target-dir";
|
||||||
|
public static final String APPEND_ARG = "append";
|
||||||
|
|
||||||
public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
|
public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
|
||||||
public static final String FMT_TEXTFILE_ARG = "as-textfile";
|
public static final String FMT_TEXTFILE_ARG = "as-textfile";
|
||||||
public static final String HIVE_IMPORT_ARG = "hive-import";
|
public static final String HIVE_IMPORT_ARG = "hive-import";
|
||||||
|
@ -33,7 +33,9 @@
|
|||||||
import com.cloudera.sqoop.cli.ToolOptions;
|
import com.cloudera.sqoop.cli.ToolOptions;
|
||||||
import com.cloudera.sqoop.hive.HiveImport;
|
import com.cloudera.sqoop.hive.HiveImport;
|
||||||
import com.cloudera.sqoop.manager.ImportJobContext;
|
import com.cloudera.sqoop.manager.ImportJobContext;
|
||||||
|
import com.cloudera.sqoop.util.AppendUtils;
|
||||||
import com.cloudera.sqoop.util.ImportException;
|
import com.cloudera.sqoop.util.ImportException;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tool that performs database imports to HDFS.
|
* Tool that performs database imports to HDFS.
|
||||||
@ -72,7 +74,7 @@ protected boolean init(SqoopOptions sqoopOpts) {
|
|||||||
public List<String> getGeneratedJarFiles() {
|
public List<String> getGeneratedJarFiles() {
|
||||||
return this.codeGenerator.getGeneratedJarFiles();
|
return this.codeGenerator.getGeneratedJarFiles();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void importTable(SqoopOptions options, String tableName,
|
protected void importTable(SqoopOptions options, String tableName,
|
||||||
HiveImport hiveImport) throws IOException, ImportException {
|
HiveImport hiveImport) throws IOException, ImportException {
|
||||||
String jarFile = null;
|
String jarFile = null;
|
||||||
@ -82,15 +84,48 @@ protected void importTable(SqoopOptions options, String tableName,
|
|||||||
|
|
||||||
// Do the actual import.
|
// Do the actual import.
|
||||||
ImportJobContext context = new ImportJobContext(tableName, jarFile,
|
ImportJobContext context = new ImportJobContext(tableName, jarFile,
|
||||||
options);
|
options, getOutputPath(options, tableName));
|
||||||
|
|
||||||
manager.importTable(context);
|
manager.importTable(context);
|
||||||
|
|
||||||
|
if (options.isAppendMode()) {
|
||||||
|
AppendUtils app = new AppendUtils(context);
|
||||||
|
app.append();
|
||||||
|
}
|
||||||
|
|
||||||
// If the user wants this table to be in Hive, perform that post-load.
|
// If the user wants this table to be in Hive, perform that post-load.
|
||||||
if (options.doHiveImport()) {
|
if (options.doHiveImport()) {
|
||||||
hiveImport.importTable(tableName, options.getHiveTableName(), false);
|
hiveImport.importTable(tableName, options.getHiveTableName(), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the output path for the imported files;
|
||||||
|
* in append mode this will point to a temporary folder.
|
||||||
|
*/
|
||||||
|
private Path getOutputPath(SqoopOptions options, String tableName) {
|
||||||
|
// Get output directory
|
||||||
|
String hdfsWarehouseDir = options.getWarehouseDir();
|
||||||
|
String hdfsTargetDir = options.getTargetDir();
|
||||||
|
Path outputPath = null;
|
||||||
|
if (options.isAppendMode()) {
|
||||||
|
// Use temporary path, later removed when appending
|
||||||
|
outputPath = AppendUtils.getTempAppendDir(tableName);
|
||||||
|
LOG.debug("Using temporary folder: " + outputPath.getName());
|
||||||
|
} else {
|
||||||
|
// Try in this order: target-dir or warehouse-dir
|
||||||
|
if (hdfsTargetDir != null) {
|
||||||
|
outputPath = new Path(hdfsTargetDir);
|
||||||
|
} else if (hdfsWarehouseDir != null) {
|
||||||
|
outputPath = new Path(hdfsWarehouseDir, tableName);
|
||||||
|
} else {
|
||||||
|
outputPath = new Path(tableName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return outputPath;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
public int run(SqoopOptions options) {
|
public int run(SqoopOptions options) {
|
||||||
@ -172,6 +207,14 @@ protected RelatedOptions getImportOptions() {
|
|||||||
.hasArg().withDescription("WHERE clause to use during import")
|
.hasArg().withDescription("WHERE clause to use during import")
|
||||||
.withLongOpt(WHERE_ARG)
|
.withLongOpt(WHERE_ARG)
|
||||||
.create());
|
.create());
|
||||||
|
importOpts.addOption(OptionBuilder
|
||||||
|
.withDescription("Imports data in append mode")
|
||||||
|
.withLongOpt(APPEND_ARG)
|
||||||
|
.create());
|
||||||
|
importOpts.addOption(OptionBuilder.withArgName("dir")
|
||||||
|
.hasArg().withDescription("HDFS plain table destination")
|
||||||
|
.withLongOpt(TARGET_DIR_ARG)
|
||||||
|
.create());
|
||||||
}
|
}
|
||||||
|
|
||||||
importOpts.addOption(OptionBuilder.withArgName("dir")
|
importOpts.addOption(OptionBuilder.withArgName("dir")
|
||||||
@ -278,12 +321,21 @@ public void applyOptions(CommandLine in, SqoopOptions out)
|
|||||||
if (in.hasOption(WHERE_ARG)) {
|
if (in.hasOption(WHERE_ARG)) {
|
||||||
out.setWhereClause(in.getOptionValue(WHERE_ARG));
|
out.setWhereClause(in.getOptionValue(WHERE_ARG));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (in.hasOption(TARGET_DIR_ARG)) {
|
||||||
|
out.setTargetDir(in.getOptionValue(TARGET_DIR_ARG));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in.hasOption(APPEND_ARG)) {
|
||||||
|
out.setAppendMode(true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (in.hasOption(WAREHOUSE_DIR_ARG)) {
|
if (in.hasOption(WAREHOUSE_DIR_ARG)) {
|
||||||
out.setWarehouseDir(in.getOptionValue(WAREHOUSE_DIR_ARG));
|
out.setWarehouseDir(in.getOptionValue(WAREHOUSE_DIR_ARG));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (in.hasOption(FMT_SEQUENCEFILE_ARG)) {
|
if (in.hasOption(FMT_SEQUENCEFILE_ARG)) {
|
||||||
out.setFileLayout(SqoopOptions.FileLayout.SequenceFile);
|
out.setFileLayout(SqoopOptions.FileLayout.SequenceFile);
|
||||||
}
|
}
|
||||||
@ -338,6 +390,11 @@ protected void validateImportOptions(SqoopOptions options)
|
|||||||
&& options.getClassName() == null) {
|
&& options.getClassName() == null) {
|
||||||
throw new InvalidOptionsException("Jar specified with --jar-file, but no "
|
throw new InvalidOptionsException("Jar specified with --jar-file, but no "
|
||||||
+ "class specified with --class-name." + HELP_STR);
|
+ "class specified with --class-name." + HELP_STR);
|
||||||
|
} else if (options.getTargetDir() != null
|
||||||
|
&& options.getWarehouseDir() != null) {
|
||||||
|
throw new InvalidOptionsException(
|
||||||
|
"--target-dir with --warehouse-dir are incompatible options"
|
||||||
|
+ HELP_STR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
216
src/java/com/cloudera/sqoop/util/AppendUtils.java
Normal file
216
src/java/com/cloudera/sqoop/util/AppendUtils.java
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to Cloudera, Inc. under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Cloudera, Inc. licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.cloudera.sqoop.util;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.text.NumberFormat;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import com.cloudera.sqoop.manager.ImportJobContext;
|
||||||
|
import com.cloudera.sqoop.SqoopOptions;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilities used when appending imported files to an existing dir.
|
||||||
|
*/
|
||||||
|
public class AppendUtils {
|
||||||
|
|
||||||
|
public static final Log LOG = LogFactory.getLog(AppendUtils.class.getName());
|
||||||
|
|
||||||
|
private static final SimpleDateFormat DATE_FORM = new SimpleDateFormat(
|
||||||
|
"ddHHmmssSSS");
|
||||||
|
private static final String TEMP_IMPORT_ROOT = "_sqoop";
|
||||||
|
|
||||||
|
private static final int PARTITION_DIGITS = 5;
|
||||||
|
private static final String FILEPART_SEPARATOR = "-";
|
||||||
|
private static final String FILEEXT_SEPARATOR = ".";
|
||||||
|
|
||||||
|
private ImportJobContext context = null;
|
||||||
|
|
||||||
|
public AppendUtils(ImportJobContext context) {
|
||||||
|
this.context = context;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Moves the imported files from temporary directory to specified target-dir,
|
||||||
|
* renaming partition number if appending file exists.
|
||||||
|
*/
|
||||||
|
public void append() throws IOException {
|
||||||
|
|
||||||
|
SqoopOptions options = context.getOptions();
|
||||||
|
FileSystem fs = FileSystem.get(options.getConf());
|
||||||
|
Path tempDir = context.getDestination();
|
||||||
|
|
||||||
|
// Try in this order: target-dir or warehouse-dir
|
||||||
|
Path userDestDir = null;
|
||||||
|
if (options.getTargetDir() != null) {
|
||||||
|
userDestDir = new Path(options.getTargetDir());
|
||||||
|
} else if (options.getWarehouseDir() != null) {
|
||||||
|
userDestDir = new Path(options.getWarehouseDir(),
|
||||||
|
context.getTableName());
|
||||||
|
} else {
|
||||||
|
userDestDir = new Path(context.getTableName());
|
||||||
|
}
|
||||||
|
|
||||||
|
int nextPartition = 0;
|
||||||
|
|
||||||
|
// Create directory in case
|
||||||
|
if (!fs.exists(userDestDir)) {
|
||||||
|
LOG.info("Creating missing output directory - " + userDestDir.getName());
|
||||||
|
fs.mkdirs(userDestDir);
|
||||||
|
nextPartition = 0;
|
||||||
|
} else {
|
||||||
|
LOG.info("Appending to directory " + userDestDir.getName());
|
||||||
|
// Get the right next partition for the imported files
|
||||||
|
nextPartition = getNextPartition(fs, userDestDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
// move files
|
||||||
|
moveFiles(fs, tempDir, userDestDir, nextPartition);
|
||||||
|
|
||||||
|
// delete temporary path
|
||||||
|
LOG.debug("Deleting temporary folder "
|
||||||
|
+ context.getDestination().getName());
|
||||||
|
fs.delete(context.getDestination(), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the greatest partition number available for appending, for data
|
||||||
|
* files in targetDir.
|
||||||
|
*/
|
||||||
|
private int getNextPartition(FileSystem fs, Path targetDir)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
int nextPartition = 0;
|
||||||
|
FileStatus[] existingFiles = fs.listStatus(targetDir);
|
||||||
|
if (existingFiles != null && existingFiles.length > 0) {
|
||||||
|
Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
|
||||||
|
for (FileStatus fileStat : existingFiles) {
|
||||||
|
if (!fileStat.isDir()) {
|
||||||
|
String filename = fileStat.getPath().getName();
|
||||||
|
Matcher mat = patt.matcher(filename);
|
||||||
|
if (mat.matches()) {
|
||||||
|
int thisPart = Integer.parseInt(mat.group(1));
|
||||||
|
if (thisPart >= nextPartition) {
|
||||||
|
nextPartition = thisPart;
|
||||||
|
nextPartition++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextPartition > 0) {
|
||||||
|
LOG.info("Using found partition " + nextPartition);
|
||||||
|
}
|
||||||
|
|
||||||
|
return nextPartition;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Move files from source to target using a specified starting partition.
|
||||||
|
*/
|
||||||
|
private void moveFiles(FileSystem fs, Path sourceDir, Path targetDir,
|
||||||
|
int partitionStart) throws IOException {
|
||||||
|
|
||||||
|
NumberFormat numpart = NumberFormat.getInstance();
|
||||||
|
numpart.setMinimumIntegerDigits(PARTITION_DIGITS);
|
||||||
|
numpart.setGroupingUsed(false);
|
||||||
|
Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
|
||||||
|
FileStatus[] tempFiles = fs.listStatus(sourceDir);
|
||||||
|
// Move and rename files & directories from temporary to target-dir thus
|
||||||
|
// appending file's next partition
|
||||||
|
for (FileStatus fileStat : tempFiles) {
|
||||||
|
if (!fileStat.isDir()) {
|
||||||
|
// Move imported data files
|
||||||
|
String filename = fileStat.getPath().getName();
|
||||||
|
Matcher mat = patt.matcher(filename);
|
||||||
|
if (mat.matches()) {
|
||||||
|
String name = getFilename(filename);
|
||||||
|
String fileToMove = name.concat(numpart.format(partitionStart++));
|
||||||
|
String extension = getFileExtension(filename);
|
||||||
|
if (extension != null) {
|
||||||
|
fileToMove = fileToMove.concat(extension);
|
||||||
|
}
|
||||||
|
LOG.debug("Filename: " + filename + " repartitioned to: "
|
||||||
|
+ fileToMove);
|
||||||
|
fs.rename(fileStat.getPath(), new Path(targetDir, fileToMove));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Move directories (_logs & any other)
|
||||||
|
String dirName = fileStat.getPath().getName();
|
||||||
|
Path path = new Path(targetDir, dirName);
|
||||||
|
int dirNumber = 0;
|
||||||
|
while (fs.exists(path)) {
|
||||||
|
path = new Path(targetDir, dirName.concat("-").concat(
|
||||||
|
numpart.format(dirNumber++)));
|
||||||
|
}
|
||||||
|
LOG.debug("Directory: " + dirName + " renamed to: " + path.getName());
|
||||||
|
fs.rename(fileStat.getPath(), path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns the name component of a file. */
|
||||||
|
private String getFilename(String filename) {
|
||||||
|
String result = null;
|
||||||
|
int pos = filename.lastIndexOf(FILEPART_SEPARATOR);
|
||||||
|
if (pos != -1) {
|
||||||
|
result = filename.substring(0, pos + 1);
|
||||||
|
} else {
|
||||||
|
pos = filename.lastIndexOf(FILEEXT_SEPARATOR);
|
||||||
|
if (pos != -1) {
|
||||||
|
result = filename.substring(0, pos);
|
||||||
|
} else {
|
||||||
|
result = filename;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns the extension component of a filename. */
|
||||||
|
private String getFileExtension(String filename) {
|
||||||
|
int pos = filename.lastIndexOf(FILEEXT_SEPARATOR);
|
||||||
|
if (pos != -1) {
|
||||||
|
return filename.substring(pos, filename.length());
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a unique path object inside the sqoop temporary directory.
|
||||||
|
*
|
||||||
|
* @param tableName
|
||||||
|
* @return a path pointing to the temporary directory
|
||||||
|
*/
|
||||||
|
public static Path getTempAppendDir(String tableName) {
|
||||||
|
String timeId = DATE_FORM.format(new Date(System.currentTimeMillis()));
|
||||||
|
String tempDir = TEMP_IMPORT_ROOT + Path.SEPARATOR + timeId + tableName;
|
||||||
|
return new Path(tempDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -33,6 +33,7 @@
|
|||||||
import com.cloudera.sqoop.io.SplittingOutputStream;
|
import com.cloudera.sqoop.io.SplittingOutputStream;
|
||||||
import com.cloudera.sqoop.io.SplittableBufferedWriter;
|
import com.cloudera.sqoop.io.SplittableBufferedWriter;
|
||||||
import org.apache.hadoop.util.Shell;
|
import org.apache.hadoop.util.Shell;
|
||||||
|
import com.cloudera.sqoop.manager.ImportJobContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility methods that are common to various the direct import managers.
|
* Utility methods that are common to various the direct import managers.
|
||||||
@ -70,16 +71,10 @@ public static void setFilePermissions(File file, String modstr)
|
|||||||
* returned stream.
|
* returned stream.
|
||||||
*/
|
*/
|
||||||
public static SplittableBufferedWriter createHdfsSink(Configuration conf,
|
public static SplittableBufferedWriter createHdfsSink(Configuration conf,
|
||||||
SqoopOptions options, String tableName) throws IOException {
|
SqoopOptions options, ImportJobContext context) throws IOException {
|
||||||
|
|
||||||
FileSystem fs = FileSystem.get(conf);
|
FileSystem fs = FileSystem.get(conf);
|
||||||
String warehouseDir = options.getWarehouseDir();
|
Path destDir = context.getDestination();
|
||||||
Path destDir = null;
|
|
||||||
if (null != warehouseDir) {
|
|
||||||
destDir = new Path(new Path(warehouseDir), tableName);
|
|
||||||
} else {
|
|
||||||
destDir = new Path(tableName);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.debug("Writing to filesystem: " + conf.get("fs.default.name"));
|
LOG.debug("Writing to filesystem: " + conf.get("fs.default.name"));
|
||||||
LOG.debug("Creating destination directory " + destDir);
|
LOG.debug("Creating destination directory " + destDir);
|
||||||
|
@ -57,6 +57,8 @@ public static Test suite() {
|
|||||||
suite.addTestSuite(TestMultiMaps.class);
|
suite.addTestSuite(TestMultiMaps.class);
|
||||||
suite.addTestSuite(TestSplitBy.class);
|
suite.addTestSuite(TestSplitBy.class);
|
||||||
suite.addTestSuite(TestWhere.class);
|
suite.addTestSuite(TestWhere.class);
|
||||||
|
suite.addTestSuite(TestTargetDir.class);
|
||||||
|
suite.addTestSuite(TestAppendUtils.class);
|
||||||
suite.addTestSuite(TestHiveImport.class);
|
suite.addTestSuite(TestHiveImport.class);
|
||||||
suite.addTestSuite(TestRecordParser.class);
|
suite.addTestSuite(TestRecordParser.class);
|
||||||
suite.addTestSuite(TestFieldFormatter.class);
|
suite.addTestSuite(TestFieldFormatter.class);
|
||||||
|
265
src/test/com/cloudera/sqoop/TestAppendUtils.java
Normal file
265
src/test/com/cloudera/sqoop/TestAppendUtils.java
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to Cloudera, Inc. under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Cloudera, Inc. licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.cloudera.sqoop;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.testutil.CommonArgs;
|
||||||
|
import com.cloudera.sqoop.testutil.HsqldbTestServer;
|
||||||
|
import com.cloudera.sqoop.testutil.ImportJobTestCase;
|
||||||
|
import com.cloudera.sqoop.tool.ImportTool;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that --append works.
|
||||||
|
*/
|
||||||
|
public class TestAppendUtils extends ImportJobTestCase {
|
||||||
|
|
||||||
|
private static final int PARTITION_DIGITS = 5;
|
||||||
|
private static final String FILEPART_SEPARATOR = "-";
|
||||||
|
|
||||||
|
public static final Log LOG = LogFactory.getLog(TestAppendUtils.class
|
||||||
|
.getName());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the argv to pass to Sqoop.
|
||||||
|
*
|
||||||
|
* @return the argv as an array of strings.
|
||||||
|
*/
|
||||||
|
protected ArrayList getOutputlessArgv(boolean includeHadoopFlags,
|
||||||
|
String[] colNames, Configuration conf) {
|
||||||
|
if (null == colNames) {
|
||||||
|
colNames = getColNames();
|
||||||
|
}
|
||||||
|
|
||||||
|
String splitByCol = colNames[0];
|
||||||
|
String columnsString = "";
|
||||||
|
for (String col : colNames) {
|
||||||
|
columnsString += col + ",";
|
||||||
|
}
|
||||||
|
|
||||||
|
ArrayList<String> args = new ArrayList<String>();
|
||||||
|
|
||||||
|
if (includeHadoopFlags) {
|
||||||
|
CommonArgs.addHadoopFlags(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
args.add("--table");
|
||||||
|
args.add(getTableName());
|
||||||
|
args.add("--columns");
|
||||||
|
args.add(columnsString);
|
||||||
|
args.add("--split-by");
|
||||||
|
args.add(splitByCol);
|
||||||
|
args.add("--connect");
|
||||||
|
args.add(getConnectString());
|
||||||
|
args.add("--as-sequencefile");
|
||||||
|
args.add("--num-mappers");
|
||||||
|
args.add("1");
|
||||||
|
|
||||||
|
args.addAll(getExtraArgs(conf));
|
||||||
|
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
// this test just uses the two int table.
|
||||||
|
protected String getTableName() {
|
||||||
|
return HsqldbTestServer.getTableName();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** the same than ImportJobTestCase but without removing tabledir. */
|
||||||
|
protected void runUncleanImport(String[] argv) throws IOException {
|
||||||
|
// run the tool through the normal entry-point.
|
||||||
|
int ret;
|
||||||
|
try {
|
||||||
|
Configuration conf = getConf();
|
||||||
|
SqoopOptions opts = getSqoopOptions(conf);
|
||||||
|
Sqoop sqoop = new Sqoop(new ImportTool(), conf, opts);
|
||||||
|
ret = Sqoop.runSqoop(sqoop, argv);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Got exception running Sqoop: " + e.toString());
|
||||||
|
e.printStackTrace();
|
||||||
|
ret = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// expect a successful return.
|
||||||
|
if (0 != ret) {
|
||||||
|
throw new IOException("Failure during job; return status " + ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return FileStatus for data files only. */
|
||||||
|
private FileStatus[] listFiles(FileSystem fs, Path path) throws IOException {
|
||||||
|
FileStatus[] fileStatuses = fs.listStatus(path);
|
||||||
|
ArrayList files = new ArrayList();
|
||||||
|
Pattern patt = Pattern.compile("part.*-([0-9][0-9][0-9][0-9][0-9]).*");
|
||||||
|
for (FileStatus fstat : fileStatuses) {
|
||||||
|
String fname = fstat.getPath().getName();
|
||||||
|
if (!fstat.isDir()) {
|
||||||
|
Matcher mat = patt.matcher(fname);
|
||||||
|
if (mat.matches()) {
|
||||||
|
files.add(fstat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (FileStatus[]) files.toArray(new FileStatus[files.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class StatusPathComparator implements Comparator<FileStatus> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(FileStatus fs1, FileStatus fs2) {
|
||||||
|
return fs1.getPath().toString().compareTo(fs2.getPath().toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return a concat. string with file-creation dates excluding folders. */
|
||||||
|
private String getFileCreationTimeImage(FileSystem fs, Path outputPath,
|
||||||
|
int fileCount) throws IOException {
|
||||||
|
// create string image with all file creation dates
|
||||||
|
StringBuffer image = new StringBuffer();
|
||||||
|
FileStatus[] fileStatuses = listFiles(fs, outputPath);
|
||||||
|
// sort the file statuses by path so we have a stable order for
|
||||||
|
// using 'fileCount'.
|
||||||
|
Arrays.sort(fileStatuses, new StatusPathComparator());
|
||||||
|
for (int i = 0; i < fileStatuses.length && i < fileCount; i++) {
|
||||||
|
image.append(fileStatuses[i].getPath() + "="
|
||||||
|
+ fileStatuses[i].getModificationTime());
|
||||||
|
}
|
||||||
|
return image.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return the number part of a partition */
|
||||||
|
private int getFilePartition(Path file) {
|
||||||
|
String filename = file.getName();
|
||||||
|
int pos = filename.lastIndexOf(FILEPART_SEPARATOR);
|
||||||
|
if (pos != -1) {
|
||||||
|
String part = filename.substring(pos + 1, pos + 1 + PARTITION_DIGITS);
|
||||||
|
return Integer.parseInt(part);
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for ouput path file-count increase, current files untouched and new
|
||||||
|
* correct partition number.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void runAppendTest(ArrayList args, Path outputPath)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
// ensure non-existing output dir for insert phase
|
||||||
|
FileSystem fs = FileSystem.get(getConf());
|
||||||
|
if (fs.exists(outputPath)) {
|
||||||
|
fs.delete(outputPath, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// run Sqoop in INSERT mode
|
||||||
|
String[] argv = (String[]) args.toArray(new String[0]);
|
||||||
|
runUncleanImport(argv);
|
||||||
|
|
||||||
|
// get current file count
|
||||||
|
FileStatus[] fileStatuses = listFiles(fs, outputPath);
|
||||||
|
Arrays.sort(fileStatuses, new StatusPathComparator());
|
||||||
|
int previousFileCount = fileStatuses.length;
|
||||||
|
|
||||||
|
// get string image with all file creation dates
|
||||||
|
String previousImage = getFileCreationTimeImage(fs, outputPath,
|
||||||
|
previousFileCount);
|
||||||
|
|
||||||
|
// get current last partition number
|
||||||
|
Path lastFile = fileStatuses[fileStatuses.length - 1].getPath();
|
||||||
|
int lastPartition = getFilePartition(lastFile);
|
||||||
|
|
||||||
|
// run Sqoop in APPEND mode
|
||||||
|
args.add("--append");
|
||||||
|
argv = (String[]) args.toArray(new String[0]);
|
||||||
|
runUncleanImport(argv);
|
||||||
|
|
||||||
|
// check directory file increase
|
||||||
|
fileStatuses = listFiles(fs, outputPath);
|
||||||
|
Arrays.sort(fileStatuses, new StatusPathComparator());
|
||||||
|
int currentFileCount = fileStatuses.length;
|
||||||
|
assertTrue("Output directory didn't got increased in file count ",
|
||||||
|
currentFileCount > previousFileCount);
|
||||||
|
|
||||||
|
// check previous files weren't modified, also works for partition
|
||||||
|
// overlapping
|
||||||
|
String currentImage = getFileCreationTimeImage(fs, outputPath,
|
||||||
|
previousFileCount);
|
||||||
|
assertEquals("Previous files to appending operation were modified",
|
||||||
|
currentImage, previousImage);
|
||||||
|
|
||||||
|
// check that exists at least 1 new correlative partition
|
||||||
|
// let's use a different way than the code being tested
|
||||||
|
Path newFile = fileStatuses[previousFileCount].getPath(); // there is a
|
||||||
|
// new bound now
|
||||||
|
int newPartition = getFilePartition(newFile);
|
||||||
|
assertTrue("New partition file isn't correlative",
|
||||||
|
lastPartition + 1 == newPartition);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Got Exception: " + StringUtils.stringifyException(e));
|
||||||
|
fail(e.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** independent to target-dir. */
|
||||||
|
public void testAppend() throws IOException {
|
||||||
|
ArrayList args = getOutputlessArgv(false, HsqldbTestServer.getFieldNames(),
|
||||||
|
getConf());
|
||||||
|
args.add("--warehouse-dir");
|
||||||
|
args.add(getWarehouseDir());
|
||||||
|
|
||||||
|
Path output = new Path(getWarehouseDir(), HsqldbTestServer.getTableName());
|
||||||
|
runAppendTest(args, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** working with target-dir. */
|
||||||
|
public void testAppendToTargetDir() throws IOException {
|
||||||
|
ArrayList args = getOutputlessArgv(false, HsqldbTestServer.getFieldNames(),
|
||||||
|
getConf());
|
||||||
|
String targetDir = getWarehouseDir() + "/tempTargetDir";
|
||||||
|
args.add("--target-dir");
|
||||||
|
args.add(targetDir);
|
||||||
|
|
||||||
|
// there's no need for a new param
|
||||||
|
// in diff. w/--warehouse-dir there will no be $tablename dir
|
||||||
|
Path output = new Path(targetDir);
|
||||||
|
runAppendTest(args, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
151
src/test/com/cloudera/sqoop/TestTargetDir.java
Normal file
151
src/test/com/cloudera/sqoop/TestTargetDir.java
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to Cloudera, Inc. under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Cloudera, Inc. licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.cloudera.sqoop;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.ContentSummary;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.testutil.CommonArgs;
|
||||||
|
import com.cloudera.sqoop.testutil.HsqldbTestServer;
|
||||||
|
import com.cloudera.sqoop.testutil.ImportJobTestCase;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that --target-dir works.
|
||||||
|
*/
|
||||||
|
public class TestTargetDir extends ImportJobTestCase {
|
||||||
|
|
||||||
|
public static final Log LOG = LogFactory
|
||||||
|
.getLog(TestTargetDir.class.getName());
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the argv to pass to Sqoop.
|
||||||
|
*
|
||||||
|
* @return the argv as an array of strings.
|
||||||
|
*/
|
||||||
|
protected ArrayList getOutputArgv(boolean includeHadoopFlags) {
|
||||||
|
ArrayList<String> args = new ArrayList<String>();
|
||||||
|
|
||||||
|
if (includeHadoopFlags) {
|
||||||
|
CommonArgs.addHadoopFlags(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
args.add("--table");
|
||||||
|
args.add(HsqldbTestServer.getTableName());
|
||||||
|
args.add("--connect");
|
||||||
|
args.add(HsqldbTestServer.getUrl());
|
||||||
|
args.add("--split-by");
|
||||||
|
args.add("INTFIELD1");
|
||||||
|
args.add("--as-sequencefile");
|
||||||
|
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
// this test just uses the two int table.
|
||||||
|
protected String getTableName() {
|
||||||
|
return HsqldbTestServer.getTableName();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** test invalid argument exception if several output options. */
|
||||||
|
public void testSeveralOutputsIOException() throws IOException {
|
||||||
|
|
||||||
|
try {
|
||||||
|
ArrayList args = getOutputArgv(true);
|
||||||
|
args.add("--warehouse-dir");
|
||||||
|
args.add(getWarehouseDir());
|
||||||
|
args.add("--target-dir");
|
||||||
|
args.add(getWarehouseDir());
|
||||||
|
|
||||||
|
String[] argv = (String[]) args.toArray(new String[0]);
|
||||||
|
runImport(argv);
|
||||||
|
|
||||||
|
fail("warehouse-dir & target-dir were set and run "
|
||||||
|
+ "without problem reported");
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** test target-dir contains imported files. */
|
||||||
|
public void testTargetDir() throws IOException {
|
||||||
|
|
||||||
|
try {
|
||||||
|
String targetDir = getWarehouseDir() + "/tempTargetDir";
|
||||||
|
|
||||||
|
ArrayList args = getOutputArgv(true);
|
||||||
|
args.add("--target-dir");
|
||||||
|
args.add(targetDir);
|
||||||
|
|
||||||
|
// delete target-dir if exists and recreate it
|
||||||
|
FileSystem fs = FileSystem.get(getConf());
|
||||||
|
Path outputPath = new Path(targetDir);
|
||||||
|
if (fs.exists(outputPath)) {
|
||||||
|
fs.delete(outputPath, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] argv = (String[]) args.toArray(new String[0]);
|
||||||
|
runImport(argv);
|
||||||
|
|
||||||
|
ContentSummary summ = fs.getContentSummary(outputPath);
|
||||||
|
|
||||||
|
assertTrue("There's no new imported files in target-dir",
|
||||||
|
summ.getFileCount() > 0);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Got Exception: " + StringUtils.stringifyException(e));
|
||||||
|
fail(e.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** test target-dir breaks if already existing
|
||||||
|
* (only allowed in append mode). */
|
||||||
|
public void testExistingTargetDir() throws IOException {
|
||||||
|
|
||||||
|
try {
|
||||||
|
String targetDir = getWarehouseDir() + "/tempTargetDir";
|
||||||
|
|
||||||
|
ArrayList args = getOutputArgv(true);
|
||||||
|
args.add("--target-dir");
|
||||||
|
args.add(targetDir);
|
||||||
|
|
||||||
|
// delete target-dir if exists and recreate it
|
||||||
|
FileSystem fs = FileSystem.get(getConf());
|
||||||
|
Path outputPath = new Path(targetDir);
|
||||||
|
if (!fs.exists(outputPath)) {
|
||||||
|
fs.mkdirs(outputPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] argv = (String[]) args.toArray(new String[0]);
|
||||||
|
runImport(argv);
|
||||||
|
|
||||||
|
fail("Existing target-dir run without problem report");
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user