Merge pull request #889 from Mr-KIDBK/dev

hdfs增加overwrite模式,rdbms增加单表切分参数
This commit is contained in:
Trafalgar 2020-12-11 17:44:47 +08:00 committed by GitHub
commit 42e50f626f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 3 deletions

View File

@ -81,10 +81,10 @@ public class HdfsWriter extends Writer {
//writeMode check
this.writeMode = this.writerSliceConfig.getNecessaryValue(Key.WRITE_MODE, HdfsWriterErrorCode.REQUIRED_VALUE);
writeMode = writeMode.toLowerCase().trim();
Set<String> supportedWriteModes = Sets.newHashSet("append", "nonconflict");
Set<String> supportedWriteModes = Sets.newHashSet("append", "nonconflict", "truncate");
if (!supportedWriteModes.contains(writeMode)) {
throw DataXException.asDataXException(HdfsWriterErrorCode.ILLEGAL_VALUE,
String.format("仅支持append, nonConflict种模式, 不支持您配置的 writeMode 模式 : [%s]",
String.format("仅支持append, nonConflict, truncate三种模式, 不支持您配置的 writeMode 模式 : [%s]",
writeMode));
}
this.writerSliceConfig.set(Key.WRITE_MODE, writeMode);
@ -179,6 +179,9 @@ public class HdfsWriter extends Writer {
LOG.error(String.format("冲突文件列表为: [%s]", StringUtils.join(allFiles, ",")));
throw DataXException.asDataXException(HdfsWriterErrorCode.ILLEGAL_VALUE,
String.format("由于您配置了writeMode nonConflict,但您配置的path: [%s] 目录不为空, 下面存在其他文件或文件夹.", path));
}else if ("truncate".equalsIgnoreCase(writeMode) && isExistFile) {
LOG.info(String.format("由于您配置了writeMode truncate, [%s] 下面的内容将被覆盖重写", path));
hdfsHelper.deleteFiles(existFilePaths);
}
}else{
throw DataXException.asDataXException(HdfsWriterErrorCode.ILLEGAL_VALUE,

View File

@ -25,4 +25,6 @@ public final class Constant {
public static String TABLE_NAME_PLACEHOLDER = "@table";
public static Integer SPLIT_FACTOR = 5;
}

View File

@ -46,5 +46,6 @@ public final class Key {
public final static String DRYRUN = "dryRun";
public static String SPLIT_FACTOR = "splitFactor";
}

View File

@ -68,7 +68,12 @@ public final class ReaderSplitUtil {
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 2 + 1;// 不应该加1导致长尾
//考虑其他比率数字?(splitPk is null, 忽略此长尾)
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;
//为避免导入hive小文件 默认基数为5可以通过 splitFactor 配置基数
// 最终task数为(channel/tableNum)向上取整*splitFactor
Integer splitFactor = originalSliceConfig.getInt(Key.SPLIT_FACTOR, Constant.SPLIT_FACTOR);
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * splitFactor;
}
// 尝试对每个表切分为eachTableShouldSplittedNumber
for (String table : tables) {