mirror of
https://github.com/alibaba/DataX.git
synced 2025-05-02 12:49:42 +08:00
support for "splitFactor" parameterization
This commit is contained in:
parent
809cffc5f6
commit
b4a3eeb130
@ -25,4 +25,6 @@ public final class Constant {
|
||||
|
||||
public static String TABLE_NAME_PLACEHOLDER = "@table";
|
||||
|
||||
public static Integer SPLIT_FACTOR = 5;
|
||||
|
||||
}
|
||||
|
@ -46,5 +46,6 @@ public final class Key {
|
||||
|
||||
public final static String DRYRUN = "dryRun";
|
||||
|
||||
public static String SPLIT_FACTOR = "splitFactor";
|
||||
|
||||
}
|
@ -68,7 +68,12 @@ public final class ReaderSplitUtil {
|
||||
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 2 + 1;// 不应该加1导致长尾
|
||||
|
||||
//考虑其他比率数字?(splitPk is null, 忽略此长尾)
|
||||
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;
|
||||
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;
|
||||
|
||||
//为避免导入hive小文件 默认基数为5,也就是channel配置几个就是几个task,可以通过 pkQuota 配置基数
|
||||
// 最终task数为(channel/tableNum)向上取整*pkQuota
|
||||
Integer quota = originalSliceConfig.getInt(Key.SPLIT_FACTOR, Constant.SPLIT_FACTOR);
|
||||
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * quota;
|
||||
}
|
||||
// 尝试对每个表,切分为eachTableShouldSplittedNumber 份
|
||||
for (String table : tables) {
|
||||
|
Loading…
Reference in New Issue
Block a user