support for "splitFactor" parameterization

This commit is contained in:
daizihao 2020-10-14 20:39:25 +08:00
parent 809cffc5f6
commit b4a3eeb130
3 changed files with 9 additions and 1 deletions

View File

@ -25,4 +25,6 @@ public final class Constant {
public static String TABLE_NAME_PLACEHOLDER = "@table";
public static Integer SPLIT_FACTOR = 5;
}

View File

@ -46,5 +46,6 @@ public final class Key {
public final static String DRYRUN = "dryRun";
public static String SPLIT_FACTOR = "splitFactor";
}

View File

@ -68,7 +68,12 @@ public final class ReaderSplitUtil {
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 2 + 1;// 不应该加1导致长尾
//考虑其他比率数字?(splitPk is null, 忽略此长尾)
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;
//为避免导入hive小文件 默认基数为5也就是channel配置几个就是几个task,可以通过 pkQuota 配置基数
// 最终task数为(channel/tableNum)向上取整*pkQuota
Integer quota = originalSliceConfig.getInt(Key.SPLIT_FACTOR, Constant.SPLIT_FACTOR);
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * quota;
}
// 尝试对每个表切分为eachTableShouldSplittedNumber
for (String table : tables) {