diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/Constant.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/Constant.java index 729d71ac..f998357e 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/Constant.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/Constant.java @@ -25,4 +25,6 @@ public final class Constant { public static String TABLE_NAME_PLACEHOLDER = "@table"; + public static Integer SPLIT_FACTOR = 5; + } diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/Key.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/Key.java index 63f8dde0..0e10c742 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/Key.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/Key.java @@ -46,5 +46,6 @@ public final class Key { public final static String DRYRUN = "dryRun"; + public static String SPLIT_FACTOR = "splitFactor"; } \ No newline at end of file diff --git a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ReaderSplitUtil.java b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ReaderSplitUtil.java index 64109e90..b1be9143 100755 --- a/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ReaderSplitUtil.java +++ b/plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/reader/util/ReaderSplitUtil.java @@ -68,7 +68,12 @@ public final class ReaderSplitUtil { //eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 2 + 1;// 不应该加1导致长尾 //考虑其他比率数字?(splitPk is null, 忽略此长尾) - eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5; + //eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5; + + //为避免导入hive小文件 默认基数为5,也就是channel配置几个就是几个task,可以通过 pkQuota 配置基数 + // 最终task数为(channel/tableNum)向上取整*pkQuota + Integer quota = originalSliceConfig.getInt(Key.SPLIT_FACTOR, Constant.SPLIT_FACTOR); + eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * quota; } // 尝试对每个表,切分为eachTableShouldSplittedNumber 份 for (String table : tables) {