5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 04:29:59 +08:00

minVal = 1.111

maxVal = 133.333
numSplits = 2
if use curUpper,It's going to be like this.
[float_code >= 1.111 AND float_code < 67.22200000000001, float_code >= 133.33300000000003 AND float_code <= 133.333]
So curLower is needed.
[float_code >= 1.111 AND float_code < 67.22200000000001, float_code >= 67.22200000000001 AND float_code <= 133.333]
This commit is contained in:
chenqixu 2019-04-22 16:04:43 +08:00
parent 2328971411
commit 5b503ea89c
2 changed files with 61 additions and 1 deletions

View File

@ -87,7 +87,8 @@ public List<InputSplit> split(Configuration conf, ResultSet results,
// Catch any overage and create the closed interval for the last split.
if (curLower <= maxVal || splits.size() == 1) {
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + Double.toString(curUpper),
// lowClausePrefix + Double.toString(curUpper),
lowClausePrefix + Double.toString(curLower),
colName + " <= " + Double.toString(maxVal)));
}

View File

@ -0,0 +1,59 @@
package org.apache.sqoop.mapreduce.db;
import org.apache.hadoop.mapreduce.InputSplit;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
public class FloatSplitterTest {
@Test
public void split() {
double MIN_INCREMENT = 10000 * Double.MIN_VALUE;
System.out.println("Generating splits for a floating-point index column. Due to the");
System.out.println("imprecise representation of floating-point values in Java, this");
System.out.println("may result in an incomplete import.");
System.out.println("You are strongly encouraged to choose an integral split column.");
List<InputSplit> splits = new ArrayList<InputSplit>();
String colName = "float_code";
double minVal = 1.111;
double maxVal = 133.333;
// Use this as a hint. May need an extra task if the size doesn't
// divide cleanly.
int numSplits = 2;
double splitSize = (maxVal - minVal) / (double) numSplits;
if (splitSize < MIN_INCREMENT) {
splitSize = MIN_INCREMENT;
}
String lowClausePrefix = colName + " >= ";
String highClausePrefix = colName + " < ";
double curLower = minVal;
double curUpper = curLower + splitSize;
while (curUpper < maxVal) {
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + Double.toString(curLower),
highClausePrefix + Double.toString(curUpper)));
curLower = curUpper;
curUpper += splitSize;
}
// Catch any overage and create the closed interval for the last split.
if (curLower <= maxVal || splits.size() == 1) {
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
// lowClausePrefix + Double.toString(curUpper),
lowClausePrefix + Double.toString(curLower),
colName + " <= " + Double.toString(maxVal)));
}
System.out.println(splits);
}
}