mirror of
https://github.com/apache/sqoop.git
synced 2025-05-02 17:22:25 +08:00
minVal = 1.111
maxVal = 133.333 numSplits = 2 if use curUpper,It's going to be like this. [float_code >= 1.111 AND float_code < 67.22200000000001, float_code >= 133.33300000000003 AND float_code <= 133.333] So curLower is needed. [float_code >= 1.111 AND float_code < 67.22200000000001, float_code >= 67.22200000000001 AND float_code <= 133.333]
This commit is contained in:
parent
2328971411
commit
5b503ea89c
@ -87,7 +87,8 @@ public List<InputSplit> split(Configuration conf, ResultSet results,
|
||||
// Catch any overage and create the closed interval for the last split.
|
||||
if (curLower <= maxVal || splits.size() == 1) {
|
||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||
lowClausePrefix + Double.toString(curUpper),
|
||||
// lowClausePrefix + Double.toString(curUpper),
|
||||
lowClausePrefix + Double.toString(curLower),
|
||||
colName + " <= " + Double.toString(maxVal)));
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,59 @@
|
||||
package org.apache.sqoop.mapreduce.db;
|
||||
|
||||
import org.apache.hadoop.mapreduce.InputSplit;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class FloatSplitterTest {
|
||||
|
||||
@Test
|
||||
public void split() {
|
||||
double MIN_INCREMENT = 10000 * Double.MIN_VALUE;
|
||||
|
||||
System.out.println("Generating splits for a floating-point index column. Due to the");
|
||||
System.out.println("imprecise representation of floating-point values in Java, this");
|
||||
System.out.println("may result in an incomplete import.");
|
||||
System.out.println("You are strongly encouraged to choose an integral split column.");
|
||||
|
||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||
String colName = "float_code";
|
||||
double minVal = 1.111;
|
||||
double maxVal = 133.333;
|
||||
|
||||
// Use this as a hint. May need an extra task if the size doesn't
|
||||
// divide cleanly.
|
||||
int numSplits = 2;
|
||||
double splitSize = (maxVal - minVal) / (double) numSplits;
|
||||
|
||||
if (splitSize < MIN_INCREMENT) {
|
||||
splitSize = MIN_INCREMENT;
|
||||
}
|
||||
|
||||
String lowClausePrefix = colName + " >= ";
|
||||
String highClausePrefix = colName + " < ";
|
||||
|
||||
double curLower = minVal;
|
||||
double curUpper = curLower + splitSize;
|
||||
|
||||
while (curUpper < maxVal) {
|
||||
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||
lowClausePrefix + Double.toString(curLower),
|
||||
highClausePrefix + Double.toString(curUpper)));
|
||||
|
||||
curLower = curUpper;
|
||||
curUpper += splitSize;
|
||||
}
|
||||
|
||||
// Catch any overage and create the closed interval for the last split.
|
||||
if (curLower <= maxVal || splits.size() == 1) {
|
||||
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||
// lowClausePrefix + Double.toString(curUpper),
|
||||
lowClausePrefix + Double.toString(curLower),
|
||||
colName + " <= " + Double.toString(maxVal)));
|
||||
}
|
||||
|
||||
System.out.println(splits);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user