mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 04:11:44 +08:00
minVal = 1.111
maxVal = 133.333 numSplits = 2 if use curUpper,It's going to be like this. [float_code >= 1.111 AND float_code < 67.22200000000001, float_code >= 133.33300000000003 AND float_code <= 133.333] So curLower is needed. [float_code >= 1.111 AND float_code < 67.22200000000001, float_code >= 67.22200000000001 AND float_code <= 133.333]
This commit is contained in:
parent
2328971411
commit
5b503ea89c
@ -87,7 +87,8 @@ public List<InputSplit> split(Configuration conf, ResultSet results,
|
|||||||
// Catch any overage and create the closed interval for the last split.
|
// Catch any overage and create the closed interval for the last split.
|
||||||
if (curLower <= maxVal || splits.size() == 1) {
|
if (curLower <= maxVal || splits.size() == 1) {
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
lowClausePrefix + Double.toString(curUpper),
|
// lowClausePrefix + Double.toString(curUpper),
|
||||||
|
lowClausePrefix + Double.toString(curLower),
|
||||||
colName + " <= " + Double.toString(maxVal)));
|
colName + " <= " + Double.toString(maxVal)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,59 @@
|
|||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class FloatSplitterTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void split() {
|
||||||
|
double MIN_INCREMENT = 10000 * Double.MIN_VALUE;
|
||||||
|
|
||||||
|
System.out.println("Generating splits for a floating-point index column. Due to the");
|
||||||
|
System.out.println("imprecise representation of floating-point values in Java, this");
|
||||||
|
System.out.println("may result in an incomplete import.");
|
||||||
|
System.out.println("You are strongly encouraged to choose an integral split column.");
|
||||||
|
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
String colName = "float_code";
|
||||||
|
double minVal = 1.111;
|
||||||
|
double maxVal = 133.333;
|
||||||
|
|
||||||
|
// Use this as a hint. May need an extra task if the size doesn't
|
||||||
|
// divide cleanly.
|
||||||
|
int numSplits = 2;
|
||||||
|
double splitSize = (maxVal - minVal) / (double) numSplits;
|
||||||
|
|
||||||
|
if (splitSize < MIN_INCREMENT) {
|
||||||
|
splitSize = MIN_INCREMENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
String lowClausePrefix = colName + " >= ";
|
||||||
|
String highClausePrefix = colName + " < ";
|
||||||
|
|
||||||
|
double curLower = minVal;
|
||||||
|
double curUpper = curLower + splitSize;
|
||||||
|
|
||||||
|
while (curUpper < maxVal) {
|
||||||
|
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + Double.toString(curLower),
|
||||||
|
highClausePrefix + Double.toString(curUpper)));
|
||||||
|
|
||||||
|
curLower = curUpper;
|
||||||
|
curUpper += splitSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Catch any overage and create the closed interval for the last split.
|
||||||
|
if (curLower <= maxVal || splits.size() == 1) {
|
||||||
|
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
// lowClausePrefix + Double.toString(curUpper),
|
||||||
|
lowClausePrefix + Double.toString(curLower),
|
||||||
|
colName + " <= " + Double.toString(maxVal)));
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(splits);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user