minVal = 1.111

maxVal = 133.333 numSplits = 2 if use curUpper，It's going to be like this. [float_code >= 1.111 AND float_code < 67.22200000000001, float_code >= 133.33300000000003 AND float_code <= 133.333] So curLower is needed. [float_code >= 1.111 AND float_code < 67.22200000000001, float_code >= 67.22200000000001 AND float_code <= 133.333]
2025-05-03 04:29:59 +08:00 · 2019-04-22 16:04:43 +08:00 · 2019-04-22 16:04:43 +08:00 · 5b503ea89c
commit 5b503ea89c
parent 2328971411
2 changed files with 61 additions and 1 deletions
--- a/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java
+++ b/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java
@ -87,7 +87,8 @@ public List<InputSplit> split(Configuration conf, ResultSet results,
    // Catch any overage and create the closed interval for the last split.
    if (curLower <= maxVal || splits.size() == 1) {
      splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
-          lowClausePrefix + Double.toString(curUpper),
+//              lowClausePrefix + Double.toString(curUpper),
+              lowClausePrefix + Double.toString(curLower),
          colName + " <= " + Double.toString(maxVal)));
    }

--- a/src/test/org/apache/sqoop/mapreduce/db/FloatSplitterTest.java
+++ b/src/test/org/apache/sqoop/mapreduce/db/FloatSplitterTest.java
@ -0,0 +1,59 @@
+package org.apache.sqoop.mapreduce.db;
+
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class FloatSplitterTest {
+
+    @Test
+    public void split() {
+        double MIN_INCREMENT = 10000 * Double.MIN_VALUE;
+
+        System.out.println("Generating splits for a floating-point index column. Due to the");
+        System.out.println("imprecise representation of floating-point values in Java, this");
+        System.out.println("may result in an incomplete import.");
+        System.out.println("You are strongly encouraged to choose an integral split column.");
+
+        List<InputSplit> splits = new ArrayList<InputSplit>();
+        String colName = "float_code";
+        double minVal = 1.111;
+        double maxVal = 133.333;
+
+        // Use this as a hint. May need an extra task if the size doesn't
+        // divide cleanly.
+        int numSplits = 2;
+        double splitSize = (maxVal - minVal) / (double) numSplits;
+
+        if (splitSize < MIN_INCREMENT) {
+            splitSize = MIN_INCREMENT;
+        }
+
+        String lowClausePrefix = colName + " >= ";
+        String highClausePrefix = colName + " < ";
+
+        double curLower = minVal;
+        double curUpper = curLower + splitSize;
+
+        while (curUpper < maxVal) {
+            splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+                    lowClausePrefix + Double.toString(curLower),
+                    highClausePrefix + Double.toString(curUpper)));
+
+            curLower = curUpper;
+            curUpper += splitSize;
+        }
+
+        // Catch any overage and create the closed interval for the last split.
+        if (curLower <= maxVal || splits.size() == 1) {
+            splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
+//                    lowClausePrefix + Double.toString(curUpper),
+                    lowClausePrefix + Double.toString(curLower),
+                    colName + " <= " + Double.toString(maxVal)));
+        }
+
+        System.out.println(splits);
+    }
+}