5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-08 13:52:25 +08:00

SQOOP-1154: Sqoop2: Text partitioner might miss or include edge values

(Jarek Jarcec Cecho via Venkat Ranganathan)
This commit is contained in:
Venkat Ranganathan 2013-07-30 22:47:44 -07:00
parent 81e71dc45c
commit 9c7adb4e11
2 changed files with 13 additions and 17 deletions

View File

@ -250,7 +250,8 @@ protected List<Partition> partitionTextColumn() {
// Having one single value means that we can create only one single split
if(minStringBD.equals(maxStringBD)) {
GenericJdbcImportPartition partition = new GenericJdbcImportPartition();
partition.setConditions(constructTextConditions(prefix, maxStringBD));
partition.setConditions(constructTextConditions(prefix, 0, 0,
partitionMinValue, partitionMaxValue, true, true));
partitions.add(partition);
return partitions;
}
@ -294,8 +295,8 @@ protected List<Partition> partitionTextColumn() {
BigDecimal end = splitPoints.get(i);
GenericJdbcImportPartition partition = new GenericJdbcImportPartition();
partition.setConditions(constructTextConditions(prefix, start,
end, i == splitPoints.size() - 1));
partition.setConditions(constructTextConditions(prefix, start, end,
partitionMinValue, partitionMaxValue, i == 1, i == splitPoints.size() - 1));
partitions.add(partition);
start = end;
@ -521,31 +522,21 @@ protected String constructDateConditions(SimpleDateFormat sdf,
return conditions.toString();
}
protected String constructTextConditions(String prefix,
Object lowerBound, Object upperBound, boolean lastOne) {
protected String constructTextConditions(String prefix, Object lowerBound, Object upperBound,
String lowerStringBound, String upperStringBound, boolean firstOne, boolean lastOne) {
StringBuilder conditions = new StringBuilder();
String lbString = prefix + bigDecimalToText((BigDecimal)lowerBound);
String ubString = prefix + bigDecimalToText((BigDecimal)upperBound);
conditions.append('\'').append(lbString).append('\'');
conditions.append('\'').append(firstOne ? lowerStringBound : lbString).append('\'');
conditions.append(" <= ");
conditions.append(partitionColumnName);
conditions.append(" AND ");
conditions.append(partitionColumnName);
conditions.append(lastOne ? " <= " : " < ");
conditions.append('\'').append(ubString).append('\'');
conditions.append('\'').append(lastOne ? upperStringBound : ubString).append('\'');
return conditions.toString();
}
protected String constructTextConditions(String prefix, Object value) {
return new StringBuilder()
.append(partitionColumnName)
.append(" = ").append('\'')
.append(prefix + bigDecimalToText((BigDecimal)value))
.append('\'').toString()
;
}
/**
* Converts a string to a BigDecimal representation in Base 2^21 format.
* The maximum Unicode code point value defined is 10FFFF. Although

View File

@ -409,6 +409,7 @@ public void testVarcharPartition() throws Exception {
"'Y' <= VCCOL AND VCCOL <= 'Z'",
});
}
public void testVarcharPartition2() throws Exception {
MutableContext context = new MutableMapContext();
context.setString(GenericJdbcConnectorConstants
@ -426,6 +427,10 @@ public void testVarcharPartition2() throws Exception {
PartitionerContext partitionerContext = new PartitionerContext(context, 5, null);
List<Partition> partitions = partitioner.getPartitions(partitionerContext, connConf, jobConf);
assertEquals(partitions.size(), 5);
// First partition needs to contain entire upper bound
assertTrue(partitions.get(0).toString().contains("Breezy Badger"));
// Last partition needs to contain entire lower bound
assertTrue(partitions.get(4).toString().contains("Warty Warthog"));
}
public void testVarcharPartitionWithCommonPrefix() throws Exception {