mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 16:40:21 +08:00
SQOOP-377 Migrate mapreduce.db package to new name space
git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1190441 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
315fff82b9
commit
37c9642e7d
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,137 +15,16 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
|
|
||||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implement DBSplitter over BigDecimal values.
|
* Implement DBSplitter over BigDecimal values.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.BigDecimalSplitter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.BigDecimalSplitter
|
||||||
*/
|
*/
|
||||||
public class BigDecimalSplitter implements DBSplitter {
|
public class BigDecimalSplitter
|
||||||
private static final Log LOG = LogFactory.getLog(BigDecimalSplitter.class);
|
extends org.apache.sqoop.mapreduce.db.BigDecimalSplitter {
|
||||||
|
|
||||||
public List<InputSplit> split(Configuration conf, ResultSet results,
|
|
||||||
String colName) throws SQLException {
|
|
||||||
|
|
||||||
BigDecimal minVal = results.getBigDecimal(1);
|
|
||||||
BigDecimal maxVal = results.getBigDecimal(2);
|
|
||||||
|
|
||||||
String lowClausePrefix = colName + " >= ";
|
|
||||||
String highClausePrefix = colName + " < ";
|
|
||||||
|
|
||||||
BigDecimal numSplits = new BigDecimal(
|
|
||||||
ConfigurationHelper.getConfNumMaps(conf));
|
|
||||||
|
|
||||||
if (minVal == null && maxVal == null) {
|
|
||||||
// Range is null to null. Return a null split accordingly.
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minVal == null || maxVal == null) {
|
|
||||||
// Don't know what is a reasonable min/max value for interpolation. Fail.
|
|
||||||
LOG.error("Cannot find a range for NUMERIC or DECIMAL "
|
|
||||||
+ "fields with one end NULL.");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get all the split points together.
|
|
||||||
List<BigDecimal> splitPoints = split(numSplits, minVal, maxVal);
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
|
|
||||||
// Turn the split points into a set of intervals.
|
|
||||||
BigDecimal start = splitPoints.get(0);
|
|
||||||
for (int i = 1; i < splitPoints.size(); i++) {
|
|
||||||
BigDecimal end = splitPoints.get(i);
|
|
||||||
|
|
||||||
if (i == splitPoints.size() - 1) {
|
|
||||||
// This is the last one; use a closed interval.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + start.toString(),
|
|
||||||
colName + " <= " + end.toString()));
|
|
||||||
} else {
|
|
||||||
// Normal open-interval case.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + start.toString(),
|
|
||||||
highClausePrefix + end.toString()));
|
|
||||||
}
|
|
||||||
|
|
||||||
start = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final BigDecimal MIN_INCREMENT =
|
|
||||||
new BigDecimal(10000 * Double.MIN_VALUE);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Divide numerator by denominator. If impossible in exact mode, use rounding.
|
|
||||||
*/
|
|
||||||
protected BigDecimal tryDivide(BigDecimal numerator, BigDecimal denominator) {
|
|
||||||
try {
|
|
||||||
return numerator.divide(denominator);
|
|
||||||
} catch (ArithmeticException ae) {
|
|
||||||
return numerator.divide(denominator, BigDecimal.ROUND_HALF_UP);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a list of BigDecimals one element longer than the list of input
|
|
||||||
* splits. This represents the boundaries between input splits. All splits
|
|
||||||
* are open on the top end, except the last one.
|
|
||||||
*
|
|
||||||
* So the list [0, 5, 8, 12, 18] would represent splits capturing the
|
|
||||||
* intervals:
|
|
||||||
*
|
|
||||||
* [0, 5)
|
|
||||||
* [5, 8)
|
|
||||||
* [8, 12)
|
|
||||||
* [12, 18] note the closed interval for the last split.
|
|
||||||
*/
|
|
||||||
List<BigDecimal> split(BigDecimal numSplits, BigDecimal minVal,
|
|
||||||
BigDecimal maxVal) throws SQLException {
|
|
||||||
|
|
||||||
List<BigDecimal> splits = new ArrayList<BigDecimal>();
|
|
||||||
|
|
||||||
// Use numSplits as a hint. May need an extra task if the size doesn't
|
|
||||||
// divide cleanly.
|
|
||||||
|
|
||||||
BigDecimal splitSize = tryDivide(maxVal.subtract(minVal), (numSplits));
|
|
||||||
if (splitSize.compareTo(MIN_INCREMENT) < 0) {
|
|
||||||
splitSize = MIN_INCREMENT;
|
|
||||||
LOG.warn("Set BigDecimal splitSize to MIN_INCREMENT");
|
|
||||||
}
|
|
||||||
|
|
||||||
BigDecimal curVal = minVal;
|
|
||||||
|
|
||||||
while (curVal.compareTo(maxVal) <= 0) {
|
|
||||||
splits.add(curVal);
|
|
||||||
curVal = curVal.add(splitSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (splits.get(splits.size() - 1).compareTo(maxVal) != 0
|
|
||||||
|| splits.size() == 1) {
|
|
||||||
// We didn't end on the maxVal. Add that to the end of the list.
|
|
||||||
splits.add(maxVal);
|
|
||||||
}
|
|
||||||
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,53 +15,16 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implement DBSplitter over boolean values.
|
* Implement DBSplitter over boolean values.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.BooleanSplitter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.BooleanSplitter
|
||||||
*/
|
*/
|
||||||
public class BooleanSplitter implements DBSplitter {
|
public class BooleanSplitter
|
||||||
public List<InputSplit> split(Configuration conf, ResultSet results,
|
extends org.apache.sqoop.mapreduce.db.BooleanSplitter {
|
||||||
String colName) throws SQLException {
|
|
||||||
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
|
|
||||||
if (results.getString(1) == null && results.getString(2) == null) {
|
|
||||||
// Range is null to null. Return a null split accordingly.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean minVal = results.getBoolean(1);
|
|
||||||
boolean maxVal = results.getBoolean(2);
|
|
||||||
|
|
||||||
// Use one or two splits.
|
|
||||||
if (!minVal) {
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " = FALSE", colName + " = FALSE"));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (maxVal) {
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " = TRUE", colName + " = TRUE"));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results.getString(1) == null || results.getString(2) == null) {
|
|
||||||
// Include a null value.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
}
|
|
||||||
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,17 +15,9 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.sql.Connection;
|
|
||||||
import java.sql.DriverManager;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
|
||||||
|
|
||||||
import com.cloudera.sqoop.mapreduce.db.DBInputFormat.NullDBWritable;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A container for configuration property names for jobs with DB input/output.
|
* A container for configuration property names for jobs with DB input/output.
|
||||||
@ -42,67 +32,76 @@
|
|||||||
* @see DBInputFormat#setInput(Job, Class, String, String)
|
* @see DBInputFormat#setInput(Job, Class, String, String)
|
||||||
* @see DBInputFormat#setInput(Job, Class, String, String, String, String...)
|
* @see DBInputFormat#setInput(Job, Class, String, String, String, String...)
|
||||||
* @see DBOutputFormat#setOutput(Job, String, String...)
|
* @see DBOutputFormat#setOutput(Job, String, String...)
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DBConfiguration instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DBConfiguration
|
||||||
*/
|
*/
|
||||||
public class DBConfiguration {
|
public class DBConfiguration
|
||||||
|
extends org.apache.sqoop.mapreduce.db.DBConfiguration {
|
||||||
|
|
||||||
/** The JDBC Driver class name. */
|
/** The JDBC Driver class name. */
|
||||||
public static final String DRIVER_CLASS_PROPERTY =
|
public static final String DRIVER_CLASS_PROPERTY =
|
||||||
"mapreduce.jdbc.driver.class";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.DRIVER_CLASS_PROPERTY;
|
||||||
|
|
||||||
/** JDBC Database access URL. */
|
/** JDBC Database access URL. */
|
||||||
public static final String URL_PROPERTY = "mapreduce.jdbc.url";
|
public static final String URL_PROPERTY =
|
||||||
|
org.apache.sqoop.mapreduce.db.DBConfiguration.URL_PROPERTY;
|
||||||
|
|
||||||
/** User name to access the database. */
|
/** User name to access the database. */
|
||||||
public static final String USERNAME_PROPERTY = "mapreduce.jdbc.username";
|
public static final String USERNAME_PROPERTY =
|
||||||
|
org.apache.sqoop.mapreduce.db.DBConfiguration.USERNAME_PROPERTY;
|
||||||
|
|
||||||
/** Password to access the database. */
|
/** Password to access the database. */
|
||||||
public static final String PASSWORD_PROPERTY = "mapreduce.jdbc.password";
|
public static final String PASSWORD_PROPERTY =
|
||||||
|
org.apache.sqoop.mapreduce.db.DBConfiguration.PASSWORD_PROPERTY;
|
||||||
|
|
||||||
/** Fetch size. */
|
/** Fetch size. */
|
||||||
public static final String FETCH_SIZE = "mapreduce.jdbc.fetchsize";
|
public static final String FETCH_SIZE =
|
||||||
|
org.apache.sqoop.mapreduce.db.DBConfiguration.FETCH_SIZE;
|
||||||
|
|
||||||
/** Input table name. */
|
/** Input table name. */
|
||||||
public static final String INPUT_TABLE_NAME_PROPERTY =
|
public static final String INPUT_TABLE_NAME_PROPERTY =
|
||||||
"mapreduce.jdbc.input.table.name";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_TABLE_NAME_PROPERTY;
|
||||||
|
|
||||||
/** Field names in the Input table. */
|
/** Field names in the Input table. */
|
||||||
public static final String INPUT_FIELD_NAMES_PROPERTY =
|
public static final String INPUT_FIELD_NAMES_PROPERTY =
|
||||||
"mapreduce.jdbc.input.field.names";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_FIELD_NAMES_PROPERTY;
|
||||||
|
|
||||||
/** WHERE clause in the input SELECT statement. */
|
/** WHERE clause in the input SELECT statement. */
|
||||||
public static final String INPUT_CONDITIONS_PROPERTY =
|
public static final String INPUT_CONDITIONS_PROPERTY =
|
||||||
"mapreduce.jdbc.input.conditions";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_CONDITIONS_PROPERTY;
|
||||||
|
|
||||||
/** ORDER BY clause in the input SELECT statement. */
|
/** ORDER BY clause in the input SELECT statement. */
|
||||||
public static final String INPUT_ORDER_BY_PROPERTY =
|
public static final String INPUT_ORDER_BY_PROPERTY =
|
||||||
"mapreduce.jdbc.input.orderby";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_ORDER_BY_PROPERTY;
|
||||||
|
|
||||||
/** Whole input query, exluding LIMIT...OFFSET. */
|
/** Whole input query, exluding LIMIT...OFFSET. */
|
||||||
public static final String INPUT_QUERY = "mapreduce.jdbc.input.query";
|
public static final String INPUT_QUERY =
|
||||||
|
org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_QUERY;
|
||||||
|
|
||||||
/** Input query to get the count of records. */
|
/** Input query to get the count of records. */
|
||||||
public static final String INPUT_COUNT_QUERY =
|
public static final String INPUT_COUNT_QUERY =
|
||||||
"mapreduce.jdbc.input.count.query";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_COUNT_QUERY;
|
||||||
|
|
||||||
/** Input query to get the max and min values of the jdbc.input.query. */
|
/** Input query to get the max and min values of the jdbc.input.query. */
|
||||||
public static final String INPUT_BOUNDING_QUERY =
|
public static final String INPUT_BOUNDING_QUERY =
|
||||||
"mapred.jdbc.input.bounding.query";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_BOUNDING_QUERY;
|
||||||
|
|
||||||
/** Class name implementing DBWritable which will hold input tuples. */
|
/** Class name implementing DBWritable which will hold input tuples. */
|
||||||
public static final String INPUT_CLASS_PROPERTY =
|
public static final String INPUT_CLASS_PROPERTY =
|
||||||
"mapreduce.jdbc.input.class";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_CLASS_PROPERTY;
|
||||||
|
|
||||||
/** Output table name. */
|
/** Output table name. */
|
||||||
public static final String OUTPUT_TABLE_NAME_PROPERTY =
|
public static final String OUTPUT_TABLE_NAME_PROPERTY =
|
||||||
"mapreduce.jdbc.output.table.name";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY;
|
||||||
|
|
||||||
/** Field names in the Output table. */
|
/** Field names in the Output table. */
|
||||||
public static final String OUTPUT_FIELD_NAMES_PROPERTY =
|
public static final String OUTPUT_FIELD_NAMES_PROPERTY =
|
||||||
"mapreduce.jdbc.output.field.names";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY;
|
||||||
|
|
||||||
/** Number of fields in the Output table. */
|
/** Number of fields in the Output table. */
|
||||||
public static final String OUTPUT_FIELD_COUNT_PROPERTY =
|
public static final String OUTPUT_FIELD_COUNT_PROPERTY =
|
||||||
"mapreduce.jdbc.output.field.count";
|
org.apache.sqoop.mapreduce.db.DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the DB access related fields in the {@link Configuration}.
|
* Sets the DB access related fields in the {@link Configuration}.
|
||||||
@ -116,17 +115,8 @@ public class DBConfiguration {
|
|||||||
public static void configureDB(Configuration conf, String driverClass,
|
public static void configureDB(Configuration conf, String driverClass,
|
||||||
String dbUrl, String userName, String passwd, Integer fetchSize) {
|
String dbUrl, String userName, String passwd, Integer fetchSize) {
|
||||||
|
|
||||||
conf.set(DRIVER_CLASS_PROPERTY, driverClass);
|
org.apache.sqoop.mapreduce.db.DBConfiguration.configureDB(
|
||||||
conf.set(URL_PROPERTY, dbUrl);
|
conf, driverClass, dbUrl, userName, passwd, fetchSize);
|
||||||
if (userName != null) {
|
|
||||||
conf.set(USERNAME_PROPERTY, userName);
|
|
||||||
}
|
|
||||||
if (passwd != null) {
|
|
||||||
conf.set(PASSWORD_PROPERTY, passwd);
|
|
||||||
}
|
|
||||||
if (fetchSize != null) {
|
|
||||||
conf.setInt(FETCH_SIZE, fetchSize);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -138,7 +128,8 @@ public static void configureDB(Configuration conf, String driverClass,
|
|||||||
*/
|
*/
|
||||||
public static void configureDB(Configuration job, String driverClass,
|
public static void configureDB(Configuration job, String driverClass,
|
||||||
String dbUrl, Integer fetchSize) {
|
String dbUrl, Integer fetchSize) {
|
||||||
configureDB(job, driverClass, dbUrl, null, null, fetchSize);
|
org.apache.sqoop.mapreduce.db.DBConfiguration.configureDB(job, driverClass,
|
||||||
|
dbUrl, fetchSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -151,7 +142,8 @@ public static void configureDB(Configuration job, String driverClass,
|
|||||||
*/
|
*/
|
||||||
public static void configureDB(Configuration conf, String driverClass,
|
public static void configureDB(Configuration conf, String driverClass,
|
||||||
String dbUrl, String userName, String passwd) {
|
String dbUrl, String userName, String passwd) {
|
||||||
configureDB(conf, driverClass, dbUrl, userName, passwd, null);
|
org.apache.sqoop.mapreduce.db.DBConfiguration.configureDB(conf, driverClass,
|
||||||
|
dbUrl, userName, passwd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -162,151 +154,12 @@ public static void configureDB(Configuration conf, String driverClass,
|
|||||||
*/
|
*/
|
||||||
public static void configureDB(Configuration job, String driverClass,
|
public static void configureDB(Configuration job, String driverClass,
|
||||||
String dbUrl) {
|
String dbUrl) {
|
||||||
configureDB(job, driverClass, dbUrl, null);
|
org.apache.sqoop.mapreduce.db.DBConfiguration.configureDB(job, driverClass,
|
||||||
|
dbUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Configuration conf;
|
|
||||||
|
|
||||||
public DBConfiguration(Configuration job) {
|
public DBConfiguration(Configuration job) {
|
||||||
this.conf = job;
|
super(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a connection object to the DB.
|
|
||||||
* @throws ClassNotFoundException
|
|
||||||
* @throws SQLException */
|
|
||||||
public Connection getConnection()
|
|
||||||
throws ClassNotFoundException, SQLException {
|
|
||||||
|
|
||||||
Class.forName(conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY));
|
|
||||||
|
|
||||||
if(conf.get(DBConfiguration.USERNAME_PROPERTY) == null) {
|
|
||||||
return DriverManager.getConnection(
|
|
||||||
conf.get(DBConfiguration.URL_PROPERTY));
|
|
||||||
} else {
|
|
||||||
return DriverManager.getConnection(
|
|
||||||
conf.get(DBConfiguration.URL_PROPERTY),
|
|
||||||
conf.get(DBConfiguration.USERNAME_PROPERTY),
|
|
||||||
conf.get(DBConfiguration.PASSWORD_PROPERTY));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public Configuration getConf() {
|
|
||||||
return conf;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer getFetchSize() {
|
|
||||||
if (conf.get(DBConfiguration.FETCH_SIZE) == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return conf.getInt(DBConfiguration.FETCH_SIZE, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFetchSize(Integer fetchSize) {
|
|
||||||
if (fetchSize != null) {
|
|
||||||
conf.setInt(DBConfiguration.FETCH_SIZE, fetchSize);
|
|
||||||
} else {
|
|
||||||
conf.set(FETCH_SIZE, null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
public String getInputTableName() {
|
|
||||||
return conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setInputTableName(String tableName) {
|
|
||||||
conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String[] getInputFieldNames() {
|
|
||||||
return conf.getStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setInputFieldNames(String... fieldNames) {
|
|
||||||
conf.setStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, fieldNames);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getInputConditions() {
|
|
||||||
return conf.get(DBConfiguration.INPUT_CONDITIONS_PROPERTY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setInputConditions(String conditions) {
|
|
||||||
if (conditions != null && conditions.length() > 0) {
|
|
||||||
conf.set(DBConfiguration.INPUT_CONDITIONS_PROPERTY, conditions);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getInputOrderBy() {
|
|
||||||
return conf.get(DBConfiguration.INPUT_ORDER_BY_PROPERTY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setInputOrderBy(String orderby) {
|
|
||||||
if(orderby != null && orderby.length() >0) {
|
|
||||||
conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, orderby);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getInputQuery() {
|
|
||||||
return conf.get(DBConfiguration.INPUT_QUERY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setInputQuery(String query) {
|
|
||||||
if(query != null && query.length() >0) {
|
|
||||||
conf.set(DBConfiguration.INPUT_QUERY, query);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getInputCountQuery() {
|
|
||||||
return conf.get(DBConfiguration.INPUT_COUNT_QUERY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setInputCountQuery(String query) {
|
|
||||||
if(query != null && query.length() > 0) {
|
|
||||||
conf.set(DBConfiguration.INPUT_COUNT_QUERY, query);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setInputBoundingQuery(String query) {
|
|
||||||
if (query != null && query.length() > 0) {
|
|
||||||
conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, query);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getInputBoundingQuery() {
|
|
||||||
return conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Class<?> getInputClass() {
|
|
||||||
return conf.getClass(DBConfiguration.INPUT_CLASS_PROPERTY,
|
|
||||||
NullDBWritable.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setInputClass(Class<? extends DBWritable> inputClass) {
|
|
||||||
conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, inputClass,
|
|
||||||
DBWritable.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getOutputTableName() {
|
|
||||||
return conf.get(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setOutputTableName(String tableName) {
|
|
||||||
conf.set(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tableName);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String[] getOutputFieldNames() {
|
|
||||||
return conf.getStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setOutputFieldNames(String... fieldNames) {
|
|
||||||
conf.setStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, fieldNames);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setOutputFieldCount(int fieldCount) {
|
|
||||||
conf.setInt(DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY, fieldCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getOutputFieldCount() {
|
|
||||||
return conf.getInt(OUTPUT_FIELD_COUNT_PROPERTY, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,34 +15,9 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.io.DataInput;
|
|
||||||
import java.io.DataOutput;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.sql.Connection;
|
|
||||||
import java.sql.DatabaseMetaData;
|
|
||||||
import java.sql.PreparedStatement;
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.sql.Statement;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.hadoop.io.LongWritable;
|
|
||||||
import org.apache.hadoop.io.Writable;
|
|
||||||
import org.apache.hadoop.mapreduce.InputFormat;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
|
||||||
import org.apache.hadoop.mapreduce.JobContext;
|
|
||||||
import org.apache.hadoop.mapreduce.RecordReader;
|
|
||||||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
import org.apache.hadoop.conf.Configurable;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
|
|
||||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A InputFormat that reads input data from an SQL table.
|
* A InputFormat that reads input data from an SQL table.
|
||||||
@ -54,38 +27,38 @@
|
|||||||
*
|
*
|
||||||
* The SQL query, and input class can be using one of the two
|
* The SQL query, and input class can be using one of the two
|
||||||
* setInput methods.
|
* setInput methods.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DBInputFormat instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DBInputFormat
|
||||||
*/
|
*/
|
||||||
public class DBInputFormat<T extends DBWritable>
|
public class DBInputFormat<T extends DBWritable>
|
||||||
extends InputFormat<LongWritable, T> implements Configurable {
|
extends org.apache.sqoop.mapreduce.db.DBInputFormat<T> {
|
||||||
|
|
||||||
private String dbProductName = "DEFAULT";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A Class that does nothing, implementing DBWritable.
|
* A Class that does nothing, implementing DBWritable.
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DBInputFormat.NullDBWritable
|
||||||
|
* instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DBInputFormat.NullDBWritable
|
||||||
*/
|
*/
|
||||||
public static class NullDBWritable implements DBWritable, Writable {
|
public static class NullDBWritable
|
||||||
@Override
|
extends org.apache.sqoop.mapreduce.db.DBInputFormat.NullDBWritable {
|
||||||
public void readFields(DataInput in) throws IOException { }
|
|
||||||
@Override
|
|
||||||
public void readFields(ResultSet arg0) throws SQLException { }
|
|
||||||
@Override
|
|
||||||
public void write(DataOutput out) throws IOException { }
|
|
||||||
@Override
|
|
||||||
public void write(PreparedStatement arg0) throws SQLException { }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A InputSplit that spans a set of rows.
|
* A InputSplit that spans a set of rows.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DBInputFormat.DBInputSplit
|
||||||
|
* instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DBInputFormat.DBInputSplit
|
||||||
*/
|
*/
|
||||||
public static class DBInputSplit extends InputSplit implements Writable {
|
public static class DBInputSplit extends
|
||||||
|
org.apache.sqoop.mapreduce.db.DBInputFormat.DBInputSplit {
|
||||||
private long end = 0;
|
|
||||||
private long start = 0;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default Constructor.
|
* Default Constructor.
|
||||||
*/
|
*/
|
||||||
public DBInputSplit() {
|
public DBInputSplit() {
|
||||||
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -94,266 +67,7 @@ public DBInputSplit() {
|
|||||||
* @param end the index of the last row to select
|
* @param end the index of the last row to select
|
||||||
*/
|
*/
|
||||||
public DBInputSplit(long start, long end) {
|
public DBInputSplit(long start, long end) {
|
||||||
this.start = start;
|
super(start, end);
|
||||||
this.end = end;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public String[] getLocations() throws IOException {
|
|
||||||
// TODO Add a layer to enable SQL "sharding" and support locality
|
|
||||||
return new String[] {};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return The index of the first row to select
|
|
||||||
*/
|
|
||||||
public long getStart() {
|
|
||||||
return start;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return The index of the last row to select
|
|
||||||
*/
|
|
||||||
public long getEnd() {
|
|
||||||
return end;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return The total row count in this split
|
|
||||||
*/
|
|
||||||
public long getLength() throws IOException {
|
|
||||||
return end - start;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void readFields(DataInput input) throws IOException {
|
|
||||||
start = input.readLong();
|
|
||||||
end = input.readLong();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void write(DataOutput output) throws IOException {
|
|
||||||
output.writeLong(start);
|
|
||||||
output.writeLong(end);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private String conditions;
|
|
||||||
|
|
||||||
private Connection connection;
|
|
||||||
|
|
||||||
private String tableName;
|
|
||||||
|
|
||||||
private String[] fieldNames;
|
|
||||||
|
|
||||||
private DBConfiguration dbConf;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void setConf(Configuration conf) {
|
|
||||||
|
|
||||||
dbConf = new DBConfiguration(conf);
|
|
||||||
|
|
||||||
try {
|
|
||||||
getConnection();
|
|
||||||
|
|
||||||
DatabaseMetaData dbMeta = connection.getMetaData();
|
|
||||||
this.dbProductName = dbMeta.getDatabaseProductName().toUpperCase();
|
|
||||||
} catch (Exception ex) {
|
|
||||||
throw new RuntimeException(ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
tableName = dbConf.getInputTableName();
|
|
||||||
fieldNames = dbConf.getInputFieldNames();
|
|
||||||
conditions = dbConf.getInputConditions();
|
|
||||||
}
|
|
||||||
|
|
||||||
public Configuration getConf() {
|
|
||||||
return dbConf.getConf();
|
|
||||||
}
|
|
||||||
|
|
||||||
public DBConfiguration getDBConf() {
|
|
||||||
return dbConf;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Connection getConnection() {
|
|
||||||
try {
|
|
||||||
if (null == this.connection) {
|
|
||||||
// The connection was closed; reinstantiate it.
|
|
||||||
this.connection = dbConf.getConnection();
|
|
||||||
this.connection.setAutoCommit(false);
|
|
||||||
this.connection.setTransactionIsolation(
|
|
||||||
Connection.TRANSACTION_READ_COMMITTED);
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
return connection;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDBProductName() {
|
|
||||||
return dbProductName;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected RecordReader<LongWritable, T> createDBRecordReader(
|
|
||||||
DBInputSplit split, Configuration conf) throws IOException {
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
|
|
||||||
try {
|
|
||||||
// use database product name to determine appropriate record reader.
|
|
||||||
if (dbProductName.startsWith("ORACLE")) {
|
|
||||||
// use Oracle-specific db reader.
|
|
||||||
return new OracleDBRecordReader<T>(split, inputClass,
|
|
||||||
conf, getConnection(), getDBConf(), conditions, fieldNames,
|
|
||||||
tableName);
|
|
||||||
} else {
|
|
||||||
// Generic reader.
|
|
||||||
return new DBRecordReader<T>(split, inputClass,
|
|
||||||
conf, getConnection(), getDBConf(), conditions, fieldNames,
|
|
||||||
tableName);
|
|
||||||
}
|
|
||||||
} catch (SQLException ex) {
|
|
||||||
throw new IOException(ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public RecordReader<LongWritable, T> createRecordReader(InputSplit split,
|
|
||||||
TaskAttemptContext context) throws IOException, InterruptedException {
|
|
||||||
|
|
||||||
return createDBRecordReader((DBInputSplit) split,
|
|
||||||
context.getConfiguration());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
|
||||||
public List<InputSplit> getSplits(JobContext job) throws IOException {
|
|
||||||
|
|
||||||
ResultSet results = null;
|
|
||||||
Statement statement = null;
|
|
||||||
try {
|
|
||||||
statement = connection.createStatement();
|
|
||||||
|
|
||||||
results = statement.executeQuery(getCountQuery());
|
|
||||||
results.next();
|
|
||||||
|
|
||||||
long count = results.getLong(1);
|
|
||||||
int chunks = ConfigurationHelper.getJobNumMaps(job);
|
|
||||||
long chunkSize = (count / chunks);
|
|
||||||
|
|
||||||
results.close();
|
|
||||||
statement.close();
|
|
||||||
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
|
|
||||||
// Split the rows into n-number of chunks and adjust the last chunk
|
|
||||||
// accordingly
|
|
||||||
for (int i = 0; i < chunks; i++) {
|
|
||||||
DBInputSplit split;
|
|
||||||
|
|
||||||
if ((i + 1) == chunks) {
|
|
||||||
split = new DBInputSplit(i * chunkSize, count);
|
|
||||||
} else {
|
|
||||||
split = new DBInputSplit(i * chunkSize, (i * chunkSize)
|
|
||||||
+ chunkSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
splits.add(split);
|
|
||||||
}
|
|
||||||
|
|
||||||
connection.commit();
|
|
||||||
return splits;
|
|
||||||
} catch (SQLException e) {
|
|
||||||
throw new IOException("Got SQLException", e);
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
if (results != null) { results.close(); }
|
|
||||||
} catch (SQLException e1) { /* ignored */ }
|
|
||||||
try {
|
|
||||||
if (statement != null) { statement.close(); }
|
|
||||||
} catch (SQLException e1) { /* ignored */ }
|
|
||||||
|
|
||||||
closeConnection();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the query for getting the total number of rows,
|
|
||||||
* subclasses can override this for custom behaviour.*/
|
|
||||||
protected String getCountQuery() {
|
|
||||||
|
|
||||||
if(dbConf.getInputCountQuery() != null) {
|
|
||||||
return dbConf.getInputCountQuery();
|
|
||||||
}
|
|
||||||
|
|
||||||
StringBuilder query = new StringBuilder();
|
|
||||||
query.append("SELECT COUNT(*) FROM " + tableName);
|
|
||||||
|
|
||||||
if (conditions != null && conditions.length() > 0) {
|
|
||||||
query.append(" WHERE " + conditions);
|
|
||||||
}
|
|
||||||
return query.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes the map-part of the job with the appropriate input settings.
|
|
||||||
*
|
|
||||||
* @param job The map-reduce job
|
|
||||||
* @param inputClass the class object implementing DBWritable, which is the
|
|
||||||
* Java object holding tuple fields.
|
|
||||||
* @param tableName The table to read data from
|
|
||||||
* @param conditions The condition which to select data with,
|
|
||||||
* eg. '(updated > 20070101 AND length > 0)'
|
|
||||||
* @param orderBy the fieldNames in the orderBy clause.
|
|
||||||
* @param fieldNames The field names in the table
|
|
||||||
* @see #setInput(Job, Class, String, String)
|
|
||||||
*/
|
|
||||||
public static void setInput(Job job,
|
|
||||||
Class<? extends DBWritable> inputClass,
|
|
||||||
String tableName, String conditions,
|
|
||||||
String orderBy, String... fieldNames) {
|
|
||||||
job.setInputFormatClass(DBInputFormat.class);
|
|
||||||
DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
|
|
||||||
dbConf.setInputClass(inputClass);
|
|
||||||
dbConf.setInputTableName(tableName);
|
|
||||||
dbConf.setInputFieldNames(fieldNames);
|
|
||||||
dbConf.setInputConditions(conditions);
|
|
||||||
dbConf.setInputOrderBy(orderBy);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes the map-part of the job with the appropriate input settings.
|
|
||||||
*
|
|
||||||
* @param job The map-reduce job
|
|
||||||
* @param inputClass the class object implementing DBWritable, which is the
|
|
||||||
* Java object holding tuple fields.
|
|
||||||
* @param inputQuery the input query to select fields. Example :
|
|
||||||
* "SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
|
|
||||||
* @param inputCountQuery the input query that returns
|
|
||||||
* the number of records in the table.
|
|
||||||
* Example : "SELECT COUNT(f1) FROM Mytable"
|
|
||||||
* @see #setInput(Job, Class, String, String, String, String...)
|
|
||||||
*/
|
|
||||||
public static void setInput(Job job,
|
|
||||||
Class<? extends DBWritable> inputClass,
|
|
||||||
String inputQuery, String inputCountQuery) {
|
|
||||||
job.setInputFormatClass(DBInputFormat.class);
|
|
||||||
DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
|
|
||||||
dbConf.setInputClass(inputClass);
|
|
||||||
dbConf.setInputQuery(inputQuery);
|
|
||||||
dbConf.setInputCountQuery(inputCountQuery);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void closeConnection() {
|
|
||||||
try {
|
|
||||||
if (null != this.connection) {
|
|
||||||
this.connection.close();
|
|
||||||
this.connection = null;
|
|
||||||
}
|
|
||||||
} catch (SQLException sqlE) { /* ignore exception on close. */ }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,28 +15,14 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
import java.sql.PreparedStatement;
|
import java.sql.PreparedStatement;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
|
||||||
import org.apache.hadoop.mapreduce.JobContext;
|
|
||||||
import org.apache.hadoop.mapreduce.OutputCommitter;
|
|
||||||
import org.apache.hadoop.mapreduce.OutputFormat;
|
|
||||||
import org.apache.hadoop.mapreduce.RecordWriter;
|
import org.apache.hadoop.mapreduce.RecordWriter;
|
||||||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
|
|
||||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A OutputFormat that sends the reduce output to a SQL table.
|
* A OutputFormat that sends the reduce output to a SQL table.
|
||||||
@ -47,191 +31,29 @@
|
|||||||
* key has a type extending DBWritable. Returned {@link RecordWriter}
|
* key has a type extending DBWritable. Returned {@link RecordWriter}
|
||||||
* writes <b>only the key</b> to the database with a batch SQL query.
|
* writes <b>only the key</b> to the database with a batch SQL query.
|
||||||
*
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DBoutputFormat instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DBOutputFormat
|
||||||
*/
|
*/
|
||||||
public class DBOutputFormat<K extends DBWritable, V>
|
public class DBOutputFormat<K extends DBWritable, V>
|
||||||
extends OutputFormat<K, V> {
|
extends org.apache.sqoop.mapreduce.db.DBOutputFormat<K, V> {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(DBOutputFormat.class);
|
|
||||||
public void checkOutputSpecs(JobContext context)
|
|
||||||
throws IOException, InterruptedException {}
|
|
||||||
|
|
||||||
public OutputCommitter getOutputCommitter(TaskAttemptContext context)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
|
|
||||||
context);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A RecordWriter that writes the reduce output to a SQL table.
|
* A RecordWriter that writes the reduce output to a SQL table.
|
||||||
|
*
|
||||||
|
* @deprecated use
|
||||||
|
* org.apache.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter
|
||||||
*/
|
*/
|
||||||
public class DBRecordWriter
|
public static class DBRecordWriter<K extends DBWritable, V> extends
|
||||||
extends RecordWriter<K, V> {
|
org.apache.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter<K, V> {
|
||||||
|
|
||||||
private Connection connection;
|
|
||||||
private PreparedStatement statement;
|
|
||||||
|
|
||||||
public DBRecordWriter() throws SQLException {
|
public DBRecordWriter() throws SQLException {
|
||||||
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
public DBRecordWriter(Connection connection
|
public DBRecordWriter(Connection connection,
|
||||||
, PreparedStatement statement) throws SQLException {
|
PreparedStatement statement) throws SQLException {
|
||||||
this.connection = connection;
|
super(connection, statement);
|
||||||
this.statement = statement;
|
|
||||||
this.connection.setAutoCommit(false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Connection getConnection() {
|
|
||||||
return connection;
|
|
||||||
}
|
|
||||||
|
|
||||||
public PreparedStatement getStatement() {
|
|
||||||
return statement;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void close(TaskAttemptContext context) throws IOException {
|
|
||||||
try {
|
|
||||||
statement.executeBatch();
|
|
||||||
connection.commit();
|
|
||||||
} catch (SQLException e) {
|
|
||||||
try {
|
|
||||||
connection.rollback();
|
|
||||||
} catch (SQLException ex) {
|
|
||||||
LOG.warn(StringUtils.stringifyException(ex));
|
|
||||||
}
|
|
||||||
throw new IOException(e);
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
statement.close();
|
|
||||||
connection.close();
|
|
||||||
} catch (SQLException ex) {
|
|
||||||
LOG.error("Unable to close connection", ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void write(K key, V value) throws IOException {
|
|
||||||
try {
|
|
||||||
key.write(statement);
|
|
||||||
statement.addBatch();
|
|
||||||
} catch (SQLException e) {
|
|
||||||
LOG.error("Exception encountered", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs the query used as the prepared statement to insert data.
|
|
||||||
*
|
|
||||||
* @param table
|
|
||||||
* the table to insert into
|
|
||||||
* @param fieldNames
|
|
||||||
* the fields to insert into. If field names are unknown, supply an
|
|
||||||
* array of nulls.
|
|
||||||
*/
|
|
||||||
public String constructQuery(String table, String[] fieldNames) {
|
|
||||||
if(fieldNames == null) {
|
|
||||||
throw new IllegalArgumentException("Field names may not be null");
|
|
||||||
}
|
|
||||||
|
|
||||||
StringBuilder query = new StringBuilder();
|
|
||||||
query.append("INSERT INTO ").append(table);
|
|
||||||
|
|
||||||
if (fieldNames.length > 0 && fieldNames[0] != null) {
|
|
||||||
query.append(" (");
|
|
||||||
for (int i = 0; i < fieldNames.length; i++) {
|
|
||||||
query.append(fieldNames[i]);
|
|
||||||
if (i != fieldNames.length - 1) {
|
|
||||||
query.append(",");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
query.append(")");
|
|
||||||
}
|
|
||||||
query.append(" VALUES (");
|
|
||||||
|
|
||||||
for (int i = 0; i < fieldNames.length; i++) {
|
|
||||||
query.append("?");
|
|
||||||
if(i != fieldNames.length - 1) {
|
|
||||||
query.append(",");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
query.append(");");
|
|
||||||
|
|
||||||
return query.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
|
|
||||||
throws IOException {
|
|
||||||
DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
|
|
||||||
String tableName = dbConf.getOutputTableName();
|
|
||||||
String[] fieldNames = dbConf.getOutputFieldNames();
|
|
||||||
|
|
||||||
if(fieldNames == null) {
|
|
||||||
fieldNames = new String[dbConf.getOutputFieldCount()];
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
Connection connection = dbConf.getConnection();
|
|
||||||
PreparedStatement statement = null;
|
|
||||||
|
|
||||||
statement = connection.prepareStatement(
|
|
||||||
constructQuery(tableName, fieldNames));
|
|
||||||
return new DBRecordWriter(connection, statement);
|
|
||||||
} catch (Exception ex) {
|
|
||||||
throw new IOException(ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes the reduce-part of the job with
|
|
||||||
* the appropriate output settings.
|
|
||||||
*
|
|
||||||
* @param job The job
|
|
||||||
* @param tableName The table to insert data into
|
|
||||||
* @param fieldNames The field names in the table.
|
|
||||||
*/
|
|
||||||
public static void setOutput(Job job, String tableName,
|
|
||||||
String... fieldNames) throws IOException {
|
|
||||||
if(fieldNames.length > 0 && fieldNames[0] != null) {
|
|
||||||
DBConfiguration dbConf = setOutput(job, tableName);
|
|
||||||
dbConf.setOutputFieldNames(fieldNames);
|
|
||||||
} else {
|
|
||||||
if (fieldNames.length > 0) {
|
|
||||||
setOutput(job, tableName, fieldNames.length);
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Field names must be greater than 0");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes the reduce-part of the job
|
|
||||||
* with the appropriate output settings.
|
|
||||||
*
|
|
||||||
* @param job The job
|
|
||||||
* @param tableName The table to insert data into
|
|
||||||
* @param fieldCount the number of fields in the table.
|
|
||||||
*/
|
|
||||||
public static void setOutput(Job job, String tableName,
|
|
||||||
int fieldCount) throws IOException {
|
|
||||||
DBConfiguration dbConf = setOutput(job, tableName);
|
|
||||||
dbConf.setOutputFieldCount(fieldCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static DBConfiguration setOutput(Job job,
|
|
||||||
String tableName) throws IOException {
|
|
||||||
job.setOutputFormatClass(DBOutputFormat.class);
|
|
||||||
ConfigurationHelper.setJobReduceSpeculativeExecution(job, false);
|
|
||||||
|
|
||||||
DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
|
|
||||||
|
|
||||||
dbConf.setOutputTableName(tableName);
|
|
||||||
return dbConf;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,63 +15,24 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
import java.sql.PreparedStatement;
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.io.LongWritable;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
import org.apache.hadoop.mapreduce.RecordReader;
|
|
||||||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
|
||||||
import org.apache.hadoop.util.ReflectionUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
import com.cloudera.sqoop.util.LoggingUtils;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A RecordReader that reads records from a SQL table.
|
* A RecordReader that reads records from a SQL table.
|
||||||
* Emits LongWritables containing the record number as
|
* Emits LongWritables containing the record number as
|
||||||
* key and DBWritables as value.
|
* key and DBWritables as value.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DBRecordReader instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DBRecordReader
|
||||||
*/
|
*/
|
||||||
public class DBRecordReader<T extends DBWritable> extends
|
public class DBRecordReader<T extends DBWritable> extends
|
||||||
RecordReader<LongWritable, T> {
|
org.apache.sqoop.mapreduce.db.DBRecordReader<T> {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(DBRecordReader.class);
|
|
||||||
|
|
||||||
private ResultSet results = null;
|
|
||||||
|
|
||||||
private Class<T> inputClass;
|
|
||||||
|
|
||||||
private Configuration conf;
|
|
||||||
|
|
||||||
private DBInputFormat.DBInputSplit split;
|
|
||||||
|
|
||||||
private long pos = 0;
|
|
||||||
|
|
||||||
private LongWritable key = null;
|
|
||||||
|
|
||||||
private T value = null;
|
|
||||||
|
|
||||||
private Connection connection;
|
|
||||||
|
|
||||||
protected PreparedStatement statement;
|
|
||||||
|
|
||||||
private DBConfiguration dbConf;
|
|
||||||
|
|
||||||
private String conditions;
|
|
||||||
|
|
||||||
private String [] fieldNames;
|
|
||||||
|
|
||||||
private String tableName;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param split The InputSplit to read data for
|
* @param split The InputSplit to read data for
|
||||||
@ -85,222 +44,7 @@ public DBRecordReader(DBInputFormat.DBInputSplit split,
|
|||||||
Class<T> inputClass, Configuration conf, Connection conn,
|
Class<T> inputClass, Configuration conf, Connection conn,
|
||||||
DBConfiguration dbConfig, String cond, String [] fields, String table)
|
DBConfiguration dbConfig, String cond, String [] fields, String table)
|
||||||
throws SQLException {
|
throws SQLException {
|
||||||
this.inputClass = inputClass;
|
super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
|
||||||
this.split = split;
|
|
||||||
this.conf = conf;
|
|
||||||
this.connection = conn;
|
|
||||||
this.dbConf = dbConfig;
|
|
||||||
this.conditions = cond;
|
|
||||||
if (fields != null) {
|
|
||||||
this.fieldNames = Arrays.copyOf(fields, fields.length);
|
|
||||||
}
|
|
||||||
this.tableName = table;
|
|
||||||
}
|
}
|
||||||
// CHECKSTYLE:ON
|
// CHECKSTYLE:ON
|
||||||
|
|
||||||
protected ResultSet executeQuery(String query) throws SQLException {
|
|
||||||
this.statement = connection.prepareStatement(query,
|
|
||||||
ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
|
|
||||||
|
|
||||||
Integer fetchSize = dbConf.getFetchSize();
|
|
||||||
if (fetchSize != null) {
|
|
||||||
LOG.debug("Using fetchSize for next query: " + fetchSize);
|
|
||||||
statement.setFetchSize(fetchSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.debug("Executing query: " + query);
|
|
||||||
return statement.executeQuery();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the query for selecting the records,
|
|
||||||
* subclasses can override this for custom behaviour.*/
|
|
||||||
protected String getSelectQuery() {
|
|
||||||
StringBuilder query = new StringBuilder();
|
|
||||||
|
|
||||||
// Default codepath for MySQL, HSQLDB, etc.
|
|
||||||
// Relies on LIMIT/OFFSET for splits.
|
|
||||||
if(dbConf.getInputQuery() == null) {
|
|
||||||
query.append("SELECT ");
|
|
||||||
|
|
||||||
for (int i = 0; i < fieldNames.length; i++) {
|
|
||||||
query.append(fieldNames[i]);
|
|
||||||
if (i != fieldNames.length -1) {
|
|
||||||
query.append(", ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
query.append(" FROM ").append(tableName);
|
|
||||||
query.append(" AS ").append(tableName); //in hsqldb this is necessary
|
|
||||||
if (conditions != null && conditions.length() > 0) {
|
|
||||||
query.append(" WHERE (").append(conditions).append(")");
|
|
||||||
}
|
|
||||||
|
|
||||||
String orderBy = dbConf.getInputOrderBy();
|
|
||||||
if (orderBy != null && orderBy.length() > 0) {
|
|
||||||
query.append(" ORDER BY ").append(orderBy);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
//PREBUILT QUERY
|
|
||||||
query.append(dbConf.getInputQuery());
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
query.append(" LIMIT ").append(split.getLength());
|
|
||||||
query.append(" OFFSET ").append(split.getStart());
|
|
||||||
} catch (IOException ex) {
|
|
||||||
// Ignore, will not throw.
|
|
||||||
}
|
|
||||||
|
|
||||||
return query.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void close() throws IOException {
|
|
||||||
try {
|
|
||||||
if (null != results) {
|
|
||||||
results.close();
|
|
||||||
}
|
|
||||||
if (null != statement) {
|
|
||||||
statement.close();
|
|
||||||
}
|
|
||||||
if (null != connection) {
|
|
||||||
connection.commit();
|
|
||||||
connection.close();
|
|
||||||
}
|
|
||||||
} catch (SQLException e) {
|
|
||||||
throw new IOException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
|
|
||||||
throws IOException, InterruptedException {
|
|
||||||
//do nothing
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public LongWritable getCurrentKey() {
|
|
||||||
return key;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public T getCurrentValue() {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public T createValue() {
|
|
||||||
return ReflectionUtils.newInstance(inputClass, conf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public long getPos() throws IOException {
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated Use {@link #nextKeyValue()}
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public boolean next(LongWritable k, T v) throws IOException {
|
|
||||||
this.key = k;
|
|
||||||
this.value = v;
|
|
||||||
return nextKeyValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public float getProgress() throws IOException {
|
|
||||||
return pos / (float)split.getLength();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public boolean nextKeyValue() throws IOException {
|
|
||||||
try {
|
|
||||||
if (key == null) {
|
|
||||||
key = new LongWritable();
|
|
||||||
}
|
|
||||||
if (value == null) {
|
|
||||||
value = createValue();
|
|
||||||
}
|
|
||||||
if (null == this.results) {
|
|
||||||
// First time into this method, run the query.
|
|
||||||
this.results = executeQuery(getSelectQuery());
|
|
||||||
}
|
|
||||||
if (!results.next()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the key field value as the output key value
|
|
||||||
key.set(pos + split.getStart());
|
|
||||||
|
|
||||||
value.readFields(results);
|
|
||||||
|
|
||||||
pos++;
|
|
||||||
} catch (SQLException e) {
|
|
||||||
LoggingUtils.logAll(LOG, e);
|
|
||||||
throw new IOException("SQLException in nextKeyValue", e);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return true if nextKeyValue() would return false.
|
|
||||||
*/
|
|
||||||
protected boolean isDone() {
|
|
||||||
try {
|
|
||||||
return this.results != null
|
|
||||||
&& (results.isLast() || results.isAfterLast());
|
|
||||||
} catch (SQLException sqlE) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected DBInputFormat.DBInputSplit getSplit() {
|
|
||||||
return split;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected String [] getFieldNames() {
|
|
||||||
return fieldNames;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected String getTableName() {
|
|
||||||
return tableName;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected String getConditions() {
|
|
||||||
return conditions;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected DBConfiguration getDBConf() {
|
|
||||||
return dbConf;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected Connection getConnection() {
|
|
||||||
return connection;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected PreparedStatement getStatement() {
|
|
||||||
return statement;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setStatement(PreparedStatement stmt) {
|
|
||||||
this.statement = stmt;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the configuration. Allows subclasses to access the configuration
|
|
||||||
*/
|
|
||||||
protected Configuration getConf(){
|
|
||||||
return conf;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,15 +15,8 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat.
|
* DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat.
|
||||||
@ -34,13 +25,10 @@
|
|||||||
* on the data-type of the column, this requires different behavior.
|
* on the data-type of the column, this requires different behavior.
|
||||||
* DBSplitter implementations should perform this for a data type or family
|
* DBSplitter implementations should perform this for a data type or family
|
||||||
* of data types.
|
* of data types.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DBSplitter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DBSplitter
|
||||||
*/
|
*/
|
||||||
public interface DBSplitter {
|
public interface DBSplitter extends org.apache.sqoop.mapreduce.db.DBSplitter {
|
||||||
/**
|
|
||||||
* Given a ResultSet containing one record (and already advanced to that
|
|
||||||
* record) with two columns (a low value, and a high value, both of the same
|
|
||||||
* type), determine a set of splits that span the given values.
|
|
||||||
*/
|
|
||||||
List<InputSplit> split(Configuration conf, ResultSet results, String colName)
|
|
||||||
throws SQLException;
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,67 +15,48 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.io.DataInput;
|
|
||||||
import java.io.DataOutput;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.sql.Connection;
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.sql.Statement;
|
|
||||||
import java.sql.Types;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.io.LongWritable;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
|
||||||
import org.apache.hadoop.mapreduce.JobContext;
|
|
||||||
import org.apache.hadoop.mapreduce.RecordReader;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
|
||||||
import org.apache.hadoop.conf.Configurable;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A InputFormat that reads input data from an SQL table.
|
* A InputFormat that reads input data from an SQL table.
|
||||||
* Operates like DBInputFormat, but instead of using LIMIT and OFFSET to
|
* Operates like DBInputFormat, but instead of using LIMIT and OFFSET to
|
||||||
* demarcate splits, it tries to generate WHERE clauses which separate the
|
* demarcate splits, it tries to generate WHERE clauses which separate the
|
||||||
* data into roughly equivalent shards.
|
* data into roughly equivalent shards.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat instead
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat
|
||||||
*/
|
*/
|
||||||
public class DataDrivenDBInputFormat<T extends DBWritable>
|
public class DataDrivenDBInputFormat<T extends DBWritable>
|
||||||
extends DBInputFormat<T> implements Configurable {
|
extends org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat<T> {
|
||||||
|
|
||||||
private static final Log LOG =
|
|
||||||
LogFactory.getLog(DataDrivenDBInputFormat.class);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If users are providing their own query, the following string is expected
|
* If users are providing their own query, the following string is expected
|
||||||
* to appear in the WHERE clause, which will be substituted with a pair of
|
* to appear in the WHERE clause, which will be substituted with a pair of
|
||||||
* conditions on the input to allow input splits to parallelise the import.
|
* conditions on the input to allow input splits to parallelise the import.
|
||||||
*/
|
*/
|
||||||
public static final String SUBSTITUTE_TOKEN = "$CONDITIONS";
|
public static final String SUBSTITUTE_TOKEN =
|
||||||
|
org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.SUBSTITUTE_TOKEN;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A InputSplit that spans a set of rows.
|
* A InputSplit that spans a set of rows.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.
|
||||||
|
* DataDrivenDBInputSplit instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.
|
||||||
|
* DataDrivenDBInputSplit
|
||||||
*/
|
*/
|
||||||
public static class DataDrivenDBInputSplit
|
public static class DataDrivenDBInputSplit extends
|
||||||
extends DBInputFormat.DBInputSplit {
|
org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit {
|
||||||
|
|
||||||
private String lowerBoundClause;
|
|
||||||
private String upperBoundClause;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default Constructor.
|
* Default Constructor.
|
||||||
*/
|
*/
|
||||||
public DataDrivenDBInputSplit() {
|
public DataDrivenDBInputSplit() {
|
||||||
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -88,189 +67,10 @@ public DataDrivenDBInputSplit() {
|
|||||||
* on the 'upper' end.
|
* on the 'upper' end.
|
||||||
*/
|
*/
|
||||||
public DataDrivenDBInputSplit(final String lower, final String upper) {
|
public DataDrivenDBInputSplit(final String lower, final String upper) {
|
||||||
this.lowerBoundClause = lower;
|
super(lower, upper);
|
||||||
this.upperBoundClause = upper;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return The total row count in this split.
|
|
||||||
*/
|
|
||||||
public long getLength() throws IOException {
|
|
||||||
return 0; // unfortunately, we don't know this.
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void readFields(DataInput input) throws IOException {
|
|
||||||
this.lowerBoundClause = Text.readString(input);
|
|
||||||
this.upperBoundClause = Text.readString(input);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public void write(DataOutput output) throws IOException {
|
|
||||||
Text.writeString(output, this.lowerBoundClause);
|
|
||||||
Text.writeString(output, this.upperBoundClause);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getLowerClause() {
|
|
||||||
return lowerBoundClause;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getUpperClause() {
|
|
||||||
return upperBoundClause;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the DBSplitter implementation to use to divide the table/query
|
|
||||||
* into InputSplits.
|
|
||||||
*/
|
|
||||||
protected DBSplitter getSplitter(int sqlDataType) {
|
|
||||||
switch (sqlDataType) {
|
|
||||||
case Types.NUMERIC:
|
|
||||||
case Types.DECIMAL:
|
|
||||||
return new BigDecimalSplitter();
|
|
||||||
|
|
||||||
case Types.BIT:
|
|
||||||
case Types.BOOLEAN:
|
|
||||||
return new BooleanSplitter();
|
|
||||||
|
|
||||||
case Types.INTEGER:
|
|
||||||
case Types.TINYINT:
|
|
||||||
case Types.SMALLINT:
|
|
||||||
case Types.BIGINT:
|
|
||||||
return new IntegerSplitter();
|
|
||||||
|
|
||||||
case Types.REAL:
|
|
||||||
case Types.FLOAT:
|
|
||||||
case Types.DOUBLE:
|
|
||||||
return new FloatSplitter();
|
|
||||||
|
|
||||||
case Types.CHAR:
|
|
||||||
case Types.VARCHAR:
|
|
||||||
case Types.LONGVARCHAR:
|
|
||||||
return new TextSplitter();
|
|
||||||
|
|
||||||
case Types.DATE:
|
|
||||||
case Types.TIME:
|
|
||||||
case Types.TIMESTAMP:
|
|
||||||
return new DateSplitter();
|
|
||||||
|
|
||||||
default:
|
|
||||||
// TODO: Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT, CLOB,
|
|
||||||
// BLOB, ARRAY, STRUCT, REF, DATALINK, and JAVA_OBJECT.
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public List<InputSplit> getSplits(JobContext job) throws IOException {
|
|
||||||
|
|
||||||
int targetNumTasks = ConfigurationHelper.getJobNumMaps(job);
|
|
||||||
String boundaryQuery = getDBConf().getInputBoundingQuery();
|
|
||||||
|
|
||||||
// If user do not forced us to use his boundary query and we don't have to
|
|
||||||
// bacause there is only one mapper we will return single split that
|
|
||||||
// separates nothing. This can be considerably more optimal for a large
|
|
||||||
// table with no index.
|
|
||||||
if (1 == targetNumTasks
|
|
||||||
&& (boundaryQuery == null || boundaryQuery.isEmpty())) {
|
|
||||||
List<InputSplit> singletonSplit = new ArrayList<InputSplit>();
|
|
||||||
singletonSplit.add(new DataDrivenDBInputSplit("1=1", "1=1"));
|
|
||||||
return singletonSplit;
|
|
||||||
}
|
|
||||||
|
|
||||||
ResultSet results = null;
|
|
||||||
Statement statement = null;
|
|
||||||
Connection connection = getConnection();
|
|
||||||
try {
|
|
||||||
statement = connection.createStatement();
|
|
||||||
|
|
||||||
String query = getBoundingValsQuery();
|
|
||||||
LOG.info("BoundingValsQuery: " + query);
|
|
||||||
|
|
||||||
results = statement.executeQuery(query);
|
|
||||||
results.next();
|
|
||||||
|
|
||||||
// Based on the type of the results, use a different mechanism
|
|
||||||
// for interpolating split points (i.e., numeric splits, text splits,
|
|
||||||
// dates, etc.)
|
|
||||||
int sqlDataType = results.getMetaData().getColumnType(1);
|
|
||||||
boolean isSigned = results.getMetaData().isSigned(1);
|
|
||||||
|
|
||||||
// MySQL has an unsigned integer which we need to allocate space for
|
|
||||||
if (sqlDataType == Types.INTEGER && !isSigned){
|
|
||||||
sqlDataType = Types.BIGINT;
|
|
||||||
}
|
|
||||||
|
|
||||||
DBSplitter splitter = getSplitter(sqlDataType);
|
|
||||||
if (null == splitter) {
|
|
||||||
throw new IOException("Unknown SQL data type: " + sqlDataType);
|
|
||||||
}
|
|
||||||
|
|
||||||
return splitter.split(job.getConfiguration(), results,
|
|
||||||
getDBConf().getInputOrderBy());
|
|
||||||
} catch (SQLException e) {
|
|
||||||
throw new IOException(e);
|
|
||||||
} finally {
|
|
||||||
// More-or-less ignore SQL exceptions here, but log in case we need it.
|
|
||||||
try {
|
|
||||||
if (null != results) {
|
|
||||||
results.close();
|
|
||||||
}
|
|
||||||
} catch (SQLException se) {
|
|
||||||
LOG.debug("SQLException closing resultset: " + se.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (null != statement) {
|
|
||||||
statement.close();
|
|
||||||
}
|
|
||||||
} catch (SQLException se) {
|
|
||||||
LOG.debug("SQLException closing statement: " + se.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
connection.commit();
|
|
||||||
closeConnection();
|
|
||||||
} catch (SQLException se) {
|
|
||||||
LOG.debug("SQLException committing split transaction: "
|
|
||||||
+ se.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return a query which returns the minimum and maximum values for
|
|
||||||
* the order-by column.
|
|
||||||
*
|
|
||||||
* The min value should be in the first column, and the
|
|
||||||
* max value should be in the second column of the results.
|
|
||||||
*/
|
|
||||||
protected String getBoundingValsQuery() {
|
|
||||||
// If the user has provided a query, use that instead.
|
|
||||||
String userQuery = getDBConf().getInputBoundingQuery();
|
|
||||||
if (null != userQuery) {
|
|
||||||
return userQuery;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Auto-generate one based on the table name we've been provided with.
|
|
||||||
StringBuilder query = new StringBuilder();
|
|
||||||
|
|
||||||
String splitCol = getDBConf().getInputOrderBy();
|
|
||||||
query.append("SELECT MIN(").append(splitCol).append("), ");
|
|
||||||
query.append("MAX(").append(splitCol).append(") FROM ");
|
|
||||||
query.append(getDBConf().getInputTableName());
|
|
||||||
String conditions = getDBConf().getInputConditions();
|
|
||||||
if (null != conditions) {
|
|
||||||
query.append(" WHERE ( " + conditions + " )");
|
|
||||||
}
|
|
||||||
|
|
||||||
return query.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Set the user-defined bounding query to use with a user-defined query.
|
/** Set the user-defined bounding query to use with a user-defined query.
|
||||||
This *must* include the substring "$CONDITIONS"
|
This *must* include the substring "$CONDITIONS"
|
||||||
@ -282,35 +82,8 @@ protected String getBoundingValsQuery() {
|
|||||||
inside each split.
|
inside each split.
|
||||||
*/
|
*/
|
||||||
public static void setBoundingQuery(Configuration conf, String query) {
|
public static void setBoundingQuery(Configuration conf, String query) {
|
||||||
if (null != query) {
|
org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.setBoundingQuery(
|
||||||
// If the user's settng a query, warn if they don't allow conditions.
|
conf, query);
|
||||||
if (query.indexOf(SUBSTITUTE_TOKEN) == -1) {
|
|
||||||
LOG.warn("Could not find " + SUBSTITUTE_TOKEN + " token in query: "
|
|
||||||
+ query + "; splits may not partition data.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, query);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected RecordReader<LongWritable, T> createDBRecordReader(
|
|
||||||
DBInputSplit split, Configuration conf) throws IOException {
|
|
||||||
|
|
||||||
DBConfiguration dbConf = getDBConf();
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
|
|
||||||
String dbProductName = getDBProductName();
|
|
||||||
|
|
||||||
LOG.debug("Creating db record reader for db product: " + dbProductName);
|
|
||||||
|
|
||||||
try {
|
|
||||||
return new DataDrivenDBRecordReader<T>(split, inputClass,
|
|
||||||
conf, getConnection(), dbConf, dbConf.getInputConditions(),
|
|
||||||
dbConf.getInputFieldNames(), dbConf.getInputTableName(),
|
|
||||||
dbProductName);
|
|
||||||
} catch (SQLException ex) {
|
|
||||||
throw new IOException(ex);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configuration methods override superclass to ensure that the proper
|
// Configuration methods override superclass to ensure that the proper
|
||||||
@ -324,9 +97,8 @@ public static void setInput(Job job,
|
|||||||
Class<? extends DBWritable> inputClass,
|
Class<? extends DBWritable> inputClass,
|
||||||
String tableName, String conditions,
|
String tableName, String conditions,
|
||||||
String splitBy, String... fieldNames) {
|
String splitBy, String... fieldNames) {
|
||||||
DBInputFormat.setInput(job, inputClass, tableName, conditions,
|
org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.setInput(
|
||||||
splitBy, fieldNames);
|
job, inputClass, tableName, conditions, splitBy, fieldNames);
|
||||||
job.setInputFormatClass(DataDrivenDBInputFormat.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** setInput() takes a custom query and a separate "bounding query" to use
|
/** setInput() takes a custom query and a separate "bounding query" to use
|
||||||
@ -335,9 +107,7 @@ public static void setInput(Job job,
|
|||||||
public static void setInput(Job job,
|
public static void setInput(Job job,
|
||||||
Class<? extends DBWritable> inputClass,
|
Class<? extends DBWritable> inputClass,
|
||||||
String inputQuery, String inputBoundingQuery) {
|
String inputQuery, String inputBoundingQuery) {
|
||||||
DBInputFormat.setInput(job, inputClass, inputQuery, "");
|
org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.setInput(
|
||||||
job.getConfiguration().set(DBConfiguration.INPUT_BOUNDING_QUERY,
|
job, inputClass, inputQuery, inputBoundingQuery);
|
||||||
inputBoundingQuery);
|
|
||||||
job.setInputFormatClass(DataDrivenDBInputFormat.class);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,15 +15,11 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
|
||||||
@ -34,14 +28,13 @@
|
|||||||
* using data-driven WHERE clause splits.
|
* using data-driven WHERE clause splits.
|
||||||
* Emits LongWritables containing the record number as
|
* Emits LongWritables containing the record number as
|
||||||
* key and DBWritables as value.
|
* key and DBWritables as value.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DataDrivenDBRecordReader
|
||||||
|
* instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DataDrivenDBRecordReader
|
||||||
*/
|
*/
|
||||||
public class DataDrivenDBRecordReader<T extends DBWritable>
|
public class DataDrivenDBRecordReader<T extends DBWritable>
|
||||||
extends DBRecordReader<T> {
|
extends org.apache.sqoop.mapreduce.db.DataDrivenDBRecordReader<T> {
|
||||||
|
|
||||||
private static final Log LOG =
|
|
||||||
LogFactory.getLog(DataDrivenDBRecordReader.class);
|
|
||||||
|
|
||||||
private String dbProductName; // database manufacturer string.
|
|
||||||
|
|
||||||
// CHECKSTYLE:OFF
|
// CHECKSTYLE:OFF
|
||||||
// TODO(aaron): Refactor constructor to use fewer arguments.
|
// TODO(aaron): Refactor constructor to use fewer arguments.
|
||||||
@ -53,77 +46,8 @@ public DataDrivenDBRecordReader(DBInputFormat.DBInputSplit split,
|
|||||||
Class<T> inputClass, Configuration conf, Connection conn,
|
Class<T> inputClass, Configuration conf, Connection conn,
|
||||||
DBConfiguration dbConfig, String cond, String [] fields, String table,
|
DBConfiguration dbConfig, String cond, String [] fields, String table,
|
||||||
String dbProduct) throws SQLException {
|
String dbProduct) throws SQLException {
|
||||||
super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
|
super(split, inputClass, conf, conn, dbConfig,
|
||||||
this.dbProductName = dbProduct;
|
cond, fields, table, dbProduct);
|
||||||
}
|
}
|
||||||
// CHECKSTYLE:ON
|
// CHECKSTYLE:ON
|
||||||
|
|
||||||
@Override
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
public float getProgress() throws IOException {
|
|
||||||
return isDone() ? 1.0f : 0.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the query for selecting the records,
|
|
||||||
* subclasses can override this for custom behaviour.*/
|
|
||||||
protected String getSelectQuery() {
|
|
||||||
StringBuilder query = new StringBuilder();
|
|
||||||
DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit =
|
|
||||||
(DataDrivenDBInputFormat.DataDrivenDBInputSplit) getSplit();
|
|
||||||
DBConfiguration dbConf = getDBConf();
|
|
||||||
String [] fieldNames = getFieldNames();
|
|
||||||
String tableName = getTableName();
|
|
||||||
String conditions = getConditions();
|
|
||||||
|
|
||||||
// Build the WHERE clauses associated with the data split first.
|
|
||||||
// We need them in both branches of this function.
|
|
||||||
StringBuilder conditionClauses = new StringBuilder();
|
|
||||||
conditionClauses.append("( ").append(dataSplit.getLowerClause());
|
|
||||||
conditionClauses.append(" ) AND ( ").append(dataSplit.getUpperClause());
|
|
||||||
conditionClauses.append(" )");
|
|
||||||
|
|
||||||
if(dbConf.getInputQuery() == null) {
|
|
||||||
// We need to generate the entire query.
|
|
||||||
query.append("SELECT ");
|
|
||||||
|
|
||||||
for (int i = 0; i < fieldNames.length; i++) {
|
|
||||||
query.append(fieldNames[i]);
|
|
||||||
if (i != fieldNames.length -1) {
|
|
||||||
query.append(", ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
query.append(" FROM ").append(tableName);
|
|
||||||
if (!dbProductName.startsWith("ORACLE")) {
|
|
||||||
// Seems to be necessary for hsqldb? Oracle explicitly does *not*
|
|
||||||
// use this clause.
|
|
||||||
query.append(" AS ").append(tableName);
|
|
||||||
}
|
|
||||||
query.append(" WHERE ");
|
|
||||||
if (conditions != null && conditions.length() > 0) {
|
|
||||||
// Put the user's conditions first.
|
|
||||||
query.append("( ").append(conditions).append(" ) AND ");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now append the conditions associated with our split.
|
|
||||||
query.append(conditionClauses.toString());
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// User provided the query. We replace the special token with
|
|
||||||
// our WHERE clause.
|
|
||||||
String inputQuery = dbConf.getInputQuery();
|
|
||||||
if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) {
|
|
||||||
LOG.error("Could not find the clause substitution token "
|
|
||||||
+ DataDrivenDBInputFormat.SUBSTITUTE_TOKEN + " in the query: ["
|
|
||||||
+ inputQuery + "]. Parallel splits may not work correctly.");
|
|
||||||
}
|
|
||||||
|
|
||||||
query.append(inputQuery.replace(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN,
|
|
||||||
conditionClauses.toString()));
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG.debug("Using query: " + query.toString());
|
|
||||||
|
|
||||||
return query.toString();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,168 +15,17 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.sql.Types;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
|
|
||||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implement DBSplitter over date/time values.
|
* Implement DBSplitter over date/time values.
|
||||||
* Make use of logic from IntegerSplitter, since date/time are just longs
|
* Make use of logic from IntegerSplitter, since date/time are just longs
|
||||||
* in Java.
|
* in Java.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.DateSplitter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.DateSplitter
|
||||||
*/
|
*/
|
||||||
public class DateSplitter extends IntegerSplitter {
|
public class DateSplitter extends org.apache.sqoop.mapreduce.db.DateSplitter {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(DateSplitter.class);
|
|
||||||
|
|
||||||
public List<InputSplit> split(Configuration conf, ResultSet results,
|
|
||||||
String colName) throws SQLException {
|
|
||||||
|
|
||||||
long minVal;
|
|
||||||
long maxVal;
|
|
||||||
|
|
||||||
int sqlDataType = results.getMetaData().getColumnType(1);
|
|
||||||
minVal = resultSetColToLong(results, 1, sqlDataType);
|
|
||||||
maxVal = resultSetColToLong(results, 2, sqlDataType);
|
|
||||||
|
|
||||||
String lowClausePrefix = colName + " >= ";
|
|
||||||
String highClausePrefix = colName + " < ";
|
|
||||||
|
|
||||||
int numSplits = ConfigurationHelper.getConfNumMaps(conf);
|
|
||||||
if (numSplits < 1) {
|
|
||||||
numSplits = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minVal == Long.MIN_VALUE && maxVal == Long.MIN_VALUE) {
|
|
||||||
// The range of acceptable dates is NULL to NULL. Just create a single
|
|
||||||
// split.
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Gather the split point integers
|
|
||||||
List<Long> splitPoints = split(numSplits, minVal, maxVal);
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
|
|
||||||
// Turn the split points into a set of intervals.
|
|
||||||
long start = splitPoints.get(0);
|
|
||||||
Date startDate = longToDate(start, sqlDataType);
|
|
||||||
if (sqlDataType == Types.TIMESTAMP) {
|
|
||||||
// The lower bound's nanos value needs to match the actual lower-bound
|
|
||||||
// nanos.
|
|
||||||
try {
|
|
||||||
((java.sql.Timestamp) startDate).setNanos(
|
|
||||||
results.getTimestamp(1).getNanos());
|
|
||||||
} catch (NullPointerException npe) {
|
|
||||||
// If the lower bound was NULL, we'll get an NPE; just ignore it and
|
|
||||||
// don't set nanos.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 1; i < splitPoints.size(); i++) {
|
|
||||||
long end = splitPoints.get(i);
|
|
||||||
Date endDate = longToDate(end, sqlDataType);
|
|
||||||
|
|
||||||
if (i == splitPoints.size() - 1) {
|
|
||||||
if (sqlDataType == Types.TIMESTAMP) {
|
|
||||||
// The upper bound's nanos value needs to match the actual
|
|
||||||
// upper-bound nanos.
|
|
||||||
try {
|
|
||||||
((java.sql.Timestamp) endDate).setNanos(
|
|
||||||
results.getTimestamp(2).getNanos());
|
|
||||||
} catch (NullPointerException npe) {
|
|
||||||
// If the upper bound was NULL, we'll get an NPE; just ignore it
|
|
||||||
// and don't set nanos.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// This is the last one; use a closed interval.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + dateToString(startDate),
|
|
||||||
colName + " <= " + dateToString(endDate)));
|
|
||||||
} else {
|
|
||||||
// Normal open-interval case.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + dateToString(startDate),
|
|
||||||
highClausePrefix + dateToString(endDate)));
|
|
||||||
}
|
|
||||||
|
|
||||||
start = end;
|
|
||||||
startDate = endDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minVal == Long.MIN_VALUE || maxVal == Long.MIN_VALUE) {
|
|
||||||
// Add an extra split to handle the null case that we saw.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
}
|
|
||||||
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
Retrieve the value from the column in a type-appropriate manner and
|
|
||||||
return its timestamp since the epoch. If the column is null, then return
|
|
||||||
Long.MIN_VALUE. This will cause a special split to be generated for the
|
|
||||||
NULL case, but may also cause poorly-balanced splits if most of the
|
|
||||||
actual dates are positive time since the epoch, etc.
|
|
||||||
*/
|
|
||||||
private long resultSetColToLong(ResultSet rs, int colNum, int sqlDataType)
|
|
||||||
throws SQLException {
|
|
||||||
try {
|
|
||||||
switch (sqlDataType) {
|
|
||||||
case Types.DATE:
|
|
||||||
return rs.getDate(colNum).getTime();
|
|
||||||
case Types.TIME:
|
|
||||||
return rs.getTime(colNum).getTime();
|
|
||||||
case Types.TIMESTAMP:
|
|
||||||
return rs.getTimestamp(colNum).getTime();
|
|
||||||
default:
|
|
||||||
throw new SQLException("Not a date-type field");
|
|
||||||
}
|
|
||||||
} catch (NullPointerException npe) {
|
|
||||||
// null column. return minimum long value.
|
|
||||||
LOG.warn("Encountered a NULL date in the split column. "
|
|
||||||
+ "Splits may be poorly balanced.");
|
|
||||||
return Long.MIN_VALUE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Parse the long-valued timestamp into the appropriate SQL date type. */
|
|
||||||
private Date longToDate(long val, int sqlDataType) {
|
|
||||||
switch (sqlDataType) {
|
|
||||||
case Types.DATE:
|
|
||||||
return new java.sql.Date(val);
|
|
||||||
case Types.TIME:
|
|
||||||
return new java.sql.Time(val);
|
|
||||||
case Types.TIMESTAMP:
|
|
||||||
return new java.sql.Timestamp(val);
|
|
||||||
default: // Shouldn't ever hit this case.
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Given a Date 'd', format it as a string for use in a SQL date
|
|
||||||
* comparison operation.
|
|
||||||
* @param d the date to format.
|
|
||||||
* @return the string representing this date in SQL with any appropriate
|
|
||||||
* quotation characters, etc.
|
|
||||||
*/
|
|
||||||
protected String dateToString(Date d) {
|
|
||||||
return "'" + d.toString() + "'";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,88 +15,16 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
|
|
||||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implement DBSplitter over floating-point values.
|
* Implement DBSplitter over floating-point values.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.FloatSplitter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.FloatSplitter
|
||||||
*/
|
*/
|
||||||
public class FloatSplitter implements DBSplitter {
|
public class FloatSplitter
|
||||||
|
extends org.apache.sqoop.mapreduce.db.FloatSplitter {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(FloatSplitter.class);
|
|
||||||
|
|
||||||
private static final double MIN_INCREMENT = 10000 * Double.MIN_VALUE;
|
|
||||||
|
|
||||||
public List<InputSplit> split(Configuration conf, ResultSet results,
|
|
||||||
String colName) throws SQLException {
|
|
||||||
|
|
||||||
LOG.warn("Generating splits for a floating-point index column. Due to the");
|
|
||||||
LOG.warn("imprecise representation of floating-point values in Java, this");
|
|
||||||
LOG.warn("may result in an incomplete import.");
|
|
||||||
LOG.warn("You are strongly encouraged to choose an integral split column.");
|
|
||||||
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
|
|
||||||
if (results.getString(1) == null && results.getString(2) == null) {
|
|
||||||
// Range is null to null. Return a null split accordingly.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
double minVal = results.getDouble(1);
|
|
||||||
double maxVal = results.getDouble(2);
|
|
||||||
|
|
||||||
// Use this as a hint. May need an extra task if the size doesn't
|
|
||||||
// divide cleanly.
|
|
||||||
int numSplits = ConfigurationHelper.getConfNumMaps(conf);
|
|
||||||
double splitSize = (maxVal - minVal) / (double) numSplits;
|
|
||||||
|
|
||||||
if (splitSize < MIN_INCREMENT) {
|
|
||||||
splitSize = MIN_INCREMENT;
|
|
||||||
}
|
|
||||||
|
|
||||||
String lowClausePrefix = colName + " >= ";
|
|
||||||
String highClausePrefix = colName + " < ";
|
|
||||||
|
|
||||||
double curLower = minVal;
|
|
||||||
double curUpper = curLower + splitSize;
|
|
||||||
|
|
||||||
while (curUpper < maxVal) {
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + Double.toString(curLower),
|
|
||||||
highClausePrefix + Double.toString(curUpper)));
|
|
||||||
|
|
||||||
curLower = curUpper;
|
|
||||||
curUpper += splitSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Catch any overage and create the closed interval for the last split.
|
|
||||||
if (curLower <= maxVal || splits.size() == 1) {
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + Double.toString(curUpper),
|
|
||||||
colName + " <= " + Double.toString(maxVal)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results.getString(1) == null || results.getString(2) == null) {
|
|
||||||
// At least one extrema is null; add a null split.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
}
|
|
||||||
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,133 +15,18 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
|
|
||||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implement DBSplitter over integer values.
|
* Implement DBSplitter over integer values.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.IntegerSplitter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.IntegerSplitter
|
||||||
*/
|
*/
|
||||||
public class IntegerSplitter implements DBSplitter {
|
public class IntegerSplitter
|
||||||
|
extends org.apache.sqoop.mapreduce.db.IntegerSplitter {
|
||||||
public static final Log LOG =
|
public static final Log LOG =
|
||||||
LogFactory.getLog(IntegerSplitter.class.getName());
|
org.apache.sqoop.mapreduce.db.IntegerSplitter.LOG;
|
||||||
|
|
||||||
public List<InputSplit> split(Configuration conf, ResultSet results,
|
|
||||||
String colName) throws SQLException {
|
|
||||||
|
|
||||||
long minVal = results.getLong(1);
|
|
||||||
long maxVal = results.getLong(2);
|
|
||||||
|
|
||||||
String lowClausePrefix = colName + " >= ";
|
|
||||||
String highClausePrefix = colName + " < ";
|
|
||||||
|
|
||||||
int numSplits = ConfigurationHelper.getConfNumMaps(conf);
|
|
||||||
if (numSplits < 1) {
|
|
||||||
numSplits = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results.getString(1) == null && results.getString(2) == null) {
|
|
||||||
// Range is null to null. Return a null split accordingly.
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get all the split points together.
|
|
||||||
List<Long> splitPoints = split(numSplits, minVal, maxVal);
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug(String.format("Splits: [%,28d to %,28d] into %d parts",
|
|
||||||
minVal, maxVal, numSplits));
|
|
||||||
for (int i = 0; i < splitPoints.size(); i++) {
|
|
||||||
LOG.debug(String.format("%,28d", splitPoints.get(i)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
|
|
||||||
// Turn the split points into a set of intervals.
|
|
||||||
long start = splitPoints.get(0);
|
|
||||||
for (int i = 1; i < splitPoints.size(); i++) {
|
|
||||||
long end = splitPoints.get(i);
|
|
||||||
|
|
||||||
if (i == splitPoints.size() - 1) {
|
|
||||||
// This is the last one; use a closed interval.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + Long.toString(start),
|
|
||||||
colName + " <= " + Long.toString(end)));
|
|
||||||
} else {
|
|
||||||
// Normal open-interval case.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + Long.toString(start),
|
|
||||||
highClausePrefix + Long.toString(end)));
|
|
||||||
}
|
|
||||||
|
|
||||||
start = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results.getString(1) == null || results.getString(2) == null) {
|
|
||||||
// At least one extrema is null; add a null split.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
}
|
|
||||||
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a list of longs one element longer than the list of input splits.
|
|
||||||
* This represents the boundaries between input splits.
|
|
||||||
* All splits are open on the top end, except the last one.
|
|
||||||
*
|
|
||||||
* So the list [0, 5, 8, 12, 18] would represent splits capturing the
|
|
||||||
* intervals:
|
|
||||||
*
|
|
||||||
* [0, 5)
|
|
||||||
* [5, 8)
|
|
||||||
* [8, 12)
|
|
||||||
* [12, 18] note the closed interval for the last split.
|
|
||||||
*/
|
|
||||||
List<Long> split(long numSplits, long minVal, long maxVal)
|
|
||||||
throws SQLException {
|
|
||||||
|
|
||||||
List<Long> splits = new ArrayList<Long>();
|
|
||||||
|
|
||||||
// We take the min-max interval and divide by the numSplits and also
|
|
||||||
// calculate a remainder. Because of integer division rules, numsplits *
|
|
||||||
// splitSize + minVal will always be <= maxVal. We then use the remainder
|
|
||||||
// and add 1 if the current split index is less than the < the remainder.
|
|
||||||
// This is guaranteed to add up to remainder and not surpass the value.
|
|
||||||
long splitSize = (maxVal - minVal) / numSplits;
|
|
||||||
long remainder = (maxVal - minVal) % numSplits;
|
|
||||||
long curVal = minVal;
|
|
||||||
|
|
||||||
// This will honor numSplits as long as split size > 0. If split size is
|
|
||||||
// 0, it will have remainder splits.
|
|
||||||
for (int i = 0; i <= numSplits; i++) {
|
|
||||||
splits.add(curVal);
|
|
||||||
if (curVal >= maxVal) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
curVal += splitSize;
|
|
||||||
curVal += (i < remainder) ? 1 : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (splits.size() == 1) {
|
|
||||||
// make a valid singleton split
|
|
||||||
splits.add(maxVal);
|
|
||||||
}
|
|
||||||
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,29 +15,25 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.lang.reflect.Method;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A RecordReader that reads records from an Oracle SQL table.
|
* A RecordReader that reads records from an Oracle SQL table.
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.OracleDBRecordReader instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.OracleDBRecordReader
|
||||||
*/
|
*/
|
||||||
public class OracleDBRecordReader<T extends DBWritable>
|
public class OracleDBRecordReader<T extends DBWritable> extends
|
||||||
extends DBRecordReader<T> {
|
org.apache.sqoop.mapreduce.db.OracleDBRecordReader<T> {
|
||||||
|
|
||||||
/** Configuration key to set to a timezone string. */
|
/** Configuration key to set to a timezone string. */
|
||||||
public static final String SESSION_TIMEZONE_KEY = "oracle.sessionTimeZone";
|
public static final String SESSION_TIMEZONE_KEY =
|
||||||
|
org.apache.sqoop.mapreduce.db.OracleDBRecordReader.SESSION_TIMEZONE_KEY;
|
||||||
private static final Log LOG = LogFactory.getLog(OracleDBRecordReader.class);
|
|
||||||
|
|
||||||
// CHECKSTYLE:OFF
|
// CHECKSTYLE:OFF
|
||||||
public OracleDBRecordReader(DBInputFormat.DBInputSplit split,
|
public OracleDBRecordReader(DBInputFormat.DBInputSplit split,
|
||||||
@ -47,61 +41,9 @@ public OracleDBRecordReader(DBInputFormat.DBInputSplit split,
|
|||||||
DBConfiguration dbConfig, String cond, String [] fields,
|
DBConfiguration dbConfig, String cond, String [] fields,
|
||||||
String table) throws SQLException {
|
String table) throws SQLException {
|
||||||
super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
|
super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
|
||||||
setSessionTimeZone(conf, conn);
|
|
||||||
}
|
}
|
||||||
// CHECKSTYLE:ON
|
// CHECKSTYLE:ON
|
||||||
|
|
||||||
/** Returns the query for selecting the records from an Oracle DB. */
|
|
||||||
protected String getSelectQuery() {
|
|
||||||
StringBuilder query = new StringBuilder();
|
|
||||||
DBConfiguration dbConf = getDBConf();
|
|
||||||
String conditions = getConditions();
|
|
||||||
String tableName = getTableName();
|
|
||||||
String [] fieldNames = getFieldNames();
|
|
||||||
|
|
||||||
// Oracle-specific codepath to use rownum instead of LIMIT/OFFSET.
|
|
||||||
if(dbConf.getInputQuery() == null) {
|
|
||||||
query.append("SELECT ");
|
|
||||||
|
|
||||||
for (int i = 0; i < fieldNames.length; i++) {
|
|
||||||
query.append(fieldNames[i]);
|
|
||||||
if (i != fieldNames.length -1) {
|
|
||||||
query.append(", ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
query.append(" FROM ").append(tableName);
|
|
||||||
if (conditions != null && conditions.length() > 0) {
|
|
||||||
query.append(" WHERE ").append(conditions);
|
|
||||||
}
|
|
||||||
String orderBy = dbConf.getInputOrderBy();
|
|
||||||
if (orderBy != null && orderBy.length() > 0) {
|
|
||||||
query.append(" ORDER BY ").append(orderBy);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
//PREBUILT QUERY
|
|
||||||
query.append(dbConf.getInputQuery());
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
DBInputFormat.DBInputSplit split = getSplit();
|
|
||||||
if (split.getLength() > 0 && split.getStart() > 0) {
|
|
||||||
String querystring = query.toString();
|
|
||||||
|
|
||||||
query = new StringBuilder();
|
|
||||||
query.append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( ");
|
|
||||||
query.append(querystring);
|
|
||||||
query.append(" ) a WHERE rownum <= ").append(split.getStart());
|
|
||||||
query.append(" + ").append(split.getLength());
|
|
||||||
query.append(" ) WHERE dbif_rno >= ").append(split.getStart());
|
|
||||||
}
|
|
||||||
} catch (IOException ex) {
|
|
||||||
// ignore, will not throw.
|
|
||||||
}
|
|
||||||
|
|
||||||
return query.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set session time zone.
|
* Set session time zone.
|
||||||
* @param conf The current configuration.
|
* @param conf The current configuration.
|
||||||
@ -110,41 +52,7 @@ protected String getSelectQuery() {
|
|||||||
*/
|
*/
|
||||||
public static void setSessionTimeZone(Configuration conf,
|
public static void setSessionTimeZone(Configuration conf,
|
||||||
Connection conn) throws SQLException {
|
Connection conn) throws SQLException {
|
||||||
// need to use reflection to call the method setSessionTimeZone on
|
org.apache.sqoop.mapreduce.db.OracleDBRecordReader.setSessionTimeZone(
|
||||||
// the OracleConnection class because oracle specific java libraries are
|
conf, conn);
|
||||||
// not accessible in this context.
|
|
||||||
Method method;
|
|
||||||
try {
|
|
||||||
method = conn.getClass().getMethod(
|
|
||||||
"setSessionTimeZone", new Class [] {String.class});
|
|
||||||
} catch (Exception ex) {
|
|
||||||
LOG.error("Could not find method setSessionTimeZone in "
|
|
||||||
+ conn.getClass().getName(), ex);
|
|
||||||
// rethrow SQLException
|
|
||||||
throw new SQLException(ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Need to set the time zone in order for Java
|
|
||||||
// to correctly access the column "TIMESTAMP WITH LOCAL TIME ZONE".
|
|
||||||
// We can't easily get the correct Oracle-specific timezone string
|
|
||||||
// from Java; just let the user set the timezone in a property.
|
|
||||||
String clientTimeZone = conf.get(SESSION_TIMEZONE_KEY, "GMT");
|
|
||||||
try {
|
|
||||||
method.setAccessible(true);
|
|
||||||
method.invoke(conn, clientTimeZone);
|
|
||||||
LOG.info("Time zone has been set to " + clientTimeZone);
|
|
||||||
} catch (Exception ex) {
|
|
||||||
LOG.warn("Time zone " + clientTimeZone
|
|
||||||
+ " could not be set on Oracle database.");
|
|
||||||
LOG.warn("Setting default time zone: GMT");
|
|
||||||
try {
|
|
||||||
// "GMT" timezone is guaranteed to exist.
|
|
||||||
method.invoke(conn, "GMT");
|
|
||||||
} catch (Exception ex2) {
|
|
||||||
LOG.error("Could not set time zone for oracle connection", ex2);
|
|
||||||
// rethrow SQLException
|
|
||||||
throw new SQLException(ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,57 +15,17 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.sql.Types;
|
|
||||||
|
|
||||||
import org.apache.hadoop.io.LongWritable;
|
|
||||||
import org.apache.hadoop.mapreduce.RecordReader;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
import org.apache.hadoop.conf.Configurable;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A InputFormat that reads input data from an SQL table in an Oracle db.
|
* A InputFormat that reads input data from an SQL table in an Oracle db.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.OracleDataDrivenDBInputFormat
|
||||||
|
* instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.OracleDataDrivenDBInputFormat
|
||||||
*/
|
*/
|
||||||
public class OracleDataDrivenDBInputFormat<T extends DBWritable>
|
public class OracleDataDrivenDBInputFormat<T extends DBWritable>
|
||||||
extends DataDrivenDBInputFormat<T> implements Configurable {
|
extends org.apache.sqoop.mapreduce.db.OracleDataDrivenDBInputFormat<T> {
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the DBSplitter implementation to use to divide the table/query
|
|
||||||
* into InputSplits.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
protected DBSplitter getSplitter(int sqlDataType) {
|
|
||||||
switch (sqlDataType) {
|
|
||||||
case Types.DATE:
|
|
||||||
case Types.TIME:
|
|
||||||
case Types.TIMESTAMP:
|
|
||||||
return new OracleDateSplitter();
|
|
||||||
|
|
||||||
default:
|
|
||||||
return super.getSplitter(sqlDataType);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected RecordReader<LongWritable, T> createDBRecordReader(
|
|
||||||
DBInputSplit split, Configuration conf) throws IOException {
|
|
||||||
|
|
||||||
DBConfiguration dbConf = getDBConf();
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Use Oracle-specific db reader
|
|
||||||
return new OracleDataDrivenDBRecordReader<T>(split, inputClass,
|
|
||||||
conf, getConnection(), dbConf, dbConf.getInputConditions(),
|
|
||||||
dbConf.getInputFieldNames(), dbConf.getInputTableName());
|
|
||||||
} catch (SQLException ex) {
|
|
||||||
throw new IOException(ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,7 +15,6 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
@ -29,9 +26,13 @@
|
|||||||
/**
|
/**
|
||||||
* A RecordReader that reads records from a Oracle table
|
* A RecordReader that reads records from a Oracle table
|
||||||
* via DataDrivenDBRecordReader.
|
* via DataDrivenDBRecordReader.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.OracleDataDrivenDBRecordReader
|
||||||
|
* instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.OracleDataDrivenDBRecordReader
|
||||||
*/
|
*/
|
||||||
public class OracleDataDrivenDBRecordReader<T extends DBWritable>
|
public class OracleDataDrivenDBRecordReader<T extends DBWritable>
|
||||||
extends DataDrivenDBRecordReader<T> {
|
extends org.apache.sqoop.mapreduce.db.OracleDataDrivenDBRecordReader<T> {
|
||||||
|
|
||||||
// CHECKSTYLE:OFF
|
// CHECKSTYLE:OFF
|
||||||
// TODO(aaron): Enable checkstyle after refactoring DBRecordReader c'tor.
|
// TODO(aaron): Enable checkstyle after refactoring DBRecordReader c'tor.
|
||||||
@ -40,11 +41,7 @@ public OracleDataDrivenDBRecordReader(DBInputFormat.DBInputSplit split,
|
|||||||
DBConfiguration dbConfig, String cond, String [] fields,
|
DBConfiguration dbConfig, String cond, String [] fields,
|
||||||
String table) throws SQLException {
|
String table) throws SQLException {
|
||||||
|
|
||||||
super(split, inputClass, conf, conn, dbConfig, cond, fields, table,
|
super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
|
||||||
"ORACLE");
|
|
||||||
|
|
||||||
// Must initialize the tz used by the connection for Oracle.
|
|
||||||
OracleDBRecordReader.setSessionTimeZone(conf, conn);
|
|
||||||
}
|
}
|
||||||
// CHECKSTYLE:ON
|
// CHECKSTYLE:ON
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,24 +15,18 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.util.Date;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implement DBSplitter over date/time values returned by an Oracle db.
|
* Implement DBSplitter over date/time values returned by an Oracle db.
|
||||||
* Make use of logic from DateSplitter, since this just needs to use
|
* Make use of logic from DateSplitter, since this just needs to use
|
||||||
* some Oracle-specific functions on the formatting end when generating
|
* some Oracle-specific functions on the formatting end when generating
|
||||||
* InputSplits.
|
* InputSplits.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.OracleDateSplitter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.OracleDateSplitter
|
||||||
*/
|
*/
|
||||||
public class OracleDateSplitter extends DateSplitter {
|
public class OracleDateSplitter
|
||||||
|
extends org.apache.sqoop.mapreduce.db.OracleDateSplitter {
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
@Override
|
|
||||||
protected String dateToString(Date d) {
|
|
||||||
// Oracle Data objects are always actually Timestamps
|
|
||||||
return "TO_TIMESTAMP('" + d.toString() + "', 'YYYY-MM-DD HH24:MI:SS.FF')";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,213 +15,15 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
|
||||||
import java.sql.ResultSet;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.mapreduce.InputSplit;
|
|
||||||
|
|
||||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implement DBSplitter over text strings.
|
* Implement DBSplitter over text strings.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.TextSplitter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.TextSplitter
|
||||||
*/
|
*/
|
||||||
public class TextSplitter extends BigDecimalSplitter {
|
public class TextSplitter extends org.apache.sqoop.mapreduce.db.TextSplitter {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(TextSplitter.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method needs to determine the splits between two user-provided
|
|
||||||
* strings. In the case where the user's strings are 'A' and 'Z', this is
|
|
||||||
* not hard; we could create two splits from ['A', 'M') and ['M', 'Z'], 26
|
|
||||||
* splits for strings beginning with each letter, etc.
|
|
||||||
*
|
|
||||||
* If a user has provided us with the strings "Ham" and "Haze", however, we
|
|
||||||
* need to create splits that differ in the third letter.
|
|
||||||
*
|
|
||||||
* The algorithm used is as follows:
|
|
||||||
* Since there are 2**16 unicode characters, we interpret characters as
|
|
||||||
* digits in base 65536. Given a string 's' containing characters s_0, s_1
|
|
||||||
* .. s_n, we interpret the string as the number: 0.s_0 s_1 s_2.. s_n in
|
|
||||||
* base 65536. Having mapped the low and high strings into floating-point
|
|
||||||
* values, we then use the BigDecimalSplitter to establish the even split
|
|
||||||
* points, then map the resulting floating point values back into strings.
|
|
||||||
*/
|
|
||||||
public List<InputSplit> split(Configuration conf, ResultSet results,
|
|
||||||
String colName) throws SQLException {
|
|
||||||
|
|
||||||
LOG.warn("Generating splits for a textual index column.");
|
|
||||||
LOG.warn("If your database sorts in a case-insensitive order, "
|
|
||||||
+ "this may result in a partial import or duplicate records.");
|
|
||||||
LOG.warn("You are strongly encouraged to choose an integral split column.");
|
|
||||||
|
|
||||||
String minString = results.getString(1);
|
|
||||||
String maxString = results.getString(2);
|
|
||||||
|
|
||||||
boolean minIsNull = false;
|
|
||||||
|
|
||||||
// If the min value is null, switch it to an empty string instead for
|
|
||||||
// purposes of interpolation. Then add [null, null] as a special case
|
|
||||||
// split.
|
|
||||||
if (null == minString) {
|
|
||||||
minString = "";
|
|
||||||
minIsNull = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (null == maxString) {
|
|
||||||
// If the max string is null, then the min string has to be null too.
|
|
||||||
// Just return a special split for this case.
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use this as a hint. May need an extra task if the size doesn't
|
|
||||||
// divide cleanly.
|
|
||||||
int numSplits = ConfigurationHelper.getConfNumMaps(conf);
|
|
||||||
|
|
||||||
String lowClausePrefix = colName + " >= '";
|
|
||||||
String highClausePrefix = colName + " < '";
|
|
||||||
|
|
||||||
// If there is a common prefix between minString and maxString, establish
|
|
||||||
// it and pull it out of minString and maxString.
|
|
||||||
int maxPrefixLen = Math.min(minString.length(), maxString.length());
|
|
||||||
int sharedLen;
|
|
||||||
for (sharedLen = 0; sharedLen < maxPrefixLen; sharedLen++) {
|
|
||||||
char c1 = minString.charAt(sharedLen);
|
|
||||||
char c2 = maxString.charAt(sharedLen);
|
|
||||||
if (c1 != c2) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// The common prefix has length 'sharedLen'. Extract it from both.
|
|
||||||
String commonPrefix = minString.substring(0, sharedLen);
|
|
||||||
minString = minString.substring(sharedLen);
|
|
||||||
maxString = maxString.substring(sharedLen);
|
|
||||||
|
|
||||||
List<String> splitStrings = split(numSplits, minString, maxString,
|
|
||||||
commonPrefix);
|
|
||||||
List<InputSplit> splits = new ArrayList<InputSplit>();
|
|
||||||
|
|
||||||
// Convert the list of split point strings into an actual set of
|
|
||||||
// InputSplits.
|
|
||||||
String start = splitStrings.get(0);
|
|
||||||
for (int i = 1; i < splitStrings.size(); i++) {
|
|
||||||
String end = splitStrings.get(i);
|
|
||||||
|
|
||||||
if (i == splitStrings.size() - 1) {
|
|
||||||
// This is the last one; use a closed interval.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + start + "'", colName + " <= '" + end + "'"));
|
|
||||||
} else {
|
|
||||||
// Normal open-interval case.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
lowClausePrefix + start + "'", highClausePrefix + end + "'"));
|
|
||||||
}
|
|
||||||
|
|
||||||
start = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minIsNull) {
|
|
||||||
// Add the special null split at the end.
|
|
||||||
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
|
||||||
colName + " IS NULL", colName + " IS NULL"));
|
|
||||||
}
|
|
||||||
|
|
||||||
return splits;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<String> split(int numSplits, String minString, String maxString,
|
|
||||||
String commonPrefix) throws SQLException {
|
|
||||||
|
|
||||||
BigDecimal minVal = stringToBigDecimal(minString);
|
|
||||||
BigDecimal maxVal = stringToBigDecimal(maxString);
|
|
||||||
|
|
||||||
List<BigDecimal> splitPoints = split(
|
|
||||||
new BigDecimal(numSplits), minVal, maxVal);
|
|
||||||
List<String> splitStrings = new ArrayList<String>();
|
|
||||||
|
|
||||||
// Convert the BigDecimal splitPoints into their string representations.
|
|
||||||
for (BigDecimal bd : splitPoints) {
|
|
||||||
splitStrings.add(commonPrefix + bigDecimalToString(bd));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure that our user-specified boundaries are the first and last
|
|
||||||
// entries in the array.
|
|
||||||
if (splitStrings.size() == 0
|
|
||||||
|| !splitStrings.get(0).equals(commonPrefix + minString)) {
|
|
||||||
splitStrings.add(0, commonPrefix + minString);
|
|
||||||
}
|
|
||||||
if (splitStrings.size() == 1
|
|
||||||
|| !splitStrings.get(splitStrings.size() - 1).equals(
|
|
||||||
commonPrefix + maxString)) {
|
|
||||||
splitStrings.add(commonPrefix + maxString);
|
|
||||||
}
|
|
||||||
|
|
||||||
return splitStrings;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final BigDecimal ONE_PLACE = new BigDecimal(65536);
|
|
||||||
|
|
||||||
// Maximum number of characters to convert. This is to prevent rounding
|
|
||||||
// errors or repeating fractions near the very bottom from getting out of
|
|
||||||
// control. Note that this still gives us a huge number of possible splits.
|
|
||||||
private static final int MAX_CHARS = 8;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return a BigDecimal representation of string 'str' suitable for use in a
|
|
||||||
* numerically-sorting order.
|
|
||||||
*/
|
|
||||||
BigDecimal stringToBigDecimal(String str) {
|
|
||||||
// Start with 1/65536 to compute the first digit.
|
|
||||||
BigDecimal curPlace = ONE_PLACE;
|
|
||||||
BigDecimal result = BigDecimal.ZERO;
|
|
||||||
|
|
||||||
int len = Math.min(str.length(), MAX_CHARS);
|
|
||||||
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
int codePoint = str.codePointAt(i);
|
|
||||||
result = result.add(tryDivide(new BigDecimal(codePoint), curPlace));
|
|
||||||
// advance to the next less significant place. e.g., 1/(65536^2) for the
|
|
||||||
// second char.
|
|
||||||
curPlace = curPlace.multiply(ONE_PLACE);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the string encoded in a BigDecimal.
|
|
||||||
* Repeatedly multiply the input value by 65536; the integer portion after
|
|
||||||
* such a multiplication represents a single character in base 65536.
|
|
||||||
* Convert that back into a char and create a string out of these until we
|
|
||||||
* have no data left.
|
|
||||||
*/
|
|
||||||
String bigDecimalToString(BigDecimal bd) {
|
|
||||||
BigDecimal cur = bd.stripTrailingZeros();
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
|
|
||||||
for (int numConverted = 0; numConverted < MAX_CHARS; numConverted++) {
|
|
||||||
cur = cur.multiply(ONE_PLACE);
|
|
||||||
int curCodePoint = cur.intValue();
|
|
||||||
if (0 == curCodePoint) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
cur = cur.subtract(new BigDecimal(curCodePoint));
|
|
||||||
sb.append(Character.toChars(curCodePoint));
|
|
||||||
}
|
|
||||||
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
151
src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java
Normal file
151
src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement DBSplitter over BigDecimal values.
|
||||||
|
*/
|
||||||
|
public class BigDecimalSplitter implements DBSplitter {
|
||||||
|
private static final Log LOG = LogFactory.getLog(BigDecimalSplitter.class);
|
||||||
|
|
||||||
|
public List<InputSplit> split(Configuration conf, ResultSet results,
|
||||||
|
String colName) throws SQLException {
|
||||||
|
|
||||||
|
BigDecimal minVal = results.getBigDecimal(1);
|
||||||
|
BigDecimal maxVal = results.getBigDecimal(2);
|
||||||
|
|
||||||
|
String lowClausePrefix = colName + " >= ";
|
||||||
|
String highClausePrefix = colName + " < ";
|
||||||
|
|
||||||
|
BigDecimal numSplits = new BigDecimal(
|
||||||
|
ConfigurationHelper.getConfNumMaps(conf));
|
||||||
|
|
||||||
|
if (minVal == null && maxVal == null) {
|
||||||
|
// Range is null to null. Return a null split accordingly.
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minVal == null || maxVal == null) {
|
||||||
|
// Don't know what is a reasonable min/max value for interpolation. Fail.
|
||||||
|
LOG.error("Cannot find a range for NUMERIC or DECIMAL "
|
||||||
|
+ "fields with one end NULL.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all the split points together.
|
||||||
|
List<BigDecimal> splitPoints = split(numSplits, minVal, maxVal);
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
|
||||||
|
// Turn the split points into a set of intervals.
|
||||||
|
BigDecimal start = splitPoints.get(0);
|
||||||
|
for (int i = 1; i < splitPoints.size(); i++) {
|
||||||
|
BigDecimal end = splitPoints.get(i);
|
||||||
|
|
||||||
|
if (i == splitPoints.size() - 1) {
|
||||||
|
// This is the last one; use a closed interval.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + start.toString(),
|
||||||
|
colName + " <= " + end.toString()));
|
||||||
|
} else {
|
||||||
|
// Normal open-interval case.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + start.toString(),
|
||||||
|
highClausePrefix + end.toString()));
|
||||||
|
}
|
||||||
|
|
||||||
|
start = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final BigDecimal MIN_INCREMENT =
|
||||||
|
new BigDecimal(10000 * Double.MIN_VALUE);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Divide numerator by denominator. If impossible in exact mode, use rounding.
|
||||||
|
*/
|
||||||
|
protected BigDecimal tryDivide(BigDecimal numerator, BigDecimal denominator) {
|
||||||
|
try {
|
||||||
|
return numerator.divide(denominator);
|
||||||
|
} catch (ArithmeticException ae) {
|
||||||
|
return numerator.divide(denominator, BigDecimal.ROUND_HALF_UP);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a list of BigDecimals one element longer than the list of input
|
||||||
|
* splits. This represents the boundaries between input splits. All splits
|
||||||
|
* are open on the top end, except the last one.
|
||||||
|
*
|
||||||
|
* So the list [0, 5, 8, 12, 18] would represent splits capturing the
|
||||||
|
* intervals:
|
||||||
|
*
|
||||||
|
* [0, 5)
|
||||||
|
* [5, 8)
|
||||||
|
* [8, 12)
|
||||||
|
* [12, 18] note the closed interval for the last split.
|
||||||
|
*/
|
||||||
|
protected List<BigDecimal> split(BigDecimal numSplits, BigDecimal minVal,
|
||||||
|
BigDecimal maxVal) throws SQLException {
|
||||||
|
|
||||||
|
List<BigDecimal> splits = new ArrayList<BigDecimal>();
|
||||||
|
|
||||||
|
// Use numSplits as a hint. May need an extra task if the size doesn't
|
||||||
|
// divide cleanly.
|
||||||
|
|
||||||
|
BigDecimal splitSize = tryDivide(maxVal.subtract(minVal), (numSplits));
|
||||||
|
if (splitSize.compareTo(MIN_INCREMENT) < 0) {
|
||||||
|
splitSize = MIN_INCREMENT;
|
||||||
|
LOG.warn("Set BigDecimal splitSize to MIN_INCREMENT");
|
||||||
|
}
|
||||||
|
|
||||||
|
BigDecimal curVal = minVal;
|
||||||
|
|
||||||
|
while (curVal.compareTo(maxVal) <= 0) {
|
||||||
|
splits.add(curVal);
|
||||||
|
curVal = curVal.add(splitSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (splits.get(splits.size() - 1).compareTo(maxVal) != 0
|
||||||
|
|| splits.size() == 1) {
|
||||||
|
// We didn't end on the maxVal. Add that to the end of the list.
|
||||||
|
splits.add(maxVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
}
|
69
src/java/org/apache/sqoop/mapreduce/db/BooleanSplitter.java
Normal file
69
src/java/org/apache/sqoop/mapreduce/db/BooleanSplitter.java
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement DBSplitter over boolean values.
|
||||||
|
*/
|
||||||
|
public class BooleanSplitter implements DBSplitter {
|
||||||
|
public List<InputSplit> split(Configuration conf, ResultSet results,
|
||||||
|
String colName) throws SQLException {
|
||||||
|
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
|
||||||
|
if (results.getString(1) == null && results.getString(2) == null) {
|
||||||
|
// Range is null to null. Return a null split accordingly.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean minVal = results.getBoolean(1);
|
||||||
|
boolean maxVal = results.getBoolean(2);
|
||||||
|
|
||||||
|
// Use one or two splits.
|
||||||
|
if (!minVal) {
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " = FALSE", colName + " = FALSE"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxVal) {
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " = TRUE", colName + " = TRUE"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results.getString(1) == null || results.getString(2) == null) {
|
||||||
|
// Include a null value.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
}
|
310
src/java/org/apache/sqoop/mapreduce/db/DBConfiguration.java
Normal file
310
src/java/org/apache/sqoop/mapreduce/db/DBConfiguration.java
Normal file
@ -0,0 +1,310 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBInputFormat.NullDBWritable;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBOutputFormat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A container for configuration property names for jobs with DB input/output.
|
||||||
|
*
|
||||||
|
* The job can be configured using the static methods in this class,
|
||||||
|
* {@link DBInputFormat}, and {@link DBOutputFormat}.
|
||||||
|
* Alternatively, the properties can be set in the configuration with proper
|
||||||
|
* values.
|
||||||
|
*
|
||||||
|
* @see DBConfiguration#configureDB(Configuration, String, String, String,
|
||||||
|
* String)
|
||||||
|
* @see DBInputFormat#setInput(Job, Class, String, String)
|
||||||
|
* @see DBInputFormat#setInput(Job, Class, String, String, String, String...)
|
||||||
|
* @see DBOutputFormat#setOutput(Job, String, String...)
|
||||||
|
*/
|
||||||
|
public class DBConfiguration {
|
||||||
|
|
||||||
|
/** The JDBC Driver class name. */
|
||||||
|
public static final String DRIVER_CLASS_PROPERTY =
|
||||||
|
"mapreduce.jdbc.driver.class";
|
||||||
|
|
||||||
|
/** JDBC Database access URL. */
|
||||||
|
public static final String URL_PROPERTY = "mapreduce.jdbc.url";
|
||||||
|
|
||||||
|
/** User name to access the database. */
|
||||||
|
public static final String USERNAME_PROPERTY = "mapreduce.jdbc.username";
|
||||||
|
|
||||||
|
/** Password to access the database. */
|
||||||
|
public static final String PASSWORD_PROPERTY = "mapreduce.jdbc.password";
|
||||||
|
|
||||||
|
/** Fetch size. */
|
||||||
|
public static final String FETCH_SIZE = "mapreduce.jdbc.fetchsize";
|
||||||
|
|
||||||
|
/** Input table name. */
|
||||||
|
public static final String INPUT_TABLE_NAME_PROPERTY =
|
||||||
|
"mapreduce.jdbc.input.table.name";
|
||||||
|
|
||||||
|
/** Field names in the Input table. */
|
||||||
|
public static final String INPUT_FIELD_NAMES_PROPERTY =
|
||||||
|
"mapreduce.jdbc.input.field.names";
|
||||||
|
|
||||||
|
/** WHERE clause in the input SELECT statement. */
|
||||||
|
public static final String INPUT_CONDITIONS_PROPERTY =
|
||||||
|
"mapreduce.jdbc.input.conditions";
|
||||||
|
|
||||||
|
/** ORDER BY clause in the input SELECT statement. */
|
||||||
|
public static final String INPUT_ORDER_BY_PROPERTY =
|
||||||
|
"mapreduce.jdbc.input.orderby";
|
||||||
|
|
||||||
|
/** Whole input query, exluding LIMIT...OFFSET. */
|
||||||
|
public static final String INPUT_QUERY = "mapreduce.jdbc.input.query";
|
||||||
|
|
||||||
|
/** Input query to get the count of records. */
|
||||||
|
public static final String INPUT_COUNT_QUERY =
|
||||||
|
"mapreduce.jdbc.input.count.query";
|
||||||
|
|
||||||
|
/** Input query to get the max and min values of the jdbc.input.query. */
|
||||||
|
public static final String INPUT_BOUNDING_QUERY =
|
||||||
|
"mapred.jdbc.input.bounding.query";
|
||||||
|
|
||||||
|
/** Class name implementing DBWritable which will hold input tuples. */
|
||||||
|
public static final String INPUT_CLASS_PROPERTY =
|
||||||
|
"mapreduce.jdbc.input.class";
|
||||||
|
|
||||||
|
/** Output table name. */
|
||||||
|
public static final String OUTPUT_TABLE_NAME_PROPERTY =
|
||||||
|
"mapreduce.jdbc.output.table.name";
|
||||||
|
|
||||||
|
/** Field names in the Output table. */
|
||||||
|
public static final String OUTPUT_FIELD_NAMES_PROPERTY =
|
||||||
|
"mapreduce.jdbc.output.field.names";
|
||||||
|
|
||||||
|
/** Number of fields in the Output table. */
|
||||||
|
public static final String OUTPUT_FIELD_COUNT_PROPERTY =
|
||||||
|
"mapreduce.jdbc.output.field.count";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the DB access related fields in the {@link Configuration}.
|
||||||
|
* @param conf the configuration
|
||||||
|
* @param driverClass JDBC Driver class name
|
||||||
|
* @param dbUrl JDBC DB access URL
|
||||||
|
* @param userName DB access username
|
||||||
|
* @param passwd DB access passwd
|
||||||
|
* @param fetchSize DB fetch size
|
||||||
|
*/
|
||||||
|
public static void configureDB(Configuration conf, String driverClass,
|
||||||
|
String dbUrl, String userName, String passwd, Integer fetchSize) {
|
||||||
|
|
||||||
|
conf.set(DRIVER_CLASS_PROPERTY, driverClass);
|
||||||
|
conf.set(URL_PROPERTY, dbUrl);
|
||||||
|
if (userName != null) {
|
||||||
|
conf.set(USERNAME_PROPERTY, userName);
|
||||||
|
}
|
||||||
|
if (passwd != null) {
|
||||||
|
conf.set(PASSWORD_PROPERTY, passwd);
|
||||||
|
}
|
||||||
|
if (fetchSize != null) {
|
||||||
|
conf.setInt(FETCH_SIZE, fetchSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the DB access related fields in the JobConf.
|
||||||
|
* @param job the job
|
||||||
|
* @param driverClass JDBC Driver class name
|
||||||
|
* @param dbUrl JDBC DB access URL
|
||||||
|
* @param fetchSize DB fetch size
|
||||||
|
*/
|
||||||
|
public static void configureDB(Configuration job, String driverClass,
|
||||||
|
String dbUrl, Integer fetchSize) {
|
||||||
|
configureDB(job, driverClass, dbUrl, null, null, fetchSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the DB access related fields in the {@link Configuration}.
|
||||||
|
* @param conf the configuration
|
||||||
|
* @param driverClass JDBC Driver class name
|
||||||
|
* @param dbUrl JDBC DB access URL
|
||||||
|
* @param userName DB access username
|
||||||
|
* @param passwd DB access passwd
|
||||||
|
*/
|
||||||
|
public static void configureDB(Configuration conf, String driverClass,
|
||||||
|
String dbUrl, String userName, String passwd) {
|
||||||
|
configureDB(conf, driverClass, dbUrl, userName, passwd, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the DB access related fields in the JobConf.
|
||||||
|
* @param job the job
|
||||||
|
* @param driverClass JDBC Driver class name
|
||||||
|
* @param dbUrl JDBC DB access URL.
|
||||||
|
*/
|
||||||
|
public static void configureDB(Configuration job, String driverClass,
|
||||||
|
String dbUrl) {
|
||||||
|
configureDB(job, driverClass, dbUrl, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Configuration conf;
|
||||||
|
|
||||||
|
public DBConfiguration(Configuration job) {
|
||||||
|
this.conf = job;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a connection object to the DB.
|
||||||
|
* @throws ClassNotFoundException
|
||||||
|
* @throws SQLException */
|
||||||
|
public Connection getConnection()
|
||||||
|
throws ClassNotFoundException, SQLException {
|
||||||
|
|
||||||
|
Class.forName(conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY));
|
||||||
|
|
||||||
|
if(conf.get(DBConfiguration.USERNAME_PROPERTY) == null) {
|
||||||
|
return DriverManager.getConnection(
|
||||||
|
conf.get(DBConfiguration.URL_PROPERTY));
|
||||||
|
} else {
|
||||||
|
return DriverManager.getConnection(
|
||||||
|
conf.get(DBConfiguration.URL_PROPERTY),
|
||||||
|
conf.get(DBConfiguration.USERNAME_PROPERTY),
|
||||||
|
conf.get(DBConfiguration.PASSWORD_PROPERTY));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Configuration getConf() {
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getFetchSize() {
|
||||||
|
if (conf.get(DBConfiguration.FETCH_SIZE) == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return conf.getInt(DBConfiguration.FETCH_SIZE, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setFetchSize(Integer fetchSize) {
|
||||||
|
if (fetchSize != null) {
|
||||||
|
conf.setInt(DBConfiguration.FETCH_SIZE, fetchSize);
|
||||||
|
} else {
|
||||||
|
conf.set(FETCH_SIZE, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public String getInputTableName() {
|
||||||
|
return conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputTableName(String tableName) {
|
||||||
|
conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getInputFieldNames() {
|
||||||
|
return conf.getStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputFieldNames(String... fieldNames) {
|
||||||
|
conf.setStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, fieldNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getInputConditions() {
|
||||||
|
return conf.get(DBConfiguration.INPUT_CONDITIONS_PROPERTY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputConditions(String conditions) {
|
||||||
|
if (conditions != null && conditions.length() > 0) {
|
||||||
|
conf.set(DBConfiguration.INPUT_CONDITIONS_PROPERTY, conditions);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getInputOrderBy() {
|
||||||
|
return conf.get(DBConfiguration.INPUT_ORDER_BY_PROPERTY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputOrderBy(String orderby) {
|
||||||
|
if(orderby != null && orderby.length() >0) {
|
||||||
|
conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, orderby);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getInputQuery() {
|
||||||
|
return conf.get(DBConfiguration.INPUT_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputQuery(String query) {
|
||||||
|
if(query != null && query.length() >0) {
|
||||||
|
conf.set(DBConfiguration.INPUT_QUERY, query);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getInputCountQuery() {
|
||||||
|
return conf.get(DBConfiguration.INPUT_COUNT_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputCountQuery(String query) {
|
||||||
|
if(query != null && query.length() > 0) {
|
||||||
|
conf.set(DBConfiguration.INPUT_COUNT_QUERY, query);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputBoundingQuery(String query) {
|
||||||
|
if (query != null && query.length() > 0) {
|
||||||
|
conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, query);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getInputBoundingQuery() {
|
||||||
|
return conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Class<?> getInputClass() {
|
||||||
|
return conf.getClass(DBConfiguration.INPUT_CLASS_PROPERTY,
|
||||||
|
NullDBWritable.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setInputClass(Class<? extends DBWritable> inputClass) {
|
||||||
|
conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, inputClass,
|
||||||
|
DBWritable.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOutputTableName() {
|
||||||
|
return conf.get(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOutputTableName(String tableName) {
|
||||||
|
conf.set(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tableName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getOutputFieldNames() {
|
||||||
|
return conf.getStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOutputFieldNames(String... fieldNames) {
|
||||||
|
conf.setStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, fieldNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setOutputFieldCount(int fieldCount) {
|
||||||
|
conf.setInt(DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY, fieldCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getOutputFieldCount() {
|
||||||
|
return conf.getInt(OUTPUT_FIELD_COUNT_PROPERTY, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
363
src/java/org/apache/sqoop/mapreduce/db/DBInputFormat.java
Normal file
363
src/java/org/apache/sqoop/mapreduce/db/DBInputFormat.java
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DatabaseMetaData;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configurable;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.io.LongWritable;
|
||||||
|
import org.apache.hadoop.io.Writable;
|
||||||
|
import org.apache.hadoop.mapreduce.InputFormat;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
|
import org.apache.hadoop.mapreduce.JobContext;
|
||||||
|
import org.apache.hadoop.mapreduce.RecordReader;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBRecordReader;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.OracleDBRecordReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A InputFormat that reads input data from an SQL table.
|
||||||
|
* <p>
|
||||||
|
* DBInputFormat emits LongWritables containing the record number as
|
||||||
|
* key and DBWritables as value.
|
||||||
|
*
|
||||||
|
* The SQL query, and input class can be using one of the two
|
||||||
|
* setInput methods.
|
||||||
|
*/
|
||||||
|
public class DBInputFormat<T extends DBWritable>
|
||||||
|
extends InputFormat<LongWritable, T> implements Configurable {
|
||||||
|
|
||||||
|
|
||||||
|
private String dbProductName = "DEFAULT";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Class that does nothing, implementing DBWritable.
|
||||||
|
*/
|
||||||
|
public static class NullDBWritable implements DBWritable, Writable {
|
||||||
|
@Override
|
||||||
|
public void readFields(DataInput in) throws IOException { }
|
||||||
|
@Override
|
||||||
|
public void readFields(ResultSet arg0) throws SQLException { }
|
||||||
|
@Override
|
||||||
|
public void write(DataOutput out) throws IOException { }
|
||||||
|
@Override
|
||||||
|
public void write(PreparedStatement arg0) throws SQLException { }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A InputSplit that spans a set of rows.
|
||||||
|
*/
|
||||||
|
public static class DBInputSplit extends InputSplit implements Writable {
|
||||||
|
|
||||||
|
private long end = 0;
|
||||||
|
private long start = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default Constructor.
|
||||||
|
*/
|
||||||
|
public DBInputSplit() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience Constructor.
|
||||||
|
* @param start the index of the first row to select
|
||||||
|
* @param end the index of the last row to select
|
||||||
|
*/
|
||||||
|
public DBInputSplit(long start, long end) {
|
||||||
|
this.start = start;
|
||||||
|
this.end = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public String[] getLocations() throws IOException {
|
||||||
|
// TODO Add a layer to enable SQL "sharding" and support locality
|
||||||
|
return new String[] {};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The index of the first row to select
|
||||||
|
*/
|
||||||
|
public long getStart() {
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The index of the last row to select
|
||||||
|
*/
|
||||||
|
public long getEnd() {
|
||||||
|
return end;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The total row count in this split
|
||||||
|
*/
|
||||||
|
public long getLength() throws IOException {
|
||||||
|
return end - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public void readFields(DataInput input) throws IOException {
|
||||||
|
start = input.readLong();
|
||||||
|
end = input.readLong();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public void write(DataOutput output) throws IOException {
|
||||||
|
output.writeLong(start);
|
||||||
|
output.writeLong(end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String conditions;
|
||||||
|
|
||||||
|
private Connection connection;
|
||||||
|
|
||||||
|
private String tableName;
|
||||||
|
|
||||||
|
private String[] fieldNames;
|
||||||
|
|
||||||
|
private DBConfiguration dbConf;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public void setConf(Configuration conf) {
|
||||||
|
|
||||||
|
dbConf = new DBConfiguration(conf);
|
||||||
|
|
||||||
|
try {
|
||||||
|
getConnection();
|
||||||
|
|
||||||
|
DatabaseMetaData dbMeta = connection.getMetaData();
|
||||||
|
this.dbProductName = dbMeta.getDatabaseProductName().toUpperCase();
|
||||||
|
} catch (Exception ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
tableName = dbConf.getInputTableName();
|
||||||
|
fieldNames = dbConf.getInputFieldNames();
|
||||||
|
conditions = dbConf.getInputConditions();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Configuration getConf() {
|
||||||
|
return dbConf.getConf();
|
||||||
|
}
|
||||||
|
|
||||||
|
public DBConfiguration getDBConf() {
|
||||||
|
return dbConf;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Connection getConnection() {
|
||||||
|
try {
|
||||||
|
if (null == this.connection) {
|
||||||
|
// The connection was closed; reinstantiate it.
|
||||||
|
this.connection = dbConf.getConnection();
|
||||||
|
this.connection.setAutoCommit(false);
|
||||||
|
this.connection.setTransactionIsolation(
|
||||||
|
Connection.TRANSACTION_READ_COMMITTED);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
return connection;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDBProductName() {
|
||||||
|
return dbProductName;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected RecordReader<LongWritable, T> createDBRecordReader(
|
||||||
|
com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit split,
|
||||||
|
Configuration conf) throws IOException {
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
|
||||||
|
try {
|
||||||
|
// use database product name to determine appropriate record reader.
|
||||||
|
if (dbProductName.startsWith("ORACLE")) {
|
||||||
|
// use Oracle-specific db reader.
|
||||||
|
return new OracleDBRecordReader<T>(split, inputClass,
|
||||||
|
conf, getConnection(), getDBConf(), conditions, fieldNames,
|
||||||
|
tableName);
|
||||||
|
} else {
|
||||||
|
// Generic reader.
|
||||||
|
return new DBRecordReader<T>(split, inputClass,
|
||||||
|
conf, getConnection(), getDBConf(), conditions, fieldNames,
|
||||||
|
tableName);
|
||||||
|
}
|
||||||
|
} catch (SQLException ex) {
|
||||||
|
throw new IOException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public RecordReader<LongWritable, T> createRecordReader(InputSplit split,
|
||||||
|
TaskAttemptContext context) throws IOException, InterruptedException {
|
||||||
|
|
||||||
|
return createDBRecordReader(
|
||||||
|
(com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit) split,
|
||||||
|
context.getConfiguration());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
@Override
|
||||||
|
public List<InputSplit> getSplits(JobContext job) throws IOException {
|
||||||
|
|
||||||
|
ResultSet results = null;
|
||||||
|
Statement statement = null;
|
||||||
|
try {
|
||||||
|
statement = connection.createStatement();
|
||||||
|
|
||||||
|
results = statement.executeQuery(getCountQuery());
|
||||||
|
results.next();
|
||||||
|
|
||||||
|
long count = results.getLong(1);
|
||||||
|
int chunks = ConfigurationHelper.getJobNumMaps(job);
|
||||||
|
long chunkSize = (count / chunks);
|
||||||
|
|
||||||
|
results.close();
|
||||||
|
statement.close();
|
||||||
|
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
|
||||||
|
// Split the rows into n-number of chunks and adjust the last chunk
|
||||||
|
// accordingly
|
||||||
|
for (int i = 0; i < chunks; i++) {
|
||||||
|
DBInputSplit split;
|
||||||
|
|
||||||
|
if ((i + 1) == chunks) {
|
||||||
|
split = new DBInputSplit(i * chunkSize, count);
|
||||||
|
} else {
|
||||||
|
split = new DBInputSplit(i * chunkSize, (i * chunkSize)
|
||||||
|
+ chunkSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
splits.add(split);
|
||||||
|
}
|
||||||
|
|
||||||
|
connection.commit();
|
||||||
|
return splits;
|
||||||
|
} catch (SQLException e) {
|
||||||
|
throw new IOException("Got SQLException", e);
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
if (results != null) { results.close(); }
|
||||||
|
} catch (SQLException e1) { /* ignored */ }
|
||||||
|
try {
|
||||||
|
if (statement != null) { statement.close(); }
|
||||||
|
} catch (SQLException e1) { /* ignored */ }
|
||||||
|
|
||||||
|
closeConnection();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the query for getting the total number of rows,
|
||||||
|
* subclasses can override this for custom behaviour.*/
|
||||||
|
protected String getCountQuery() {
|
||||||
|
|
||||||
|
if(dbConf.getInputCountQuery() != null) {
|
||||||
|
return dbConf.getInputCountQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder query = new StringBuilder();
|
||||||
|
query.append("SELECT COUNT(*) FROM " + tableName);
|
||||||
|
|
||||||
|
if (conditions != null && conditions.length() > 0) {
|
||||||
|
query.append(" WHERE " + conditions);
|
||||||
|
}
|
||||||
|
return query.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the map-part of the job with the appropriate input settings.
|
||||||
|
*
|
||||||
|
* @param job The map-reduce job
|
||||||
|
* @param inputClass the class object implementing DBWritable, which is the
|
||||||
|
* Java object holding tuple fields.
|
||||||
|
* @param tableName The table to read data from
|
||||||
|
* @param conditions The condition which to select data with,
|
||||||
|
* eg. '(updated > 20070101 AND length > 0)'
|
||||||
|
* @param orderBy the fieldNames in the orderBy clause.
|
||||||
|
* @param fieldNames The field names in the table
|
||||||
|
* @see #setInput(Job, Class, String, String)
|
||||||
|
*/
|
||||||
|
public static void setInput(Job job,
|
||||||
|
Class<? extends DBWritable> inputClass,
|
||||||
|
String tableName, String conditions,
|
||||||
|
String orderBy, String... fieldNames) {
|
||||||
|
job.setInputFormatClass(DBInputFormat.class);
|
||||||
|
DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
|
||||||
|
dbConf.setInputClass(inputClass);
|
||||||
|
dbConf.setInputTableName(tableName);
|
||||||
|
dbConf.setInputFieldNames(fieldNames);
|
||||||
|
dbConf.setInputConditions(conditions);
|
||||||
|
dbConf.setInputOrderBy(orderBy);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the map-part of the job with the appropriate input settings.
|
||||||
|
*
|
||||||
|
* @param job The map-reduce job
|
||||||
|
* @param inputClass the class object implementing DBWritable, which is the
|
||||||
|
* Java object holding tuple fields.
|
||||||
|
* @param inputQuery the input query to select fields. Example :
|
||||||
|
* "SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
|
||||||
|
* @param inputCountQuery the input query that returns
|
||||||
|
* the number of records in the table.
|
||||||
|
* Example : "SELECT COUNT(f1) FROM Mytable"
|
||||||
|
* @see #setInput(Job, Class, String, String, String, String...)
|
||||||
|
*/
|
||||||
|
public static void setInput(Job job,
|
||||||
|
Class<? extends DBWritable> inputClass,
|
||||||
|
String inputQuery, String inputCountQuery) {
|
||||||
|
job.setInputFormatClass(DBInputFormat.class);
|
||||||
|
DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
|
||||||
|
dbConf.setInputClass(inputClass);
|
||||||
|
dbConf.setInputQuery(inputQuery);
|
||||||
|
dbConf.setInputCountQuery(inputCountQuery);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void closeConnection() {
|
||||||
|
try {
|
||||||
|
if (null != this.connection) {
|
||||||
|
this.connection.close();
|
||||||
|
this.connection = null;
|
||||||
|
}
|
||||||
|
} catch (SQLException sqlE) { /* ignore exception on close. */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
238
src/java/org/apache/sqoop/mapreduce/db/DBOutputFormat.java
Normal file
238
src/java/org/apache/sqoop/mapreduce/db/DBOutputFormat.java
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
|
import org.apache.hadoop.mapreduce.JobContext;
|
||||||
|
import org.apache.hadoop.mapreduce.OutputCommitter;
|
||||||
|
import org.apache.hadoop.mapreduce.OutputFormat;
|
||||||
|
import org.apache.hadoop.mapreduce.RecordWriter;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||||
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A OutputFormat that sends the reduce output to a SQL table.
|
||||||
|
* <p>
|
||||||
|
* {@link DBOutputFormat} accepts <key,value> pairs, where
|
||||||
|
* key has a type extending DBWritable. Returned {@link RecordWriter}
|
||||||
|
* writes <b>only the key</b> to the database with a batch SQL query.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class DBOutputFormat<K extends DBWritable, V>
|
||||||
|
extends OutputFormat<K, V> {
|
||||||
|
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(DBOutputFormat.class);
|
||||||
|
public void checkOutputSpecs(JobContext context)
|
||||||
|
throws IOException, InterruptedException {}
|
||||||
|
|
||||||
|
public OutputCommitter getOutputCommitter(TaskAttemptContext context)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
|
||||||
|
context);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs the query used as the prepared statement to insert data.
|
||||||
|
*
|
||||||
|
* @param table
|
||||||
|
* the table to insert into
|
||||||
|
* @param fieldNames
|
||||||
|
* the fields to insert into. If field names are unknown, supply an
|
||||||
|
* array of nulls.
|
||||||
|
*/
|
||||||
|
public String constructQuery(String table, String[] fieldNames) {
|
||||||
|
if(fieldNames == null) {
|
||||||
|
throw new IllegalArgumentException("Field names may not be null");
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder query = new StringBuilder();
|
||||||
|
query.append("INSERT INTO ").append(table);
|
||||||
|
|
||||||
|
if (fieldNames.length > 0 && fieldNames[0] != null) {
|
||||||
|
query.append(" (");
|
||||||
|
for (int i = 0; i < fieldNames.length; i++) {
|
||||||
|
query.append(fieldNames[i]);
|
||||||
|
if (i != fieldNames.length - 1) {
|
||||||
|
query.append(",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
query.append(")");
|
||||||
|
}
|
||||||
|
query.append(" VALUES (");
|
||||||
|
|
||||||
|
for (int i = 0; i < fieldNames.length; i++) {
|
||||||
|
query.append("?");
|
||||||
|
if(i != fieldNames.length - 1) {
|
||||||
|
query.append(",");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
query.append(");");
|
||||||
|
|
||||||
|
return query.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
|
||||||
|
throws IOException {
|
||||||
|
DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
|
||||||
|
String tableName = dbConf.getOutputTableName();
|
||||||
|
String[] fieldNames = dbConf.getOutputFieldNames();
|
||||||
|
|
||||||
|
if(fieldNames == null) {
|
||||||
|
fieldNames = new String[dbConf.getOutputFieldCount()];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Connection connection = dbConf.getConnection();
|
||||||
|
PreparedStatement statement = null;
|
||||||
|
|
||||||
|
statement = connection.prepareStatement(
|
||||||
|
constructQuery(tableName, fieldNames));
|
||||||
|
return new DBRecordWriter(connection, statement);
|
||||||
|
} catch (Exception ex) {
|
||||||
|
throw new IOException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the reduce-part of the job with
|
||||||
|
* the appropriate output settings.
|
||||||
|
*
|
||||||
|
* @param job The job
|
||||||
|
* @param tableName The table to insert data into
|
||||||
|
* @param fieldNames The field names in the table.
|
||||||
|
*/
|
||||||
|
public static void setOutput(Job job, String tableName,
|
||||||
|
String... fieldNames) throws IOException {
|
||||||
|
if(fieldNames.length > 0 && fieldNames[0] != null) {
|
||||||
|
DBConfiguration dbConf = setOutput(job, tableName);
|
||||||
|
dbConf.setOutputFieldNames(fieldNames);
|
||||||
|
} else {
|
||||||
|
if (fieldNames.length > 0) {
|
||||||
|
setOutput(job, tableName, fieldNames.length);
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Field names must be greater than 0");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the reduce-part of the job
|
||||||
|
* with the appropriate output settings.
|
||||||
|
*
|
||||||
|
* @param job The job
|
||||||
|
* @param tableName The table to insert data into
|
||||||
|
* @param fieldCount the number of fields in the table.
|
||||||
|
*/
|
||||||
|
public static void setOutput(Job job, String tableName,
|
||||||
|
int fieldCount) throws IOException {
|
||||||
|
DBConfiguration dbConf = setOutput(job, tableName);
|
||||||
|
dbConf.setOutputFieldCount(fieldCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static DBConfiguration setOutput(Job job,
|
||||||
|
String tableName) throws IOException {
|
||||||
|
job.setOutputFormatClass(DBOutputFormat.class);
|
||||||
|
ConfigurationHelper.setJobReduceSpeculativeExecution(job, false);
|
||||||
|
|
||||||
|
DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
|
||||||
|
|
||||||
|
dbConf.setOutputTableName(tableName);
|
||||||
|
return dbConf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A RecordWriter that writes the reduce output to a SQL table.
|
||||||
|
*/
|
||||||
|
public static class DBRecordWriter<K extends DBWritable, V>
|
||||||
|
extends RecordWriter<K, V> {
|
||||||
|
|
||||||
|
private Connection connection;
|
||||||
|
private PreparedStatement statement;
|
||||||
|
|
||||||
|
public DBRecordWriter() throws SQLException {
|
||||||
|
}
|
||||||
|
|
||||||
|
public DBRecordWriter(Connection connection
|
||||||
|
, PreparedStatement statement) throws SQLException {
|
||||||
|
this.connection = connection;
|
||||||
|
this.statement = statement;
|
||||||
|
this.connection.setAutoCommit(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Connection getConnection() {
|
||||||
|
return connection;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PreparedStatement getStatement() {
|
||||||
|
return statement;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public void close(TaskAttemptContext context) throws IOException {
|
||||||
|
try {
|
||||||
|
statement.executeBatch();
|
||||||
|
connection.commit();
|
||||||
|
} catch (SQLException e) {
|
||||||
|
try {
|
||||||
|
connection.rollback();
|
||||||
|
} catch (SQLException ex) {
|
||||||
|
LOG.warn(StringUtils.stringifyException(ex));
|
||||||
|
}
|
||||||
|
throw new IOException(e);
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
statement.close();
|
||||||
|
connection.close();
|
||||||
|
} catch (SQLException ex) {
|
||||||
|
LOG.error("Unable to close connection", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public void write(K key, V value) throws IOException {
|
||||||
|
try {
|
||||||
|
key.write(statement);
|
||||||
|
statement.addBatch();
|
||||||
|
} catch (SQLException e) {
|
||||||
|
LOG.error("Exception encountered", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
305
src/java/org/apache/sqoop/mapreduce/db/DBRecordReader.java
Normal file
305
src/java/org/apache/sqoop/mapreduce/db/DBRecordReader.java
Normal file
@ -0,0 +1,305 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.io.LongWritable;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
import org.apache.hadoop.mapreduce.RecordReader;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
|
||||||
|
import com.cloudera.sqoop.util.LoggingUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A RecordReader that reads records from a SQL table.
|
||||||
|
* Emits LongWritables containing the record number as
|
||||||
|
* key and DBWritables as value.
|
||||||
|
*/
|
||||||
|
public class DBRecordReader<T extends DBWritable> extends
|
||||||
|
RecordReader<LongWritable, T> {
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(DBRecordReader.class);
|
||||||
|
|
||||||
|
private ResultSet results = null;
|
||||||
|
|
||||||
|
private Class<T> inputClass;
|
||||||
|
|
||||||
|
private Configuration conf;
|
||||||
|
|
||||||
|
private DBInputFormat.DBInputSplit split;
|
||||||
|
|
||||||
|
private long pos = 0;
|
||||||
|
|
||||||
|
private LongWritable key = null;
|
||||||
|
|
||||||
|
private T value = null;
|
||||||
|
|
||||||
|
private Connection connection;
|
||||||
|
|
||||||
|
protected PreparedStatement statement;
|
||||||
|
|
||||||
|
private DBConfiguration dbConf;
|
||||||
|
|
||||||
|
private String conditions;
|
||||||
|
|
||||||
|
private String [] fieldNames;
|
||||||
|
|
||||||
|
private String tableName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param split The InputSplit to read data for
|
||||||
|
* @throws SQLException
|
||||||
|
*/
|
||||||
|
// CHECKSTYLE:OFF
|
||||||
|
// TODO (aaron): Refactor constructor to take fewer arguments
|
||||||
|
public DBRecordReader(DBInputFormat.DBInputSplit split,
|
||||||
|
Class<T> inputClass, Configuration conf, Connection conn,
|
||||||
|
DBConfiguration dbConfig, String cond, String [] fields, String table)
|
||||||
|
throws SQLException {
|
||||||
|
this.inputClass = inputClass;
|
||||||
|
this.split = split;
|
||||||
|
this.conf = conf;
|
||||||
|
this.connection = conn;
|
||||||
|
this.dbConf = dbConfig;
|
||||||
|
this.conditions = cond;
|
||||||
|
if (fields != null) {
|
||||||
|
this.fieldNames = Arrays.copyOf(fields, fields.length);
|
||||||
|
}
|
||||||
|
this.tableName = table;
|
||||||
|
}
|
||||||
|
// CHECKSTYLE:ON
|
||||||
|
|
||||||
|
protected ResultSet executeQuery(String query) throws SQLException {
|
||||||
|
this.statement = connection.prepareStatement(query,
|
||||||
|
ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
|
||||||
|
|
||||||
|
Integer fetchSize = dbConf.getFetchSize();
|
||||||
|
if (fetchSize != null) {
|
||||||
|
LOG.debug("Using fetchSize for next query: " + fetchSize);
|
||||||
|
statement.setFetchSize(fetchSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.debug("Executing query: " + query);
|
||||||
|
return statement.executeQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the query for selecting the records,
|
||||||
|
* subclasses can override this for custom behaviour.*/
|
||||||
|
protected String getSelectQuery() {
|
||||||
|
StringBuilder query = new StringBuilder();
|
||||||
|
|
||||||
|
// Default codepath for MySQL, HSQLDB, etc.
|
||||||
|
// Relies on LIMIT/OFFSET for splits.
|
||||||
|
if(dbConf.getInputQuery() == null) {
|
||||||
|
query.append("SELECT ");
|
||||||
|
|
||||||
|
for (int i = 0; i < fieldNames.length; i++) {
|
||||||
|
query.append(fieldNames[i]);
|
||||||
|
if (i != fieldNames.length -1) {
|
||||||
|
query.append(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
query.append(" FROM ").append(tableName);
|
||||||
|
query.append(" AS ").append(tableName); //in hsqldb this is necessary
|
||||||
|
if (conditions != null && conditions.length() > 0) {
|
||||||
|
query.append(" WHERE (").append(conditions).append(")");
|
||||||
|
}
|
||||||
|
|
||||||
|
String orderBy = dbConf.getInputOrderBy();
|
||||||
|
if (orderBy != null && orderBy.length() > 0) {
|
||||||
|
query.append(" ORDER BY ").append(orderBy);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//PREBUILT QUERY
|
||||||
|
query.append(dbConf.getInputQuery());
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
query.append(" LIMIT ").append(split.getLength());
|
||||||
|
query.append(" OFFSET ").append(split.getStart());
|
||||||
|
} catch (IOException ex) {
|
||||||
|
// Ignore, will not throw.
|
||||||
|
}
|
||||||
|
|
||||||
|
return query.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public void close() throws IOException {
|
||||||
|
try {
|
||||||
|
if (null != results) {
|
||||||
|
results.close();
|
||||||
|
}
|
||||||
|
if (null != statement) {
|
||||||
|
statement.close();
|
||||||
|
}
|
||||||
|
if (null != connection) {
|
||||||
|
connection.commit();
|
||||||
|
connection.close();
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
//do nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public LongWritable getCurrentKey() {
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public T getCurrentValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public T createValue() {
|
||||||
|
return ReflectionUtils.newInstance(inputClass, conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public long getPos() throws IOException {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use {@link #nextKeyValue()}
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public boolean next(LongWritable k, T v) throws IOException {
|
||||||
|
this.key = k;
|
||||||
|
this.value = v;
|
||||||
|
return nextKeyValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public float getProgress() throws IOException {
|
||||||
|
return pos / (float)split.getLength();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public boolean nextKeyValue() throws IOException {
|
||||||
|
try {
|
||||||
|
if (key == null) {
|
||||||
|
key = new LongWritable();
|
||||||
|
}
|
||||||
|
if (value == null) {
|
||||||
|
value = createValue();
|
||||||
|
}
|
||||||
|
if (null == this.results) {
|
||||||
|
// First time into this method, run the query.
|
||||||
|
this.results = executeQuery(getSelectQuery());
|
||||||
|
}
|
||||||
|
if (!results.next()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the key field value as the output key value
|
||||||
|
key.set(pos + split.getStart());
|
||||||
|
|
||||||
|
value.readFields(results);
|
||||||
|
|
||||||
|
pos++;
|
||||||
|
} catch (SQLException e) {
|
||||||
|
LoggingUtils.logAll(LOG, e);
|
||||||
|
throw new IOException("SQLException in nextKeyValue", e);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if nextKeyValue() would return false.
|
||||||
|
*/
|
||||||
|
protected boolean isDone() {
|
||||||
|
try {
|
||||||
|
return this.results != null
|
||||||
|
&& (results.isLast() || results.isAfterLast());
|
||||||
|
} catch (SQLException sqlE) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected DBInputFormat.DBInputSplit getSplit() {
|
||||||
|
return split;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String [] getFieldNames() {
|
||||||
|
return fieldNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String getTableName() {
|
||||||
|
return tableName;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String getConditions() {
|
||||||
|
return conditions;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected DBConfiguration getDBConf() {
|
||||||
|
return dbConf;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Connection getConnection() {
|
||||||
|
return connection;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected PreparedStatement getStatement() {
|
||||||
|
return statement;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setStatement(PreparedStatement stmt) {
|
||||||
|
this.statement = stmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the configuration. Allows subclasses to access the configuration
|
||||||
|
*/
|
||||||
|
protected Configuration getConf(){
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
}
|
44
src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java
Normal file
44
src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat.
|
||||||
|
* DataDrivenDBInputFormat needs to interpolate between two values that
|
||||||
|
* represent the lowest and highest valued records to import. Depending
|
||||||
|
* on the data-type of the column, this requires different behavior.
|
||||||
|
* DBSplitter implementations should perform this for a data type or family
|
||||||
|
* of data types.
|
||||||
|
*/
|
||||||
|
public interface DBSplitter {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a ResultSet containing one record (and already advanced to that
|
||||||
|
* record) with two columns (a low value, and a high value, both of the same
|
||||||
|
* type), determine a set of splits that span the given values.
|
||||||
|
*/
|
||||||
|
List<InputSplit> split(Configuration conf, ResultSet results, String colName)
|
||||||
|
throws SQLException;
|
||||||
|
}
|
@ -0,0 +1,354 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.io.DataInput;
|
||||||
|
import java.io.DataOutput;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.sql.Types;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configurable;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.io.LongWritable;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
|
import org.apache.hadoop.mapreduce.JobContext;
|
||||||
|
import org.apache.hadoop.mapreduce.RecordReader;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.BigDecimalSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.BooleanSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBRecordReader;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DateSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.FloatSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.IntegerSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.TextSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A InputFormat that reads input data from an SQL table.
|
||||||
|
* Operates like DBInputFormat, but instead of using LIMIT and OFFSET to
|
||||||
|
* demarcate splits, it tries to generate WHERE clauses which separate the
|
||||||
|
* data into roughly equivalent shards.
|
||||||
|
*/
|
||||||
|
public class DataDrivenDBInputFormat<T extends DBWritable>
|
||||||
|
extends DBInputFormat<T> implements Configurable {
|
||||||
|
|
||||||
|
private static final Log LOG =
|
||||||
|
LogFactory.getLog(DataDrivenDBInputFormat.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If users are providing their own query, the following string is expected
|
||||||
|
* to appear in the WHERE clause, which will be substituted with a pair of
|
||||||
|
* conditions on the input to allow input splits to parallelise the import.
|
||||||
|
*/
|
||||||
|
public static final String SUBSTITUTE_TOKEN = "$CONDITIONS";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the DBSplitter implementation to use to divide the table/query
|
||||||
|
* into InputSplits.
|
||||||
|
*/
|
||||||
|
protected DBSplitter getSplitter(int sqlDataType) {
|
||||||
|
switch (sqlDataType) {
|
||||||
|
case Types.NUMERIC:
|
||||||
|
case Types.DECIMAL:
|
||||||
|
return new BigDecimalSplitter();
|
||||||
|
|
||||||
|
case Types.BIT:
|
||||||
|
case Types.BOOLEAN:
|
||||||
|
return new BooleanSplitter();
|
||||||
|
|
||||||
|
case Types.INTEGER:
|
||||||
|
case Types.TINYINT:
|
||||||
|
case Types.SMALLINT:
|
||||||
|
case Types.BIGINT:
|
||||||
|
return new IntegerSplitter();
|
||||||
|
|
||||||
|
case Types.REAL:
|
||||||
|
case Types.FLOAT:
|
||||||
|
case Types.DOUBLE:
|
||||||
|
return new FloatSplitter();
|
||||||
|
|
||||||
|
case Types.CHAR:
|
||||||
|
case Types.VARCHAR:
|
||||||
|
case Types.LONGVARCHAR:
|
||||||
|
return new TextSplitter();
|
||||||
|
|
||||||
|
case Types.DATE:
|
||||||
|
case Types.TIME:
|
||||||
|
case Types.TIMESTAMP:
|
||||||
|
return new DateSplitter();
|
||||||
|
|
||||||
|
default:
|
||||||
|
// TODO: Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT, CLOB,
|
||||||
|
// BLOB, ARRAY, STRUCT, REF, DATALINK, and JAVA_OBJECT.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public List<InputSplit> getSplits(JobContext job) throws IOException {
|
||||||
|
|
||||||
|
int targetNumTasks = ConfigurationHelper.getJobNumMaps(job);
|
||||||
|
String boundaryQuery = getDBConf().getInputBoundingQuery();
|
||||||
|
|
||||||
|
// If user do not forced us to use his boundary query and we don't have to
|
||||||
|
// bacause there is only one mapper we will return single split that
|
||||||
|
// separates nothing. This can be considerably more optimal for a large
|
||||||
|
// table with no index.
|
||||||
|
if (1 == targetNumTasks
|
||||||
|
&& (boundaryQuery == null || boundaryQuery.isEmpty())) {
|
||||||
|
List<InputSplit> singletonSplit = new ArrayList<InputSplit>();
|
||||||
|
singletonSplit.add(new com.cloudera.sqoop.mapreduce.db.
|
||||||
|
DataDrivenDBInputFormat.DataDrivenDBInputSplit("1=1", "1=1"));
|
||||||
|
return singletonSplit;
|
||||||
|
}
|
||||||
|
|
||||||
|
ResultSet results = null;
|
||||||
|
Statement statement = null;
|
||||||
|
Connection connection = getConnection();
|
||||||
|
try {
|
||||||
|
statement = connection.createStatement();
|
||||||
|
|
||||||
|
String query = getBoundingValsQuery();
|
||||||
|
LOG.info("BoundingValsQuery: " + query);
|
||||||
|
|
||||||
|
results = statement.executeQuery(query);
|
||||||
|
results.next();
|
||||||
|
|
||||||
|
// Based on the type of the results, use a different mechanism
|
||||||
|
// for interpolating split points (i.e., numeric splits, text splits,
|
||||||
|
// dates, etc.)
|
||||||
|
int sqlDataType = results.getMetaData().getColumnType(1);
|
||||||
|
boolean isSigned = results.getMetaData().isSigned(1);
|
||||||
|
|
||||||
|
// MySQL has an unsigned integer which we need to allocate space for
|
||||||
|
if (sqlDataType == Types.INTEGER && !isSigned){
|
||||||
|
sqlDataType = Types.BIGINT;
|
||||||
|
}
|
||||||
|
|
||||||
|
DBSplitter splitter = getSplitter(sqlDataType);
|
||||||
|
if (null == splitter) {
|
||||||
|
throw new IOException("Unknown SQL data type: " + sqlDataType);
|
||||||
|
}
|
||||||
|
|
||||||
|
return splitter.split(job.getConfiguration(), results,
|
||||||
|
getDBConf().getInputOrderBy());
|
||||||
|
} catch (SQLException e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
} finally {
|
||||||
|
// More-or-less ignore SQL exceptions here, but log in case we need it.
|
||||||
|
try {
|
||||||
|
if (null != results) {
|
||||||
|
results.close();
|
||||||
|
}
|
||||||
|
} catch (SQLException se) {
|
||||||
|
LOG.debug("SQLException closing resultset: " + se.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (null != statement) {
|
||||||
|
statement.close();
|
||||||
|
}
|
||||||
|
} catch (SQLException se) {
|
||||||
|
LOG.debug("SQLException closing statement: " + se.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
connection.commit();
|
||||||
|
closeConnection();
|
||||||
|
} catch (SQLException se) {
|
||||||
|
LOG.debug("SQLException committing split transaction: "
|
||||||
|
+ se.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return a query which returns the minimum and maximum values for
|
||||||
|
* the order-by column.
|
||||||
|
*
|
||||||
|
* The min value should be in the first column, and the
|
||||||
|
* max value should be in the second column of the results.
|
||||||
|
*/
|
||||||
|
protected String getBoundingValsQuery() {
|
||||||
|
// If the user has provided a query, use that instead.
|
||||||
|
String userQuery = getDBConf().getInputBoundingQuery();
|
||||||
|
if (null != userQuery) {
|
||||||
|
return userQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auto-generate one based on the table name we've been provided with.
|
||||||
|
StringBuilder query = new StringBuilder();
|
||||||
|
|
||||||
|
String splitCol = getDBConf().getInputOrderBy();
|
||||||
|
query.append("SELECT MIN(").append(splitCol).append("), ");
|
||||||
|
query.append("MAX(").append(splitCol).append(") FROM ");
|
||||||
|
query.append(getDBConf().getInputTableName());
|
||||||
|
String conditions = getDBConf().getInputConditions();
|
||||||
|
if (null != conditions) {
|
||||||
|
query.append(" WHERE ( " + conditions + " )");
|
||||||
|
}
|
||||||
|
|
||||||
|
return query.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected RecordReader<LongWritable, T> createDBRecordReader(
|
||||||
|
DBInputSplit split, Configuration conf) throws IOException {
|
||||||
|
|
||||||
|
DBConfiguration dbConf = getDBConf();
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
|
||||||
|
String dbProductName = getDBProductName();
|
||||||
|
|
||||||
|
LOG.debug("Creating db record reader for db product: " + dbProductName);
|
||||||
|
|
||||||
|
try {
|
||||||
|
return new DataDrivenDBRecordReader<T>(split, inputClass,
|
||||||
|
conf, getConnection(), dbConf, dbConf.getInputConditions(),
|
||||||
|
dbConf.getInputFieldNames(), dbConf.getInputTableName(),
|
||||||
|
dbProductName);
|
||||||
|
} catch (SQLException ex) {
|
||||||
|
throw new IOException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set the user-defined bounding query to use with a user-defined query.
|
||||||
|
* This *must* include the substring "$CONDITIONS"
|
||||||
|
* (DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) inside the WHERE clause,
|
||||||
|
* so that DataDrivenDBInputFormat knows where to insert split clauses.
|
||||||
|
* e.g., "SELECT foo FROM mytable WHERE $CONDITIONS"
|
||||||
|
* This will be expanded to something like:
|
||||||
|
* SELECT foo FROM mytable WHERE (id > 100) AND (id < 250)
|
||||||
|
* inside each split.
|
||||||
|
*/
|
||||||
|
public static void setBoundingQuery(Configuration conf, String query) {
|
||||||
|
if (null != query) {
|
||||||
|
// If the user's settng a query, warn if they don't allow conditions.
|
||||||
|
if (query.indexOf(SUBSTITUTE_TOKEN) == -1) {
|
||||||
|
LOG.warn("Could not find " + SUBSTITUTE_TOKEN + " token in query: "
|
||||||
|
+ query + "; splits may not partition data.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, query);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configuration methods override superclass to ensure that the proper
|
||||||
|
// DataDrivenDBInputFormat gets used.
|
||||||
|
|
||||||
|
/** Note that the "orderBy" column is called the "splitBy" in this version.
|
||||||
|
* We reuse the same field, but it's not strictly ordering it
|
||||||
|
* -- just partitioning the results.
|
||||||
|
*/
|
||||||
|
public static void setInput(Job job,
|
||||||
|
Class<? extends DBWritable> inputClass,
|
||||||
|
String tableName, String conditions,
|
||||||
|
String splitBy, String... fieldNames) {
|
||||||
|
DBInputFormat.setInput(job, inputClass, tableName, conditions,
|
||||||
|
splitBy, fieldNames);
|
||||||
|
job.setInputFormatClass(DataDrivenDBInputFormat.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** setInput() takes a custom query and a separate "bounding query" to use
|
||||||
|
instead of the custom "count query" used by DBInputFormat.
|
||||||
|
*/
|
||||||
|
public static void setInput(Job job,
|
||||||
|
Class<? extends DBWritable> inputClass,
|
||||||
|
String inputQuery, String inputBoundingQuery) {
|
||||||
|
DBInputFormat.setInput(job, inputClass, inputQuery, "");
|
||||||
|
job.getConfiguration().set(DBConfiguration.INPUT_BOUNDING_QUERY,
|
||||||
|
inputBoundingQuery);
|
||||||
|
job.setInputFormatClass(DataDrivenDBInputFormat.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A InputSplit that spans a set of rows.
|
||||||
|
*/
|
||||||
|
public static class DataDrivenDBInputSplit
|
||||||
|
extends DBInputFormat.DBInputSplit {
|
||||||
|
|
||||||
|
private String lowerBoundClause;
|
||||||
|
private String upperBoundClause;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default Constructor.
|
||||||
|
*/
|
||||||
|
public DataDrivenDBInputSplit() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience Constructor.
|
||||||
|
* @param lower the string to be put in the WHERE clause to guard
|
||||||
|
* on the 'lower' end.
|
||||||
|
* @param upper the string to be put in the WHERE clause to guard
|
||||||
|
* on the 'upper' end.
|
||||||
|
*/
|
||||||
|
public DataDrivenDBInputSplit(final String lower, final String upper) {
|
||||||
|
this.lowerBoundClause = lower;
|
||||||
|
this.upperBoundClause = upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The total row count in this split.
|
||||||
|
*/
|
||||||
|
public long getLength() throws IOException {
|
||||||
|
return 0; // unfortunately, we don't know this.
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public void readFields(DataInput input) throws IOException {
|
||||||
|
this.lowerBoundClause = Text.readString(input);
|
||||||
|
this.upperBoundClause = Text.readString(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public void write(DataOutput output) throws IOException {
|
||||||
|
Text.writeString(output, this.lowerBoundClause);
|
||||||
|
Text.writeString(output, this.upperBoundClause);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLowerClause() {
|
||||||
|
return lowerBoundClause;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getUpperClause() {
|
||||||
|
return upperBoundClause;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,132 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBRecordReader;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A RecordReader that reads records from a SQL table,
|
||||||
|
* using data-driven WHERE clause splits.
|
||||||
|
* Emits LongWritables containing the record number as
|
||||||
|
* key and DBWritables as value.
|
||||||
|
*/
|
||||||
|
public class DataDrivenDBRecordReader<T extends DBWritable>
|
||||||
|
extends DBRecordReader<T> {
|
||||||
|
|
||||||
|
private static final Log LOG =
|
||||||
|
LogFactory.getLog(DataDrivenDBRecordReader.class);
|
||||||
|
|
||||||
|
private String dbProductName; // database manufacturer string.
|
||||||
|
|
||||||
|
// CHECKSTYLE:OFF
|
||||||
|
// TODO(aaron): Refactor constructor to use fewer arguments.
|
||||||
|
/**
|
||||||
|
* @param split The InputSplit to read data for
|
||||||
|
* @throws SQLException
|
||||||
|
*/
|
||||||
|
public DataDrivenDBRecordReader(DBInputFormat.DBInputSplit split,
|
||||||
|
Class<T> inputClass, Configuration conf, Connection conn,
|
||||||
|
DBConfiguration dbConfig, String cond, String [] fields, String table,
|
||||||
|
String dbProduct) throws SQLException {
|
||||||
|
super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
|
||||||
|
this.dbProductName = dbProduct;
|
||||||
|
}
|
||||||
|
// CHECKSTYLE:ON
|
||||||
|
|
||||||
|
@Override
|
||||||
|
/** {@inheritDoc} */
|
||||||
|
public float getProgress() throws IOException {
|
||||||
|
return isDone() ? 1.0f : 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the query for selecting the records,
|
||||||
|
* subclasses can override this for custom behaviour.*/
|
||||||
|
protected String getSelectQuery() {
|
||||||
|
StringBuilder query = new StringBuilder();
|
||||||
|
DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit =
|
||||||
|
(DataDrivenDBInputFormat.DataDrivenDBInputSplit) getSplit();
|
||||||
|
DBConfiguration dbConf = getDBConf();
|
||||||
|
String [] fieldNames = getFieldNames();
|
||||||
|
String tableName = getTableName();
|
||||||
|
String conditions = getConditions();
|
||||||
|
|
||||||
|
// Build the WHERE clauses associated with the data split first.
|
||||||
|
// We need them in both branches of this function.
|
||||||
|
StringBuilder conditionClauses = new StringBuilder();
|
||||||
|
conditionClauses.append("( ").append(dataSplit.getLowerClause());
|
||||||
|
conditionClauses.append(" ) AND ( ").append(dataSplit.getUpperClause());
|
||||||
|
conditionClauses.append(" )");
|
||||||
|
|
||||||
|
if(dbConf.getInputQuery() == null) {
|
||||||
|
// We need to generate the entire query.
|
||||||
|
query.append("SELECT ");
|
||||||
|
|
||||||
|
for (int i = 0; i < fieldNames.length; i++) {
|
||||||
|
query.append(fieldNames[i]);
|
||||||
|
if (i != fieldNames.length -1) {
|
||||||
|
query.append(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
query.append(" FROM ").append(tableName);
|
||||||
|
if (!dbProductName.startsWith("ORACLE")) {
|
||||||
|
// Seems to be necessary for hsqldb? Oracle explicitly does *not*
|
||||||
|
// use this clause.
|
||||||
|
query.append(" AS ").append(tableName);
|
||||||
|
}
|
||||||
|
query.append(" WHERE ");
|
||||||
|
if (conditions != null && conditions.length() > 0) {
|
||||||
|
// Put the user's conditions first.
|
||||||
|
query.append("( ").append(conditions).append(" ) AND ");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now append the conditions associated with our split.
|
||||||
|
query.append(conditionClauses.toString());
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// User provided the query. We replace the special token with
|
||||||
|
// our WHERE clause.
|
||||||
|
String inputQuery = dbConf.getInputQuery();
|
||||||
|
if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) {
|
||||||
|
LOG.error("Could not find the clause substitution token "
|
||||||
|
+ DataDrivenDBInputFormat.SUBSTITUTE_TOKEN + " in the query: ["
|
||||||
|
+ inputQuery + "]. Parallel splits may not work correctly.");
|
||||||
|
}
|
||||||
|
|
||||||
|
query.append(inputQuery.replace(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN,
|
||||||
|
conditionClauses.toString()));
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.debug("Using query: " + query.toString());
|
||||||
|
|
||||||
|
return query.toString();
|
||||||
|
}
|
||||||
|
}
|
183
src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java
Normal file
183
src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Types;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.IntegerSplitter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement DBSplitter over date/time values.
|
||||||
|
* Make use of logic from IntegerSplitter, since date/time are just longs
|
||||||
|
* in Java.
|
||||||
|
*/
|
||||||
|
public class DateSplitter extends IntegerSplitter {
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(DateSplitter.class);
|
||||||
|
|
||||||
|
public List<InputSplit> split(Configuration conf, ResultSet results,
|
||||||
|
String colName) throws SQLException {
|
||||||
|
|
||||||
|
long minVal;
|
||||||
|
long maxVal;
|
||||||
|
|
||||||
|
int sqlDataType = results.getMetaData().getColumnType(1);
|
||||||
|
minVal = resultSetColToLong(results, 1, sqlDataType);
|
||||||
|
maxVal = resultSetColToLong(results, 2, sqlDataType);
|
||||||
|
|
||||||
|
String lowClausePrefix = colName + " >= ";
|
||||||
|
String highClausePrefix = colName + " < ";
|
||||||
|
|
||||||
|
int numSplits = ConfigurationHelper.getConfNumMaps(conf);
|
||||||
|
if (numSplits < 1) {
|
||||||
|
numSplits = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minVal == Long.MIN_VALUE && maxVal == Long.MIN_VALUE) {
|
||||||
|
// The range of acceptable dates is NULL to NULL. Just create a single
|
||||||
|
// split.
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gather the split point integers
|
||||||
|
List<Long> splitPoints = split(numSplits, minVal, maxVal);
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
|
||||||
|
// Turn the split points into a set of intervals.
|
||||||
|
long start = splitPoints.get(0);
|
||||||
|
Date startDate = longToDate(start, sqlDataType);
|
||||||
|
if (sqlDataType == Types.TIMESTAMP) {
|
||||||
|
// The lower bound's nanos value needs to match the actual lower-bound
|
||||||
|
// nanos.
|
||||||
|
try {
|
||||||
|
((java.sql.Timestamp) startDate).setNanos(
|
||||||
|
results.getTimestamp(1).getNanos());
|
||||||
|
} catch (NullPointerException npe) {
|
||||||
|
// If the lower bound was NULL, we'll get an NPE; just ignore it and
|
||||||
|
// don't set nanos.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 1; i < splitPoints.size(); i++) {
|
||||||
|
long end = splitPoints.get(i);
|
||||||
|
Date endDate = longToDate(end, sqlDataType);
|
||||||
|
|
||||||
|
if (i == splitPoints.size() - 1) {
|
||||||
|
if (sqlDataType == Types.TIMESTAMP) {
|
||||||
|
// The upper bound's nanos value needs to match the actual
|
||||||
|
// upper-bound nanos.
|
||||||
|
try {
|
||||||
|
((java.sql.Timestamp) endDate).setNanos(
|
||||||
|
results.getTimestamp(2).getNanos());
|
||||||
|
} catch (NullPointerException npe) {
|
||||||
|
// If the upper bound was NULL, we'll get an NPE; just ignore it
|
||||||
|
// and don't set nanos.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// This is the last one; use a closed interval.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + dateToString(startDate),
|
||||||
|
colName + " <= " + dateToString(endDate)));
|
||||||
|
} else {
|
||||||
|
// Normal open-interval case.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + dateToString(startDate),
|
||||||
|
highClausePrefix + dateToString(endDate)));
|
||||||
|
}
|
||||||
|
|
||||||
|
start = end;
|
||||||
|
startDate = endDate;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minVal == Long.MIN_VALUE || maxVal == Long.MIN_VALUE) {
|
||||||
|
// Add an extra split to handle the null case that we saw.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Retrieve the value from the column in a type-appropriate manner and
|
||||||
|
return its timestamp since the epoch. If the column is null, then return
|
||||||
|
Long.MIN_VALUE. This will cause a special split to be generated for the
|
||||||
|
NULL case, but may also cause poorly-balanced splits if most of the
|
||||||
|
actual dates are positive time since the epoch, etc.
|
||||||
|
*/
|
||||||
|
private long resultSetColToLong(ResultSet rs, int colNum, int sqlDataType)
|
||||||
|
throws SQLException {
|
||||||
|
try {
|
||||||
|
switch (sqlDataType) {
|
||||||
|
case Types.DATE:
|
||||||
|
return rs.getDate(colNum).getTime();
|
||||||
|
case Types.TIME:
|
||||||
|
return rs.getTime(colNum).getTime();
|
||||||
|
case Types.TIMESTAMP:
|
||||||
|
return rs.getTimestamp(colNum).getTime();
|
||||||
|
default:
|
||||||
|
throw new SQLException("Not a date-type field");
|
||||||
|
}
|
||||||
|
} catch (NullPointerException npe) {
|
||||||
|
// null column. return minimum long value.
|
||||||
|
LOG.warn("Encountered a NULL date in the split column. "
|
||||||
|
+ "Splits may be poorly balanced.");
|
||||||
|
return Long.MIN_VALUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Parse the long-valued timestamp into the appropriate SQL date type. */
|
||||||
|
private Date longToDate(long val, int sqlDataType) {
|
||||||
|
switch (sqlDataType) {
|
||||||
|
case Types.DATE:
|
||||||
|
return new java.sql.Date(val);
|
||||||
|
case Types.TIME:
|
||||||
|
return new java.sql.Time(val);
|
||||||
|
case Types.TIMESTAMP:
|
||||||
|
return new java.sql.Timestamp(val);
|
||||||
|
default: // Shouldn't ever hit this case.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a Date 'd', format it as a string for use in a SQL date
|
||||||
|
* comparison operation.
|
||||||
|
* @param d the date to format.
|
||||||
|
* @return the string representing this date in SQL with any appropriate
|
||||||
|
* quotation characters, etc.
|
||||||
|
*/
|
||||||
|
protected String dateToString(Date d) {
|
||||||
|
return "'" + d.toString() + "'";
|
||||||
|
}
|
||||||
|
}
|
99
src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java
Normal file
99
src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||||
|
|
||||||
|
public class FloatSplitter implements DBSplitter {
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(FloatSplitter.class);
|
||||||
|
|
||||||
|
private static final double MIN_INCREMENT = 10000 * Double.MIN_VALUE;
|
||||||
|
|
||||||
|
public List<InputSplit> split(Configuration conf, ResultSet results,
|
||||||
|
String colName) throws SQLException {
|
||||||
|
|
||||||
|
LOG.warn("Generating splits for a floating-point index column. Due to the");
|
||||||
|
LOG.warn("imprecise representation of floating-point values in Java, this");
|
||||||
|
LOG.warn("may result in an incomplete import.");
|
||||||
|
LOG.warn("You are strongly encouraged to choose an integral split column.");
|
||||||
|
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
|
||||||
|
if (results.getString(1) == null && results.getString(2) == null) {
|
||||||
|
// Range is null to null. Return a null split accordingly.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
double minVal = results.getDouble(1);
|
||||||
|
double maxVal = results.getDouble(2);
|
||||||
|
|
||||||
|
// Use this as a hint. May need an extra task if the size doesn't
|
||||||
|
// divide cleanly.
|
||||||
|
int numSplits = ConfigurationHelper.getConfNumMaps(conf);
|
||||||
|
double splitSize = (maxVal - minVal) / (double) numSplits;
|
||||||
|
|
||||||
|
if (splitSize < MIN_INCREMENT) {
|
||||||
|
splitSize = MIN_INCREMENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
String lowClausePrefix = colName + " >= ";
|
||||||
|
String highClausePrefix = colName + " < ";
|
||||||
|
|
||||||
|
double curLower = minVal;
|
||||||
|
double curUpper = curLower + splitSize;
|
||||||
|
|
||||||
|
while (curUpper < maxVal) {
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + Double.toString(curLower),
|
||||||
|
highClausePrefix + Double.toString(curUpper)));
|
||||||
|
|
||||||
|
curLower = curUpper;
|
||||||
|
curUpper += splitSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Catch any overage and create the closed interval for the last split.
|
||||||
|
if (curLower <= maxVal || splits.size() == 1) {
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + Double.toString(curUpper),
|
||||||
|
colName + " <= " + Double.toString(maxVal)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results.getString(1) == null || results.getString(2) == null) {
|
||||||
|
// At least one extrema is null; add a null split.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
}
|
148
src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
Normal file
148
src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement DBSplitter over integer values.
|
||||||
|
*/
|
||||||
|
public class IntegerSplitter implements DBSplitter {
|
||||||
|
public static final Log LOG =
|
||||||
|
LogFactory.getLog(IntegerSplitter.class.getName());
|
||||||
|
|
||||||
|
public List<InputSplit> split(Configuration conf, ResultSet results,
|
||||||
|
String colName) throws SQLException {
|
||||||
|
|
||||||
|
long minVal = results.getLong(1);
|
||||||
|
long maxVal = results.getLong(2);
|
||||||
|
|
||||||
|
String lowClausePrefix = colName + " >= ";
|
||||||
|
String highClausePrefix = colName + " < ";
|
||||||
|
|
||||||
|
int numSplits = ConfigurationHelper.getConfNumMaps(conf);
|
||||||
|
if (numSplits < 1) {
|
||||||
|
numSplits = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results.getString(1) == null && results.getString(2) == null) {
|
||||||
|
// Range is null to null. Return a null split accordingly.
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all the split points together.
|
||||||
|
List<Long> splitPoints = split(numSplits, minVal, maxVal);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(String.format("Splits: [%,28d to %,28d] into %d parts",
|
||||||
|
minVal, maxVal, numSplits));
|
||||||
|
for (int i = 0; i < splitPoints.size(); i++) {
|
||||||
|
LOG.debug(String.format("%,28d", splitPoints.get(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
|
||||||
|
// Turn the split points into a set of intervals.
|
||||||
|
long start = splitPoints.get(0);
|
||||||
|
for (int i = 1; i < splitPoints.size(); i++) {
|
||||||
|
long end = splitPoints.get(i);
|
||||||
|
|
||||||
|
if (i == splitPoints.size() - 1) {
|
||||||
|
// This is the last one; use a closed interval.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + Long.toString(start),
|
||||||
|
colName + " <= " + Long.toString(end)));
|
||||||
|
} else {
|
||||||
|
// Normal open-interval case.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + Long.toString(start),
|
||||||
|
highClausePrefix + Long.toString(end)));
|
||||||
|
}
|
||||||
|
|
||||||
|
start = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results.getString(1) == null || results.getString(2) == null) {
|
||||||
|
// At least one extrema is null; add a null split.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a list of longs one element longer than the list of input splits.
|
||||||
|
* This represents the boundaries between input splits.
|
||||||
|
* All splits are open on the top end, except the last one.
|
||||||
|
*
|
||||||
|
* So the list [0, 5, 8, 12, 18] would represent splits capturing the
|
||||||
|
* intervals:
|
||||||
|
*
|
||||||
|
* [0, 5)
|
||||||
|
* [5, 8)
|
||||||
|
* [8, 12)
|
||||||
|
* [12, 18] note the closed interval for the last split.
|
||||||
|
*/
|
||||||
|
protected List<Long> split(long numSplits, long minVal, long maxVal)
|
||||||
|
throws SQLException {
|
||||||
|
|
||||||
|
List<Long> splits = new ArrayList<Long>();
|
||||||
|
|
||||||
|
// We take the min-max interval and divide by the numSplits and also
|
||||||
|
// calculate a remainder. Because of integer division rules, numsplits *
|
||||||
|
// splitSize + minVal will always be <= maxVal. We then use the remainder
|
||||||
|
// and add 1 if the current split index is less than the < the remainder.
|
||||||
|
// This is guaranteed to add up to remainder and not surpass the value.
|
||||||
|
long splitSize = (maxVal - minVal) / numSplits;
|
||||||
|
long remainder = (maxVal - minVal) % numSplits;
|
||||||
|
long curVal = minVal;
|
||||||
|
|
||||||
|
// This will honor numSplits as long as split size > 0. If split size is
|
||||||
|
// 0, it will have remainder splits.
|
||||||
|
for (int i = 0; i <= numSplits; i++) {
|
||||||
|
splits.add(curVal);
|
||||||
|
if (curVal >= maxVal) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
curVal += splitSize;
|
||||||
|
curVal += (i < remainder) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (splits.size() == 1) {
|
||||||
|
// make a valid singleton split
|
||||||
|
splits.add(maxVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
}
|
152
src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java
Normal file
152
src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBRecordReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A RecordReader that reads records from an Oracle SQL table.
|
||||||
|
*/
|
||||||
|
public class OracleDBRecordReader<T extends DBWritable>
|
||||||
|
extends DBRecordReader<T> {
|
||||||
|
|
||||||
|
/** Configuration key to set to a timezone string. */
|
||||||
|
public static final String SESSION_TIMEZONE_KEY = "oracle.sessionTimeZone";
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(OracleDBRecordReader.class);
|
||||||
|
|
||||||
|
// CHECKSTYLE:OFF
|
||||||
|
public OracleDBRecordReader(DBInputFormat.DBInputSplit split,
|
||||||
|
Class<T> inputClass, Configuration conf, Connection conn,
|
||||||
|
DBConfiguration dbConfig, String cond, String [] fields,
|
||||||
|
String table) throws SQLException {
|
||||||
|
super(split, inputClass, conf, conn, dbConfig, cond, fields, table);
|
||||||
|
setSessionTimeZone(conf, conn);
|
||||||
|
}
|
||||||
|
// CHECKSTYLE:ON
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns the query for selecting the records from an Oracle DB. */
|
||||||
|
protected String getSelectQuery() {
|
||||||
|
StringBuilder query = new StringBuilder();
|
||||||
|
DBConfiguration dbConf = getDBConf();
|
||||||
|
String conditions = getConditions();
|
||||||
|
String tableName = getTableName();
|
||||||
|
String [] fieldNames = getFieldNames();
|
||||||
|
|
||||||
|
// Oracle-specific codepath to use rownum instead of LIMIT/OFFSET.
|
||||||
|
if(dbConf.getInputQuery() == null) {
|
||||||
|
query.append("SELECT ");
|
||||||
|
|
||||||
|
for (int i = 0; i < fieldNames.length; i++) {
|
||||||
|
query.append(fieldNames[i]);
|
||||||
|
if (i != fieldNames.length -1) {
|
||||||
|
query.append(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
query.append(" FROM ").append(tableName);
|
||||||
|
if (conditions != null && conditions.length() > 0) {
|
||||||
|
query.append(" WHERE ").append(conditions);
|
||||||
|
}
|
||||||
|
String orderBy = dbConf.getInputOrderBy();
|
||||||
|
if (orderBy != null && orderBy.length() > 0) {
|
||||||
|
query.append(" ORDER BY ").append(orderBy);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//PREBUILT QUERY
|
||||||
|
query.append(dbConf.getInputQuery());
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
DBInputFormat.DBInputSplit split = getSplit();
|
||||||
|
if (split.getLength() > 0 && split.getStart() > 0) {
|
||||||
|
String querystring = query.toString();
|
||||||
|
|
||||||
|
query = new StringBuilder();
|
||||||
|
query.append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( ");
|
||||||
|
query.append(querystring);
|
||||||
|
query.append(" ) a WHERE rownum <= ").append(split.getStart());
|
||||||
|
query.append(" + ").append(split.getLength());
|
||||||
|
query.append(" ) WHERE dbif_rno >= ").append(split.getStart());
|
||||||
|
}
|
||||||
|
} catch (IOException ex) {
|
||||||
|
// ignore, will not throw.
|
||||||
|
}
|
||||||
|
|
||||||
|
return query.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set session time zone.
|
||||||
|
* @param conf The current configuration.
|
||||||
|
* We read the 'oracle.sessionTimeZone' property from here.
|
||||||
|
* @param conn The connection to alter the timezone properties of.
|
||||||
|
*/
|
||||||
|
public static void setSessionTimeZone(Configuration conf,
|
||||||
|
Connection conn) throws SQLException {
|
||||||
|
// need to use reflection to call the method setSessionTimeZone on
|
||||||
|
// the OracleConnection class because oracle specific java libraries are
|
||||||
|
// not accessible in this context.
|
||||||
|
Method method;
|
||||||
|
try {
|
||||||
|
method = conn.getClass().getMethod(
|
||||||
|
"setSessionTimeZone", new Class [] {String.class});
|
||||||
|
} catch (Exception ex) {
|
||||||
|
LOG.error("Could not find method setSessionTimeZone in "
|
||||||
|
+ conn.getClass().getName(), ex);
|
||||||
|
// rethrow SQLException
|
||||||
|
throw new SQLException(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Need to set the time zone in order for Java
|
||||||
|
// to correctly access the column "TIMESTAMP WITH LOCAL TIME ZONE".
|
||||||
|
// We can't easily get the correct Oracle-specific timezone string
|
||||||
|
// from Java; just let the user set the timezone in a property.
|
||||||
|
String clientTimeZone = conf.get(SESSION_TIMEZONE_KEY, "GMT");
|
||||||
|
try {
|
||||||
|
method.setAccessible(true);
|
||||||
|
method.invoke(conn, clientTimeZone);
|
||||||
|
LOG.info("Time zone has been set to " + clientTimeZone);
|
||||||
|
} catch (Exception ex) {
|
||||||
|
LOG.warn("Time zone " + clientTimeZone
|
||||||
|
+ " could not be set on Oracle database.");
|
||||||
|
LOG.warn("Setting default time zone: GMT");
|
||||||
|
try {
|
||||||
|
// "GMT" timezone is guaranteed to exist.
|
||||||
|
method.invoke(conn, "GMT");
|
||||||
|
} catch (Exception ex2) {
|
||||||
|
LOG.error("Could not set time zone for oracle connection", ex2);
|
||||||
|
// rethrow SQLException
|
||||||
|
throw new SQLException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,77 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Types;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configurable;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.io.LongWritable;
|
||||||
|
import org.apache.hadoop.mapreduce.RecordReader;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.OracleDataDrivenDBRecordReader;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.OracleDateSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A InputFormat that reads input data from an SQL table in an Oracle db.
|
||||||
|
*/
|
||||||
|
public class OracleDataDrivenDBInputFormat<T extends DBWritable>
|
||||||
|
extends DataDrivenDBInputFormat<T> implements Configurable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the DBSplitter implementation to use to divide the table/query
|
||||||
|
* into InputSplits.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected DBSplitter getSplitter(int sqlDataType) {
|
||||||
|
switch (sqlDataType) {
|
||||||
|
case Types.DATE:
|
||||||
|
case Types.TIME:
|
||||||
|
case Types.TIMESTAMP:
|
||||||
|
return new OracleDateSplitter();
|
||||||
|
|
||||||
|
default:
|
||||||
|
return super.getSplitter(sqlDataType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected RecordReader<LongWritable, T> createDBRecordReader(
|
||||||
|
DBInputSplit split, Configuration conf) throws IOException {
|
||||||
|
|
||||||
|
DBConfiguration dbConf = getDBConf();
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Use Oracle-specific db reader
|
||||||
|
return new OracleDataDrivenDBRecordReader<T>(split, inputClass,
|
||||||
|
conf, getConnection(), dbConf, dbConf.getInputConditions(),
|
||||||
|
dbConf.getInputFieldNames(), dbConf.getInputTableName());
|
||||||
|
} catch (SQLException ex) {
|
||||||
|
throw new IOException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,53 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBRecordReader;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.OracleDBRecordReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A RecordReader that reads records from a Oracle table
|
||||||
|
* via DataDrivenDBRecordReader.
|
||||||
|
*/
|
||||||
|
public class OracleDataDrivenDBRecordReader<T extends DBWritable>
|
||||||
|
extends DataDrivenDBRecordReader<T> {
|
||||||
|
|
||||||
|
|
||||||
|
// CHECKSTYLE:OFF
|
||||||
|
// TODO(aaron): Enable checkstyle after refactoring DBRecordReader c'tor.
|
||||||
|
public OracleDataDrivenDBRecordReader(DBInputFormat.DBInputSplit split,
|
||||||
|
Class<T> inputClass, Configuration conf, Connection conn,
|
||||||
|
DBConfiguration dbConfig, String cond, String [] fields,
|
||||||
|
String table) throws SQLException {
|
||||||
|
|
||||||
|
super(split, inputClass, conf, conn, dbConfig, cond, fields, table,
|
||||||
|
"ORACLE");
|
||||||
|
|
||||||
|
// Must initialize the tz used by the connection for Oracle.
|
||||||
|
OracleDBRecordReader.setSessionTimeZone(conf, conn);
|
||||||
|
}
|
||||||
|
// CHECKSTYLE:ON
|
||||||
|
}
|
@ -0,0 +1,38 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DateSplitter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement DBSplitter over date/time values returned by an Oracle db.
|
||||||
|
* Make use of logic from DateSplitter, since this just needs to use
|
||||||
|
* some Oracle-specific functions on the formatting end when generating
|
||||||
|
* InputSplits.
|
||||||
|
*/
|
||||||
|
public class OracleDateSplitter extends DateSplitter {
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
@Override
|
||||||
|
protected String dateToString(Date d) {
|
||||||
|
// Oracle Data objects are always actually Timestamps
|
||||||
|
return "TO_TIMESTAMP('" + d.toString() + "', 'YYYY-MM-DD HH24:MI:SS.FF')";
|
||||||
|
}
|
||||||
|
}
|
228
src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java
Normal file
228
src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.mapreduce.InputSplit;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.BigDecimalSplitter;
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement DBSplitter over text strings.
|
||||||
|
*/
|
||||||
|
public class TextSplitter extends BigDecimalSplitter {
|
||||||
|
|
||||||
|
private static final Log LOG = LogFactory.getLog(TextSplitter.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method needs to determine the splits between two user-provided
|
||||||
|
* strings. In the case where the user's strings are 'A' and 'Z', this is
|
||||||
|
* not hard; we could create two splits from ['A', 'M') and ['M', 'Z'], 26
|
||||||
|
* splits for strings beginning with each letter, etc.
|
||||||
|
*
|
||||||
|
* If a user has provided us with the strings "Ham" and "Haze", however, we
|
||||||
|
* need to create splits that differ in the third letter.
|
||||||
|
*
|
||||||
|
* The algorithm used is as follows:
|
||||||
|
* Since there are 2**16 unicode characters, we interpret characters as
|
||||||
|
* digits in base 65536. Given a string 's' containing characters s_0, s_1
|
||||||
|
* .. s_n, we interpret the string as the number: 0.s_0 s_1 s_2.. s_n in
|
||||||
|
* base 65536. Having mapped the low and high strings into floating-point
|
||||||
|
* values, we then use the BigDecimalSplitter to establish the even split
|
||||||
|
* points, then map the resulting floating point values back into strings.
|
||||||
|
*/
|
||||||
|
public List<InputSplit> split(Configuration conf, ResultSet results,
|
||||||
|
String colName) throws SQLException {
|
||||||
|
|
||||||
|
LOG.warn("Generating splits for a textual index column.");
|
||||||
|
LOG.warn("If your database sorts in a case-insensitive order, "
|
||||||
|
+ "this may result in a partial import or duplicate records.");
|
||||||
|
LOG.warn("You are strongly encouraged to choose an integral split column.");
|
||||||
|
|
||||||
|
String minString = results.getString(1);
|
||||||
|
String maxString = results.getString(2);
|
||||||
|
|
||||||
|
boolean minIsNull = false;
|
||||||
|
|
||||||
|
// If the min value is null, switch it to an empty string instead for
|
||||||
|
// purposes of interpolation. Then add [null, null] as a special case
|
||||||
|
// split.
|
||||||
|
if (null == minString) {
|
||||||
|
minString = "";
|
||||||
|
minIsNull = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (null == maxString) {
|
||||||
|
// If the max string is null, then the min string has to be null too.
|
||||||
|
// Just return a special split for this case.
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use this as a hint. May need an extra task if the size doesn't
|
||||||
|
// divide cleanly.
|
||||||
|
int numSplits = ConfigurationHelper.getConfNumMaps(conf);
|
||||||
|
|
||||||
|
String lowClausePrefix = colName + " >= '";
|
||||||
|
String highClausePrefix = colName + " < '";
|
||||||
|
|
||||||
|
// If there is a common prefix between minString and maxString, establish
|
||||||
|
// it and pull it out of minString and maxString.
|
||||||
|
int maxPrefixLen = Math.min(minString.length(), maxString.length());
|
||||||
|
int sharedLen;
|
||||||
|
for (sharedLen = 0; sharedLen < maxPrefixLen; sharedLen++) {
|
||||||
|
char c1 = minString.charAt(sharedLen);
|
||||||
|
char c2 = maxString.charAt(sharedLen);
|
||||||
|
if (c1 != c2) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The common prefix has length 'sharedLen'. Extract it from both.
|
||||||
|
String commonPrefix = minString.substring(0, sharedLen);
|
||||||
|
minString = minString.substring(sharedLen);
|
||||||
|
maxString = maxString.substring(sharedLen);
|
||||||
|
|
||||||
|
List<String> splitStrings = split(numSplits, minString, maxString,
|
||||||
|
commonPrefix);
|
||||||
|
List<InputSplit> splits = new ArrayList<InputSplit>();
|
||||||
|
|
||||||
|
// Convert the list of split point strings into an actual set of
|
||||||
|
// InputSplits.
|
||||||
|
String start = splitStrings.get(0);
|
||||||
|
for (int i = 1; i < splitStrings.size(); i++) {
|
||||||
|
String end = splitStrings.get(i);
|
||||||
|
|
||||||
|
if (i == splitStrings.size() - 1) {
|
||||||
|
// This is the last one; use a closed interval.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + start + "'", colName + " <= '" + end + "'"));
|
||||||
|
} else {
|
||||||
|
// Normal open-interval case.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
lowClausePrefix + start + "'", highClausePrefix + end + "'"));
|
||||||
|
}
|
||||||
|
|
||||||
|
start = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minIsNull) {
|
||||||
|
// Add the special null split at the end.
|
||||||
|
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
|
||||||
|
colName + " IS NULL", colName + " IS NULL"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return splits;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<String> split(int numSplits, String minString,
|
||||||
|
String maxString, String commonPrefix) throws SQLException {
|
||||||
|
|
||||||
|
BigDecimal minVal = stringToBigDecimal(minString);
|
||||||
|
BigDecimal maxVal = stringToBigDecimal(maxString);
|
||||||
|
|
||||||
|
List<BigDecimal> splitPoints = split(
|
||||||
|
new BigDecimal(numSplits), minVal, maxVal);
|
||||||
|
List<String> splitStrings = new ArrayList<String>();
|
||||||
|
|
||||||
|
// Convert the BigDecimal splitPoints into their string representations.
|
||||||
|
for (BigDecimal bd : splitPoints) {
|
||||||
|
splitStrings.add(commonPrefix + bigDecimalToString(bd));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that our user-specified boundaries are the first and last
|
||||||
|
// entries in the array.
|
||||||
|
if (splitStrings.size() == 0
|
||||||
|
|| !splitStrings.get(0).equals(commonPrefix + minString)) {
|
||||||
|
splitStrings.add(0, commonPrefix + minString);
|
||||||
|
}
|
||||||
|
if (splitStrings.size() == 1
|
||||||
|
|| !splitStrings.get(splitStrings.size() - 1).equals(
|
||||||
|
commonPrefix + maxString)) {
|
||||||
|
splitStrings.add(commonPrefix + maxString);
|
||||||
|
}
|
||||||
|
|
||||||
|
return splitStrings;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final BigDecimal ONE_PLACE = new BigDecimal(65536);
|
||||||
|
|
||||||
|
// Maximum number of characters to convert. This is to prevent rounding
|
||||||
|
// errors or repeating fractions near the very bottom from getting out of
|
||||||
|
// control. Note that this still gives us a huge number of possible splits.
|
||||||
|
private static final int MAX_CHARS = 8;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a BigDecimal representation of string 'str' suitable for use in a
|
||||||
|
* numerically-sorting order.
|
||||||
|
*/
|
||||||
|
protected BigDecimal stringToBigDecimal(String str) {
|
||||||
|
// Start with 1/65536 to compute the first digit.
|
||||||
|
BigDecimal curPlace = ONE_PLACE;
|
||||||
|
BigDecimal result = BigDecimal.ZERO;
|
||||||
|
|
||||||
|
int len = Math.min(str.length(), MAX_CHARS);
|
||||||
|
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
int codePoint = str.codePointAt(i);
|
||||||
|
result = result.add(tryDivide(new BigDecimal(codePoint), curPlace));
|
||||||
|
// advance to the next less significant place. e.g., 1/(65536^2) for the
|
||||||
|
// second char.
|
||||||
|
curPlace = curPlace.multiply(ONE_PLACE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the string encoded in a BigDecimal.
|
||||||
|
* Repeatedly multiply the input value by 65536; the integer portion after
|
||||||
|
* such a multiplication represents a single character in base 65536.
|
||||||
|
* Convert that back into a char and create a string out of these until we
|
||||||
|
* have no data left.
|
||||||
|
*/
|
||||||
|
protected String bigDecimalToString(BigDecimal bd) {
|
||||||
|
BigDecimal cur = bd.stripTrailingZeros();
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
|
for (int numConverted = 0; numConverted < MAX_CHARS; numConverted++) {
|
||||||
|
cur = cur.multiply(ONE_PLACE);
|
||||||
|
int curCodePoint = cur.intValue();
|
||||||
|
if (0 == curCodePoint) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
cur = cur.subtract(new BigDecimal(curCodePoint));
|
||||||
|
sb.append(Character.toChars(curCodePoint));
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,106 +15,20 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that the IntegerSplitter generates sane splits.
|
* Test that the IntegerSplitter generates sane splits.
|
||||||
|
*
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.TestIntegerSplitter isntead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.TestIntegerSplitter
|
||||||
*/
|
*/
|
||||||
public class TestIntegerSplitter extends TestCase {
|
public class TestIntegerSplitter
|
||||||
private long [] toLongArray(List<Long> in) {
|
extends org.apache.sqoop.mapreduce.db.TestIntegerSplitter {
|
||||||
long [] out = new long[in.size()];
|
|
||||||
for (int i = 0; i < in.size(); i++) {
|
|
||||||
out[i] = in.get(i).longValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
return out;
|
public void testDummy() {
|
||||||
}
|
// Nothing to do
|
||||||
|
|
||||||
public String formatLongArray(long [] ar) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
sb.append("[");
|
|
||||||
boolean first = true;
|
|
||||||
for (long val : ar) {
|
|
||||||
if (!first) {
|
|
||||||
sb.append(", ");
|
|
||||||
}
|
|
||||||
|
|
||||||
sb.append(Long.toString(val));
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
sb.append("]");
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void assertLongArrayEquals(long [] expected, long [] actual) {
|
|
||||||
for (int i = 0; i < expected.length; i++) {
|
|
||||||
try {
|
|
||||||
assertEquals("Failure at position " + i + "; got " + actual[i]
|
|
||||||
+ " instead of " + expected[i]
|
|
||||||
+ "; actual array is " + formatLongArray(actual),
|
|
||||||
expected[i], actual[i]);
|
|
||||||
} catch (ArrayIndexOutOfBoundsException oob) {
|
|
||||||
fail("Expected array with " + expected.length
|
|
||||||
+ " elements; got " + actual.length
|
|
||||||
+ ". Actual array is " + formatLongArray(actual));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (actual.length > expected.length) {
|
|
||||||
fail("Actual array has " + actual.length
|
|
||||||
+ " elements; expected " + expected.length
|
|
||||||
+ ". Actual array is " + formatLongArray(actual));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testEvenSplits() throws SQLException {
|
|
||||||
List<Long> splits = new IntegerSplitter().split(10, 0, 100);
|
|
||||||
long [] expected = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, };
|
|
||||||
assertLongArrayEquals(expected, toLongArray(splits));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testOddSplits() throws SQLException {
|
|
||||||
List<Long> splits = new IntegerSplitter().split(10, 0, 95);
|
|
||||||
long [] expected = { 0, 10, 20, 30, 40, 50, 59, 68, 77, 86, 95, };
|
|
||||||
assertLongArrayEquals(expected, toLongArray(splits));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testSingletonSplit() throws SQLException {
|
|
||||||
List<Long> splits = new IntegerSplitter().split(1, 5, 5);
|
|
||||||
long [] expected = { 5, 5 };
|
|
||||||
assertLongArrayEquals(expected, toLongArray(splits));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testSingletonSplit2() throws SQLException {
|
|
||||||
// Same test, but overly-high numSplits
|
|
||||||
List<Long> splits = new IntegerSplitter().split(5, 5, 5);
|
|
||||||
long [] expected = { 5, 5 };
|
|
||||||
assertLongArrayEquals(expected, toLongArray(splits));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testTooManySplits() throws SQLException {
|
|
||||||
List<Long> splits = new IntegerSplitter().split(5, 3, 5);
|
|
||||||
long [] expected = { 3, 4, 5 };
|
|
||||||
assertLongArrayEquals(expected, toLongArray(splits));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This tests verifies that overflows do not happen due to the splitting
|
|
||||||
* algorithm.
|
|
||||||
*
|
|
||||||
* @throws SQLException
|
|
||||||
*/
|
|
||||||
public void testBigIntSplits() throws SQLException {
|
|
||||||
List<Long> splits = new IntegerSplitter().split(4, 14,
|
|
||||||
7863696997872966707L);
|
|
||||||
assertEquals(splits.size(), 5);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
/**
|
/**
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
@ -17,123 +15,20 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.cloudera.sqoop.mapreduce.db;
|
package com.cloudera.sqoop.mapreduce.db;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that the TextSplitter implementation creates a sane set of splits.
|
* Test that the TextSplitter implementation creates a sane set of splits.
|
||||||
|
* @deprecated use org.apache.sqoop.mapreduce.db.TestTextSplitter instead.
|
||||||
|
* @see org.apache.sqoop.mapreduce.db.TestTextSplitter
|
||||||
*/
|
*/
|
||||||
public class TestTextSplitter extends TestCase {
|
public class TestTextSplitter extends
|
||||||
|
org.apache.sqoop.mapreduce.db.TestTextSplitter {
|
||||||
|
|
||||||
public String formatArray(Object [] ar) {
|
public void testDummy() {
|
||||||
StringBuilder sb = new StringBuilder();
|
// Nothing to do
|
||||||
sb.append("[");
|
|
||||||
boolean first = true;
|
|
||||||
for (Object val : ar) {
|
|
||||||
if (!first) {
|
|
||||||
sb.append(", ");
|
|
||||||
}
|
|
||||||
|
|
||||||
sb.append(val.toString());
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
sb.append("]");
|
|
||||||
return sb.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void assertArrayEquals(Object [] expected, Object [] actual) {
|
|
||||||
for (int i = 0; i < expected.length; i++) {
|
|
||||||
try {
|
|
||||||
assertEquals("Failure at position " + i + "; got " + actual[i]
|
|
||||||
+ " instead of " + expected[i]
|
|
||||||
+ "; actual array is " + formatArray(actual),
|
|
||||||
expected[i], actual[i]);
|
|
||||||
} catch (ArrayIndexOutOfBoundsException oob) {
|
|
||||||
fail("Expected array with " + expected.length
|
|
||||||
+ " elements; got " + actual.length
|
|
||||||
+ ". Actual array is " + formatArray(actual));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (actual.length > expected.length) {
|
|
||||||
fail("Actual array has " + actual.length
|
|
||||||
+ " elements; expected " + expected.length
|
|
||||||
+ ". Actual array is " + formatArray(actual));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testStringConvertEmpty() {
|
|
||||||
TextSplitter splitter = new TextSplitter();
|
|
||||||
BigDecimal emptyBigDec = splitter.stringToBigDecimal("");
|
|
||||||
assertEquals(BigDecimal.ZERO, emptyBigDec);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testBigDecConvertEmpty() {
|
|
||||||
TextSplitter splitter = new TextSplitter();
|
|
||||||
String emptyStr = splitter.bigDecimalToString(BigDecimal.ZERO);
|
|
||||||
assertEquals("", emptyStr);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testConvertA() {
|
|
||||||
TextSplitter splitter = new TextSplitter();
|
|
||||||
String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("A"));
|
|
||||||
assertEquals("A", out);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testConvertZ() {
|
|
||||||
TextSplitter splitter = new TextSplitter();
|
|
||||||
String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("Z"));
|
|
||||||
assertEquals("Z", out);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testConvertThreeChars() {
|
|
||||||
TextSplitter splitter = new TextSplitter();
|
|
||||||
String out = splitter.bigDecimalToString(
|
|
||||||
splitter.stringToBigDecimal("abc"));
|
|
||||||
assertEquals("abc", out);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testConvertStr() {
|
|
||||||
TextSplitter splitter = new TextSplitter();
|
|
||||||
String out = splitter.bigDecimalToString(
|
|
||||||
splitter.stringToBigDecimal("big str"));
|
|
||||||
assertEquals("big str", out);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testConvertChomped() {
|
|
||||||
TextSplitter splitter = new TextSplitter();
|
|
||||||
String out = splitter.bigDecimalToString(
|
|
||||||
splitter.stringToBigDecimal("AVeryLongStringIndeed"));
|
|
||||||
assertEquals("AVeryLon", out);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testAlphabetSplit() throws SQLException {
|
|
||||||
// This should give us 25 splits, one per letter.
|
|
||||||
TextSplitter splitter = new TextSplitter();
|
|
||||||
List<String> splits = splitter.split(25, "A", "Z", "");
|
|
||||||
String [] expected = { "A", "B", "C", "D", "E", "F", "G", "H", "I",
|
|
||||||
"J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
|
|
||||||
"V", "W", "X", "Y", "Z", };
|
|
||||||
assertArrayEquals(expected, splits.toArray(new String [0]));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testCommonPrefix() throws SQLException {
|
|
||||||
// Splits between 'Hand' and 'Hardy'
|
|
||||||
TextSplitter splitter = new TextSplitter();
|
|
||||||
List<String> splits = splitter.split(5, "nd", "rdy", "Ha");
|
|
||||||
// Don't check for exact values in the middle, because the splitter
|
|
||||||
// generates some ugly Unicode-isms. But do check that we get multiple
|
|
||||||
// splits and that it starts and ends on the correct points.
|
|
||||||
assertEquals("Hand", splits.get(0));
|
|
||||||
assertEquals("Hardy", splits.get(splits.size() -1));
|
|
||||||
assertEquals(6, splits.size());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
120
src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
Normal file
120
src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.IntegerSplitter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that the IntegerSplitter generates sane splits.
|
||||||
|
*/
|
||||||
|
public class TestIntegerSplitter extends TestCase {
|
||||||
|
private long [] toLongArray(List<Long> in) {
|
||||||
|
long [] out = new long[in.size()];
|
||||||
|
for (int i = 0; i < in.size(); i++) {
|
||||||
|
out[i] = in.get(i).longValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String formatLongArray(long [] ar) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("[");
|
||||||
|
boolean first = true;
|
||||||
|
for (long val : ar) {
|
||||||
|
if (!first) {
|
||||||
|
sb.append(", ");
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.append(Long.toString(val));
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.append("]");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void assertLongArrayEquals(long [] expected, long [] actual) {
|
||||||
|
for (int i = 0; i < expected.length; i++) {
|
||||||
|
try {
|
||||||
|
assertEquals("Failure at position " + i + "; got " + actual[i]
|
||||||
|
+ " instead of " + expected[i]
|
||||||
|
+ "; actual array is " + formatLongArray(actual),
|
||||||
|
expected[i], actual[i]);
|
||||||
|
} catch (ArrayIndexOutOfBoundsException oob) {
|
||||||
|
fail("Expected array with " + expected.length
|
||||||
|
+ " elements; got " + actual.length
|
||||||
|
+ ". Actual array is " + formatLongArray(actual));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actual.length > expected.length) {
|
||||||
|
fail("Actual array has " + actual.length
|
||||||
|
+ " elements; expected " + expected.length
|
||||||
|
+ ". Actual array is " + formatLongArray(actual));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEvenSplits() throws SQLException {
|
||||||
|
List<Long> splits = new IntegerSplitter().split(10, 0, 100);
|
||||||
|
long [] expected = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, };
|
||||||
|
assertLongArrayEquals(expected, toLongArray(splits));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOddSplits() throws SQLException {
|
||||||
|
List<Long> splits = new IntegerSplitter().split(10, 0, 95);
|
||||||
|
long [] expected = { 0, 10, 20, 30, 40, 50, 59, 68, 77, 86, 95, };
|
||||||
|
assertLongArrayEquals(expected, toLongArray(splits));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingletonSplit() throws SQLException {
|
||||||
|
List<Long> splits = new IntegerSplitter().split(1, 5, 5);
|
||||||
|
long [] expected = { 5, 5 };
|
||||||
|
assertLongArrayEquals(expected, toLongArray(splits));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSingletonSplit2() throws SQLException {
|
||||||
|
// Same test, but overly-high numSplits
|
||||||
|
List<Long> splits = new IntegerSplitter().split(5, 5, 5);
|
||||||
|
long [] expected = { 5, 5 };
|
||||||
|
assertLongArrayEquals(expected, toLongArray(splits));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTooManySplits() throws SQLException {
|
||||||
|
List<Long> splits = new IntegerSplitter().split(5, 3, 5);
|
||||||
|
long [] expected = { 3, 4, 5 };
|
||||||
|
assertLongArrayEquals(expected, toLongArray(splits));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests verifies that overflows do not happen due to the splitting
|
||||||
|
* algorithm.
|
||||||
|
*
|
||||||
|
* @throws SQLException
|
||||||
|
*/
|
||||||
|
public void testBigIntSplits() throws SQLException {
|
||||||
|
List<Long> splits = new IntegerSplitter().split(4, 14,
|
||||||
|
7863696997872966707L);
|
||||||
|
assertEquals(splits.size(), 5);
|
||||||
|
}
|
||||||
|
}
|
134
src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java
Normal file
134
src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.sqoop.mapreduce.db;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.cloudera.sqoop.mapreduce.db.TextSplitter;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
public class TestTextSplitter extends TestCase {
|
||||||
|
|
||||||
|
public String formatArray(Object [] ar) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("[");
|
||||||
|
boolean first = true;
|
||||||
|
for (Object val : ar) {
|
||||||
|
if (!first) {
|
||||||
|
sb.append(", ");
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.append(val.toString());
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.append("]");
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void assertArrayEquals(Object [] expected, Object [] actual) {
|
||||||
|
for (int i = 0; i < expected.length; i++) {
|
||||||
|
try {
|
||||||
|
assertEquals("Failure at position " + i + "; got " + actual[i]
|
||||||
|
+ " instead of " + expected[i]
|
||||||
|
+ "; actual array is " + formatArray(actual),
|
||||||
|
expected[i], actual[i]);
|
||||||
|
} catch (ArrayIndexOutOfBoundsException oob) {
|
||||||
|
fail("Expected array with " + expected.length
|
||||||
|
+ " elements; got " + actual.length
|
||||||
|
+ ". Actual array is " + formatArray(actual));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actual.length > expected.length) {
|
||||||
|
fail("Actual array has " + actual.length
|
||||||
|
+ " elements; expected " + expected.length
|
||||||
|
+ ". Actual array is " + formatArray(actual));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testStringConvertEmpty() {
|
||||||
|
TextSplitter splitter = new TextSplitter();
|
||||||
|
BigDecimal emptyBigDec = splitter.stringToBigDecimal("");
|
||||||
|
assertEquals(BigDecimal.ZERO, emptyBigDec);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBigDecConvertEmpty() {
|
||||||
|
TextSplitter splitter = new TextSplitter();
|
||||||
|
String emptyStr = splitter.bigDecimalToString(BigDecimal.ZERO);
|
||||||
|
assertEquals("", emptyStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testConvertA() {
|
||||||
|
TextSplitter splitter = new TextSplitter();
|
||||||
|
String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("A"));
|
||||||
|
assertEquals("A", out);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testConvertZ() {
|
||||||
|
TextSplitter splitter = new TextSplitter();
|
||||||
|
String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("Z"));
|
||||||
|
assertEquals("Z", out);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testConvertThreeChars() {
|
||||||
|
TextSplitter splitter = new TextSplitter();
|
||||||
|
String out = splitter.bigDecimalToString(
|
||||||
|
splitter.stringToBigDecimal("abc"));
|
||||||
|
assertEquals("abc", out);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testConvertStr() {
|
||||||
|
TextSplitter splitter = new TextSplitter();
|
||||||
|
String out = splitter.bigDecimalToString(
|
||||||
|
splitter.stringToBigDecimal("big str"));
|
||||||
|
assertEquals("big str", out);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testConvertChomped() {
|
||||||
|
TextSplitter splitter = new TextSplitter();
|
||||||
|
String out = splitter.bigDecimalToString(
|
||||||
|
splitter.stringToBigDecimal("AVeryLongStringIndeed"));
|
||||||
|
assertEquals("AVeryLon", out);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAlphabetSplit() throws SQLException {
|
||||||
|
// This should give us 25 splits, one per letter.
|
||||||
|
TextSplitter splitter = new TextSplitter();
|
||||||
|
List<String> splits = splitter.split(25, "A", "Z", "");
|
||||||
|
String [] expected = { "A", "B", "C", "D", "E", "F", "G", "H", "I",
|
||||||
|
"J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U",
|
||||||
|
"V", "W", "X", "Y", "Z", };
|
||||||
|
assertArrayEquals(expected, splits.toArray(new String [0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCommonPrefix() throws SQLException {
|
||||||
|
// Splits between 'Hand' and 'Hardy'
|
||||||
|
TextSplitter splitter = new TextSplitter();
|
||||||
|
List<String> splits = splitter.split(5, "nd", "rdy", "Ha");
|
||||||
|
// Don't check for exact values in the middle, because the splitter
|
||||||
|
// generates some ugly Unicode-isms. But do check that we get multiple
|
||||||
|
// splits and that it starts and ends on the correct points.
|
||||||
|
assertEquals("Hand", splits.get(0));
|
||||||
|
assertEquals("Hardy", splits.get(splits.size() -1));
|
||||||
|
assertEquals(6, splits.size());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user