From 37c9642e7d2e46b3775d2f927dc21f60fa229f37 Mon Sep 17 00:00:00 2001 From: Bilung Lee Date: Fri, 28 Oct 2011 16:50:39 +0000 Subject: [PATCH] SQOOP-377 Migrate mapreduce.db package to new name space git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1190441 13f79535-47bb-0310-9956-ffa450edef68 --- .../mapreduce/db/BigDecimalSplitter.java | 133 +------ .../sqoop/mapreduce/db/BooleanSplitter.java | 49 +-- .../sqoop/mapreduce/db/DBConfiguration.java | 217 ++--------- .../sqoop/mapreduce/db/DBInputFormat.java | 320 +-------------- .../sqoop/mapreduce/db/DBOutputFormat.java | 204 +--------- .../sqoop/mapreduce/db/DBRecordReader.java | 268 +------------ .../sqoop/mapreduce/db/DBSplitter.java | 22 +- .../mapreduce/db/DataDrivenDBInputFormat.java | 276 ++----------- .../db/DataDrivenDBRecordReader.java | 90 +---- .../sqoop/mapreduce/db/DateSplitter.java | 161 +------- .../sqoop/mapreduce/db/FloatSplitter.java | 84 +--- .../sqoop/mapreduce/db/IntegerSplitter.java | 129 +------ .../mapreduce/db/OracleDBRecordReader.java | 108 +----- .../db/OracleDataDrivenDBInputFormat.java | 52 +-- .../db/OracleDataDrivenDBRecordReader.java | 15 +- .../mapreduce/db/OracleDateSplitter.java | 18 +- .../sqoop/mapreduce/db/TextSplitter.java | 208 +--------- .../mapreduce/db/BigDecimalSplitter.java | 151 ++++++++ .../sqoop/mapreduce/db/BooleanSplitter.java | 69 ++++ .../sqoop/mapreduce/db/DBConfiguration.java | 310 +++++++++++++++ .../sqoop/mapreduce/db/DBInputFormat.java | 363 ++++++++++++++++++ .../sqoop/mapreduce/db/DBOutputFormat.java | 238 ++++++++++++ .../sqoop/mapreduce/db/DBRecordReader.java | 305 +++++++++++++++ .../apache/sqoop/mapreduce/db/DBSplitter.java | 44 +++ .../mapreduce/db/DataDrivenDBInputFormat.java | 354 +++++++++++++++++ .../db/DataDrivenDBRecordReader.java | 132 +++++++ .../sqoop/mapreduce/db/DateSplitter.java | 183 +++++++++ .../sqoop/mapreduce/db/FloatSplitter.java | 99 +++++ .../sqoop/mapreduce/db/IntegerSplitter.java | 148 +++++++ .../mapreduce/db/OracleDBRecordReader.java | 152 ++++++++ .../db/OracleDataDrivenDBInputFormat.java | 77 ++++ .../db/OracleDataDrivenDBRecordReader.java | 53 +++ .../mapreduce/db/OracleDateSplitter.java | 38 ++ .../sqoop/mapreduce/db/TextSplitter.java | 228 +++++++++++ .../mapreduce/db/TestIntegerSplitter.java | 102 +---- .../sqoop/mapreduce/db/TestTextSplitter.java | 117 +----- .../mapreduce/db/TestIntegerSplitter.java | 120 ++++++ .../sqoop/mapreduce/db/TestTextSplitter.java | 134 +++++++ 38 files changed, 3370 insertions(+), 2401 deletions(-) create mode 100644 src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/BooleanSplitter.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/DBConfiguration.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/DBInputFormat.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/DBOutputFormat.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/DBRecordReader.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBRecordReader.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/OracleDateSplitter.java create mode 100644 src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java create mode 100644 src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java create mode 100644 src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/BigDecimalSplitter.java b/src/java/com/cloudera/sqoop/mapreduce/db/BigDecimalSplitter.java index e60c81cf..8d3505ac 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/BigDecimalSplitter.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/BigDecimalSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,137 +15,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.math.BigDecimal; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.InputSplit; - -import com.cloudera.sqoop.config.ConfigurationHelper; /** * Implement DBSplitter over BigDecimal values. + * + * @deprecated use org.apache.sqoop.mapreduce.db.BigDecimalSplitter instead. + * @see org.apache.sqoop.mapreduce.db.BigDecimalSplitter */ -public class BigDecimalSplitter implements DBSplitter { - private static final Log LOG = LogFactory.getLog(BigDecimalSplitter.class); +public class BigDecimalSplitter + extends org.apache.sqoop.mapreduce.db.BigDecimalSplitter { - public List split(Configuration conf, ResultSet results, - String colName) throws SQLException { - - BigDecimal minVal = results.getBigDecimal(1); - BigDecimal maxVal = results.getBigDecimal(2); - - String lowClausePrefix = colName + " >= "; - String highClausePrefix = colName + " < "; - - BigDecimal numSplits = new BigDecimal( - ConfigurationHelper.getConfNumMaps(conf)); - - if (minVal == null && maxVal == null) { - // Range is null to null. Return a null split accordingly. - List splits = new ArrayList(); - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - return splits; - } - - if (minVal == null || maxVal == null) { - // Don't know what is a reasonable min/max value for interpolation. Fail. - LOG.error("Cannot find a range for NUMERIC or DECIMAL " - + "fields with one end NULL."); - return null; - } - - // Get all the split points together. - List splitPoints = split(numSplits, minVal, maxVal); - List splits = new ArrayList(); - - // Turn the split points into a set of intervals. - BigDecimal start = splitPoints.get(0); - for (int i = 1; i < splitPoints.size(); i++) { - BigDecimal end = splitPoints.get(i); - - if (i == splitPoints.size() - 1) { - // This is the last one; use a closed interval. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + start.toString(), - colName + " <= " + end.toString())); - } else { - // Normal open-interval case. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + start.toString(), - highClausePrefix + end.toString())); - } - - start = end; - } - - return splits; - } - - private static final BigDecimal MIN_INCREMENT = - new BigDecimal(10000 * Double.MIN_VALUE); - - /** - * Divide numerator by denominator. If impossible in exact mode, use rounding. - */ - protected BigDecimal tryDivide(BigDecimal numerator, BigDecimal denominator) { - try { - return numerator.divide(denominator); - } catch (ArithmeticException ae) { - return numerator.divide(denominator, BigDecimal.ROUND_HALF_UP); - } - } - - /** - * Returns a list of BigDecimals one element longer than the list of input - * splits. This represents the boundaries between input splits. All splits - * are open on the top end, except the last one. - * - * So the list [0, 5, 8, 12, 18] would represent splits capturing the - * intervals: - * - * [0, 5) - * [5, 8) - * [8, 12) - * [12, 18] note the closed interval for the last split. - */ - List split(BigDecimal numSplits, BigDecimal minVal, - BigDecimal maxVal) throws SQLException { - - List splits = new ArrayList(); - - // Use numSplits as a hint. May need an extra task if the size doesn't - // divide cleanly. - - BigDecimal splitSize = tryDivide(maxVal.subtract(minVal), (numSplits)); - if (splitSize.compareTo(MIN_INCREMENT) < 0) { - splitSize = MIN_INCREMENT; - LOG.warn("Set BigDecimal splitSize to MIN_INCREMENT"); - } - - BigDecimal curVal = minVal; - - while (curVal.compareTo(maxVal) <= 0) { - splits.add(curVal); - curVal = curVal.add(splitSize); - } - - if (splits.get(splits.size() - 1).compareTo(maxVal) != 0 - || splits.size() == 1) { - // We didn't end on the maxVal. Add that to the end of the list. - splits.add(maxVal); - } - - return splits; - } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/BooleanSplitter.java b/src/java/com/cloudera/sqoop/mapreduce/db/BooleanSplitter.java index b84f82d7..da5ef85e 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/BooleanSplitter.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/BooleanSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,53 +15,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.InputSplit; /** * Implement DBSplitter over boolean values. + * + * @deprecated use org.apache.sqoop.mapreduce.db.BooleanSplitter instead. + * @see org.apache.sqoop.mapreduce.db.BooleanSplitter */ -public class BooleanSplitter implements DBSplitter { - public List split(Configuration conf, ResultSet results, - String colName) throws SQLException { +public class BooleanSplitter + extends org.apache.sqoop.mapreduce.db.BooleanSplitter { - List splits = new ArrayList(); - - if (results.getString(1) == null && results.getString(2) == null) { - // Range is null to null. Return a null split accordingly. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - return splits; - } - - boolean minVal = results.getBoolean(1); - boolean maxVal = results.getBoolean(2); - - // Use one or two splits. - if (!minVal) { - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " = FALSE", colName + " = FALSE")); - } - - if (maxVal) { - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " = TRUE", colName + " = TRUE")); - } - - if (results.getString(1) == null || results.getString(2) == null) { - // Include a null value. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - } - - return splits; - } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/DBConfiguration.java b/src/java/com/cloudera/sqoop/mapreduce/db/DBConfiguration.java index f82aa1c9..89f2b4ff 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/DBConfiguration.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/DBConfiguration.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,17 +15,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; - import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.lib.db.DBWritable; - -import com.cloudera.sqoop.mapreduce.db.DBInputFormat.NullDBWritable; /** * A container for configuration property names for jobs with DB input/output. @@ -42,67 +32,76 @@ * @see DBInputFormat#setInput(Job, Class, String, String) * @see DBInputFormat#setInput(Job, Class, String, String, String, String...) * @see DBOutputFormat#setOutput(Job, String, String...) + * + * @deprecated use org.apache.sqoop.mapreduce.db.DBConfiguration instead. + * @see org.apache.sqoop.mapreduce.db.DBConfiguration */ -public class DBConfiguration { +public class DBConfiguration + extends org.apache.sqoop.mapreduce.db.DBConfiguration { /** The JDBC Driver class name. */ public static final String DRIVER_CLASS_PROPERTY = - "mapreduce.jdbc.driver.class"; + org.apache.sqoop.mapreduce.db.DBConfiguration.DRIVER_CLASS_PROPERTY; /** JDBC Database access URL. */ - public static final String URL_PROPERTY = "mapreduce.jdbc.url"; + public static final String URL_PROPERTY = + org.apache.sqoop.mapreduce.db.DBConfiguration.URL_PROPERTY; /** User name to access the database. */ - public static final String USERNAME_PROPERTY = "mapreduce.jdbc.username"; + public static final String USERNAME_PROPERTY = + org.apache.sqoop.mapreduce.db.DBConfiguration.USERNAME_PROPERTY; /** Password to access the database. */ - public static final String PASSWORD_PROPERTY = "mapreduce.jdbc.password"; + public static final String PASSWORD_PROPERTY = + org.apache.sqoop.mapreduce.db.DBConfiguration.PASSWORD_PROPERTY; /** Fetch size. */ - public static final String FETCH_SIZE = "mapreduce.jdbc.fetchsize"; + public static final String FETCH_SIZE = + org.apache.sqoop.mapreduce.db.DBConfiguration.FETCH_SIZE; /** Input table name. */ public static final String INPUT_TABLE_NAME_PROPERTY = - "mapreduce.jdbc.input.table.name"; + org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_TABLE_NAME_PROPERTY; /** Field names in the Input table. */ public static final String INPUT_FIELD_NAMES_PROPERTY = - "mapreduce.jdbc.input.field.names"; + org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_FIELD_NAMES_PROPERTY; /** WHERE clause in the input SELECT statement. */ public static final String INPUT_CONDITIONS_PROPERTY = - "mapreduce.jdbc.input.conditions"; + org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_CONDITIONS_PROPERTY; /** ORDER BY clause in the input SELECT statement. */ public static final String INPUT_ORDER_BY_PROPERTY = - "mapreduce.jdbc.input.orderby"; + org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_ORDER_BY_PROPERTY; /** Whole input query, exluding LIMIT...OFFSET. */ - public static final String INPUT_QUERY = "mapreduce.jdbc.input.query"; + public static final String INPUT_QUERY = + org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_QUERY; /** Input query to get the count of records. */ public static final String INPUT_COUNT_QUERY = - "mapreduce.jdbc.input.count.query"; + org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_COUNT_QUERY; /** Input query to get the max and min values of the jdbc.input.query. */ public static final String INPUT_BOUNDING_QUERY = - "mapred.jdbc.input.bounding.query"; + org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_BOUNDING_QUERY; /** Class name implementing DBWritable which will hold input tuples. */ public static final String INPUT_CLASS_PROPERTY = - "mapreduce.jdbc.input.class"; + org.apache.sqoop.mapreduce.db.DBConfiguration.INPUT_CLASS_PROPERTY; /** Output table name. */ public static final String OUTPUT_TABLE_NAME_PROPERTY = - "mapreduce.jdbc.output.table.name"; + org.apache.sqoop.mapreduce.db.DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY; /** Field names in the Output table. */ public static final String OUTPUT_FIELD_NAMES_PROPERTY = - "mapreduce.jdbc.output.field.names"; + org.apache.sqoop.mapreduce.db.DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY; /** Number of fields in the Output table. */ public static final String OUTPUT_FIELD_COUNT_PROPERTY = - "mapreduce.jdbc.output.field.count"; + org.apache.sqoop.mapreduce.db.DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY; /** * Sets the DB access related fields in the {@link Configuration}. @@ -116,17 +115,8 @@ public class DBConfiguration { public static void configureDB(Configuration conf, String driverClass, String dbUrl, String userName, String passwd, Integer fetchSize) { - conf.set(DRIVER_CLASS_PROPERTY, driverClass); - conf.set(URL_PROPERTY, dbUrl); - if (userName != null) { - conf.set(USERNAME_PROPERTY, userName); - } - if (passwd != null) { - conf.set(PASSWORD_PROPERTY, passwd); - } - if (fetchSize != null) { - conf.setInt(FETCH_SIZE, fetchSize); - } + org.apache.sqoop.mapreduce.db.DBConfiguration.configureDB( + conf, driverClass, dbUrl, userName, passwd, fetchSize); } /** @@ -138,7 +128,8 @@ public static void configureDB(Configuration conf, String driverClass, */ public static void configureDB(Configuration job, String driverClass, String dbUrl, Integer fetchSize) { - configureDB(job, driverClass, dbUrl, null, null, fetchSize); + org.apache.sqoop.mapreduce.db.DBConfiguration.configureDB(job, driverClass, + dbUrl, fetchSize); } /** @@ -151,7 +142,8 @@ public static void configureDB(Configuration job, String driverClass, */ public static void configureDB(Configuration conf, String driverClass, String dbUrl, String userName, String passwd) { - configureDB(conf, driverClass, dbUrl, userName, passwd, null); + org.apache.sqoop.mapreduce.db.DBConfiguration.configureDB(conf, driverClass, + dbUrl, userName, passwd); } /** @@ -162,151 +154,12 @@ public static void configureDB(Configuration conf, String driverClass, */ public static void configureDB(Configuration job, String driverClass, String dbUrl) { - configureDB(job, driverClass, dbUrl, null); + org.apache.sqoop.mapreduce.db.DBConfiguration.configureDB(job, driverClass, + dbUrl); } - private Configuration conf; - public DBConfiguration(Configuration job) { - this.conf = job; + super(job); } - - /** Returns a connection object to the DB. - * @throws ClassNotFoundException - * @throws SQLException */ - public Connection getConnection() - throws ClassNotFoundException, SQLException { - - Class.forName(conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY)); - - if(conf.get(DBConfiguration.USERNAME_PROPERTY) == null) { - return DriverManager.getConnection( - conf.get(DBConfiguration.URL_PROPERTY)); - } else { - return DriverManager.getConnection( - conf.get(DBConfiguration.URL_PROPERTY), - conf.get(DBConfiguration.USERNAME_PROPERTY), - conf.get(DBConfiguration.PASSWORD_PROPERTY)); - } - } - - public Configuration getConf() { - return conf; - } - - public Integer getFetchSize() { - if (conf.get(DBConfiguration.FETCH_SIZE) == null) { - return null; - } - return conf.getInt(DBConfiguration.FETCH_SIZE, 0); - } - - public void setFetchSize(Integer fetchSize) { - if (fetchSize != null) { - conf.setInt(DBConfiguration.FETCH_SIZE, fetchSize); - } else { - conf.set(FETCH_SIZE, null); - } - } - public String getInputTableName() { - return conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY); - } - - public void setInputTableName(String tableName) { - conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName); - } - - public String[] getInputFieldNames() { - return conf.getStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY); - } - - public void setInputFieldNames(String... fieldNames) { - conf.setStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, fieldNames); - } - - public String getInputConditions() { - return conf.get(DBConfiguration.INPUT_CONDITIONS_PROPERTY); - } - - public void setInputConditions(String conditions) { - if (conditions != null && conditions.length() > 0) { - conf.set(DBConfiguration.INPUT_CONDITIONS_PROPERTY, conditions); - } - } - - public String getInputOrderBy() { - return conf.get(DBConfiguration.INPUT_ORDER_BY_PROPERTY); - } - - public void setInputOrderBy(String orderby) { - if(orderby != null && orderby.length() >0) { - conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, orderby); - } - } - - public String getInputQuery() { - return conf.get(DBConfiguration.INPUT_QUERY); - } - - public void setInputQuery(String query) { - if(query != null && query.length() >0) { - conf.set(DBConfiguration.INPUT_QUERY, query); - } - } - - public String getInputCountQuery() { - return conf.get(DBConfiguration.INPUT_COUNT_QUERY); - } - - public void setInputCountQuery(String query) { - if(query != null && query.length() > 0) { - conf.set(DBConfiguration.INPUT_COUNT_QUERY, query); - } - } - - public void setInputBoundingQuery(String query) { - if (query != null && query.length() > 0) { - conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, query); - } - } - - public String getInputBoundingQuery() { - return conf.get(DBConfiguration.INPUT_BOUNDING_QUERY); - } - - public Class getInputClass() { - return conf.getClass(DBConfiguration.INPUT_CLASS_PROPERTY, - NullDBWritable.class); - } - - public void setInputClass(Class inputClass) { - conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, inputClass, - DBWritable.class); - } - - public String getOutputTableName() { - return conf.get(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY); - } - - public void setOutputTableName(String tableName) { - conf.set(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tableName); - } - - public String[] getOutputFieldNames() { - return conf.getStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY); - } - - public void setOutputFieldNames(String... fieldNames) { - conf.setStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, fieldNames); - } - - public void setOutputFieldCount(int fieldCount) { - conf.setInt(DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY, fieldCount); - } - - public int getOutputFieldCount() { - return conf.getInt(OUTPUT_FIELD_COUNT_PROPERTY, 0); - } - } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/DBInputFormat.java b/src/java/com/cloudera/sqoop/mapreduce/db/DBInputFormat.java index b2ab6732..f93e20b1 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/DBInputFormat.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/DBInputFormat.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,34 +15,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.db.DBWritable; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; - -import com.cloudera.sqoop.config.ConfigurationHelper; /** * A InputFormat that reads input data from an SQL table. @@ -54,38 +27,38 @@ * * The SQL query, and input class can be using one of the two * setInput methods. + * + * @deprecated use org.apache.sqoop.mapreduce.db.DBInputFormat instead. + * @see org.apache.sqoop.mapreduce.db.DBInputFormat */ public class DBInputFormat - extends InputFormat implements Configurable { - - private String dbProductName = "DEFAULT"; + extends org.apache.sqoop.mapreduce.db.DBInputFormat { /** * A Class that does nothing, implementing DBWritable. + * @deprecated use org.apache.sqoop.mapreduce.db.DBInputFormat.NullDBWritable + * instead. + * @see org.apache.sqoop.mapreduce.db.DBInputFormat.NullDBWritable */ - public static class NullDBWritable implements DBWritable, Writable { - @Override - public void readFields(DataInput in) throws IOException { } - @Override - public void readFields(ResultSet arg0) throws SQLException { } - @Override - public void write(DataOutput out) throws IOException { } - @Override - public void write(PreparedStatement arg0) throws SQLException { } + public static class NullDBWritable + extends org.apache.sqoop.mapreduce.db.DBInputFormat.NullDBWritable { } /** * A InputSplit that spans a set of rows. + * + * @deprecated use org.apache.sqoop.mapreduce.db.DBInputFormat.DBInputSplit + * instead. + * @see org.apache.sqoop.mapreduce.db.DBInputFormat.DBInputSplit */ - public static class DBInputSplit extends InputSplit implements Writable { - - private long end = 0; - private long start = 0; + public static class DBInputSplit extends + org.apache.sqoop.mapreduce.db.DBInputFormat.DBInputSplit { /** * Default Constructor. */ public DBInputSplit() { + super(); } /** @@ -94,266 +67,7 @@ public DBInputSplit() { * @param end the index of the last row to select */ public DBInputSplit(long start, long end) { - this.start = start; - this.end = end; + super(start, end); } - - @Override - /** {@inheritDoc} */ - public String[] getLocations() throws IOException { - // TODO Add a layer to enable SQL "sharding" and support locality - return new String[] {}; - } - - /** - * @return The index of the first row to select - */ - public long getStart() { - return start; - } - - /** - * @return The index of the last row to select - */ - public long getEnd() { - return end; - } - - /** - * @return The total row count in this split - */ - public long getLength() throws IOException { - return end - start; - } - - @Override - /** {@inheritDoc} */ - public void readFields(DataInput input) throws IOException { - start = input.readLong(); - end = input.readLong(); - } - - @Override - /** {@inheritDoc} */ - public void write(DataOutput output) throws IOException { - output.writeLong(start); - output.writeLong(end); - } - } - - private String conditions; - - private Connection connection; - - private String tableName; - - private String[] fieldNames; - - private DBConfiguration dbConf; - - @Override - /** {@inheritDoc} */ - public void setConf(Configuration conf) { - - dbConf = new DBConfiguration(conf); - - try { - getConnection(); - - DatabaseMetaData dbMeta = connection.getMetaData(); - this.dbProductName = dbMeta.getDatabaseProductName().toUpperCase(); - } catch (Exception ex) { - throw new RuntimeException(ex); - } - - tableName = dbConf.getInputTableName(); - fieldNames = dbConf.getInputFieldNames(); - conditions = dbConf.getInputConditions(); - } - - public Configuration getConf() { - return dbConf.getConf(); - } - - public DBConfiguration getDBConf() { - return dbConf; - } - - public Connection getConnection() { - try { - if (null == this.connection) { - // The connection was closed; reinstantiate it. - this.connection = dbConf.getConnection(); - this.connection.setAutoCommit(false); - this.connection.setTransactionIsolation( - Connection.TRANSACTION_READ_COMMITTED); - } - } catch (Exception e) { - throw new RuntimeException(e); - } - return connection; - } - - public String getDBProductName() { - return dbProductName; - } - - protected RecordReader createDBRecordReader( - DBInputSplit split, Configuration conf) throws IOException { - - @SuppressWarnings("unchecked") - Class inputClass = (Class) (dbConf.getInputClass()); - try { - // use database product name to determine appropriate record reader. - if (dbProductName.startsWith("ORACLE")) { - // use Oracle-specific db reader. - return new OracleDBRecordReader(split, inputClass, - conf, getConnection(), getDBConf(), conditions, fieldNames, - tableName); - } else { - // Generic reader. - return new DBRecordReader(split, inputClass, - conf, getConnection(), getDBConf(), conditions, fieldNames, - tableName); - } - } catch (SQLException ex) { - throw new IOException(ex); - } - } - - @Override - /** {@inheritDoc} */ - public RecordReader createRecordReader(InputSplit split, - TaskAttemptContext context) throws IOException, InterruptedException { - - return createDBRecordReader((DBInputSplit) split, - context.getConfiguration()); - } - - /** {@inheritDoc} */ - @Override - public List getSplits(JobContext job) throws IOException { - - ResultSet results = null; - Statement statement = null; - try { - statement = connection.createStatement(); - - results = statement.executeQuery(getCountQuery()); - results.next(); - - long count = results.getLong(1); - int chunks = ConfigurationHelper.getJobNumMaps(job); - long chunkSize = (count / chunks); - - results.close(); - statement.close(); - - List splits = new ArrayList(); - - // Split the rows into n-number of chunks and adjust the last chunk - // accordingly - for (int i = 0; i < chunks; i++) { - DBInputSplit split; - - if ((i + 1) == chunks) { - split = new DBInputSplit(i * chunkSize, count); - } else { - split = new DBInputSplit(i * chunkSize, (i * chunkSize) - + chunkSize); - } - - splits.add(split); - } - - connection.commit(); - return splits; - } catch (SQLException e) { - throw new IOException("Got SQLException", e); - } finally { - try { - if (results != null) { results.close(); } - } catch (SQLException e1) { /* ignored */ } - try { - if (statement != null) { statement.close(); } - } catch (SQLException e1) { /* ignored */ } - - closeConnection(); - } - } - - /** Returns the query for getting the total number of rows, - * subclasses can override this for custom behaviour.*/ - protected String getCountQuery() { - - if(dbConf.getInputCountQuery() != null) { - return dbConf.getInputCountQuery(); - } - - StringBuilder query = new StringBuilder(); - query.append("SELECT COUNT(*) FROM " + tableName); - - if (conditions != null && conditions.length() > 0) { - query.append(" WHERE " + conditions); - } - return query.toString(); - } - - /** - * Initializes the map-part of the job with the appropriate input settings. - * - * @param job The map-reduce job - * @param inputClass the class object implementing DBWritable, which is the - * Java object holding tuple fields. - * @param tableName The table to read data from - * @param conditions The condition which to select data with, - * eg. '(updated > 20070101 AND length > 0)' - * @param orderBy the fieldNames in the orderBy clause. - * @param fieldNames The field names in the table - * @see #setInput(Job, Class, String, String) - */ - public static void setInput(Job job, - Class inputClass, - String tableName, String conditions, - String orderBy, String... fieldNames) { - job.setInputFormatClass(DBInputFormat.class); - DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); - dbConf.setInputClass(inputClass); - dbConf.setInputTableName(tableName); - dbConf.setInputFieldNames(fieldNames); - dbConf.setInputConditions(conditions); - dbConf.setInputOrderBy(orderBy); - } - - /** - * Initializes the map-part of the job with the appropriate input settings. - * - * @param job The map-reduce job - * @param inputClass the class object implementing DBWritable, which is the - * Java object holding tuple fields. - * @param inputQuery the input query to select fields. Example : - * "SELECT f1, f2, f3 FROM Mytable ORDER BY f1" - * @param inputCountQuery the input query that returns - * the number of records in the table. - * Example : "SELECT COUNT(f1) FROM Mytable" - * @see #setInput(Job, Class, String, String, String, String...) - */ - public static void setInput(Job job, - Class inputClass, - String inputQuery, String inputCountQuery) { - job.setInputFormatClass(DBInputFormat.class); - DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); - dbConf.setInputClass(inputClass); - dbConf.setInputQuery(inputQuery); - dbConf.setInputCountQuery(inputCountQuery); - } - - protected void closeConnection() { - try { - if (null != this.connection) { - this.connection.close(); - this.connection = null; - } - } catch (SQLException sqlE) { /* ignore exception on close. */ } } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/DBOutputFormat.java b/src/java/com/cloudera/sqoop/mapreduce/db/DBOutputFormat.java index 56fc7ab1..6780f2f7 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/DBOutputFormat.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/DBOutputFormat.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,28 +15,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.db.DBWritable; -import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.StringUtils; - -import com.cloudera.sqoop.config.ConfigurationHelper; /** * A OutputFormat that sends the reduce output to a SQL table. @@ -47,191 +31,29 @@ * key has a type extending DBWritable. Returned {@link RecordWriter} * writes only the key to the database with a batch SQL query. * + * @deprecated use org.apache.sqoop.mapreduce.db.DBoutputFormat instead. + * @see org.apache.sqoop.mapreduce.db.DBOutputFormat */ public class DBOutputFormat - extends OutputFormat { - - private static final Log LOG = LogFactory.getLog(DBOutputFormat.class); - public void checkOutputSpecs(JobContext context) - throws IOException, InterruptedException {} - - public OutputCommitter getOutputCommitter(TaskAttemptContext context) - throws IOException, InterruptedException { - return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), - context); - } + extends org.apache.sqoop.mapreduce.db.DBOutputFormat { /** * A RecordWriter that writes the reduce output to a SQL table. + * + * @deprecated use + * org.apache.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter instead. + * @see org.apache.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter */ - public class DBRecordWriter - extends RecordWriter { - - private Connection connection; - private PreparedStatement statement; + public static class DBRecordWriter extends + org.apache.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter { public DBRecordWriter() throws SQLException { + super(); } - public DBRecordWriter(Connection connection - , PreparedStatement statement) throws SQLException { - this.connection = connection; - this.statement = statement; - this.connection.setAutoCommit(false); + public DBRecordWriter(Connection connection, + PreparedStatement statement) throws SQLException { + super(connection, statement); } - - public Connection getConnection() { - return connection; - } - - public PreparedStatement getStatement() { - return statement; - } - - @Override - /** {@inheritDoc} */ - public void close(TaskAttemptContext context) throws IOException { - try { - statement.executeBatch(); - connection.commit(); - } catch (SQLException e) { - try { - connection.rollback(); - } catch (SQLException ex) { - LOG.warn(StringUtils.stringifyException(ex)); - } - throw new IOException(e); - } finally { - try { - statement.close(); - connection.close(); - } catch (SQLException ex) { - LOG.error("Unable to close connection", ex); - } - } - } - - @Override - /** {@inheritDoc} */ - public void write(K key, V value) throws IOException { - try { - key.write(statement); - statement.addBatch(); - } catch (SQLException e) { - LOG.error("Exception encountered", e); - } - } - } - - /** - * Constructs the query used as the prepared statement to insert data. - * - * @param table - * the table to insert into - * @param fieldNames - * the fields to insert into. If field names are unknown, supply an - * array of nulls. - */ - public String constructQuery(String table, String[] fieldNames) { - if(fieldNames == null) { - throw new IllegalArgumentException("Field names may not be null"); - } - - StringBuilder query = new StringBuilder(); - query.append("INSERT INTO ").append(table); - - if (fieldNames.length > 0 && fieldNames[0] != null) { - query.append(" ("); - for (int i = 0; i < fieldNames.length; i++) { - query.append(fieldNames[i]); - if (i != fieldNames.length - 1) { - query.append(","); - } - } - query.append(")"); - } - query.append(" VALUES ("); - - for (int i = 0; i < fieldNames.length; i++) { - query.append("?"); - if(i != fieldNames.length - 1) { - query.append(","); - } - } - query.append(");"); - - return query.toString(); - } - - @Override - /** {@inheritDoc} */ - public RecordWriter getRecordWriter(TaskAttemptContext context) - throws IOException { - DBConfiguration dbConf = new DBConfiguration(context.getConfiguration()); - String tableName = dbConf.getOutputTableName(); - String[] fieldNames = dbConf.getOutputFieldNames(); - - if(fieldNames == null) { - fieldNames = new String[dbConf.getOutputFieldCount()]; - } - - try { - Connection connection = dbConf.getConnection(); - PreparedStatement statement = null; - - statement = connection.prepareStatement( - constructQuery(tableName, fieldNames)); - return new DBRecordWriter(connection, statement); - } catch (Exception ex) { - throw new IOException(ex); - } - } - - /** - * Initializes the reduce-part of the job with - * the appropriate output settings. - * - * @param job The job - * @param tableName The table to insert data into - * @param fieldNames The field names in the table. - */ - public static void setOutput(Job job, String tableName, - String... fieldNames) throws IOException { - if(fieldNames.length > 0 && fieldNames[0] != null) { - DBConfiguration dbConf = setOutput(job, tableName); - dbConf.setOutputFieldNames(fieldNames); - } else { - if (fieldNames.length > 0) { - setOutput(job, tableName, fieldNames.length); - } else { - throw new IllegalArgumentException( - "Field names must be greater than 0"); - } - } - } - - /** - * Initializes the reduce-part of the job - * with the appropriate output settings. - * - * @param job The job - * @param tableName The table to insert data into - * @param fieldCount the number of fields in the table. - */ - public static void setOutput(Job job, String tableName, - int fieldCount) throws IOException { - DBConfiguration dbConf = setOutput(job, tableName); - dbConf.setOutputFieldCount(fieldCount); - } - - private static DBConfiguration setOutput(Job job, - String tableName) throws IOException { - job.setOutputFormatClass(DBOutputFormat.class); - ConfigurationHelper.setJobReduceSpeculativeExecution(job, false); - - DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); - - dbConf.setOutputTableName(tableName); - return dbConf; } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/DBRecordReader.java b/src/java/com/cloudera/sqoop/mapreduce/db/DBRecordReader.java index b572341a..8af9b7bd 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/DBRecordReader.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/DBRecordReader.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,63 +15,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.io.IOException; import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; import java.sql.SQLException; -import java.util.Arrays; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.db.DBWritable; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.conf.Configuration; - -import com.cloudera.sqoop.util.LoggingUtils; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; /** * A RecordReader that reads records from a SQL table. * Emits LongWritables containing the record number as * key and DBWritables as value. + * + * @deprecated use org.apache.sqoop.mapreduce.db.DBRecordReader instead. + * @see org.apache.sqoop.mapreduce.db.DBRecordReader */ public class DBRecordReader extends - RecordReader { - - private static final Log LOG = LogFactory.getLog(DBRecordReader.class); - - private ResultSet results = null; - - private Class inputClass; - - private Configuration conf; - - private DBInputFormat.DBInputSplit split; - - private long pos = 0; - - private LongWritable key = null; - - private T value = null; - - private Connection connection; - - protected PreparedStatement statement; - - private DBConfiguration dbConf; - - private String conditions; - - private String [] fieldNames; - - private String tableName; + org.apache.sqoop.mapreduce.db.DBRecordReader { /** * @param split The InputSplit to read data for @@ -85,222 +44,7 @@ public DBRecordReader(DBInputFormat.DBInputSplit split, Class inputClass, Configuration conf, Connection conn, DBConfiguration dbConfig, String cond, String [] fields, String table) throws SQLException { - this.inputClass = inputClass; - this.split = split; - this.conf = conf; - this.connection = conn; - this.dbConf = dbConfig; - this.conditions = cond; - if (fields != null) { - this.fieldNames = Arrays.copyOf(fields, fields.length); - } - this.tableName = table; + super(split, inputClass, conf, conn, dbConfig, cond, fields, table); } // CHECKSTYLE:ON - - protected ResultSet executeQuery(String query) throws SQLException { - this.statement = connection.prepareStatement(query, - ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); - - Integer fetchSize = dbConf.getFetchSize(); - if (fetchSize != null) { - LOG.debug("Using fetchSize for next query: " + fetchSize); - statement.setFetchSize(fetchSize); - } - - LOG.debug("Executing query: " + query); - return statement.executeQuery(); - } - - /** Returns the query for selecting the records, - * subclasses can override this for custom behaviour.*/ - protected String getSelectQuery() { - StringBuilder query = new StringBuilder(); - - // Default codepath for MySQL, HSQLDB, etc. - // Relies on LIMIT/OFFSET for splits. - if(dbConf.getInputQuery() == null) { - query.append("SELECT "); - - for (int i = 0; i < fieldNames.length; i++) { - query.append(fieldNames[i]); - if (i != fieldNames.length -1) { - query.append(", "); - } - } - - query.append(" FROM ").append(tableName); - query.append(" AS ").append(tableName); //in hsqldb this is necessary - if (conditions != null && conditions.length() > 0) { - query.append(" WHERE (").append(conditions).append(")"); - } - - String orderBy = dbConf.getInputOrderBy(); - if (orderBy != null && orderBy.length() > 0) { - query.append(" ORDER BY ").append(orderBy); - } - } else { - //PREBUILT QUERY - query.append(dbConf.getInputQuery()); - } - - try { - query.append(" LIMIT ").append(split.getLength()); - query.append(" OFFSET ").append(split.getStart()); - } catch (IOException ex) { - // Ignore, will not throw. - } - - return query.toString(); - } - - @Override - /** {@inheritDoc} */ - public void close() throws IOException { - try { - if (null != results) { - results.close(); - } - if (null != statement) { - statement.close(); - } - if (null != connection) { - connection.commit(); - connection.close(); - } - } catch (SQLException e) { - throw new IOException(e); - } - } - - public void initialize(InputSplit inputSplit, TaskAttemptContext context) - throws IOException, InterruptedException { - //do nothing - } - - @Override - /** {@inheritDoc} */ - public LongWritable getCurrentKey() { - return key; - } - - @Override - /** {@inheritDoc} */ - public T getCurrentValue() { - return value; - } - - /** - * @deprecated - */ - @Deprecated - public T createValue() { - return ReflectionUtils.newInstance(inputClass, conf); - } - - /** - * @deprecated - */ - @Deprecated - public long getPos() throws IOException { - return pos; - } - - /** - * @deprecated Use {@link #nextKeyValue()} - */ - @Deprecated - public boolean next(LongWritable k, T v) throws IOException { - this.key = k; - this.value = v; - return nextKeyValue(); - } - - @Override - /** {@inheritDoc} */ - public float getProgress() throws IOException { - return pos / (float)split.getLength(); - } - - @Override - /** {@inheritDoc} */ - public boolean nextKeyValue() throws IOException { - try { - if (key == null) { - key = new LongWritable(); - } - if (value == null) { - value = createValue(); - } - if (null == this.results) { - // First time into this method, run the query. - this.results = executeQuery(getSelectQuery()); - } - if (!results.next()) { - return false; - } - - // Set the key field value as the output key value - key.set(pos + split.getStart()); - - value.readFields(results); - - pos++; - } catch (SQLException e) { - LoggingUtils.logAll(LOG, e); - throw new IOException("SQLException in nextKeyValue", e); - } - return true; - } - - /** - * @return true if nextKeyValue() would return false. - */ - protected boolean isDone() { - try { - return this.results != null - && (results.isLast() || results.isAfterLast()); - } catch (SQLException sqlE) { - return true; - } - } - - protected DBInputFormat.DBInputSplit getSplit() { - return split; - } - - protected String [] getFieldNames() { - return fieldNames; - } - - protected String getTableName() { - return tableName; - } - - protected String getConditions() { - return conditions; - } - - protected DBConfiguration getDBConf() { - return dbConf; - } - - protected Connection getConnection() { - return connection; - } - - protected PreparedStatement getStatement() { - return statement; - } - - protected void setStatement(PreparedStatement stmt) { - this.statement = stmt; - } - - /** - * @return the configuration. Allows subclasses to access the configuration - */ - protected Configuration getConf(){ - return conf; - } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/DBSplitter.java b/src/java/com/cloudera/sqoop/mapreduce/db/DBSplitter.java index 735238c6..6abeeba6 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/DBSplitter.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/DBSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,15 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.InputSplit; /** * DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat. @@ -34,13 +25,10 @@ * on the data-type of the column, this requires different behavior. * DBSplitter implementations should perform this for a data type or family * of data types. + * + * @deprecated use org.apache.sqoop.mapreduce.db.DBSplitter instead. + * @see org.apache.sqoop.mapreduce.db.DBSplitter */ -public interface DBSplitter { - /** - * Given a ResultSet containing one record (and already advanced to that - * record) with two columns (a low value, and a high value, both of the same - * type), determine a set of splits that span the given values. - */ - List split(Configuration conf, ResultSet results, String colName) - throws SQLException; +public interface DBSplitter extends org.apache.sqoop.mapreduce.db.DBSplitter { + } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBInputFormat.java b/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBInputFormat.java index b6c7f421..a639e307 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBInputFormat.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBInputFormat.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,67 +15,48 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Types; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.lib.db.DBWritable; -import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; - -import com.cloudera.sqoop.config.ConfigurationHelper; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; /** * A InputFormat that reads input data from an SQL table. * Operates like DBInputFormat, but instead of using LIMIT and OFFSET to * demarcate splits, it tries to generate WHERE clauses which separate the * data into roughly equivalent shards. + * + * @deprecated use org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat instead + * @see org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat */ public class DataDrivenDBInputFormat - extends DBInputFormat implements Configurable { - - private static final Log LOG = - LogFactory.getLog(DataDrivenDBInputFormat.class); + extends org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat { /** * If users are providing their own query, the following string is expected * to appear in the WHERE clause, which will be substituted with a pair of * conditions on the input to allow input splits to parallelise the import. */ - public static final String SUBSTITUTE_TOKEN = "$CONDITIONS"; + public static final String SUBSTITUTE_TOKEN = + org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.SUBSTITUTE_TOKEN; /** * A InputSplit that spans a set of rows. + * + * @deprecated use org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat. + * DataDrivenDBInputSplit instead. + * @see org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat. + * DataDrivenDBInputSplit */ - public static class DataDrivenDBInputSplit - extends DBInputFormat.DBInputSplit { - - private String lowerBoundClause; - private String upperBoundClause; + public static class DataDrivenDBInputSplit extends + org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit { /** * Default Constructor. */ public DataDrivenDBInputSplit() { + super(); } /** @@ -88,189 +67,10 @@ public DataDrivenDBInputSplit() { * on the 'upper' end. */ public DataDrivenDBInputSplit(final String lower, final String upper) { - this.lowerBoundClause = lower; - this.upperBoundClause = upper; - } - - - /** - * @return The total row count in this split. - */ - public long getLength() throws IOException { - return 0; // unfortunately, we don't know this. - } - - @Override - /** {@inheritDoc} */ - public void readFields(DataInput input) throws IOException { - this.lowerBoundClause = Text.readString(input); - this.upperBoundClause = Text.readString(input); - } - - @Override - /** {@inheritDoc} */ - public void write(DataOutput output) throws IOException { - Text.writeString(output, this.lowerBoundClause); - Text.writeString(output, this.upperBoundClause); - } - - public String getLowerClause() { - return lowerBoundClause; - } - - public String getUpperClause() { - return upperBoundClause; + super(lower, upper); } } - /** - * @return the DBSplitter implementation to use to divide the table/query - * into InputSplits. - */ - protected DBSplitter getSplitter(int sqlDataType) { - switch (sqlDataType) { - case Types.NUMERIC: - case Types.DECIMAL: - return new BigDecimalSplitter(); - - case Types.BIT: - case Types.BOOLEAN: - return new BooleanSplitter(); - - case Types.INTEGER: - case Types.TINYINT: - case Types.SMALLINT: - case Types.BIGINT: - return new IntegerSplitter(); - - case Types.REAL: - case Types.FLOAT: - case Types.DOUBLE: - return new FloatSplitter(); - - case Types.CHAR: - case Types.VARCHAR: - case Types.LONGVARCHAR: - return new TextSplitter(); - - case Types.DATE: - case Types.TIME: - case Types.TIMESTAMP: - return new DateSplitter(); - - default: - // TODO: Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT, CLOB, - // BLOB, ARRAY, STRUCT, REF, DATALINK, and JAVA_OBJECT. - return null; - } - } - - @Override - /** {@inheritDoc} */ - public List getSplits(JobContext job) throws IOException { - - int targetNumTasks = ConfigurationHelper.getJobNumMaps(job); - String boundaryQuery = getDBConf().getInputBoundingQuery(); - - // If user do not forced us to use his boundary query and we don't have to - // bacause there is only one mapper we will return single split that - // separates nothing. This can be considerably more optimal for a large - // table with no index. - if (1 == targetNumTasks - && (boundaryQuery == null || boundaryQuery.isEmpty())) { - List singletonSplit = new ArrayList(); - singletonSplit.add(new DataDrivenDBInputSplit("1=1", "1=1")); - return singletonSplit; - } - - ResultSet results = null; - Statement statement = null; - Connection connection = getConnection(); - try { - statement = connection.createStatement(); - - String query = getBoundingValsQuery(); - LOG.info("BoundingValsQuery: " + query); - - results = statement.executeQuery(query); - results.next(); - - // Based on the type of the results, use a different mechanism - // for interpolating split points (i.e., numeric splits, text splits, - // dates, etc.) - int sqlDataType = results.getMetaData().getColumnType(1); - boolean isSigned = results.getMetaData().isSigned(1); - - // MySQL has an unsigned integer which we need to allocate space for - if (sqlDataType == Types.INTEGER && !isSigned){ - sqlDataType = Types.BIGINT; - } - - DBSplitter splitter = getSplitter(sqlDataType); - if (null == splitter) { - throw new IOException("Unknown SQL data type: " + sqlDataType); - } - - return splitter.split(job.getConfiguration(), results, - getDBConf().getInputOrderBy()); - } catch (SQLException e) { - throw new IOException(e); - } finally { - // More-or-less ignore SQL exceptions here, but log in case we need it. - try { - if (null != results) { - results.close(); - } - } catch (SQLException se) { - LOG.debug("SQLException closing resultset: " + se.toString()); - } - - try { - if (null != statement) { - statement.close(); - } - } catch (SQLException se) { - LOG.debug("SQLException closing statement: " + se.toString()); - } - - try { - connection.commit(); - closeConnection(); - } catch (SQLException se) { - LOG.debug("SQLException committing split transaction: " - + se.toString()); - } - } - } - - /** - * @return a query which returns the minimum and maximum values for - * the order-by column. - * - * The min value should be in the first column, and the - * max value should be in the second column of the results. - */ - protected String getBoundingValsQuery() { - // If the user has provided a query, use that instead. - String userQuery = getDBConf().getInputBoundingQuery(); - if (null != userQuery) { - return userQuery; - } - - // Auto-generate one based on the table name we've been provided with. - StringBuilder query = new StringBuilder(); - - String splitCol = getDBConf().getInputOrderBy(); - query.append("SELECT MIN(").append(splitCol).append("), "); - query.append("MAX(").append(splitCol).append(") FROM "); - query.append(getDBConf().getInputTableName()); - String conditions = getDBConf().getInputConditions(); - if (null != conditions) { - query.append(" WHERE ( " + conditions + " )"); - } - - return query.toString(); - } /** Set the user-defined bounding query to use with a user-defined query. This *must* include the substring "$CONDITIONS" @@ -282,35 +82,8 @@ protected String getBoundingValsQuery() { inside each split. */ public static void setBoundingQuery(Configuration conf, String query) { - if (null != query) { - // If the user's settng a query, warn if they don't allow conditions. - if (query.indexOf(SUBSTITUTE_TOKEN) == -1) { - LOG.warn("Could not find " + SUBSTITUTE_TOKEN + " token in query: " - + query + "; splits may not partition data."); - } - } - - conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, query); - } - - protected RecordReader createDBRecordReader( - DBInputSplit split, Configuration conf) throws IOException { - - DBConfiguration dbConf = getDBConf(); - @SuppressWarnings("unchecked") - Class inputClass = (Class) (dbConf.getInputClass()); - String dbProductName = getDBProductName(); - - LOG.debug("Creating db record reader for db product: " + dbProductName); - - try { - return new DataDrivenDBRecordReader(split, inputClass, - conf, getConnection(), dbConf, dbConf.getInputConditions(), - dbConf.getInputFieldNames(), dbConf.getInputTableName(), - dbProductName); - } catch (SQLException ex) { - throw new IOException(ex); - } + org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.setBoundingQuery( + conf, query); } // Configuration methods override superclass to ensure that the proper @@ -324,9 +97,8 @@ public static void setInput(Job job, Class inputClass, String tableName, String conditions, String splitBy, String... fieldNames) { - DBInputFormat.setInput(job, inputClass, tableName, conditions, - splitBy, fieldNames); - job.setInputFormatClass(DataDrivenDBInputFormat.class); + org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.setInput( + job, inputClass, tableName, conditions, splitBy, fieldNames); } /** setInput() takes a custom query and a separate "bounding query" to use @@ -335,9 +107,7 @@ public static void setInput(Job job, public static void setInput(Job job, Class inputClass, String inputQuery, String inputBoundingQuery) { - DBInputFormat.setInput(job, inputClass, inputQuery, ""); - job.getConfiguration().set(DBConfiguration.INPUT_BOUNDING_QUERY, - inputBoundingQuery); - job.setInputFormatClass(DataDrivenDBInputFormat.class); + org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.setInput( + job, inputClass, inputQuery, inputBoundingQuery); } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBRecordReader.java b/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBRecordReader.java index b48d5288..c5e6cd81 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBRecordReader.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/DataDrivenDBRecordReader.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,15 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.lib.db.DBWritable; @@ -34,14 +28,13 @@ * using data-driven WHERE clause splits. * Emits LongWritables containing the record number as * key and DBWritables as value. + * + * @deprecated use org.apache.sqoop.mapreduce.db.DataDrivenDBRecordReader + * instead. + * @see org.apache.sqoop.mapreduce.db.DataDrivenDBRecordReader */ public class DataDrivenDBRecordReader - extends DBRecordReader { - - private static final Log LOG = - LogFactory.getLog(DataDrivenDBRecordReader.class); - - private String dbProductName; // database manufacturer string. + extends org.apache.sqoop.mapreduce.db.DataDrivenDBRecordReader { // CHECKSTYLE:OFF // TODO(aaron): Refactor constructor to use fewer arguments. @@ -53,77 +46,8 @@ public DataDrivenDBRecordReader(DBInputFormat.DBInputSplit split, Class inputClass, Configuration conf, Connection conn, DBConfiguration dbConfig, String cond, String [] fields, String table, String dbProduct) throws SQLException { - super(split, inputClass, conf, conn, dbConfig, cond, fields, table); - this.dbProductName = dbProduct; + super(split, inputClass, conf, conn, dbConfig, + cond, fields, table, dbProduct); } // CHECKSTYLE:ON - - @Override - /** {@inheritDoc} */ - public float getProgress() throws IOException { - return isDone() ? 1.0f : 0.0f; - } - - /** Returns the query for selecting the records, - * subclasses can override this for custom behaviour.*/ - protected String getSelectQuery() { - StringBuilder query = new StringBuilder(); - DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit = - (DataDrivenDBInputFormat.DataDrivenDBInputSplit) getSplit(); - DBConfiguration dbConf = getDBConf(); - String [] fieldNames = getFieldNames(); - String tableName = getTableName(); - String conditions = getConditions(); - - // Build the WHERE clauses associated with the data split first. - // We need them in both branches of this function. - StringBuilder conditionClauses = new StringBuilder(); - conditionClauses.append("( ").append(dataSplit.getLowerClause()); - conditionClauses.append(" ) AND ( ").append(dataSplit.getUpperClause()); - conditionClauses.append(" )"); - - if(dbConf.getInputQuery() == null) { - // We need to generate the entire query. - query.append("SELECT "); - - for (int i = 0; i < fieldNames.length; i++) { - query.append(fieldNames[i]); - if (i != fieldNames.length -1) { - query.append(", "); - } - } - - query.append(" FROM ").append(tableName); - if (!dbProductName.startsWith("ORACLE")) { - // Seems to be necessary for hsqldb? Oracle explicitly does *not* - // use this clause. - query.append(" AS ").append(tableName); - } - query.append(" WHERE "); - if (conditions != null && conditions.length() > 0) { - // Put the user's conditions first. - query.append("( ").append(conditions).append(" ) AND "); - } - - // Now append the conditions associated with our split. - query.append(conditionClauses.toString()); - - } else { - // User provided the query. We replace the special token with - // our WHERE clause. - String inputQuery = dbConf.getInputQuery(); - if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) { - LOG.error("Could not find the clause substitution token " - + DataDrivenDBInputFormat.SUBSTITUTE_TOKEN + " in the query: [" - + inputQuery + "]. Parallel splits may not work correctly."); - } - - query.append(inputQuery.replace(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN, - conditionClauses.toString())); - } - - LOG.debug("Using query: " + query.toString()); - - return query.toString(); - } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/DateSplitter.java b/src/java/com/cloudera/sqoop/mapreduce/db/DateSplitter.java index 0c547c68..2e4a2ba3 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/DateSplitter.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/DateSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,168 +15,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Types; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.InputSplit; - -import com.cloudera.sqoop.config.ConfigurationHelper; /** * Implement DBSplitter over date/time values. * Make use of logic from IntegerSplitter, since date/time are just longs * in Java. + * + * @deprecated use org.apache.sqoop.mapreduce.db.DateSplitter instead. + * @see org.apache.sqoop.mapreduce.db.DateSplitter */ -public class DateSplitter extends IntegerSplitter { +public class DateSplitter extends org.apache.sqoop.mapreduce.db.DateSplitter { - private static final Log LOG = LogFactory.getLog(DateSplitter.class); - - public List split(Configuration conf, ResultSet results, - String colName) throws SQLException { - - long minVal; - long maxVal; - - int sqlDataType = results.getMetaData().getColumnType(1); - minVal = resultSetColToLong(results, 1, sqlDataType); - maxVal = resultSetColToLong(results, 2, sqlDataType); - - String lowClausePrefix = colName + " >= "; - String highClausePrefix = colName + " < "; - - int numSplits = ConfigurationHelper.getConfNumMaps(conf); - if (numSplits < 1) { - numSplits = 1; - } - - if (minVal == Long.MIN_VALUE && maxVal == Long.MIN_VALUE) { - // The range of acceptable dates is NULL to NULL. Just create a single - // split. - List splits = new ArrayList(); - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - return splits; - } - - // Gather the split point integers - List splitPoints = split(numSplits, minVal, maxVal); - List splits = new ArrayList(); - - // Turn the split points into a set of intervals. - long start = splitPoints.get(0); - Date startDate = longToDate(start, sqlDataType); - if (sqlDataType == Types.TIMESTAMP) { - // The lower bound's nanos value needs to match the actual lower-bound - // nanos. - try { - ((java.sql.Timestamp) startDate).setNanos( - results.getTimestamp(1).getNanos()); - } catch (NullPointerException npe) { - // If the lower bound was NULL, we'll get an NPE; just ignore it and - // don't set nanos. - } - } - - for (int i = 1; i < splitPoints.size(); i++) { - long end = splitPoints.get(i); - Date endDate = longToDate(end, sqlDataType); - - if (i == splitPoints.size() - 1) { - if (sqlDataType == Types.TIMESTAMP) { - // The upper bound's nanos value needs to match the actual - // upper-bound nanos. - try { - ((java.sql.Timestamp) endDate).setNanos( - results.getTimestamp(2).getNanos()); - } catch (NullPointerException npe) { - // If the upper bound was NULL, we'll get an NPE; just ignore it - // and don't set nanos. - } - } - // This is the last one; use a closed interval. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + dateToString(startDate), - colName + " <= " + dateToString(endDate))); - } else { - // Normal open-interval case. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + dateToString(startDate), - highClausePrefix + dateToString(endDate))); - } - - start = end; - startDate = endDate; - } - - if (minVal == Long.MIN_VALUE || maxVal == Long.MIN_VALUE) { - // Add an extra split to handle the null case that we saw. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - } - - return splits; - } - - /** - Retrieve the value from the column in a type-appropriate manner and - return its timestamp since the epoch. If the column is null, then return - Long.MIN_VALUE. This will cause a special split to be generated for the - NULL case, but may also cause poorly-balanced splits if most of the - actual dates are positive time since the epoch, etc. - */ - private long resultSetColToLong(ResultSet rs, int colNum, int sqlDataType) - throws SQLException { - try { - switch (sqlDataType) { - case Types.DATE: - return rs.getDate(colNum).getTime(); - case Types.TIME: - return rs.getTime(colNum).getTime(); - case Types.TIMESTAMP: - return rs.getTimestamp(colNum).getTime(); - default: - throw new SQLException("Not a date-type field"); - } - } catch (NullPointerException npe) { - // null column. return minimum long value. - LOG.warn("Encountered a NULL date in the split column. " - + "Splits may be poorly balanced."); - return Long.MIN_VALUE; - } - } - - /** Parse the long-valued timestamp into the appropriate SQL date type. */ - private Date longToDate(long val, int sqlDataType) { - switch (sqlDataType) { - case Types.DATE: - return new java.sql.Date(val); - case Types.TIME: - return new java.sql.Time(val); - case Types.TIMESTAMP: - return new java.sql.Timestamp(val); - default: // Shouldn't ever hit this case. - return null; - } - } - - /** - * Given a Date 'd', format it as a string for use in a SQL date - * comparison operation. - * @param d the date to format. - * @return the string representing this date in SQL with any appropriate - * quotation characters, etc. - */ - protected String dateToString(Date d) { - return "'" + d.toString() + "'"; - } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/FloatSplitter.java b/src/java/com/cloudera/sqoop/mapreduce/db/FloatSplitter.java index 4f53b499..0c7bc8ce 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/FloatSplitter.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/FloatSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,88 +15,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.InputSplit; - -import com.cloudera.sqoop.config.ConfigurationHelper; /** * Implement DBSplitter over floating-point values. + * + * @deprecated use org.apache.sqoop.mapreduce.db.FloatSplitter instead. + * @see org.apache.sqoop.mapreduce.db.FloatSplitter */ -public class FloatSplitter implements DBSplitter { +public class FloatSplitter + extends org.apache.sqoop.mapreduce.db.FloatSplitter { - private static final Log LOG = LogFactory.getLog(FloatSplitter.class); - - private static final double MIN_INCREMENT = 10000 * Double.MIN_VALUE; - - public List split(Configuration conf, ResultSet results, - String colName) throws SQLException { - - LOG.warn("Generating splits for a floating-point index column. Due to the"); - LOG.warn("imprecise representation of floating-point values in Java, this"); - LOG.warn("may result in an incomplete import."); - LOG.warn("You are strongly encouraged to choose an integral split column."); - - List splits = new ArrayList(); - - if (results.getString(1) == null && results.getString(2) == null) { - // Range is null to null. Return a null split accordingly. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - return splits; - } - - double minVal = results.getDouble(1); - double maxVal = results.getDouble(2); - - // Use this as a hint. May need an extra task if the size doesn't - // divide cleanly. - int numSplits = ConfigurationHelper.getConfNumMaps(conf); - double splitSize = (maxVal - minVal) / (double) numSplits; - - if (splitSize < MIN_INCREMENT) { - splitSize = MIN_INCREMENT; - } - - String lowClausePrefix = colName + " >= "; - String highClausePrefix = colName + " < "; - - double curLower = minVal; - double curUpper = curLower + splitSize; - - while (curUpper < maxVal) { - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + Double.toString(curLower), - highClausePrefix + Double.toString(curUpper))); - - curLower = curUpper; - curUpper += splitSize; - } - - // Catch any overage and create the closed interval for the last split. - if (curLower <= maxVal || splits.size() == 1) { - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + Double.toString(curUpper), - colName + " <= " + Double.toString(maxVal))); - } - - if (results.getString(1) == null || results.getString(2) == null) { - // At least one extrema is null; add a null split. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - } - - return splits; - } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/IntegerSplitter.java b/src/java/com/cloudera/sqoop/mapreduce/db/IntegerSplitter.java index f519425b..f0428333 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/IntegerSplitter.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/IntegerSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,133 +15,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; - import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.InputSplit; - -import com.cloudera.sqoop.config.ConfigurationHelper; /** * Implement DBSplitter over integer values. + * + * @deprecated use org.apache.sqoop.mapreduce.db.IntegerSplitter instead. + * @see org.apache.sqoop.mapreduce.db.IntegerSplitter */ -public class IntegerSplitter implements DBSplitter { +public class IntegerSplitter + extends org.apache.sqoop.mapreduce.db.IntegerSplitter { public static final Log LOG = - LogFactory.getLog(IntegerSplitter.class.getName()); - - public List split(Configuration conf, ResultSet results, - String colName) throws SQLException { - - long minVal = results.getLong(1); - long maxVal = results.getLong(2); - - String lowClausePrefix = colName + " >= "; - String highClausePrefix = colName + " < "; - - int numSplits = ConfigurationHelper.getConfNumMaps(conf); - if (numSplits < 1) { - numSplits = 1; - } - - if (results.getString(1) == null && results.getString(2) == null) { - // Range is null to null. Return a null split accordingly. - List splits = new ArrayList(); - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - return splits; - } - - // Get all the split points together. - List splitPoints = split(numSplits, minVal, maxVal); - if (LOG.isDebugEnabled()) { - LOG.debug(String.format("Splits: [%,28d to %,28d] into %d parts", - minVal, maxVal, numSplits)); - for (int i = 0; i < splitPoints.size(); i++) { - LOG.debug(String.format("%,28d", splitPoints.get(i))); - } - } - List splits = new ArrayList(); - - // Turn the split points into a set of intervals. - long start = splitPoints.get(0); - for (int i = 1; i < splitPoints.size(); i++) { - long end = splitPoints.get(i); - - if (i == splitPoints.size() - 1) { - // This is the last one; use a closed interval. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + Long.toString(start), - colName + " <= " + Long.toString(end))); - } else { - // Normal open-interval case. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + Long.toString(start), - highClausePrefix + Long.toString(end))); - } - - start = end; - } - - if (results.getString(1) == null || results.getString(2) == null) { - // At least one extrema is null; add a null split. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - } - - return splits; - } - - /** - * Returns a list of longs one element longer than the list of input splits. - * This represents the boundaries between input splits. - * All splits are open on the top end, except the last one. - * - * So the list [0, 5, 8, 12, 18] would represent splits capturing the - * intervals: - * - * [0, 5) - * [5, 8) - * [8, 12) - * [12, 18] note the closed interval for the last split. - */ - List split(long numSplits, long minVal, long maxVal) - throws SQLException { - - List splits = new ArrayList(); - - // We take the min-max interval and divide by the numSplits and also - // calculate a remainder. Because of integer division rules, numsplits * - // splitSize + minVal will always be <= maxVal. We then use the remainder - // and add 1 if the current split index is less than the < the remainder. - // This is guaranteed to add up to remainder and not surpass the value. - long splitSize = (maxVal - minVal) / numSplits; - long remainder = (maxVal - minVal) % numSplits; - long curVal = minVal; - - // This will honor numSplits as long as split size > 0. If split size is - // 0, it will have remainder splits. - for (int i = 0; i <= numSplits; i++) { - splits.add(curVal); - if (curVal >= maxVal) { - break; - } - curVal += splitSize; - curVal += (i < remainder) ? 1 : 0; - } - - if (splits.size() == 1) { - // make a valid singleton split - splits.add(maxVal); - } - - return splits; - } + org.apache.sqoop.mapreduce.db.IntegerSplitter.LOG; } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/OracleDBRecordReader.java b/src/java/com/cloudera/sqoop/mapreduce/db/OracleDBRecordReader.java index 112d1a7e..281d9eea 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/OracleDBRecordReader.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/OracleDBRecordReader.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,29 +15,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; -import java.lang.reflect.Method; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.lib.db.DBWritable; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; /** * A RecordReader that reads records from an Oracle SQL table. + * @deprecated use org.apache.sqoop.mapreduce.db.OracleDBRecordReader instead. + * @see org.apache.sqoop.mapreduce.db.OracleDBRecordReader */ -public class OracleDBRecordReader - extends DBRecordReader { +public class OracleDBRecordReader extends + org.apache.sqoop.mapreduce.db.OracleDBRecordReader { /** Configuration key to set to a timezone string. */ - public static final String SESSION_TIMEZONE_KEY = "oracle.sessionTimeZone"; - - private static final Log LOG = LogFactory.getLog(OracleDBRecordReader.class); + public static final String SESSION_TIMEZONE_KEY = + org.apache.sqoop.mapreduce.db.OracleDBRecordReader.SESSION_TIMEZONE_KEY; // CHECKSTYLE:OFF public OracleDBRecordReader(DBInputFormat.DBInputSplit split, @@ -47,61 +41,9 @@ public OracleDBRecordReader(DBInputFormat.DBInputSplit split, DBConfiguration dbConfig, String cond, String [] fields, String table) throws SQLException { super(split, inputClass, conf, conn, dbConfig, cond, fields, table); - setSessionTimeZone(conf, conn); } // CHECKSTYLE:ON - /** Returns the query for selecting the records from an Oracle DB. */ - protected String getSelectQuery() { - StringBuilder query = new StringBuilder(); - DBConfiguration dbConf = getDBConf(); - String conditions = getConditions(); - String tableName = getTableName(); - String [] fieldNames = getFieldNames(); - - // Oracle-specific codepath to use rownum instead of LIMIT/OFFSET. - if(dbConf.getInputQuery() == null) { - query.append("SELECT "); - - for (int i = 0; i < fieldNames.length; i++) { - query.append(fieldNames[i]); - if (i != fieldNames.length -1) { - query.append(", "); - } - } - - query.append(" FROM ").append(tableName); - if (conditions != null && conditions.length() > 0) { - query.append(" WHERE ").append(conditions); - } - String orderBy = dbConf.getInputOrderBy(); - if (orderBy != null && orderBy.length() > 0) { - query.append(" ORDER BY ").append(orderBy); - } - } else { - //PREBUILT QUERY - query.append(dbConf.getInputQuery()); - } - - try { - DBInputFormat.DBInputSplit split = getSplit(); - if (split.getLength() > 0 && split.getStart() > 0) { - String querystring = query.toString(); - - query = new StringBuilder(); - query.append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( "); - query.append(querystring); - query.append(" ) a WHERE rownum <= ").append(split.getStart()); - query.append(" + ").append(split.getLength()); - query.append(" ) WHERE dbif_rno >= ").append(split.getStart()); - } - } catch (IOException ex) { - // ignore, will not throw. - } - - return query.toString(); - } - /** * Set session time zone. * @param conf The current configuration. @@ -110,41 +52,7 @@ protected String getSelectQuery() { */ public static void setSessionTimeZone(Configuration conf, Connection conn) throws SQLException { - // need to use reflection to call the method setSessionTimeZone on - // the OracleConnection class because oracle specific java libraries are - // not accessible in this context. - Method method; - try { - method = conn.getClass().getMethod( - "setSessionTimeZone", new Class [] {String.class}); - } catch (Exception ex) { - LOG.error("Could not find method setSessionTimeZone in " - + conn.getClass().getName(), ex); - // rethrow SQLException - throw new SQLException(ex); - } - - // Need to set the time zone in order for Java - // to correctly access the column "TIMESTAMP WITH LOCAL TIME ZONE". - // We can't easily get the correct Oracle-specific timezone string - // from Java; just let the user set the timezone in a property. - String clientTimeZone = conf.get(SESSION_TIMEZONE_KEY, "GMT"); - try { - method.setAccessible(true); - method.invoke(conn, clientTimeZone); - LOG.info("Time zone has been set to " + clientTimeZone); - } catch (Exception ex) { - LOG.warn("Time zone " + clientTimeZone - + " could not be set on Oracle database."); - LOG.warn("Setting default time zone: GMT"); - try { - // "GMT" timezone is guaranteed to exist. - method.invoke(conn, "GMT"); - } catch (Exception ex2) { - LOG.error("Could not set time zone for oracle connection", ex2); - // rethrow SQLException - throw new SQLException(ex); - } - } + org.apache.sqoop.mapreduce.db.OracleDBRecordReader.setSessionTimeZone( + conf, conn); } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java b/src/java/com/cloudera/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java index 2e9bc382..05fb87a8 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,57 +15,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.io.IOException; -import java.sql.SQLException; -import java.sql.Types; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.lib.db.DBWritable; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; /** * A InputFormat that reads input data from an SQL table in an Oracle db. + * + * @deprecated use org.apache.sqoop.mapreduce.db.OracleDataDrivenDBInputFormat + * instead. + * @see org.apache.sqoop.mapreduce.db.OracleDataDrivenDBInputFormat */ public class OracleDataDrivenDBInputFormat - extends DataDrivenDBInputFormat implements Configurable { - - /** - * @return the DBSplitter implementation to use to divide the table/query - * into InputSplits. - */ - @Override - protected DBSplitter getSplitter(int sqlDataType) { - switch (sqlDataType) { - case Types.DATE: - case Types.TIME: - case Types.TIMESTAMP: - return new OracleDateSplitter(); - - default: - return super.getSplitter(sqlDataType); - } - } - - @Override - protected RecordReader createDBRecordReader( - DBInputSplit split, Configuration conf) throws IOException { - - DBConfiguration dbConf = getDBConf(); - @SuppressWarnings("unchecked") - Class inputClass = (Class) (dbConf.getInputClass()); - - try { - // Use Oracle-specific db reader - return new OracleDataDrivenDBRecordReader(split, inputClass, - conf, getConnection(), dbConf, dbConf.getInputConditions(), - dbConf.getInputFieldNames(), dbConf.getInputTableName()); - } catch (SQLException ex) { - throw new IOException(ex); - } - } + extends org.apache.sqoop.mapreduce.db.OracleDataDrivenDBInputFormat { } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java b/src/java/com/cloudera/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java index 10cb3831..3bc92e7f 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,7 +15,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; import java.sql.Connection; @@ -29,9 +26,13 @@ /** * A RecordReader that reads records from a Oracle table * via DataDrivenDBRecordReader. + * + * @deprecated use org.apache.sqoop.mapreduce.db.OracleDataDrivenDBRecordReader + * instead. + * @see org.apache.sqoop.mapreduce.db.OracleDataDrivenDBRecordReader */ public class OracleDataDrivenDBRecordReader - extends DataDrivenDBRecordReader { + extends org.apache.sqoop.mapreduce.db.OracleDataDrivenDBRecordReader { // CHECKSTYLE:OFF // TODO(aaron): Enable checkstyle after refactoring DBRecordReader c'tor. @@ -40,11 +41,7 @@ public OracleDataDrivenDBRecordReader(DBInputFormat.DBInputSplit split, DBConfiguration dbConfig, String cond, String [] fields, String table) throws SQLException { - super(split, inputClass, conf, conn, dbConfig, cond, fields, table, - "ORACLE"); - - // Must initialize the tz used by the connection for Oracle. - OracleDBRecordReader.setSessionTimeZone(conf, conn); + super(split, inputClass, conf, conn, dbConfig, cond, fields, table); } // CHECKSTYLE:ON } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/OracleDateSplitter.java b/src/java/com/cloudera/sqoop/mapreduce/db/OracleDateSplitter.java index 542a33d0..3e1af336 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/OracleDateSplitter.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/OracleDateSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,24 +15,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.util.Date; - - /** * Implement DBSplitter over date/time values returned by an Oracle db. * Make use of logic from DateSplitter, since this just needs to use * some Oracle-specific functions on the formatting end when generating * InputSplits. + * + * @deprecated use org.apache.sqoop.mapreduce.db.OracleDateSplitter instead. + * @see org.apache.sqoop.mapreduce.db.OracleDateSplitter */ -public class OracleDateSplitter extends DateSplitter { +public class OracleDateSplitter + extends org.apache.sqoop.mapreduce.db.OracleDateSplitter { - @SuppressWarnings("unchecked") - @Override - protected String dateToString(Date d) { - // Oracle Data objects are always actually Timestamps - return "TO_TIMESTAMP('" + d.toString() + "', 'YYYY-MM-DD HH24:MI:SS.FF')"; - } } diff --git a/src/java/com/cloudera/sqoop/mapreduce/db/TextSplitter.java b/src/java/com/cloudera/sqoop/mapreduce/db/TextSplitter.java index bcbca959..f1bd9d12 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/db/TextSplitter.java +++ b/src/java/com/cloudera/sqoop/mapreduce/db/TextSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,213 +15,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.math.BigDecimal; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.InputSplit; - -import com.cloudera.sqoop.config.ConfigurationHelper; /** * Implement DBSplitter over text strings. + * + * @deprecated use org.apache.sqoop.mapreduce.db.TextSplitter instead. + * @see org.apache.sqoop.mapreduce.db.TextSplitter */ -public class TextSplitter extends BigDecimalSplitter { +public class TextSplitter extends org.apache.sqoop.mapreduce.db.TextSplitter { - private static final Log LOG = LogFactory.getLog(TextSplitter.class); - - /** - * This method needs to determine the splits between two user-provided - * strings. In the case where the user's strings are 'A' and 'Z', this is - * not hard; we could create two splits from ['A', 'M') and ['M', 'Z'], 26 - * splits for strings beginning with each letter, etc. - * - * If a user has provided us with the strings "Ham" and "Haze", however, we - * need to create splits that differ in the third letter. - * - * The algorithm used is as follows: - * Since there are 2**16 unicode characters, we interpret characters as - * digits in base 65536. Given a string 's' containing characters s_0, s_1 - * .. s_n, we interpret the string as the number: 0.s_0 s_1 s_2.. s_n in - * base 65536. Having mapped the low and high strings into floating-point - * values, we then use the BigDecimalSplitter to establish the even split - * points, then map the resulting floating point values back into strings. - */ - public List split(Configuration conf, ResultSet results, - String colName) throws SQLException { - - LOG.warn("Generating splits for a textual index column."); - LOG.warn("If your database sorts in a case-insensitive order, " - + "this may result in a partial import or duplicate records."); - LOG.warn("You are strongly encouraged to choose an integral split column."); - - String minString = results.getString(1); - String maxString = results.getString(2); - - boolean minIsNull = false; - - // If the min value is null, switch it to an empty string instead for - // purposes of interpolation. Then add [null, null] as a special case - // split. - if (null == minString) { - minString = ""; - minIsNull = true; - } - - if (null == maxString) { - // If the max string is null, then the min string has to be null too. - // Just return a special split for this case. - List splits = new ArrayList(); - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - return splits; - } - - // Use this as a hint. May need an extra task if the size doesn't - // divide cleanly. - int numSplits = ConfigurationHelper.getConfNumMaps(conf); - - String lowClausePrefix = colName + " >= '"; - String highClausePrefix = colName + " < '"; - - // If there is a common prefix between minString and maxString, establish - // it and pull it out of minString and maxString. - int maxPrefixLen = Math.min(minString.length(), maxString.length()); - int sharedLen; - for (sharedLen = 0; sharedLen < maxPrefixLen; sharedLen++) { - char c1 = minString.charAt(sharedLen); - char c2 = maxString.charAt(sharedLen); - if (c1 != c2) { - break; - } - } - - // The common prefix has length 'sharedLen'. Extract it from both. - String commonPrefix = minString.substring(0, sharedLen); - minString = minString.substring(sharedLen); - maxString = maxString.substring(sharedLen); - - List splitStrings = split(numSplits, minString, maxString, - commonPrefix); - List splits = new ArrayList(); - - // Convert the list of split point strings into an actual set of - // InputSplits. - String start = splitStrings.get(0); - for (int i = 1; i < splitStrings.size(); i++) { - String end = splitStrings.get(i); - - if (i == splitStrings.size() - 1) { - // This is the last one; use a closed interval. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + start + "'", colName + " <= '" + end + "'")); - } else { - // Normal open-interval case. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - lowClausePrefix + start + "'", highClausePrefix + end + "'")); - } - - start = end; - } - - if (minIsNull) { - // Add the special null split at the end. - splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( - colName + " IS NULL", colName + " IS NULL")); - } - - return splits; - } - - List split(int numSplits, String minString, String maxString, - String commonPrefix) throws SQLException { - - BigDecimal minVal = stringToBigDecimal(minString); - BigDecimal maxVal = stringToBigDecimal(maxString); - - List splitPoints = split( - new BigDecimal(numSplits), minVal, maxVal); - List splitStrings = new ArrayList(); - - // Convert the BigDecimal splitPoints into their string representations. - for (BigDecimal bd : splitPoints) { - splitStrings.add(commonPrefix + bigDecimalToString(bd)); - } - - // Make sure that our user-specified boundaries are the first and last - // entries in the array. - if (splitStrings.size() == 0 - || !splitStrings.get(0).equals(commonPrefix + minString)) { - splitStrings.add(0, commonPrefix + minString); - } - if (splitStrings.size() == 1 - || !splitStrings.get(splitStrings.size() - 1).equals( - commonPrefix + maxString)) { - splitStrings.add(commonPrefix + maxString); - } - - return splitStrings; - } - - private static final BigDecimal ONE_PLACE = new BigDecimal(65536); - - // Maximum number of characters to convert. This is to prevent rounding - // errors or repeating fractions near the very bottom from getting out of - // control. Note that this still gives us a huge number of possible splits. - private static final int MAX_CHARS = 8; - - /** - * Return a BigDecimal representation of string 'str' suitable for use in a - * numerically-sorting order. - */ - BigDecimal stringToBigDecimal(String str) { - // Start with 1/65536 to compute the first digit. - BigDecimal curPlace = ONE_PLACE; - BigDecimal result = BigDecimal.ZERO; - - int len = Math.min(str.length(), MAX_CHARS); - - for (int i = 0; i < len; i++) { - int codePoint = str.codePointAt(i); - result = result.add(tryDivide(new BigDecimal(codePoint), curPlace)); - // advance to the next less significant place. e.g., 1/(65536^2) for the - // second char. - curPlace = curPlace.multiply(ONE_PLACE); - } - - return result; - } - - /** - * Return the string encoded in a BigDecimal. - * Repeatedly multiply the input value by 65536; the integer portion after - * such a multiplication represents a single character in base 65536. - * Convert that back into a char and create a string out of these until we - * have no data left. - */ - String bigDecimalToString(BigDecimal bd) { - BigDecimal cur = bd.stripTrailingZeros(); - StringBuilder sb = new StringBuilder(); - - for (int numConverted = 0; numConverted < MAX_CHARS; numConverted++) { - cur = cur.multiply(ONE_PLACE); - int curCodePoint = cur.intValue(); - if (0 == curCodePoint) { - break; - } - - cur = cur.subtract(new BigDecimal(curCodePoint)); - sb.append(Character.toChars(curCodePoint)); - } - - return sb.toString(); - } } diff --git a/src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java new file mode 100644 index 00000000..7e3048a8 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/BigDecimalSplitter.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.math.BigDecimal; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputSplit; + +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.DBSplitter; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; + +/** + * Implement DBSplitter over BigDecimal values. + */ +public class BigDecimalSplitter implements DBSplitter { + private static final Log LOG = LogFactory.getLog(BigDecimalSplitter.class); + + public List split(Configuration conf, ResultSet results, + String colName) throws SQLException { + + BigDecimal minVal = results.getBigDecimal(1); + BigDecimal maxVal = results.getBigDecimal(2); + + String lowClausePrefix = colName + " >= "; + String highClausePrefix = colName + " < "; + + BigDecimal numSplits = new BigDecimal( + ConfigurationHelper.getConfNumMaps(conf)); + + if (minVal == null && maxVal == null) { + // Range is null to null. Return a null split accordingly. + List splits = new ArrayList(); + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + return splits; + } + + if (minVal == null || maxVal == null) { + // Don't know what is a reasonable min/max value for interpolation. Fail. + LOG.error("Cannot find a range for NUMERIC or DECIMAL " + + "fields with one end NULL."); + return null; + } + + // Get all the split points together. + List splitPoints = split(numSplits, minVal, maxVal); + List splits = new ArrayList(); + + // Turn the split points into a set of intervals. + BigDecimal start = splitPoints.get(0); + for (int i = 1; i < splitPoints.size(); i++) { + BigDecimal end = splitPoints.get(i); + + if (i == splitPoints.size() - 1) { + // This is the last one; use a closed interval. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + start.toString(), + colName + " <= " + end.toString())); + } else { + // Normal open-interval case. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + start.toString(), + highClausePrefix + end.toString())); + } + + start = end; + } + + return splits; + } + + private static final BigDecimal MIN_INCREMENT = + new BigDecimal(10000 * Double.MIN_VALUE); + + /** + * Divide numerator by denominator. If impossible in exact mode, use rounding. + */ + protected BigDecimal tryDivide(BigDecimal numerator, BigDecimal denominator) { + try { + return numerator.divide(denominator); + } catch (ArithmeticException ae) { + return numerator.divide(denominator, BigDecimal.ROUND_HALF_UP); + } + } + + /** + * Returns a list of BigDecimals one element longer than the list of input + * splits. This represents the boundaries between input splits. All splits + * are open on the top end, except the last one. + * + * So the list [0, 5, 8, 12, 18] would represent splits capturing the + * intervals: + * + * [0, 5) + * [5, 8) + * [8, 12) + * [12, 18] note the closed interval for the last split. + */ + protected List split(BigDecimal numSplits, BigDecimal minVal, + BigDecimal maxVal) throws SQLException { + + List splits = new ArrayList(); + + // Use numSplits as a hint. May need an extra task if the size doesn't + // divide cleanly. + + BigDecimal splitSize = tryDivide(maxVal.subtract(minVal), (numSplits)); + if (splitSize.compareTo(MIN_INCREMENT) < 0) { + splitSize = MIN_INCREMENT; + LOG.warn("Set BigDecimal splitSize to MIN_INCREMENT"); + } + + BigDecimal curVal = minVal; + + while (curVal.compareTo(maxVal) <= 0) { + splits.add(curVal); + curVal = curVal.add(splitSize); + } + + if (splits.get(splits.size() - 1).compareTo(maxVal) != 0 + || splits.size() == 1) { + // We didn't end on the maxVal. Add that to the end of the list. + splits.add(maxVal); + } + + return splits; + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/BooleanSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/BooleanSplitter.java new file mode 100644 index 00000000..122bb8fc --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/BooleanSplitter.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputSplit; + +import com.cloudera.sqoop.mapreduce.db.DBSplitter; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; + +/** + * Implement DBSplitter over boolean values. + */ +public class BooleanSplitter implements DBSplitter { + public List split(Configuration conf, ResultSet results, + String colName) throws SQLException { + + List splits = new ArrayList(); + + if (results.getString(1) == null && results.getString(2) == null) { + // Range is null to null. Return a null split accordingly. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + return splits; + } + + boolean minVal = results.getBoolean(1); + boolean maxVal = results.getBoolean(2); + + // Use one or two splits. + if (!minVal) { + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " = FALSE", colName + " = FALSE")); + } + + if (maxVal) { + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " = TRUE", colName + " = TRUE")); + } + + if (results.getString(1) == null || results.getString(2) == null) { + // Include a null value. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + } + + return splits; + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/DBConfiguration.java b/src/java/org/apache/sqoop/mapreduce/db/DBConfiguration.java new file mode 100644 index 00000000..7a57d8bc --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/DBConfiguration.java @@ -0,0 +1,310 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; + +import com.cloudera.sqoop.mapreduce.db.DBInputFormat.NullDBWritable; +import com.cloudera.sqoop.mapreduce.db.DBOutputFormat; + +/** + * A container for configuration property names for jobs with DB input/output. + * + * The job can be configured using the static methods in this class, + * {@link DBInputFormat}, and {@link DBOutputFormat}. + * Alternatively, the properties can be set in the configuration with proper + * values. + * + * @see DBConfiguration#configureDB(Configuration, String, String, String, + * String) + * @see DBInputFormat#setInput(Job, Class, String, String) + * @see DBInputFormat#setInput(Job, Class, String, String, String, String...) + * @see DBOutputFormat#setOutput(Job, String, String...) + */ +public class DBConfiguration { + + /** The JDBC Driver class name. */ + public static final String DRIVER_CLASS_PROPERTY = + "mapreduce.jdbc.driver.class"; + + /** JDBC Database access URL. */ + public static final String URL_PROPERTY = "mapreduce.jdbc.url"; + + /** User name to access the database. */ + public static final String USERNAME_PROPERTY = "mapreduce.jdbc.username"; + + /** Password to access the database. */ + public static final String PASSWORD_PROPERTY = "mapreduce.jdbc.password"; + + /** Fetch size. */ + public static final String FETCH_SIZE = "mapreduce.jdbc.fetchsize"; + + /** Input table name. */ + public static final String INPUT_TABLE_NAME_PROPERTY = + "mapreduce.jdbc.input.table.name"; + + /** Field names in the Input table. */ + public static final String INPUT_FIELD_NAMES_PROPERTY = + "mapreduce.jdbc.input.field.names"; + + /** WHERE clause in the input SELECT statement. */ + public static final String INPUT_CONDITIONS_PROPERTY = + "mapreduce.jdbc.input.conditions"; + + /** ORDER BY clause in the input SELECT statement. */ + public static final String INPUT_ORDER_BY_PROPERTY = + "mapreduce.jdbc.input.orderby"; + + /** Whole input query, exluding LIMIT...OFFSET. */ + public static final String INPUT_QUERY = "mapreduce.jdbc.input.query"; + + /** Input query to get the count of records. */ + public static final String INPUT_COUNT_QUERY = + "mapreduce.jdbc.input.count.query"; + + /** Input query to get the max and min values of the jdbc.input.query. */ + public static final String INPUT_BOUNDING_QUERY = + "mapred.jdbc.input.bounding.query"; + + /** Class name implementing DBWritable which will hold input tuples. */ + public static final String INPUT_CLASS_PROPERTY = + "mapreduce.jdbc.input.class"; + + /** Output table name. */ + public static final String OUTPUT_TABLE_NAME_PROPERTY = + "mapreduce.jdbc.output.table.name"; + + /** Field names in the Output table. */ + public static final String OUTPUT_FIELD_NAMES_PROPERTY = + "mapreduce.jdbc.output.field.names"; + + /** Number of fields in the Output table. */ + public static final String OUTPUT_FIELD_COUNT_PROPERTY = + "mapreduce.jdbc.output.field.count"; + + /** + * Sets the DB access related fields in the {@link Configuration}. + * @param conf the configuration + * @param driverClass JDBC Driver class name + * @param dbUrl JDBC DB access URL + * @param userName DB access username + * @param passwd DB access passwd + * @param fetchSize DB fetch size + */ + public static void configureDB(Configuration conf, String driverClass, + String dbUrl, String userName, String passwd, Integer fetchSize) { + + conf.set(DRIVER_CLASS_PROPERTY, driverClass); + conf.set(URL_PROPERTY, dbUrl); + if (userName != null) { + conf.set(USERNAME_PROPERTY, userName); + } + if (passwd != null) { + conf.set(PASSWORD_PROPERTY, passwd); + } + if (fetchSize != null) { + conf.setInt(FETCH_SIZE, fetchSize); + } + } + + /** + * Sets the DB access related fields in the JobConf. + * @param job the job + * @param driverClass JDBC Driver class name + * @param dbUrl JDBC DB access URL + * @param fetchSize DB fetch size + */ + public static void configureDB(Configuration job, String driverClass, + String dbUrl, Integer fetchSize) { + configureDB(job, driverClass, dbUrl, null, null, fetchSize); + } + + /** + * Sets the DB access related fields in the {@link Configuration}. + * @param conf the configuration + * @param driverClass JDBC Driver class name + * @param dbUrl JDBC DB access URL + * @param userName DB access username + * @param passwd DB access passwd + */ + public static void configureDB(Configuration conf, String driverClass, + String dbUrl, String userName, String passwd) { + configureDB(conf, driverClass, dbUrl, userName, passwd, null); + } + + /** + * Sets the DB access related fields in the JobConf. + * @param job the job + * @param driverClass JDBC Driver class name + * @param dbUrl JDBC DB access URL. + */ + public static void configureDB(Configuration job, String driverClass, + String dbUrl) { + configureDB(job, driverClass, dbUrl, null); + } + + + private Configuration conf; + + public DBConfiguration(Configuration job) { + this.conf = job; + } + + /** Returns a connection object to the DB. + * @throws ClassNotFoundException + * @throws SQLException */ + public Connection getConnection() + throws ClassNotFoundException, SQLException { + + Class.forName(conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY)); + + if(conf.get(DBConfiguration.USERNAME_PROPERTY) == null) { + return DriverManager.getConnection( + conf.get(DBConfiguration.URL_PROPERTY)); + } else { + return DriverManager.getConnection( + conf.get(DBConfiguration.URL_PROPERTY), + conf.get(DBConfiguration.USERNAME_PROPERTY), + conf.get(DBConfiguration.PASSWORD_PROPERTY)); + } + } + + public Configuration getConf() { + return conf; + } + + public Integer getFetchSize() { + if (conf.get(DBConfiguration.FETCH_SIZE) == null) { + return null; + } + return conf.getInt(DBConfiguration.FETCH_SIZE, 0); + } + + public void setFetchSize(Integer fetchSize) { + if (fetchSize != null) { + conf.setInt(DBConfiguration.FETCH_SIZE, fetchSize); + } else { + conf.set(FETCH_SIZE, null); + } + } + public String getInputTableName() { + return conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY); + } + + public void setInputTableName(String tableName) { + conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, tableName); + } + + public String[] getInputFieldNames() { + return conf.getStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY); + } + + public void setInputFieldNames(String... fieldNames) { + conf.setStrings(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, fieldNames); + } + + public String getInputConditions() { + return conf.get(DBConfiguration.INPUT_CONDITIONS_PROPERTY); + } + + public void setInputConditions(String conditions) { + if (conditions != null && conditions.length() > 0) { + conf.set(DBConfiguration.INPUT_CONDITIONS_PROPERTY, conditions); + } + } + + public String getInputOrderBy() { + return conf.get(DBConfiguration.INPUT_ORDER_BY_PROPERTY); + } + + public void setInputOrderBy(String orderby) { + if(orderby != null && orderby.length() >0) { + conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, orderby); + } + } + + public String getInputQuery() { + return conf.get(DBConfiguration.INPUT_QUERY); + } + + public void setInputQuery(String query) { + if(query != null && query.length() >0) { + conf.set(DBConfiguration.INPUT_QUERY, query); + } + } + + public String getInputCountQuery() { + return conf.get(DBConfiguration.INPUT_COUNT_QUERY); + } + + public void setInputCountQuery(String query) { + if(query != null && query.length() > 0) { + conf.set(DBConfiguration.INPUT_COUNT_QUERY, query); + } + } + + public void setInputBoundingQuery(String query) { + if (query != null && query.length() > 0) { + conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, query); + } + } + + public String getInputBoundingQuery() { + return conf.get(DBConfiguration.INPUT_BOUNDING_QUERY); + } + + public Class getInputClass() { + return conf.getClass(DBConfiguration.INPUT_CLASS_PROPERTY, + NullDBWritable.class); + } + + public void setInputClass(Class inputClass) { + conf.setClass(DBConfiguration.INPUT_CLASS_PROPERTY, inputClass, + DBWritable.class); + } + + public String getOutputTableName() { + return conf.get(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY); + } + + public void setOutputTableName(String tableName) { + conf.set(DBConfiguration.OUTPUT_TABLE_NAME_PROPERTY, tableName); + } + + public String[] getOutputFieldNames() { + return conf.getStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY); + } + + public void setOutputFieldNames(String... fieldNames) { + conf.setStrings(DBConfiguration.OUTPUT_FIELD_NAMES_PROPERTY, fieldNames); + } + + public void setOutputFieldCount(int fieldCount) { + conf.setInt(DBConfiguration.OUTPUT_FIELD_COUNT_PROPERTY, fieldCount); + } + + public int getOutputFieldCount() { + return conf.getInt(OUTPUT_FIELD_COUNT_PROPERTY, 0); + } + +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/DBInputFormat.java b/src/java/org/apache/sqoop/mapreduce/db/DBInputFormat.java new file mode 100644 index 00000000..e35bdee0 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/DBInputFormat.java @@ -0,0 +1,363 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; + +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.DBConfiguration; +import com.cloudera.sqoop.mapreduce.db.DBRecordReader; +import com.cloudera.sqoop.mapreduce.db.OracleDBRecordReader; + +/** + * A InputFormat that reads input data from an SQL table. + *

+ * DBInputFormat emits LongWritables containing the record number as + * key and DBWritables as value. + * + * The SQL query, and input class can be using one of the two + * setInput methods. + */ +public class DBInputFormat +extends InputFormat implements Configurable { + + + private String dbProductName = "DEFAULT"; + + /** + * A Class that does nothing, implementing DBWritable. + */ + public static class NullDBWritable implements DBWritable, Writable { + @Override + public void readFields(DataInput in) throws IOException { } + @Override + public void readFields(ResultSet arg0) throws SQLException { } + @Override + public void write(DataOutput out) throws IOException { } + @Override + public void write(PreparedStatement arg0) throws SQLException { } + } + + /** + * A InputSplit that spans a set of rows. + */ + public static class DBInputSplit extends InputSplit implements Writable { + + private long end = 0; + private long start = 0; + + /** + * Default Constructor. + */ + public DBInputSplit() { + } + + /** + * Convenience Constructor. + * @param start the index of the first row to select + * @param end the index of the last row to select + */ + public DBInputSplit(long start, long end) { + this.start = start; + this.end = end; + } + + @Override + /** {@inheritDoc} */ + public String[] getLocations() throws IOException { + // TODO Add a layer to enable SQL "sharding" and support locality + return new String[] {}; + } + + /** + * @return The index of the first row to select + */ + public long getStart() { + return start; + } + + /** + * @return The index of the last row to select + */ + public long getEnd() { + return end; + } + + /** + * @return The total row count in this split + */ + public long getLength() throws IOException { + return end - start; + } + + @Override + /** {@inheritDoc} */ + public void readFields(DataInput input) throws IOException { + start = input.readLong(); + end = input.readLong(); + } + + @Override + /** {@inheritDoc} */ + public void write(DataOutput output) throws IOException { + output.writeLong(start); + output.writeLong(end); + } + } + + private String conditions; + + private Connection connection; + + private String tableName; + + private String[] fieldNames; + + private DBConfiguration dbConf; + + @Override + /** {@inheritDoc} */ + public void setConf(Configuration conf) { + + dbConf = new DBConfiguration(conf); + + try { + getConnection(); + + DatabaseMetaData dbMeta = connection.getMetaData(); + this.dbProductName = dbMeta.getDatabaseProductName().toUpperCase(); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + + tableName = dbConf.getInputTableName(); + fieldNames = dbConf.getInputFieldNames(); + conditions = dbConf.getInputConditions(); + } + + public Configuration getConf() { + return dbConf.getConf(); + } + + public DBConfiguration getDBConf() { + return dbConf; + } + + public Connection getConnection() { + try { + if (null == this.connection) { + // The connection was closed; reinstantiate it. + this.connection = dbConf.getConnection(); + this.connection.setAutoCommit(false); + this.connection.setTransactionIsolation( + Connection.TRANSACTION_READ_COMMITTED); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + return connection; + } + + public String getDBProductName() { + return dbProductName; + } + + protected RecordReader createDBRecordReader( + com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit split, + Configuration conf) throws IOException { + + @SuppressWarnings("unchecked") + Class inputClass = (Class) (dbConf.getInputClass()); + try { + // use database product name to determine appropriate record reader. + if (dbProductName.startsWith("ORACLE")) { + // use Oracle-specific db reader. + return new OracleDBRecordReader(split, inputClass, + conf, getConnection(), getDBConf(), conditions, fieldNames, + tableName); + } else { + // Generic reader. + return new DBRecordReader(split, inputClass, + conf, getConnection(), getDBConf(), conditions, fieldNames, + tableName); + } + } catch (SQLException ex) { + throw new IOException(ex); + } + } + + @Override + /** {@inheritDoc} */ + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { + + return createDBRecordReader( + (com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit) split, + context.getConfiguration()); + } + + /** {@inheritDoc} */ + @Override + public List getSplits(JobContext job) throws IOException { + + ResultSet results = null; + Statement statement = null; + try { + statement = connection.createStatement(); + + results = statement.executeQuery(getCountQuery()); + results.next(); + + long count = results.getLong(1); + int chunks = ConfigurationHelper.getJobNumMaps(job); + long chunkSize = (count / chunks); + + results.close(); + statement.close(); + + List splits = new ArrayList(); + + // Split the rows into n-number of chunks and adjust the last chunk + // accordingly + for (int i = 0; i < chunks; i++) { + DBInputSplit split; + + if ((i + 1) == chunks) { + split = new DBInputSplit(i * chunkSize, count); + } else { + split = new DBInputSplit(i * chunkSize, (i * chunkSize) + + chunkSize); + } + + splits.add(split); + } + + connection.commit(); + return splits; + } catch (SQLException e) { + throw new IOException("Got SQLException", e); + } finally { + try { + if (results != null) { results.close(); } + } catch (SQLException e1) { /* ignored */ } + try { + if (statement != null) { statement.close(); } + } catch (SQLException e1) { /* ignored */ } + + closeConnection(); + } + } + + /** Returns the query for getting the total number of rows, + * subclasses can override this for custom behaviour.*/ + protected String getCountQuery() { + + if(dbConf.getInputCountQuery() != null) { + return dbConf.getInputCountQuery(); + } + + StringBuilder query = new StringBuilder(); + query.append("SELECT COUNT(*) FROM " + tableName); + + if (conditions != null && conditions.length() > 0) { + query.append(" WHERE " + conditions); + } + return query.toString(); + } + + /** + * Initializes the map-part of the job with the appropriate input settings. + * + * @param job The map-reduce job + * @param inputClass the class object implementing DBWritable, which is the + * Java object holding tuple fields. + * @param tableName The table to read data from + * @param conditions The condition which to select data with, + * eg. '(updated > 20070101 AND length > 0)' + * @param orderBy the fieldNames in the orderBy clause. + * @param fieldNames The field names in the table + * @see #setInput(Job, Class, String, String) + */ + public static void setInput(Job job, + Class inputClass, + String tableName, String conditions, + String orderBy, String... fieldNames) { + job.setInputFormatClass(DBInputFormat.class); + DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); + dbConf.setInputClass(inputClass); + dbConf.setInputTableName(tableName); + dbConf.setInputFieldNames(fieldNames); + dbConf.setInputConditions(conditions); + dbConf.setInputOrderBy(orderBy); + } + + /** + * Initializes the map-part of the job with the appropriate input settings. + * + * @param job The map-reduce job + * @param inputClass the class object implementing DBWritable, which is the + * Java object holding tuple fields. + * @param inputQuery the input query to select fields. Example : + * "SELECT f1, f2, f3 FROM Mytable ORDER BY f1" + * @param inputCountQuery the input query that returns + * the number of records in the table. + * Example : "SELECT COUNT(f1) FROM Mytable" + * @see #setInput(Job, Class, String, String, String, String...) + */ + public static void setInput(Job job, + Class inputClass, + String inputQuery, String inputCountQuery) { + job.setInputFormatClass(DBInputFormat.class); + DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); + dbConf.setInputClass(inputClass); + dbConf.setInputQuery(inputQuery); + dbConf.setInputCountQuery(inputCountQuery); + } + + protected void closeConnection() { + try { + if (null != this.connection) { + this.connection.close(); + this.connection = null; + } + } catch (SQLException sqlE) { /* ignore exception on close. */ } + } + +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/DBOutputFormat.java b/src/java/org/apache/sqoop/mapreduce/db/DBOutputFormat.java new file mode 100644 index 00000000..47a80df9 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/DBOutputFormat.java @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; +import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.StringUtils; + +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.DBConfiguration; +import com.cloudera.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter; + +/** + * A OutputFormat that sends the reduce output to a SQL table. + *

+ * {@link DBOutputFormat} accepts <key,value> pairs, where + * key has a type extending DBWritable. Returned {@link RecordWriter} + * writes only the key to the database with a batch SQL query. + * + */ +public class DBOutputFormat + extends OutputFormat { + + + private static final Log LOG = LogFactory.getLog(DBOutputFormat.class); + public void checkOutputSpecs(JobContext context) + throws IOException, InterruptedException {} + + public OutputCommitter getOutputCommitter(TaskAttemptContext context) + throws IOException, InterruptedException { + return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), + context); + } + + /** + * Constructs the query used as the prepared statement to insert data. + * + * @param table + * the table to insert into + * @param fieldNames + * the fields to insert into. If field names are unknown, supply an + * array of nulls. + */ + public String constructQuery(String table, String[] fieldNames) { + if(fieldNames == null) { + throw new IllegalArgumentException("Field names may not be null"); + } + + StringBuilder query = new StringBuilder(); + query.append("INSERT INTO ").append(table); + + if (fieldNames.length > 0 && fieldNames[0] != null) { + query.append(" ("); + for (int i = 0; i < fieldNames.length; i++) { + query.append(fieldNames[i]); + if (i != fieldNames.length - 1) { + query.append(","); + } + } + query.append(")"); + } + query.append(" VALUES ("); + + for (int i = 0; i < fieldNames.length; i++) { + query.append("?"); + if(i != fieldNames.length - 1) { + query.append(","); + } + } + query.append(");"); + + return query.toString(); + } + + @Override + /** {@inheritDoc} */ + public RecordWriter getRecordWriter(TaskAttemptContext context) + throws IOException { + DBConfiguration dbConf = new DBConfiguration(context.getConfiguration()); + String tableName = dbConf.getOutputTableName(); + String[] fieldNames = dbConf.getOutputFieldNames(); + + if(fieldNames == null) { + fieldNames = new String[dbConf.getOutputFieldCount()]; + } + + try { + Connection connection = dbConf.getConnection(); + PreparedStatement statement = null; + + statement = connection.prepareStatement( + constructQuery(tableName, fieldNames)); + return new DBRecordWriter(connection, statement); + } catch (Exception ex) { + throw new IOException(ex); + } + } + + /** + * Initializes the reduce-part of the job with + * the appropriate output settings. + * + * @param job The job + * @param tableName The table to insert data into + * @param fieldNames The field names in the table. + */ + public static void setOutput(Job job, String tableName, + String... fieldNames) throws IOException { + if(fieldNames.length > 0 && fieldNames[0] != null) { + DBConfiguration dbConf = setOutput(job, tableName); + dbConf.setOutputFieldNames(fieldNames); + } else { + if (fieldNames.length > 0) { + setOutput(job, tableName, fieldNames.length); + } else { + throw new IllegalArgumentException( + "Field names must be greater than 0"); + } + } + } + + /** + * Initializes the reduce-part of the job + * with the appropriate output settings. + * + * @param job The job + * @param tableName The table to insert data into + * @param fieldCount the number of fields in the table. + */ + public static void setOutput(Job job, String tableName, + int fieldCount) throws IOException { + DBConfiguration dbConf = setOutput(job, tableName); + dbConf.setOutputFieldCount(fieldCount); + } + + private static DBConfiguration setOutput(Job job, + String tableName) throws IOException { + job.setOutputFormatClass(DBOutputFormat.class); + ConfigurationHelper.setJobReduceSpeculativeExecution(job, false); + + DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); + + dbConf.setOutputTableName(tableName); + return dbConf; + } + + /** + * A RecordWriter that writes the reduce output to a SQL table. + */ + public static class DBRecordWriter + extends RecordWriter { + + private Connection connection; + private PreparedStatement statement; + + public DBRecordWriter() throws SQLException { + } + + public DBRecordWriter(Connection connection + , PreparedStatement statement) throws SQLException { + this.connection = connection; + this.statement = statement; + this.connection.setAutoCommit(false); + } + + public Connection getConnection() { + return connection; + } + + public PreparedStatement getStatement() { + return statement; + } + + @Override + /** {@inheritDoc} */ + public void close(TaskAttemptContext context) throws IOException { + try { + statement.executeBatch(); + connection.commit(); + } catch (SQLException e) { + try { + connection.rollback(); + } catch (SQLException ex) { + LOG.warn(StringUtils.stringifyException(ex)); + } + throw new IOException(e); + } finally { + try { + statement.close(); + connection.close(); + } catch (SQLException ex) { + LOG.error("Unable to close connection", ex); + } + } + } + + @Override + /** {@inheritDoc} */ + public void write(K key, V value) throws IOException { + try { + key.write(statement); + statement.addBatch(); + } catch (SQLException e) { + LOG.error("Exception encountered", e); + } + } + } + +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/DBRecordReader.java b/src/java/org/apache/sqoop/mapreduce/db/DBRecordReader.java new file mode 100644 index 00000000..85082b40 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/DBRecordReader.java @@ -0,0 +1,305 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Arrays; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; +import org.apache.hadoop.util.ReflectionUtils; + +import com.cloudera.sqoop.mapreduce.db.DBConfiguration; +import com.cloudera.sqoop.mapreduce.db.DBInputFormat; +import com.cloudera.sqoop.util.LoggingUtils; + +/** + * A RecordReader that reads records from a SQL table. + * Emits LongWritables containing the record number as + * key and DBWritables as value. + */ +public class DBRecordReader extends + RecordReader { + + private static final Log LOG = LogFactory.getLog(DBRecordReader.class); + + private ResultSet results = null; + + private Class inputClass; + + private Configuration conf; + + private DBInputFormat.DBInputSplit split; + + private long pos = 0; + + private LongWritable key = null; + + private T value = null; + + private Connection connection; + + protected PreparedStatement statement; + + private DBConfiguration dbConf; + + private String conditions; + + private String [] fieldNames; + + private String tableName; + + /** + * @param split The InputSplit to read data for + * @throws SQLException + */ + // CHECKSTYLE:OFF + // TODO (aaron): Refactor constructor to take fewer arguments + public DBRecordReader(DBInputFormat.DBInputSplit split, + Class inputClass, Configuration conf, Connection conn, + DBConfiguration dbConfig, String cond, String [] fields, String table) + throws SQLException { + this.inputClass = inputClass; + this.split = split; + this.conf = conf; + this.connection = conn; + this.dbConf = dbConfig; + this.conditions = cond; + if (fields != null) { + this.fieldNames = Arrays.copyOf(fields, fields.length); + } + this.tableName = table; + } + // CHECKSTYLE:ON + + protected ResultSet executeQuery(String query) throws SQLException { + this.statement = connection.prepareStatement(query, + ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + + Integer fetchSize = dbConf.getFetchSize(); + if (fetchSize != null) { + LOG.debug("Using fetchSize for next query: " + fetchSize); + statement.setFetchSize(fetchSize); + } + + LOG.debug("Executing query: " + query); + return statement.executeQuery(); + } + + /** Returns the query for selecting the records, + * subclasses can override this for custom behaviour.*/ + protected String getSelectQuery() { + StringBuilder query = new StringBuilder(); + + // Default codepath for MySQL, HSQLDB, etc. + // Relies on LIMIT/OFFSET for splits. + if(dbConf.getInputQuery() == null) { + query.append("SELECT "); + + for (int i = 0; i < fieldNames.length; i++) { + query.append(fieldNames[i]); + if (i != fieldNames.length -1) { + query.append(", "); + } + } + + query.append(" FROM ").append(tableName); + query.append(" AS ").append(tableName); //in hsqldb this is necessary + if (conditions != null && conditions.length() > 0) { + query.append(" WHERE (").append(conditions).append(")"); + } + + String orderBy = dbConf.getInputOrderBy(); + if (orderBy != null && orderBy.length() > 0) { + query.append(" ORDER BY ").append(orderBy); + } + } else { + //PREBUILT QUERY + query.append(dbConf.getInputQuery()); + } + + try { + query.append(" LIMIT ").append(split.getLength()); + query.append(" OFFSET ").append(split.getStart()); + } catch (IOException ex) { + // Ignore, will not throw. + } + + return query.toString(); + } + + @Override + /** {@inheritDoc} */ + public void close() throws IOException { + try { + if (null != results) { + results.close(); + } + if (null != statement) { + statement.close(); + } + if (null != connection) { + connection.commit(); + connection.close(); + } + } catch (SQLException e) { + throw new IOException(e); + } + } + + public void initialize(InputSplit inputSplit, TaskAttemptContext context) + throws IOException, InterruptedException { + //do nothing + } + + @Override + /** {@inheritDoc} */ + public LongWritable getCurrentKey() { + return key; + } + + @Override + /** {@inheritDoc} */ + public T getCurrentValue() { + return value; + } + + /** + * @deprecated + */ + @Deprecated + public T createValue() { + return ReflectionUtils.newInstance(inputClass, conf); + } + + /** + * @deprecated + */ + @Deprecated + public long getPos() throws IOException { + return pos; + } + + /** + * @deprecated Use {@link #nextKeyValue()} + */ + @Deprecated + public boolean next(LongWritable k, T v) throws IOException { + this.key = k; + this.value = v; + return nextKeyValue(); + } + + @Override + /** {@inheritDoc} */ + public float getProgress() throws IOException { + return pos / (float)split.getLength(); + } + + @Override + /** {@inheritDoc} */ + public boolean nextKeyValue() throws IOException { + try { + if (key == null) { + key = new LongWritable(); + } + if (value == null) { + value = createValue(); + } + if (null == this.results) { + // First time into this method, run the query. + this.results = executeQuery(getSelectQuery()); + } + if (!results.next()) { + return false; + } + + // Set the key field value as the output key value + key.set(pos + split.getStart()); + + value.readFields(results); + + pos++; + } catch (SQLException e) { + LoggingUtils.logAll(LOG, e); + throw new IOException("SQLException in nextKeyValue", e); + } + return true; + } + + /** + * @return true if nextKeyValue() would return false. + */ + protected boolean isDone() { + try { + return this.results != null + && (results.isLast() || results.isAfterLast()); + } catch (SQLException sqlE) { + return true; + } + } + + protected DBInputFormat.DBInputSplit getSplit() { + return split; + } + + protected String [] getFieldNames() { + return fieldNames; + } + + protected String getTableName() { + return tableName; + } + + protected String getConditions() { + return conditions; + } + + protected DBConfiguration getDBConf() { + return dbConf; + } + + protected Connection getConnection() { + return connection; + } + + protected PreparedStatement getStatement() { + return statement; + } + + protected void setStatement(PreparedStatement stmt) { + this.statement = stmt; + } + + /** + * @return the configuration. Allows subclasses to access the configuration + */ + protected Configuration getConf(){ + return conf; + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java new file mode 100644 index 00000000..b121d4be --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/DBSplitter.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputSplit; + +/** + * DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat. + * DataDrivenDBInputFormat needs to interpolate between two values that + * represent the lowest and highest valued records to import. Depending + * on the data-type of the column, this requires different behavior. + * DBSplitter implementations should perform this for a data type or family + * of data types. + */ +public interface DBSplitter { + + /** + * Given a ResultSet containing one record (and already advanced to that + * record) with two columns (a low value, and a high value, both of the same + * type), determine a set of splits that span the given values. + */ + List split(Configuration conf, ResultSet results, String colName) + throws SQLException; +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java new file mode 100644 index 00000000..e5f1f4b2 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java @@ -0,0 +1,354 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; + +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.BigDecimalSplitter; +import com.cloudera.sqoop.mapreduce.db.BooleanSplitter; +import com.cloudera.sqoop.mapreduce.db.DBConfiguration; +import com.cloudera.sqoop.mapreduce.db.DBInputFormat; +import com.cloudera.sqoop.mapreduce.db.DBSplitter; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBRecordReader; +import com.cloudera.sqoop.mapreduce.db.DateSplitter; +import com.cloudera.sqoop.mapreduce.db.FloatSplitter; +import com.cloudera.sqoop.mapreduce.db.IntegerSplitter; +import com.cloudera.sqoop.mapreduce.db.TextSplitter; +import com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit; + +/** + * A InputFormat that reads input data from an SQL table. + * Operates like DBInputFormat, but instead of using LIMIT and OFFSET to + * demarcate splits, it tries to generate WHERE clauses which separate the + * data into roughly equivalent shards. + */ +public class DataDrivenDBInputFormat + extends DBInputFormat implements Configurable { + + private static final Log LOG = + LogFactory.getLog(DataDrivenDBInputFormat.class); + + /** + * If users are providing their own query, the following string is expected + * to appear in the WHERE clause, which will be substituted with a pair of + * conditions on the input to allow input splits to parallelise the import. + */ + public static final String SUBSTITUTE_TOKEN = "$CONDITIONS"; + + /** + * @return the DBSplitter implementation to use to divide the table/query + * into InputSplits. + */ + protected DBSplitter getSplitter(int sqlDataType) { + switch (sqlDataType) { + case Types.NUMERIC: + case Types.DECIMAL: + return new BigDecimalSplitter(); + + case Types.BIT: + case Types.BOOLEAN: + return new BooleanSplitter(); + + case Types.INTEGER: + case Types.TINYINT: + case Types.SMALLINT: + case Types.BIGINT: + return new IntegerSplitter(); + + case Types.REAL: + case Types.FLOAT: + case Types.DOUBLE: + return new FloatSplitter(); + + case Types.CHAR: + case Types.VARCHAR: + case Types.LONGVARCHAR: + return new TextSplitter(); + + case Types.DATE: + case Types.TIME: + case Types.TIMESTAMP: + return new DateSplitter(); + + default: + // TODO: Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT, CLOB, + // BLOB, ARRAY, STRUCT, REF, DATALINK, and JAVA_OBJECT. + return null; + } + } + + @Override + /** {@inheritDoc} */ + public List getSplits(JobContext job) throws IOException { + + int targetNumTasks = ConfigurationHelper.getJobNumMaps(job); + String boundaryQuery = getDBConf().getInputBoundingQuery(); + + // If user do not forced us to use his boundary query and we don't have to + // bacause there is only one mapper we will return single split that + // separates nothing. This can be considerably more optimal for a large + // table with no index. + if (1 == targetNumTasks + && (boundaryQuery == null || boundaryQuery.isEmpty())) { + List singletonSplit = new ArrayList(); + singletonSplit.add(new com.cloudera.sqoop.mapreduce.db. + DataDrivenDBInputFormat.DataDrivenDBInputSplit("1=1", "1=1")); + return singletonSplit; + } + + ResultSet results = null; + Statement statement = null; + Connection connection = getConnection(); + try { + statement = connection.createStatement(); + + String query = getBoundingValsQuery(); + LOG.info("BoundingValsQuery: " + query); + + results = statement.executeQuery(query); + results.next(); + + // Based on the type of the results, use a different mechanism + // for interpolating split points (i.e., numeric splits, text splits, + // dates, etc.) + int sqlDataType = results.getMetaData().getColumnType(1); + boolean isSigned = results.getMetaData().isSigned(1); + + // MySQL has an unsigned integer which we need to allocate space for + if (sqlDataType == Types.INTEGER && !isSigned){ + sqlDataType = Types.BIGINT; + } + + DBSplitter splitter = getSplitter(sqlDataType); + if (null == splitter) { + throw new IOException("Unknown SQL data type: " + sqlDataType); + } + + return splitter.split(job.getConfiguration(), results, + getDBConf().getInputOrderBy()); + } catch (SQLException e) { + throw new IOException(e); + } finally { + // More-or-less ignore SQL exceptions here, but log in case we need it. + try { + if (null != results) { + results.close(); + } + } catch (SQLException se) { + LOG.debug("SQLException closing resultset: " + se.toString()); + } + + try { + if (null != statement) { + statement.close(); + } + } catch (SQLException se) { + LOG.debug("SQLException closing statement: " + se.toString()); + } + + try { + connection.commit(); + closeConnection(); + } catch (SQLException se) { + LOG.debug("SQLException committing split transaction: " + + se.toString()); + } + } + } + + /** + * @return a query which returns the minimum and maximum values for + * the order-by column. + * + * The min value should be in the first column, and the + * max value should be in the second column of the results. + */ + protected String getBoundingValsQuery() { + // If the user has provided a query, use that instead. + String userQuery = getDBConf().getInputBoundingQuery(); + if (null != userQuery) { + return userQuery; + } + + // Auto-generate one based on the table name we've been provided with. + StringBuilder query = new StringBuilder(); + + String splitCol = getDBConf().getInputOrderBy(); + query.append("SELECT MIN(").append(splitCol).append("), "); + query.append("MAX(").append(splitCol).append(") FROM "); + query.append(getDBConf().getInputTableName()); + String conditions = getDBConf().getInputConditions(); + if (null != conditions) { + query.append(" WHERE ( " + conditions + " )"); + } + + return query.toString(); + } + + protected RecordReader createDBRecordReader( + DBInputSplit split, Configuration conf) throws IOException { + + DBConfiguration dbConf = getDBConf(); + @SuppressWarnings("unchecked") + Class inputClass = (Class) (dbConf.getInputClass()); + String dbProductName = getDBProductName(); + + LOG.debug("Creating db record reader for db product: " + dbProductName); + + try { + return new DataDrivenDBRecordReader(split, inputClass, + conf, getConnection(), dbConf, dbConf.getInputConditions(), + dbConf.getInputFieldNames(), dbConf.getInputTableName(), + dbProductName); + } catch (SQLException ex) { + throw new IOException(ex); + } + } + + + /* + * Set the user-defined bounding query to use with a user-defined query. + * This *must* include the substring "$CONDITIONS" + * (DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) inside the WHERE clause, + * so that DataDrivenDBInputFormat knows where to insert split clauses. + * e.g., "SELECT foo FROM mytable WHERE $CONDITIONS" + * This will be expanded to something like: + * SELECT foo FROM mytable WHERE (id > 100) AND (id < 250) + * inside each split. + */ + public static void setBoundingQuery(Configuration conf, String query) { + if (null != query) { + // If the user's settng a query, warn if they don't allow conditions. + if (query.indexOf(SUBSTITUTE_TOKEN) == -1) { + LOG.warn("Could not find " + SUBSTITUTE_TOKEN + " token in query: " + + query + "; splits may not partition data."); + } + } + + conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, query); + } + + // Configuration methods override superclass to ensure that the proper + // DataDrivenDBInputFormat gets used. + + /** Note that the "orderBy" column is called the "splitBy" in this version. + * We reuse the same field, but it's not strictly ordering it + * -- just partitioning the results. + */ + public static void setInput(Job job, + Class inputClass, + String tableName, String conditions, + String splitBy, String... fieldNames) { + DBInputFormat.setInput(job, inputClass, tableName, conditions, + splitBy, fieldNames); + job.setInputFormatClass(DataDrivenDBInputFormat.class); + } + + /** setInput() takes a custom query and a separate "bounding query" to use + instead of the custom "count query" used by DBInputFormat. + */ + public static void setInput(Job job, + Class inputClass, + String inputQuery, String inputBoundingQuery) { + DBInputFormat.setInput(job, inputClass, inputQuery, ""); + job.getConfiguration().set(DBConfiguration.INPUT_BOUNDING_QUERY, + inputBoundingQuery); + job.setInputFormatClass(DataDrivenDBInputFormat.class); + } + + + /** + * A InputSplit that spans a set of rows. + */ + public static class DataDrivenDBInputSplit + extends DBInputFormat.DBInputSplit { + + private String lowerBoundClause; + private String upperBoundClause; + + /** + * Default Constructor. + */ + public DataDrivenDBInputSplit() { + } + + /** + * Convenience Constructor. + * @param lower the string to be put in the WHERE clause to guard + * on the 'lower' end. + * @param upper the string to be put in the WHERE clause to guard + * on the 'upper' end. + */ + public DataDrivenDBInputSplit(final String lower, final String upper) { + this.lowerBoundClause = lower; + this.upperBoundClause = upper; + } + + /** + * @return The total row count in this split. + */ + public long getLength() throws IOException { + return 0; // unfortunately, we don't know this. + } + + @Override + /** {@inheritDoc} */ + public void readFields(DataInput input) throws IOException { + this.lowerBoundClause = Text.readString(input); + this.upperBoundClause = Text.readString(input); + } + + @Override + /** {@inheritDoc} */ + public void write(DataOutput output) throws IOException { + Text.writeString(output, this.lowerBoundClause); + Text.writeString(output, this.upperBoundClause); + } + + public String getLowerClause() { + return lowerBoundClause; + } + + public String getUpperClause() { + return upperBoundClause; + } + } + +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBRecordReader.java b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBRecordReader.java new file mode 100644 index 00000000..eaef2d83 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBRecordReader.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + + +import java.io.IOException; +import java.sql.Connection; +import java.sql.SQLException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; + +import com.cloudera.sqoop.mapreduce.db.DBConfiguration; +import com.cloudera.sqoop.mapreduce.db.DBInputFormat; +import com.cloudera.sqoop.mapreduce.db.DBRecordReader; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; + +/** + * A RecordReader that reads records from a SQL table, + * using data-driven WHERE clause splits. + * Emits LongWritables containing the record number as + * key and DBWritables as value. + */ +public class DataDrivenDBRecordReader + extends DBRecordReader { + + private static final Log LOG = + LogFactory.getLog(DataDrivenDBRecordReader.class); + + private String dbProductName; // database manufacturer string. + + // CHECKSTYLE:OFF + // TODO(aaron): Refactor constructor to use fewer arguments. + /** + * @param split The InputSplit to read data for + * @throws SQLException + */ + public DataDrivenDBRecordReader(DBInputFormat.DBInputSplit split, + Class inputClass, Configuration conf, Connection conn, + DBConfiguration dbConfig, String cond, String [] fields, String table, + String dbProduct) throws SQLException { + super(split, inputClass, conf, conn, dbConfig, cond, fields, table); + this.dbProductName = dbProduct; + } + // CHECKSTYLE:ON + + @Override + /** {@inheritDoc} */ + public float getProgress() throws IOException { + return isDone() ? 1.0f : 0.0f; + } + + /** Returns the query for selecting the records, + * subclasses can override this for custom behaviour.*/ + protected String getSelectQuery() { + StringBuilder query = new StringBuilder(); + DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit = + (DataDrivenDBInputFormat.DataDrivenDBInputSplit) getSplit(); + DBConfiguration dbConf = getDBConf(); + String [] fieldNames = getFieldNames(); + String tableName = getTableName(); + String conditions = getConditions(); + + // Build the WHERE clauses associated with the data split first. + // We need them in both branches of this function. + StringBuilder conditionClauses = new StringBuilder(); + conditionClauses.append("( ").append(dataSplit.getLowerClause()); + conditionClauses.append(" ) AND ( ").append(dataSplit.getUpperClause()); + conditionClauses.append(" )"); + + if(dbConf.getInputQuery() == null) { + // We need to generate the entire query. + query.append("SELECT "); + + for (int i = 0; i < fieldNames.length; i++) { + query.append(fieldNames[i]); + if (i != fieldNames.length -1) { + query.append(", "); + } + } + + query.append(" FROM ").append(tableName); + if (!dbProductName.startsWith("ORACLE")) { + // Seems to be necessary for hsqldb? Oracle explicitly does *not* + // use this clause. + query.append(" AS ").append(tableName); + } + query.append(" WHERE "); + if (conditions != null && conditions.length() > 0) { + // Put the user's conditions first. + query.append("( ").append(conditions).append(" ) AND "); + } + + // Now append the conditions associated with our split. + query.append(conditionClauses.toString()); + + } else { + // User provided the query. We replace the special token with + // our WHERE clause. + String inputQuery = dbConf.getInputQuery(); + if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) { + LOG.error("Could not find the clause substitution token " + + DataDrivenDBInputFormat.SUBSTITUTE_TOKEN + " in the query: [" + + inputQuery + "]. Parallel splits may not work correctly."); + } + + query.append(inputQuery.replace(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN, + conditionClauses.toString())); + } + + LOG.debug("Using query: " + query.toString()); + + return query.toString(); + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java new file mode 100644 index 00000000..31e9351a --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/DateSplitter.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Types; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputSplit; + +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; +import com.cloudera.sqoop.mapreduce.db.IntegerSplitter; + +/** + * Implement DBSplitter over date/time values. + * Make use of logic from IntegerSplitter, since date/time are just longs + * in Java. + */ +public class DateSplitter extends IntegerSplitter { + + private static final Log LOG = LogFactory.getLog(DateSplitter.class); + + public List split(Configuration conf, ResultSet results, + String colName) throws SQLException { + + long minVal; + long maxVal; + + int sqlDataType = results.getMetaData().getColumnType(1); + minVal = resultSetColToLong(results, 1, sqlDataType); + maxVal = resultSetColToLong(results, 2, sqlDataType); + + String lowClausePrefix = colName + " >= "; + String highClausePrefix = colName + " < "; + + int numSplits = ConfigurationHelper.getConfNumMaps(conf); + if (numSplits < 1) { + numSplits = 1; + } + + if (minVal == Long.MIN_VALUE && maxVal == Long.MIN_VALUE) { + // The range of acceptable dates is NULL to NULL. Just create a single + // split. + List splits = new ArrayList(); + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + return splits; + } + + // Gather the split point integers + List splitPoints = split(numSplits, minVal, maxVal); + List splits = new ArrayList(); + + // Turn the split points into a set of intervals. + long start = splitPoints.get(0); + Date startDate = longToDate(start, sqlDataType); + if (sqlDataType == Types.TIMESTAMP) { + // The lower bound's nanos value needs to match the actual lower-bound + // nanos. + try { + ((java.sql.Timestamp) startDate).setNanos( + results.getTimestamp(1).getNanos()); + } catch (NullPointerException npe) { + // If the lower bound was NULL, we'll get an NPE; just ignore it and + // don't set nanos. + } + } + + for (int i = 1; i < splitPoints.size(); i++) { + long end = splitPoints.get(i); + Date endDate = longToDate(end, sqlDataType); + + if (i == splitPoints.size() - 1) { + if (sqlDataType == Types.TIMESTAMP) { + // The upper bound's nanos value needs to match the actual + // upper-bound nanos. + try { + ((java.sql.Timestamp) endDate).setNanos( + results.getTimestamp(2).getNanos()); + } catch (NullPointerException npe) { + // If the upper bound was NULL, we'll get an NPE; just ignore it + // and don't set nanos. + } + } + // This is the last one; use a closed interval. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + dateToString(startDate), + colName + " <= " + dateToString(endDate))); + } else { + // Normal open-interval case. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + dateToString(startDate), + highClausePrefix + dateToString(endDate))); + } + + start = end; + startDate = endDate; + } + + if (minVal == Long.MIN_VALUE || maxVal == Long.MIN_VALUE) { + // Add an extra split to handle the null case that we saw. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + } + + return splits; + } + + /** + Retrieve the value from the column in a type-appropriate manner and + return its timestamp since the epoch. If the column is null, then return + Long.MIN_VALUE. This will cause a special split to be generated for the + NULL case, but may also cause poorly-balanced splits if most of the + actual dates are positive time since the epoch, etc. + */ + private long resultSetColToLong(ResultSet rs, int colNum, int sqlDataType) + throws SQLException { + try { + switch (sqlDataType) { + case Types.DATE: + return rs.getDate(colNum).getTime(); + case Types.TIME: + return rs.getTime(colNum).getTime(); + case Types.TIMESTAMP: + return rs.getTimestamp(colNum).getTime(); + default: + throw new SQLException("Not a date-type field"); + } + } catch (NullPointerException npe) { + // null column. return minimum long value. + LOG.warn("Encountered a NULL date in the split column. " + + "Splits may be poorly balanced."); + return Long.MIN_VALUE; + } + } + + /** Parse the long-valued timestamp into the appropriate SQL date type. */ + private Date longToDate(long val, int sqlDataType) { + switch (sqlDataType) { + case Types.DATE: + return new java.sql.Date(val); + case Types.TIME: + return new java.sql.Time(val); + case Types.TIMESTAMP: + return new java.sql.Timestamp(val); + default: // Shouldn't ever hit this case. + return null; + } + } + + /** + * Given a Date 'd', format it as a string for use in a SQL date + * comparison operation. + * @param d the date to format. + * @return the string representing this date in SQL with any appropriate + * quotation characters, etc. + */ + protected String dateToString(Date d) { + return "'" + d.toString() + "'"; + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java new file mode 100644 index 00000000..1756df6b --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/FloatSplitter.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputSplit; + +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.DBSplitter; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; + +public class FloatSplitter implements DBSplitter { + + private static final Log LOG = LogFactory.getLog(FloatSplitter.class); + + private static final double MIN_INCREMENT = 10000 * Double.MIN_VALUE; + + public List split(Configuration conf, ResultSet results, + String colName) throws SQLException { + + LOG.warn("Generating splits for a floating-point index column. Due to the"); + LOG.warn("imprecise representation of floating-point values in Java, this"); + LOG.warn("may result in an incomplete import."); + LOG.warn("You are strongly encouraged to choose an integral split column."); + + List splits = new ArrayList(); + + if (results.getString(1) == null && results.getString(2) == null) { + // Range is null to null. Return a null split accordingly. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + return splits; + } + + double minVal = results.getDouble(1); + double maxVal = results.getDouble(2); + + // Use this as a hint. May need an extra task if the size doesn't + // divide cleanly. + int numSplits = ConfigurationHelper.getConfNumMaps(conf); + double splitSize = (maxVal - minVal) / (double) numSplits; + + if (splitSize < MIN_INCREMENT) { + splitSize = MIN_INCREMENT; + } + + String lowClausePrefix = colName + " >= "; + String highClausePrefix = colName + " < "; + + double curLower = minVal; + double curUpper = curLower + splitSize; + + while (curUpper < maxVal) { + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + Double.toString(curLower), + highClausePrefix + Double.toString(curUpper))); + + curLower = curUpper; + curUpper += splitSize; + } + + // Catch any overage and create the closed interval for the last split. + if (curLower <= maxVal || splits.size() == 1) { + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + Double.toString(curUpper), + colName + " <= " + Double.toString(maxVal))); + } + + if (results.getString(1) == null || results.getString(2) == null) { + // At least one extrema is null; add a null split. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + } + + return splits; + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java new file mode 100644 index 00000000..12011e7e --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputSplit; + +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.DBSplitter; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; + +/** + * Implement DBSplitter over integer values. + */ +public class IntegerSplitter implements DBSplitter { + public static final Log LOG = + LogFactory.getLog(IntegerSplitter.class.getName()); + + public List split(Configuration conf, ResultSet results, + String colName) throws SQLException { + + long minVal = results.getLong(1); + long maxVal = results.getLong(2); + + String lowClausePrefix = colName + " >= "; + String highClausePrefix = colName + " < "; + + int numSplits = ConfigurationHelper.getConfNumMaps(conf); + if (numSplits < 1) { + numSplits = 1; + } + + if (results.getString(1) == null && results.getString(2) == null) { + // Range is null to null. Return a null split accordingly. + List splits = new ArrayList(); + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + return splits; + } + + // Get all the split points together. + List splitPoints = split(numSplits, minVal, maxVal); + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Splits: [%,28d to %,28d] into %d parts", + minVal, maxVal, numSplits)); + for (int i = 0; i < splitPoints.size(); i++) { + LOG.debug(String.format("%,28d", splitPoints.get(i))); + } + } + List splits = new ArrayList(); + + // Turn the split points into a set of intervals. + long start = splitPoints.get(0); + for (int i = 1; i < splitPoints.size(); i++) { + long end = splitPoints.get(i); + + if (i == splitPoints.size() - 1) { + // This is the last one; use a closed interval. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + Long.toString(start), + colName + " <= " + Long.toString(end))); + } else { + // Normal open-interval case. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + Long.toString(start), + highClausePrefix + Long.toString(end))); + } + + start = end; + } + + if (results.getString(1) == null || results.getString(2) == null) { + // At least one extrema is null; add a null split. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + } + + return splits; + } + + /** + * Returns a list of longs one element longer than the list of input splits. + * This represents the boundaries between input splits. + * All splits are open on the top end, except the last one. + * + * So the list [0, 5, 8, 12, 18] would represent splits capturing the + * intervals: + * + * [0, 5) + * [5, 8) + * [8, 12) + * [12, 18] note the closed interval for the last split. + */ + protected List split(long numSplits, long minVal, long maxVal) + throws SQLException { + + List splits = new ArrayList(); + + // We take the min-max interval and divide by the numSplits and also + // calculate a remainder. Because of integer division rules, numsplits * + // splitSize + minVal will always be <= maxVal. We then use the remainder + // and add 1 if the current split index is less than the < the remainder. + // This is guaranteed to add up to remainder and not surpass the value. + long splitSize = (maxVal - minVal) / numSplits; + long remainder = (maxVal - minVal) % numSplits; + long curVal = minVal; + + // This will honor numSplits as long as split size > 0. If split size is + // 0, it will have remainder splits. + for (int i = 0; i <= numSplits; i++) { + splits.add(curVal); + if (curVal >= maxVal) { + break; + } + curVal += splitSize; + curVal += (i < remainder) ? 1 : 0; + } + + if (splits.size() == 1) { + // make a valid singleton split + splits.add(maxVal); + } + + return splits; + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java b/src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java new file mode 100644 index 00000000..49b07877 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/OracleDBRecordReader.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.io.IOException; +import java.lang.reflect.Method; +import java.sql.Connection; +import java.sql.SQLException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; + +import com.cloudera.sqoop.mapreduce.db.DBConfiguration; +import com.cloudera.sqoop.mapreduce.db.DBInputFormat; +import com.cloudera.sqoop.mapreduce.db.DBRecordReader; + +/** + * A RecordReader that reads records from an Oracle SQL table. + */ +public class OracleDBRecordReader +extends DBRecordReader { + + /** Configuration key to set to a timezone string. */ + public static final String SESSION_TIMEZONE_KEY = "oracle.sessionTimeZone"; + + private static final Log LOG = LogFactory.getLog(OracleDBRecordReader.class); + + // CHECKSTYLE:OFF + public OracleDBRecordReader(DBInputFormat.DBInputSplit split, + Class inputClass, Configuration conf, Connection conn, + DBConfiguration dbConfig, String cond, String [] fields, + String table) throws SQLException { + super(split, inputClass, conf, conn, dbConfig, cond, fields, table); + setSessionTimeZone(conf, conn); + } + // CHECKSTYLE:ON + + + /** Returns the query for selecting the records from an Oracle DB. */ + protected String getSelectQuery() { + StringBuilder query = new StringBuilder(); + DBConfiguration dbConf = getDBConf(); + String conditions = getConditions(); + String tableName = getTableName(); + String [] fieldNames = getFieldNames(); + + // Oracle-specific codepath to use rownum instead of LIMIT/OFFSET. + if(dbConf.getInputQuery() == null) { + query.append("SELECT "); + + for (int i = 0; i < fieldNames.length; i++) { + query.append(fieldNames[i]); + if (i != fieldNames.length -1) { + query.append(", "); + } + } + + query.append(" FROM ").append(tableName); + if (conditions != null && conditions.length() > 0) { + query.append(" WHERE ").append(conditions); + } + String orderBy = dbConf.getInputOrderBy(); + if (orderBy != null && orderBy.length() > 0) { + query.append(" ORDER BY ").append(orderBy); + } + } else { + //PREBUILT QUERY + query.append(dbConf.getInputQuery()); + } + + try { + DBInputFormat.DBInputSplit split = getSplit(); + if (split.getLength() > 0 && split.getStart() > 0) { + String querystring = query.toString(); + + query = new StringBuilder(); + query.append("SELECT * FROM (SELECT a.*,ROWNUM dbif_rno FROM ( "); + query.append(querystring); + query.append(" ) a WHERE rownum <= ").append(split.getStart()); + query.append(" + ").append(split.getLength()); + query.append(" ) WHERE dbif_rno >= ").append(split.getStart()); + } + } catch (IOException ex) { + // ignore, will not throw. + } + + return query.toString(); + } + + /** + * Set session time zone. + * @param conf The current configuration. + * We read the 'oracle.sessionTimeZone' property from here. + * @param conn The connection to alter the timezone properties of. + */ + public static void setSessionTimeZone(Configuration conf, + Connection conn) throws SQLException { + // need to use reflection to call the method setSessionTimeZone on + // the OracleConnection class because oracle specific java libraries are + // not accessible in this context. + Method method; + try { + method = conn.getClass().getMethod( + "setSessionTimeZone", new Class [] {String.class}); + } catch (Exception ex) { + LOG.error("Could not find method setSessionTimeZone in " + + conn.getClass().getName(), ex); + // rethrow SQLException + throw new SQLException(ex); + } + + // Need to set the time zone in order for Java + // to correctly access the column "TIMESTAMP WITH LOCAL TIME ZONE". + // We can't easily get the correct Oracle-specific timezone string + // from Java; just let the user set the timezone in a property. + String clientTimeZone = conf.get(SESSION_TIMEZONE_KEY, "GMT"); + try { + method.setAccessible(true); + method.invoke(conn, clientTimeZone); + LOG.info("Time zone has been set to " + clientTimeZone); + } catch (Exception ex) { + LOG.warn("Time zone " + clientTimeZone + + " could not be set on Oracle database."); + LOG.warn("Setting default time zone: GMT"); + try { + // "GMT" timezone is guaranteed to exist. + method.invoke(conn, "GMT"); + } catch (Exception ex2) { + LOG.error("Could not set time zone for oracle connection", ex2); + // rethrow SQLException + throw new SQLException(ex); + } + } + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java b/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java new file mode 100644 index 00000000..cec456a5 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBInputFormat.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.io.IOException; +import java.sql.SQLException; +import java.sql.Types; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; + +import com.cloudera.sqoop.mapreduce.db.DBConfiguration; +import com.cloudera.sqoop.mapreduce.db.DBSplitter; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; +import com.cloudera.sqoop.mapreduce.db.OracleDataDrivenDBRecordReader; +import com.cloudera.sqoop.mapreduce.db.OracleDateSplitter; +import com.cloudera.sqoop.mapreduce.db.DBInputFormat.DBInputSplit; + +/** + * A InputFormat that reads input data from an SQL table in an Oracle db. + */ +public class OracleDataDrivenDBInputFormat + extends DataDrivenDBInputFormat implements Configurable { + + /** + * @return the DBSplitter implementation to use to divide the table/query + * into InputSplits. + */ + @Override + protected DBSplitter getSplitter(int sqlDataType) { + switch (sqlDataType) { + case Types.DATE: + case Types.TIME: + case Types.TIMESTAMP: + return new OracleDateSplitter(); + + default: + return super.getSplitter(sqlDataType); + } + } + + @Override + protected RecordReader createDBRecordReader( + DBInputSplit split, Configuration conf) throws IOException { + + DBConfiguration dbConf = getDBConf(); + @SuppressWarnings("unchecked") + Class inputClass = (Class) (dbConf.getInputClass()); + + try { + // Use Oracle-specific db reader + return new OracleDataDrivenDBRecordReader(split, inputClass, + conf, getConnection(), dbConf, dbConf.getInputConditions(), + dbConf.getInputFieldNames(), dbConf.getInputTableName()); + } catch (SQLException ex) { + throw new IOException(ex); + } + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java b/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java new file mode 100644 index 00000000..d8e18859 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/OracleDataDrivenDBRecordReader.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.sql.Connection; +import java.sql.SQLException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.lib.db.DBWritable; + +import com.cloudera.sqoop.mapreduce.db.DBConfiguration; +import com.cloudera.sqoop.mapreduce.db.DBInputFormat; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBRecordReader; +import com.cloudera.sqoop.mapreduce.db.OracleDBRecordReader; + +/** + * A RecordReader that reads records from a Oracle table + * via DataDrivenDBRecordReader. + */ +public class OracleDataDrivenDBRecordReader + extends DataDrivenDBRecordReader { + + + // CHECKSTYLE:OFF + // TODO(aaron): Enable checkstyle after refactoring DBRecordReader c'tor. + public OracleDataDrivenDBRecordReader(DBInputFormat.DBInputSplit split, + Class inputClass, Configuration conf, Connection conn, + DBConfiguration dbConfig, String cond, String [] fields, + String table) throws SQLException { + + super(split, inputClass, conf, conn, dbConfig, cond, fields, table, + "ORACLE"); + + // Must initialize the tz used by the connection for Oracle. + OracleDBRecordReader.setSessionTimeZone(conf, conn); + } + // CHECKSTYLE:ON +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/OracleDateSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/OracleDateSplitter.java new file mode 100644 index 00000000..5d2e9d30 --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/OracleDateSplitter.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.util.Date; + +import com.cloudera.sqoop.mapreduce.db.DateSplitter; + +/** + * Implement DBSplitter over date/time values returned by an Oracle db. + * Make use of logic from DateSplitter, since this just needs to use + * some Oracle-specific functions on the formatting end when generating + * InputSplits. + */ +public class OracleDateSplitter extends DateSplitter { + + @SuppressWarnings("unchecked") + @Override + protected String dateToString(Date d) { + // Oracle Data objects are always actually Timestamps + return "TO_TIMESTAMP('" + d.toString() + "', 'YYYY-MM-DD HH24:MI:SS.FF')"; + } +} diff --git a/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java new file mode 100644 index 00000000..f1d73d4d --- /dev/null +++ b/src/java/org/apache/sqoop/mapreduce/db/TextSplitter.java @@ -0,0 +1,228 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.math.BigDecimal; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputSplit; + +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.mapreduce.db.BigDecimalSplitter; +import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat; + +/** + * Implement DBSplitter over text strings. + */ +public class TextSplitter extends BigDecimalSplitter { + + private static final Log LOG = LogFactory.getLog(TextSplitter.class); + + /** + * This method needs to determine the splits between two user-provided + * strings. In the case where the user's strings are 'A' and 'Z', this is + * not hard; we could create two splits from ['A', 'M') and ['M', 'Z'], 26 + * splits for strings beginning with each letter, etc. + * + * If a user has provided us with the strings "Ham" and "Haze", however, we + * need to create splits that differ in the third letter. + * + * The algorithm used is as follows: + * Since there are 2**16 unicode characters, we interpret characters as + * digits in base 65536. Given a string 's' containing characters s_0, s_1 + * .. s_n, we interpret the string as the number: 0.s_0 s_1 s_2.. s_n in + * base 65536. Having mapped the low and high strings into floating-point + * values, we then use the BigDecimalSplitter to establish the even split + * points, then map the resulting floating point values back into strings. + */ + public List split(Configuration conf, ResultSet results, + String colName) throws SQLException { + + LOG.warn("Generating splits for a textual index column."); + LOG.warn("If your database sorts in a case-insensitive order, " + + "this may result in a partial import or duplicate records."); + LOG.warn("You are strongly encouraged to choose an integral split column."); + + String minString = results.getString(1); + String maxString = results.getString(2); + + boolean minIsNull = false; + + // If the min value is null, switch it to an empty string instead for + // purposes of interpolation. Then add [null, null] as a special case + // split. + if (null == minString) { + minString = ""; + minIsNull = true; + } + + if (null == maxString) { + // If the max string is null, then the min string has to be null too. + // Just return a special split for this case. + List splits = new ArrayList(); + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + return splits; + } + + // Use this as a hint. May need an extra task if the size doesn't + // divide cleanly. + int numSplits = ConfigurationHelper.getConfNumMaps(conf); + + String lowClausePrefix = colName + " >= '"; + String highClausePrefix = colName + " < '"; + + // If there is a common prefix between minString and maxString, establish + // it and pull it out of minString and maxString. + int maxPrefixLen = Math.min(minString.length(), maxString.length()); + int sharedLen; + for (sharedLen = 0; sharedLen < maxPrefixLen; sharedLen++) { + char c1 = minString.charAt(sharedLen); + char c2 = maxString.charAt(sharedLen); + if (c1 != c2) { + break; + } + } + + // The common prefix has length 'sharedLen'. Extract it from both. + String commonPrefix = minString.substring(0, sharedLen); + minString = minString.substring(sharedLen); + maxString = maxString.substring(sharedLen); + + List splitStrings = split(numSplits, minString, maxString, + commonPrefix); + List splits = new ArrayList(); + + // Convert the list of split point strings into an actual set of + // InputSplits. + String start = splitStrings.get(0); + for (int i = 1; i < splitStrings.size(); i++) { + String end = splitStrings.get(i); + + if (i == splitStrings.size() - 1) { + // This is the last one; use a closed interval. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + start + "'", colName + " <= '" + end + "'")); + } else { + // Normal open-interval case. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + lowClausePrefix + start + "'", highClausePrefix + end + "'")); + } + + start = end; + } + + if (minIsNull) { + // Add the special null split at the end. + splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit( + colName + " IS NULL", colName + " IS NULL")); + } + + return splits; + } + + protected List split(int numSplits, String minString, + String maxString, String commonPrefix) throws SQLException { + + BigDecimal minVal = stringToBigDecimal(minString); + BigDecimal maxVal = stringToBigDecimal(maxString); + + List splitPoints = split( + new BigDecimal(numSplits), minVal, maxVal); + List splitStrings = new ArrayList(); + + // Convert the BigDecimal splitPoints into their string representations. + for (BigDecimal bd : splitPoints) { + splitStrings.add(commonPrefix + bigDecimalToString(bd)); + } + + // Make sure that our user-specified boundaries are the first and last + // entries in the array. + if (splitStrings.size() == 0 + || !splitStrings.get(0).equals(commonPrefix + minString)) { + splitStrings.add(0, commonPrefix + minString); + } + if (splitStrings.size() == 1 + || !splitStrings.get(splitStrings.size() - 1).equals( + commonPrefix + maxString)) { + splitStrings.add(commonPrefix + maxString); + } + + return splitStrings; + } + + private static final BigDecimal ONE_PLACE = new BigDecimal(65536); + + // Maximum number of characters to convert. This is to prevent rounding + // errors or repeating fractions near the very bottom from getting out of + // control. Note that this still gives us a huge number of possible splits. + private static final int MAX_CHARS = 8; + + /** + * Return a BigDecimal representation of string 'str' suitable for use in a + * numerically-sorting order. + */ + protected BigDecimal stringToBigDecimal(String str) { + // Start with 1/65536 to compute the first digit. + BigDecimal curPlace = ONE_PLACE; + BigDecimal result = BigDecimal.ZERO; + + int len = Math.min(str.length(), MAX_CHARS); + + for (int i = 0; i < len; i++) { + int codePoint = str.codePointAt(i); + result = result.add(tryDivide(new BigDecimal(codePoint), curPlace)); + // advance to the next less significant place. e.g., 1/(65536^2) for the + // second char. + curPlace = curPlace.multiply(ONE_PLACE); + } + + return result; + } + + /** + * Return the string encoded in a BigDecimal. + * Repeatedly multiply the input value by 65536; the integer portion after + * such a multiplication represents a single character in base 65536. + * Convert that back into a char and create a string out of these until we + * have no data left. + */ + protected String bigDecimalToString(BigDecimal bd) { + BigDecimal cur = bd.stripTrailingZeros(); + StringBuilder sb = new StringBuilder(); + + for (int numConverted = 0; numConverted < MAX_CHARS; numConverted++) { + cur = cur.multiply(ONE_PLACE); + int curCodePoint = cur.intValue(); + if (0 == curCodePoint) { + break; + } + + cur = cur.subtract(new BigDecimal(curCodePoint)); + sb.append(Character.toChars(curCodePoint)); + } + + return sb.toString(); + } +} diff --git a/src/test/com/cloudera/sqoop/mapreduce/db/TestIntegerSplitter.java b/src/test/com/cloudera/sqoop/mapreduce/db/TestIntegerSplitter.java index efdb9f47..c072fa02 100644 --- a/src/test/com/cloudera/sqoop/mapreduce/db/TestIntegerSplitter.java +++ b/src/test/com/cloudera/sqoop/mapreduce/db/TestIntegerSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,106 +15,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.sql.SQLException; -import java.util.List; - -import junit.framework.TestCase; /** * Test that the IntegerSplitter generates sane splits. + * + * @deprecated use org.apache.sqoop.mapreduce.db.TestIntegerSplitter isntead. + * @see org.apache.sqoop.mapreduce.db.TestIntegerSplitter */ -public class TestIntegerSplitter extends TestCase { - private long [] toLongArray(List in) { - long [] out = new long[in.size()]; - for (int i = 0; i < in.size(); i++) { - out[i] = in.get(i).longValue(); - } +public class TestIntegerSplitter + extends org.apache.sqoop.mapreduce.db.TestIntegerSplitter { - return out; - } - - public String formatLongArray(long [] ar) { - StringBuilder sb = new StringBuilder(); - sb.append("["); - boolean first = true; - for (long val : ar) { - if (!first) { - sb.append(", "); - } - - sb.append(Long.toString(val)); - first = false; - } - - sb.append("]"); - return sb.toString(); - } - - public void assertLongArrayEquals(long [] expected, long [] actual) { - for (int i = 0; i < expected.length; i++) { - try { - assertEquals("Failure at position " + i + "; got " + actual[i] - + " instead of " + expected[i] - + "; actual array is " + formatLongArray(actual), - expected[i], actual[i]); - } catch (ArrayIndexOutOfBoundsException oob) { - fail("Expected array with " + expected.length - + " elements; got " + actual.length - + ". Actual array is " + formatLongArray(actual)); - } - } - - if (actual.length > expected.length) { - fail("Actual array has " + actual.length - + " elements; expected " + expected.length - + ". Actual array is " + formatLongArray(actual)); - } - } - - public void testEvenSplits() throws SQLException { - List splits = new IntegerSplitter().split(10, 0, 100); - long [] expected = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, }; - assertLongArrayEquals(expected, toLongArray(splits)); - } - - public void testOddSplits() throws SQLException { - List splits = new IntegerSplitter().split(10, 0, 95); - long [] expected = { 0, 10, 20, 30, 40, 50, 59, 68, 77, 86, 95, }; - assertLongArrayEquals(expected, toLongArray(splits)); - } - - public void testSingletonSplit() throws SQLException { - List splits = new IntegerSplitter().split(1, 5, 5); - long [] expected = { 5, 5 }; - assertLongArrayEquals(expected, toLongArray(splits)); - } - - public void testSingletonSplit2() throws SQLException { - // Same test, but overly-high numSplits - List splits = new IntegerSplitter().split(5, 5, 5); - long [] expected = { 5, 5 }; - assertLongArrayEquals(expected, toLongArray(splits)); - } - - public void testTooManySplits() throws SQLException { - List splits = new IntegerSplitter().split(5, 3, 5); - long [] expected = { 3, 4, 5 }; - assertLongArrayEquals(expected, toLongArray(splits)); - } - - /** - * This tests verifies that overflows do not happen due to the splitting - * algorithm. - * - * @throws SQLException - */ - public void testBigIntSplits() throws SQLException { - List splits = new IntegerSplitter().split(4, 14, - 7863696997872966707L); - assertEquals(splits.size(), 5); + public void testDummy() { + // Nothing to do } } diff --git a/src/test/com/cloudera/sqoop/mapreduce/db/TestTextSplitter.java b/src/test/com/cloudera/sqoop/mapreduce/db/TestTextSplitter.java index a96a264c..4927c743 100644 --- a/src/test/com/cloudera/sqoop/mapreduce/db/TestTextSplitter.java +++ b/src/test/com/cloudera/sqoop/mapreduce/db/TestTextSplitter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,123 +15,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.mapreduce.db; -import java.math.BigDecimal; -import java.sql.SQLException; -import java.util.List; - -import junit.framework.TestCase; /** * Test that the TextSplitter implementation creates a sane set of splits. + * @deprecated use org.apache.sqoop.mapreduce.db.TestTextSplitter instead. + * @see org.apache.sqoop.mapreduce.db.TestTextSplitter */ -public class TestTextSplitter extends TestCase { +public class TestTextSplitter extends + org.apache.sqoop.mapreduce.db.TestTextSplitter { - public String formatArray(Object [] ar) { - StringBuilder sb = new StringBuilder(); - sb.append("["); - boolean first = true; - for (Object val : ar) { - if (!first) { - sb.append(", "); - } - - sb.append(val.toString()); - first = false; - } - - sb.append("]"); - return sb.toString(); + public void testDummy() { + // Nothing to do } - public void assertArrayEquals(Object [] expected, Object [] actual) { - for (int i = 0; i < expected.length; i++) { - try { - assertEquals("Failure at position " + i + "; got " + actual[i] - + " instead of " + expected[i] - + "; actual array is " + formatArray(actual), - expected[i], actual[i]); - } catch (ArrayIndexOutOfBoundsException oob) { - fail("Expected array with " + expected.length - + " elements; got " + actual.length - + ". Actual array is " + formatArray(actual)); - } - } - - if (actual.length > expected.length) { - fail("Actual array has " + actual.length - + " elements; expected " + expected.length - + ". Actual array is " + formatArray(actual)); - } - } - - public void testStringConvertEmpty() { - TextSplitter splitter = new TextSplitter(); - BigDecimal emptyBigDec = splitter.stringToBigDecimal(""); - assertEquals(BigDecimal.ZERO, emptyBigDec); - } - - public void testBigDecConvertEmpty() { - TextSplitter splitter = new TextSplitter(); - String emptyStr = splitter.bigDecimalToString(BigDecimal.ZERO); - assertEquals("", emptyStr); - } - - public void testConvertA() { - TextSplitter splitter = new TextSplitter(); - String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("A")); - assertEquals("A", out); - } - - public void testConvertZ() { - TextSplitter splitter = new TextSplitter(); - String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("Z")); - assertEquals("Z", out); - } - - public void testConvertThreeChars() { - TextSplitter splitter = new TextSplitter(); - String out = splitter.bigDecimalToString( - splitter.stringToBigDecimal("abc")); - assertEquals("abc", out); - } - - public void testConvertStr() { - TextSplitter splitter = new TextSplitter(); - String out = splitter.bigDecimalToString( - splitter.stringToBigDecimal("big str")); - assertEquals("big str", out); - } - - public void testConvertChomped() { - TextSplitter splitter = new TextSplitter(); - String out = splitter.bigDecimalToString( - splitter.stringToBigDecimal("AVeryLongStringIndeed")); - assertEquals("AVeryLon", out); - } - - public void testAlphabetSplit() throws SQLException { - // This should give us 25 splits, one per letter. - TextSplitter splitter = new TextSplitter(); - List splits = splitter.split(25, "A", "Z", ""); - String [] expected = { "A", "B", "C", "D", "E", "F", "G", "H", "I", - "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", - "V", "W", "X", "Y", "Z", }; - assertArrayEquals(expected, splits.toArray(new String [0])); - } - - public void testCommonPrefix() throws SQLException { - // Splits between 'Hand' and 'Hardy' - TextSplitter splitter = new TextSplitter(); - List splits = splitter.split(5, "nd", "rdy", "Ha"); - // Don't check for exact values in the middle, because the splitter - // generates some ugly Unicode-isms. But do check that we get multiple - // splits and that it starts and ends on the correct points. - assertEquals("Hand", splits.get(0)); - assertEquals("Hardy", splits.get(splits.size() -1)); - assertEquals(6, splits.size()); - } } diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java new file mode 100644 index 00000000..22d5140b --- /dev/null +++ b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.sql.SQLException; +import java.util.List; + +import junit.framework.TestCase; + +import com.cloudera.sqoop.mapreduce.db.IntegerSplitter; + +/** + * Test that the IntegerSplitter generates sane splits. + */ +public class TestIntegerSplitter extends TestCase { + private long [] toLongArray(List in) { + long [] out = new long[in.size()]; + for (int i = 0; i < in.size(); i++) { + out[i] = in.get(i).longValue(); + } + + return out; + } + + public String formatLongArray(long [] ar) { + StringBuilder sb = new StringBuilder(); + sb.append("["); + boolean first = true; + for (long val : ar) { + if (!first) { + sb.append(", "); + } + + sb.append(Long.toString(val)); + first = false; + } + + sb.append("]"); + return sb.toString(); + } + + public void assertLongArrayEquals(long [] expected, long [] actual) { + for (int i = 0; i < expected.length; i++) { + try { + assertEquals("Failure at position " + i + "; got " + actual[i] + + " instead of " + expected[i] + + "; actual array is " + formatLongArray(actual), + expected[i], actual[i]); + } catch (ArrayIndexOutOfBoundsException oob) { + fail("Expected array with " + expected.length + + " elements; got " + actual.length + + ". Actual array is " + formatLongArray(actual)); + } + } + + if (actual.length > expected.length) { + fail("Actual array has " + actual.length + + " elements; expected " + expected.length + + ". Actual array is " + formatLongArray(actual)); + } + } + + public void testEvenSplits() throws SQLException { + List splits = new IntegerSplitter().split(10, 0, 100); + long [] expected = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, }; + assertLongArrayEquals(expected, toLongArray(splits)); + } + + public void testOddSplits() throws SQLException { + List splits = new IntegerSplitter().split(10, 0, 95); + long [] expected = { 0, 10, 20, 30, 40, 50, 59, 68, 77, 86, 95, }; + assertLongArrayEquals(expected, toLongArray(splits)); + } + + public void testSingletonSplit() throws SQLException { + List splits = new IntegerSplitter().split(1, 5, 5); + long [] expected = { 5, 5 }; + assertLongArrayEquals(expected, toLongArray(splits)); + } + + public void testSingletonSplit2() throws SQLException { + // Same test, but overly-high numSplits + List splits = new IntegerSplitter().split(5, 5, 5); + long [] expected = { 5, 5 }; + assertLongArrayEquals(expected, toLongArray(splits)); + } + + public void testTooManySplits() throws SQLException { + List splits = new IntegerSplitter().split(5, 3, 5); + long [] expected = { 3, 4, 5 }; + assertLongArrayEquals(expected, toLongArray(splits)); + } + + /** + * This tests verifies that overflows do not happen due to the splitting + * algorithm. + * + * @throws SQLException + */ + public void testBigIntSplits() throws SQLException { + List splits = new IntegerSplitter().split(4, 14, + 7863696997872966707L); + assertEquals(splits.size(), 5); + } +} diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java new file mode 100644 index 00000000..3dfb2484 --- /dev/null +++ b/src/test/org/apache/sqoop/mapreduce/db/TestTextSplitter.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.sqoop.mapreduce.db; + +import java.math.BigDecimal; +import java.sql.SQLException; +import java.util.List; + +import com.cloudera.sqoop.mapreduce.db.TextSplitter; + +import junit.framework.TestCase; + +public class TestTextSplitter extends TestCase { + + public String formatArray(Object [] ar) { + StringBuilder sb = new StringBuilder(); + sb.append("["); + boolean first = true; + for (Object val : ar) { + if (!first) { + sb.append(", "); + } + + sb.append(val.toString()); + first = false; + } + + sb.append("]"); + return sb.toString(); + } + + public void assertArrayEquals(Object [] expected, Object [] actual) { + for (int i = 0; i < expected.length; i++) { + try { + assertEquals("Failure at position " + i + "; got " + actual[i] + + " instead of " + expected[i] + + "; actual array is " + formatArray(actual), + expected[i], actual[i]); + } catch (ArrayIndexOutOfBoundsException oob) { + fail("Expected array with " + expected.length + + " elements; got " + actual.length + + ". Actual array is " + formatArray(actual)); + } + } + + if (actual.length > expected.length) { + fail("Actual array has " + actual.length + + " elements; expected " + expected.length + + ". Actual array is " + formatArray(actual)); + } + } + + public void testStringConvertEmpty() { + TextSplitter splitter = new TextSplitter(); + BigDecimal emptyBigDec = splitter.stringToBigDecimal(""); + assertEquals(BigDecimal.ZERO, emptyBigDec); + } + + public void testBigDecConvertEmpty() { + TextSplitter splitter = new TextSplitter(); + String emptyStr = splitter.bigDecimalToString(BigDecimal.ZERO); + assertEquals("", emptyStr); + } + + public void testConvertA() { + TextSplitter splitter = new TextSplitter(); + String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("A")); + assertEquals("A", out); + } + + public void testConvertZ() { + TextSplitter splitter = new TextSplitter(); + String out = splitter.bigDecimalToString(splitter.stringToBigDecimal("Z")); + assertEquals("Z", out); + } + + public void testConvertThreeChars() { + TextSplitter splitter = new TextSplitter(); + String out = splitter.bigDecimalToString( + splitter.stringToBigDecimal("abc")); + assertEquals("abc", out); + } + + public void testConvertStr() { + TextSplitter splitter = new TextSplitter(); + String out = splitter.bigDecimalToString( + splitter.stringToBigDecimal("big str")); + assertEquals("big str", out); + } + + public void testConvertChomped() { + TextSplitter splitter = new TextSplitter(); + String out = splitter.bigDecimalToString( + splitter.stringToBigDecimal("AVeryLongStringIndeed")); + assertEquals("AVeryLon", out); + } + + public void testAlphabetSplit() throws SQLException { + // This should give us 25 splits, one per letter. + TextSplitter splitter = new TextSplitter(); + List splits = splitter.split(25, "A", "Z", ""); + String [] expected = { "A", "B", "C", "D", "E", "F", "G", "H", "I", + "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", + "V", "W", "X", "Y", "Z", }; + assertArrayEquals(expected, splits.toArray(new String [0])); + } + + public void testCommonPrefix() throws SQLException { + // Splits between 'Hand' and 'Hardy' + TextSplitter splitter = new TextSplitter(); + List splits = splitter.split(5, "nd", "rdy", "Ha"); + // Don't check for exact values in the middle, because the splitter + // generates some ugly Unicode-isms. But do check that we get multiple + // splits and that it starts and ends on the correct points. + assertEquals("Hand", splits.get(0)); + assertEquals("Hardy", splits.get(splits.size() -1)); + assertEquals(6, splits.size()); + } +}