mirror of
https://github.com/apache/sqoop.git
synced 2025-05-10 05:09:41 +08:00
SQOOP-724 Support Table hints in Microsoft SQL Server
(Jarek Jarcec Cecho via Cheolsoo Park)
This commit is contained in:
parent
b666fe1bb6
commit
dc4a82102c
@ -39,14 +39,55 @@ it will update appropriate row instead. As a result, Sqoop is ignoring values sp
|
||||
in parameter +\--update-key+, however user needs to specify at least one valid column
|
||||
to turn on update mode itself.
|
||||
|
||||
Microsoft SQL Connector
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Extra arguments
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
List of all extra arguments supported by Microsoft SQL Connector is shown below:
|
||||
|
||||
.Supported Microsoft SQL Connector extra arguments:
|
||||
[grid="all"]
|
||||
`----------------------------------------`---------------------------------------
|
||||
Argument Description
|
||||
---------------------------------------------------------------------------------
|
||||
+\--schema <name>+ Scheme name that sqoop should use. \
|
||||
Default is "dbo".
|
||||
+\--table-hints <hints>+ Table hints that Sqoop should use for \
|
||||
data movement.
|
||||
---------------------------------------------------------------------------------
|
||||
|
||||
Schema support
|
||||
^^^^^^^^^^^^^^
|
||||
If you need to work with tables that are located in non-default schemas, you can
|
||||
specify schema names via the +\--schema+ argument. Custom schemas are supported for
|
||||
both import and export jobs. For example:
|
||||
|
||||
----
|
||||
$ sqoop import ... --table custom_table -- --schema custom_schema
|
||||
----
|
||||
|
||||
Table hints
|
||||
^^^^^^^^^^^
|
||||
|
||||
Sqoop supports table hints in both import and export jobs. Table hints are used only
|
||||
for queries that move data from/to Microsoft SQL Server, but they cannot be used for
|
||||
meta data queries. You can specify a comma-separated list of table hints in the
|
||||
+\--table-hints+ argument. For example:
|
||||
|
||||
----
|
||||
$ sqoop import ... --table custom_table -- --table-hints NOLOCK
|
||||
----
|
||||
|
||||
|
||||
PostgreSQL Connector
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Extra arguments
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
List of all extra arguments supported by PostgreSQL Connector is shown on table
|
||||
below:
|
||||
List of all extra arguments supported by PostgreSQL Connector is shown below:
|
||||
|
||||
.Supported PostgreSQL extra arguments:
|
||||
[grid="all"]
|
||||
|
@ -29,10 +29,13 @@
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.cloudera.sqoop.SqoopOptions;
|
||||
import com.cloudera.sqoop.mapreduce.ExportBatchOutputFormat;
|
||||
import com.cloudera.sqoop.mapreduce.JdbcExportJob;
|
||||
import com.cloudera.sqoop.util.ExportException;
|
||||
import com.cloudera.sqoop.util.ImportException;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.sqoop.cli.RelatedOptions;
|
||||
import org.apache.sqoop.mapreduce.sqlserver.SqlServerExportBatchOutputFormat;
|
||||
import org.apache.sqoop.mapreduce.sqlserver.SqlServerInputFormat;
|
||||
|
||||
/**
|
||||
* Manages connections to SQLServer databases. Requires the SQLServer JDBC
|
||||
@ -42,6 +45,9 @@ public class SQLServerManager
|
||||
extends com.cloudera.sqoop.manager.InformationSchemaManager {
|
||||
|
||||
public static final String SCHEMA = "schema";
|
||||
public static final String TABLE_HINTS = "table-hints";
|
||||
public static final String TABLE_HINTS_PROP
|
||||
= "org.apache.sqoop.manager.sqlserver.table.hints";
|
||||
|
||||
public static final Log LOG = LogFactory.getLog(
|
||||
SQLServerManager.class.getName());
|
||||
@ -55,6 +61,11 @@ public class SQLServerManager
|
||||
*/
|
||||
private String schema;
|
||||
|
||||
/**
|
||||
* Optional table hints to use.
|
||||
*/
|
||||
private String tableHints;
|
||||
|
||||
public SQLServerManager(final SqoopOptions opts) {
|
||||
super(DRIVER_CLASS, opts);
|
||||
|
||||
@ -66,6 +77,28 @@ public SQLServerManager(final SqoopOptions opts) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void importTable(
|
||||
com.cloudera.sqoop.manager.ImportJobContext context)
|
||||
throws IOException, ImportException {
|
||||
// We're the correct connection manager
|
||||
context.setConnManager(this);
|
||||
|
||||
// Propagate table hints to job
|
||||
Configuration configuration = context.getOptions().getConf();
|
||||
if (tableHints != null) {
|
||||
configuration.set(TABLE_HINTS_PROP, tableHints);
|
||||
}
|
||||
|
||||
// Set our own input format
|
||||
context.setInputFormat(SqlServerInputFormat.class);
|
||||
super.importTable(context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Export data stored in HDFS into a table in a database.
|
||||
*/
|
||||
@ -73,8 +106,15 @@ public SQLServerManager(final SqoopOptions opts) {
|
||||
public void exportTable(com.cloudera.sqoop.manager.ExportJobContext context)
|
||||
throws IOException, ExportException {
|
||||
context.setConnManager(this);
|
||||
|
||||
// Propagate table hints to job
|
||||
Configuration configuration = context.getOptions().getConf();
|
||||
if (tableHints != null) {
|
||||
configuration.set(TABLE_HINTS_PROP, tableHints);
|
||||
}
|
||||
|
||||
JdbcExportJob exportJob = new JdbcExportJob(context, null, null,
|
||||
ExportBatchOutputFormat.class);
|
||||
SqlServerExportBatchOutputFormat.class);
|
||||
exportJob.runExport();
|
||||
}
|
||||
|
||||
@ -154,6 +194,15 @@ void parseExtraArgs(String[] args) throws ParseException {
|
||||
|
||||
this.schema = schemaName;
|
||||
}
|
||||
|
||||
// Apply table hints
|
||||
if (cmdLine.hasOption(TABLE_HINTS)) {
|
||||
String hints = cmdLine.getOptionValue(TABLE_HINTS);
|
||||
LOG.info("Sqoop will use following table hints for data transfer: "
|
||||
+ hints);
|
||||
|
||||
this.tableHints = hints;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -171,6 +220,10 @@ private RelatedOptions getExtraOptions() {
|
||||
.withDescription("Optional schema name")
|
||||
.withLongOpt(SCHEMA).create());
|
||||
|
||||
extraOptions.addOption(OptionBuilder.withArgName("string").hasArg()
|
||||
.withDescription("Optional table hints to use")
|
||||
.withLongOpt(TABLE_HINTS).create());
|
||||
|
||||
return extraOptions;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,111 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.sqoop.mapreduce.sqlserver;
|
||||
|
||||
import com.cloudera.sqoop.lib.SqoopRecord;
|
||||
import org.apache.hadoop.mapreduce.RecordWriter;
|
||||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||
import org.apache.sqoop.manager.SQLServerManager;
|
||||
import org.apache.sqoop.mapreduce.ExportBatchOutputFormat;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
* Output format specific for Microsoft SQL Connector.
|
||||
*/
|
||||
public class SqlServerExportBatchOutputFormat<K extends SqoopRecord, V>
|
||||
extends ExportBatchOutputFormat {
|
||||
|
||||
private static final Log LOG =
|
||||
LogFactory.getLog(SqlServerExportBatchOutputFormat.class);
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
|
||||
throws IOException {
|
||||
try {
|
||||
return new SqlServerExportBatchRecordWriter(context);
|
||||
} catch (Exception e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc}. */
|
||||
public class SqlServerExportBatchRecordWriter extends ExportBatchRecordWriter{
|
||||
|
||||
public SqlServerExportBatchRecordWriter(TaskAttemptContext context)
|
||||
throws ClassNotFoundException, SQLException {
|
||||
super(context);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
protected String getInsertStatement(int numRows) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
sb.append("INSERT INTO " + tableName + " ");
|
||||
|
||||
String tableHints = getConf().get(SQLServerManager.TABLE_HINTS_PROP);
|
||||
if (tableHints != null) {
|
||||
LOG.info("Using table hints: " + tableHints);
|
||||
sb.append(" WITH (").append(tableHints).append(") ");
|
||||
}
|
||||
|
||||
int numSlots;
|
||||
if (this.columnNames != null) {
|
||||
numSlots = this.columnNames.length;
|
||||
|
||||
sb.append("(");
|
||||
boolean first = true;
|
||||
for (String col : columnNames) {
|
||||
if (!first) {
|
||||
sb.append(", ");
|
||||
}
|
||||
|
||||
sb.append(col);
|
||||
first = false;
|
||||
}
|
||||
|
||||
sb.append(") ");
|
||||
} else {
|
||||
numSlots = this.columnCount; // set if columnNames is null.
|
||||
}
|
||||
|
||||
sb.append("VALUES ");
|
||||
|
||||
// generates the (?, ?, ?...).
|
||||
sb.append("(");
|
||||
for (int i = 0; i < numSlots; i++) {
|
||||
if (i != 0) {
|
||||
sb.append(", ");
|
||||
}
|
||||
|
||||
sb.append("?");
|
||||
}
|
||||
sb.append(")");
|
||||
|
||||
String query = sb.toString();
|
||||
LOG.info("Using query " + query);
|
||||
|
||||
return query;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.sqoop.mapreduce.sqlserver;
|
||||
|
||||
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.mapreduce.RecordReader;
|
||||
import org.apache.sqoop.mapreduce.DBWritable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
|
||||
/**
|
||||
* Input format specific for Microsoft SQL Server.
|
||||
*/
|
||||
public class SqlServerInputFormat<T extends DBWritable>
|
||||
extends DataDrivenDBInputFormat {
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
protected RecordReader<LongWritable, T> createDBRecordReader(
|
||||
DBInputSplit split, Configuration conf) throws IOException {
|
||||
|
||||
DBConfiguration dbConf = getDBConf();
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<T> inputClass = (Class<T>) (dbConf.getInputClass());
|
||||
|
||||
try {
|
||||
// Use Microsoft SQL Server specific db reader
|
||||
return new SqlServerRecordReader<T>(split, inputClass,
|
||||
conf, getConnection(), dbConf, dbConf.getInputConditions(),
|
||||
dbConf.getInputFieldNames(), dbConf.getInputTableName());
|
||||
} catch (SQLException ex) {
|
||||
throw new IOException(ex);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,122 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.sqoop.mapreduce.sqlserver;
|
||||
|
||||
import com.cloudera.sqoop.mapreduce.db.DBConfiguration;
|
||||
import com.cloudera.sqoop.mapreduce.db.DBInputFormat;
|
||||
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
|
||||
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBRecordReader;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.sqoop.manager.SQLServerManager;
|
||||
import org.apache.sqoop.mapreduce.DBWritable;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.SQLException;
|
||||
|
||||
/**
|
||||
* Microsoft SQL Server specific Record Reader.
|
||||
*/
|
||||
public class SqlServerRecordReader<T extends DBWritable>
|
||||
extends DataDrivenDBRecordReader {
|
||||
|
||||
private static final Log LOG =
|
||||
LogFactory.getLog(SqlServerRecordReader.class);
|
||||
|
||||
// CHECKSTYLE:OFF
|
||||
public SqlServerRecordReader(DBInputFormat.DBInputSplit split,
|
||||
Class<T> inputClass, Configuration conf, Connection conn,
|
||||
DBConfiguration dbConfig, String cond, String [] fields,
|
||||
String table) throws SQLException {
|
||||
|
||||
super(split, inputClass, conf, conn, dbConfig, cond, fields, table,
|
||||
"MICROSOFT SQL SERVER");
|
||||
}
|
||||
// CHECKSTYLE:ON
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
protected String getSelectQuery() {
|
||||
StringBuilder query = new StringBuilder();
|
||||
|
||||
DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit =
|
||||
(DataDrivenDBInputFormat.DataDrivenDBInputSplit) getSplit();
|
||||
|
||||
DBConfiguration dbConf = getDBConf();
|
||||
String [] fieldNames = getFieldNames();
|
||||
String tableName = getTableName();
|
||||
String conditions = getConditions();
|
||||
|
||||
// Build the WHERE clauses associated with the data split first.
|
||||
// We need them in both branches of this function.
|
||||
StringBuilder conditionClauses = new StringBuilder();
|
||||
conditionClauses.append("( ").append(dataSplit.getLowerClause());
|
||||
conditionClauses.append(" ) AND ( ").append(dataSplit.getUpperClause());
|
||||
conditionClauses.append(" )");
|
||||
|
||||
if (dbConf.getInputQuery() == null) {
|
||||
// We need to generate the entire query.
|
||||
query.append("SELECT ");
|
||||
|
||||
for (int i = 0; i < fieldNames.length; i++) {
|
||||
query.append(fieldNames[i]);
|
||||
if (i != fieldNames.length -1) {
|
||||
query.append(", ");
|
||||
}
|
||||
}
|
||||
|
||||
query.append(" FROM ").append(tableName);
|
||||
|
||||
String tableHints =
|
||||
dbConf.getConf().get(SQLServerManager.TABLE_HINTS_PROP);
|
||||
if (tableHints != null) {
|
||||
LOG.info("Using table hints: " + tableHints);
|
||||
query.append(" WITH (").append(tableHints).append(")");
|
||||
}
|
||||
|
||||
query.append(" WHERE ");
|
||||
if (conditions != null && conditions.length() > 0) {
|
||||
// Put the user's conditions first.
|
||||
query.append("( ").append(conditions).append(" ) AND ");
|
||||
}
|
||||
|
||||
// Now append the conditions associated with our split.
|
||||
query.append(conditionClauses.toString());
|
||||
|
||||
} else {
|
||||
// User provided the query. We replace the special token with
|
||||
// our WHERE clause.
|
||||
String inputQuery = dbConf.getInputQuery();
|
||||
if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) {
|
||||
LOG.error("Could not find the clause substitution token "
|
||||
+ DataDrivenDBInputFormat.SUBSTITUTE_TOKEN + " in the query: ["
|
||||
+ inputQuery + "]. Parallel splits may not work correctly.");
|
||||
}
|
||||
|
||||
query.append(inputQuery.replace(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN,
|
||||
conditionClauses.toString()));
|
||||
}
|
||||
|
||||
LOG.info("Using query: " + query.toString());
|
||||
return query.toString();
|
||||
}
|
||||
}
|
@ -257,6 +257,32 @@ public void testExportCustomSchema() throws IOException, SQLException {
|
||||
);
|
||||
}
|
||||
|
||||
public void testExportTableHints() throws IOException, SQLException {
|
||||
createTestFile("inputFile", new String[] {
|
||||
"2,Bob,400,sales",
|
||||
"3,Fred,15,marketing",
|
||||
});
|
||||
|
||||
String []extra = new String[] {"--", "--table-hints",
|
||||
"ROWLOCK",
|
||||
};
|
||||
runExport(getArgv(DBO_TABLE_NAME, extra));
|
||||
assertRowCount(2, escapeObjectName(DBO_TABLE_NAME), conn);
|
||||
}
|
||||
|
||||
public void testExportTableHintsMultiple() throws IOException, SQLException {
|
||||
createTestFile("inputFile", new String[] {
|
||||
"2,Bob,400,sales",
|
||||
"3,Fred,15,marketing",
|
||||
});
|
||||
|
||||
String []extra = new String[] {"--", "--table-hints",
|
||||
"ROWLOCK,NOWAIT",
|
||||
};
|
||||
runExport(getArgv(DBO_TABLE_NAME, extra));
|
||||
assertRowCount(2, escapeObjectName(DBO_TABLE_NAME), conn);
|
||||
}
|
||||
|
||||
public static void assertRowCount(long expected,
|
||||
String tableName,
|
||||
Connection connection) {
|
||||
|
@ -250,6 +250,30 @@ public void testImportDifferentSchema() throws IOException {
|
||||
doImportAndVerify(SCH_TABLE_NAME, expectedResults, extraArgs);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testImportTableHints() throws IOException {
|
||||
String [] expectedResults = {
|
||||
"1,Aaron,1000000.0,engineering",
|
||||
"2,Bob,400.0,sales",
|
||||
"3,Fred,15.0,marketing",
|
||||
};
|
||||
|
||||
String[] extraArgs = new String[] {"--table-hints", "NOLOCK"};
|
||||
doImportAndVerify(DBO_TABLE_NAME, expectedResults, extraArgs);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testImportTableHintsMultiple() throws IOException {
|
||||
String [] expectedResults = {
|
||||
"1,Aaron,1000000.0,engineering",
|
||||
"2,Bob,400.0,sales",
|
||||
"3,Fred,15.0,marketing",
|
||||
};
|
||||
|
||||
String[] extraArgs = new String[] {"--table-hints", "NOLOCK,NOWAIT"};
|
||||
doImportAndVerify(DBO_TABLE_NAME, expectedResults, extraArgs);
|
||||
}
|
||||
|
||||
private String [] getArgv(String tableName, String ... extraArgs) {
|
||||
ArrayList<String> args = new ArrayList<String>();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user