5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 05:31:14 +08:00

SQOOP-342. Allow user to override Sqoop type mapping.

(Jarek Jarcec Checho via Arvind Prabhakar)


git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1182523 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Arvind Prabhakar 2011-10-12 18:54:31 +00:00
parent 8e0ce98551
commit c39bf2a14e
15 changed files with 302 additions and 41 deletions

View File

@ -39,3 +39,5 @@ Code generation options
--package-name (package)::
Puts auto-generated classes in the named Java package
--map-column-java (mapping)::
Override default mapping from SQL type to Java type for configured columns

View File

@ -38,3 +38,5 @@ Hive options
--hive-table (table-name)::
When used with --hive-import, overrides the destination table name
--map-column-hive (mapping)::
Override default mapping for SQL types into Hive types for configured columns

View File

@ -86,4 +86,3 @@ Import control options
--null-non-string::
The string to be written for a null value for non-string columns

View File

@ -32,5 +32,7 @@ Argument Description
+\--jar-file <file>+ Disable code generation; use specified jar
+\--outdir <dir>+ Output directory for generated code
+\--package-name <name>+ Put auto-generated classes in this package
+\--map-column-java <m>+ Override default mapping from SQL type to\
Java type for configured columns.
-------------------------------------------------------------------------

View File

@ -44,18 +44,7 @@ another.
include::common-args.txt[]
.Code generation arguments:
[grid="all"]
`------------------------`-----------------------------------------------
Argument Description
-------------------------------------------------------------------------
+\--bindir <dir>+ Output directory for compiled objects
+\--class-name <name>+ Sets the generated class name. This overrides\
+\--package-name+.
+\--outdir <dir>+ Output directory for generated code
+\--package-name <name>+ Put auto-generated classes in this package
+\--table <table-name>+ Name of the table to generate code for.
-------------------------------------------------------------------------
include::codegen-args.txt[]
include::output-args.txt[]

View File

@ -41,5 +41,7 @@ Argument Description
sharded on
+\--hive-partition-value <v>+ String-value that serves as partition key\
for this imported into hive in this job.
+\--map-column-hive <map>+ Override default mapping from SQL type to\
Hive type for configured columns.
--------------------------------------------------------------------------

View File

@ -262,6 +262,33 @@ are expected to be present in the shell path of the task process. For MySQL
the utilities +mysqldump+ and +mysqlimport+ are required, whereas for
PostgreSQL the utility +psql+ is required.
Controlling type mapping
^^^^^^^^^^^^^^^^^^^^^^^^
Sqoop is preconfigured to map most SQL types to appropriate Java or Hive
representatives. However the default mapping might not be suitable for
everyone and might be overridden by +--map-column-java+ (for changing
mapping to Java) or +--map-column-hive+ (for changing Hive mapping).
.Parameters for overriding mapping
[grid="all"]
`---------------------------------`--------------------------------------
Argument Description
-------------------------------------------------------------------------
+\--map-column-java <mapping>+ Override mapping from SQL to Java type\
for configured columns.
+\--map-column-hive <mapping>+ Override mapping from SQL to Hive type\
for configured columns.
-------------------------------------------------------------------------
Sqoop is expecting comma separated list of mapping in form <name of column>=<new type>. For example:
----
$ sqoop import ... --map-column-java id=String,value=Integer
----
Sqoop will rise exception in case that some configured mapping will not be used.
Incremental Imports
^^^^^^^^^^^^^^^^^^^

View File

@ -159,6 +159,10 @@ public enum IncrementalMode {
@StoredAsProperty("hive.partition.key") private String hivePartitionKey;
@StoredAsProperty("hive.partition.value") private String hivePartitionValue;
// User explicit mapping of types
private Properties mapColumnJava; // stored as map.colum.java
private Properties mapColumnHive; // stored as map.column.hive
// An ordered list of column names denoting what order columns are
// serialized to a PreparedStatement from a generated record type.
// Not serialized to metastore.
@ -584,6 +588,12 @@ public void loadProperties(Properties props) {
this.connectionParams =
getPropertiesAsNetstedProperties(props, "db.connect.params");
// Loading user mapping
this.mapColumnHive =
getPropertiesAsNetstedProperties(props, "map.column.hive");
this.mapColumnJava =
getPropertiesAsNetstedProperties(props, "map.column.java");
// Delimiters were previously memoized; don't let the tool override
// them with defaults.
this.areDelimsManuallySet = true;
@ -656,6 +666,10 @@ public Properties writeProperties() {
setPropertiesAsNestedProperties(props,
"db.connect.params", this.connectionParams);
setPropertiesAsNestedProperties(props,
"map.column.hive", this.mapColumnHive);
setPropertiesAsNestedProperties(props,
"map.column.java", this.mapColumnJava);
return props;
}
@ -691,6 +705,14 @@ public Object clone() {
other.setConnectionParams(this.connectionParams);
}
if (null != mapColumnHive) {
other.mapColumnHive = (Properties) this.mapColumnHive.clone();
}
if (null != mapColumnJava) {
other.mapColumnJava = (Properties) this.mapColumnJava.clone();
}
return other;
} catch (CloneNotSupportedException cnse) {
// Shouldn't happen.
@ -817,6 +839,10 @@ private void initDefaults(Configuration baseConfiguration) {
this.incrementalMode = IncrementalMode.None;
this.updateMode = UpdateMode.UpdateOnly;
// Creating instances for user specific mapping
this.mapColumnHive = new Properties();
this.mapColumnJava = new Properties();
}
/**
@ -1008,6 +1034,32 @@ public String getPassword() {
return password;
}
protected void parseColumnMapping(String mapping,
Properties output) {
output.clear();
String[] maps = mapping.split(",");
for(String map : maps) {
String[] details = map.split("=");
output.put(details[0], details[1]);
}
}
public void setMapColumnHive(String mapColumn) {
parseColumnMapping(mapColumn, mapColumnHive);
}
public void setMapColumn(String mapColumn) {
parseColumnMapping(mapColumn, mapColumnJava);
}
public Properties getMapColumnHive() {
return mapColumnHive;
}
public Properties getMapColumnJava() {
return mapColumnJava;
}
/**
* Allow the user to enter his password on the console without printing
* characters.

View File

@ -36,6 +36,7 @@
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Properties;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -117,6 +118,7 @@ void setColumnTypes(Map<String, Integer> colTypes) {
*/
public String getCreateTableStmt() throws IOException {
Map<String, Integer> columnTypes;
Properties userMapping = options.getMapColumnHive();
if (externalColTypes != null) {
// Use pre-defined column types.
@ -139,6 +141,22 @@ public String getCreateTableStmt() throws IOException {
sb.append(outputTableName).append("` ( ");
}
// Check that all explicitly mapped columns are present in result set
for(Object column : userMapping.keySet()) {
boolean found = false;
for(String c : colNames) {
if(c.equals(column)) {
found = true;
break;
}
}
if(!found) {
throw new IllegalArgumentException("No column by the name " + column
+ "found while importing data");
}
}
boolean first = true;
for (String col : colNames) {
if (!first) {
@ -148,7 +166,8 @@ public String getCreateTableStmt() throws IOException {
first = false;
Integer colType = columnTypes.get(col);
String hiveColType = connManager.toHiveType(colType);
String hiveColType = userMapping.getProperty(col);
if(hiveColType == null) { hiveColType = connManager.toHiveType(colType); }
if (null == hiveColType) {
throw new IOException("Hive does not support the SQL type for column "
+ col);

View File

@ -43,6 +43,7 @@
import java.io.Writer;
import java.util.HashSet;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.logging.Log;
@ -229,6 +230,20 @@ public static String toJavaIdentifier(String candidate) {
return output;
}
private String toJavaType(String columnName, int sqlType) {
Properties mapping = options.getMapColumnJava();
if(mapping.containsKey(columnName)) {
String type = mapping.getProperty(columnName);
if(LOG.isDebugEnabled()) {
LOG.info("Overriding type of column " + columnName + " to " + type);
}
return type;
}
return connManager.toJavaType(sqlType);
}
/**
* @param javaType
* @return the name of the method of JdbcWritableBridge to read an entry
@ -453,7 +468,7 @@ private void generateFields(Map<String, Integer> columnTypes,
for (String col : colNames) {
int sqlType = columnTypes.get(col);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(col, sqlType);
if (null == javaType) {
LOG.error("Cannot resolve SQL type " + sqlType);
continue;
@ -496,7 +511,7 @@ private void generateEquals(Map<String, Integer> columnTypes,
sb.append(" boolean equal = true;\n");
for (String col : colNames) {
int sqlType = columnTypes.get(col);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(col, sqlType);
if (null == javaType) {
LOG.error("Cannot resolve SQL type " + sqlType);
continue;
@ -530,7 +545,7 @@ private void generateDbRead(Map<String, Integer> columnTypes,
fieldNum++;
int sqlType = columnTypes.get(col);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(col, sqlType);
if (null == javaType) {
LOG.error("No Java type for SQL type " + sqlType
+ " for column " + col);
@ -570,7 +585,7 @@ private void generateLoadLargeObjects(Map<String, Integer> columnTypes,
fieldNum++;
int sqlType = columnTypes.get(col);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(col, sqlType);
if (null == javaType) {
LOG.error("No Java type for SQL type " + sqlType
+ " for column " + col);
@ -614,7 +629,7 @@ private void generateDbWrite(Map<String, Integer> columnTypes,
fieldNum++;
int sqlType = columnTypes.get(col);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(col, sqlType);
if (null == javaType) {
LOG.error("No Java type for SQL type " + sqlType
+ " for column " + col);
@ -650,7 +665,7 @@ private void generateHadoopRead(Map<String, Integer> columnTypes,
for (String col : colNames) {
int sqlType = columnTypes.get(col);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(col, sqlType);
if (null == javaType) {
LOG.error("No Java type for SQL type " + sqlType
+ " for column " + col);
@ -687,7 +702,7 @@ private void generateCloneMethod(Map<String, Integer> columnTypes,
// For each field that is mutable, we need to perform the deep copy.
for (String colName : colNames) {
int sqlType = columnTypes.get(colName);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(colName, sqlType);
if (null == javaType) {
continue;
} else if (javaType.equals("java.sql.Date")
@ -721,7 +736,7 @@ private void generateSetField(Map<String, Integer> columnTypes,
boolean first = true;
for (String colName : colNames) {
int sqlType = columnTypes.get(colName);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(colName, sqlType);
if (null == javaType) {
continue;
} else {
@ -806,7 +821,7 @@ private void generateToString(Map<String, Integer> columnTypes,
boolean first = true;
for (String col : colNames) {
int sqlType = columnTypes.get(col);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(col, sqlType);
if (null == javaType) {
LOG.error("No Java type for SQL type " + sqlType
+ " for column " + col);
@ -896,7 +911,7 @@ private void parseColumn(String colName, int colType, StringBuilder sb) {
// assume that we have __it and __cur_str vars, based on
// __loadFromFields() code.
sb.append(" __cur_str = __it.next();\n");
String javaType = connManager.toJavaType(colType);
String javaType = toJavaType(colName, colType);
parseNullVal(javaType, colName, sb);
if (javaType.equals("String")) {
@ -991,7 +1006,7 @@ private void generateHadoopWrite(Map<String, Integer> columnTypes,
for (String col : colNames) {
int sqlType = columnTypes.get(col);
String javaType = connManager.toJavaType(sqlType);
String javaType = toJavaType(col, sqlType);
if (null == javaType) {
LOG.error("No Java type for SQL type " + sqlType
+ " for column " + col);
@ -1071,6 +1086,17 @@ public void generate() throws IOException {
columnTypes.put(identifier, type);
}
// Check that all explicitly mapped columns are present in result set
Properties mapping = options.getMapColumnJava();
if(mapping != null && !mapping.isEmpty()) {
for(Object column : mapping.keySet()) {
if(!uniqColNames.contains((String)column)) {
throw new IllegalArgumentException("No column by the name " + column
+ "found while importing data");
}
}
}
// The db write() method may use column names in a different
// order. If this is set in the options, pull it out here and
// make sure we format the column names to identifiers in the same way

View File

@ -92,6 +92,8 @@ public abstract class BaseSqoopTool extends SqoopTool {
public static final String INPUT_NULL_STRING = "input-null-string";
public static final String NULL_NON_STRING = "null-non-string";
public static final String INPUT_NULL_NON_STRING = "input-null-non-string";
public static final String MAP_COLUMN_JAVA = "map-column-java";
public static final String MAP_COLUMN_HIVE = "map-column-hive";
public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile";
public static final String FMT_TEXTFILE_ARG = "as-textfile";
@ -449,6 +451,13 @@ protected RelatedOptions getHiveOptions(boolean explicitHiveImport) {
+ "to hive")
.withLongOpt(HIVE_PARTITION_VALUE_ARG)
.create());
hiveOpts.addOption(OptionBuilder
.hasArg()
.withDescription("Override mapping for specific column to hive"
+ " types.")
.withLongOpt(MAP_COLUMN_HIVE)
.create());
return hiveOpts;
}
@ -550,25 +559,31 @@ protected RelatedOptions getCodeGenOpts(boolean multiTable) {
.withLongOpt(PACKAGE_NAME_ARG)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
.hasArg()
.withDescription("Null string representation")
.withLongOpt(NULL_STRING)
.create());
.hasArg()
.withDescription("Null string representation")
.withLongOpt(NULL_STRING)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
.hasArg()
.withDescription("Input null string representation")
.withLongOpt(INPUT_NULL_STRING)
.create());
.hasArg()
.withDescription("Input null string representation")
.withLongOpt(INPUT_NULL_STRING)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
.hasArg()
.withDescription("Null non-string representation")
.withLongOpt(NULL_NON_STRING)
.create());
.hasArg()
.withDescription("Null non-string representation")
.withLongOpt(NULL_NON_STRING)
.create());
codeGenOpts.addOption(OptionBuilder.withArgName("null-str")
.hasArg()
.withDescription("Input null non-string representation")
.withLongOpt(INPUT_NULL_NON_STRING)
.create());
.hasArg()
.withDescription("Input null non-string representation")
.withLongOpt(INPUT_NULL_NON_STRING)
.create());
codeGenOpts.addOption(OptionBuilder
.hasArg()
.withDescription("Override mapping for specific columns to java types")
.withLongOpt(MAP_COLUMN_JAVA)
.create());
if (!multiTable) {
codeGenOpts.addOption(OptionBuilder.withArgName("name")
.hasArg()
@ -753,6 +768,10 @@ protected void applyHiveOptions(CommandLine in, SqoopOptions out)
if (in.hasOption(HIVE_PARTITION_VALUE_ARG)) {
out.setHivePartitionValue(in.getOptionValue(HIVE_PARTITION_VALUE_ARG));
}
if (in.hasOption(MAP_COLUMN_HIVE)) {
out.setMapColumnHive(in.getOptionValue(MAP_COLUMN_HIVE));
}
}
protected void applyOutputFormatOptions(CommandLine in, SqoopOptions out)
@ -843,6 +862,10 @@ protected void applyCodeGenOptions(CommandLine in, SqoopOptions out,
out.setPackageName(in.getOptionValue(PACKAGE_NAME_ARG));
}
if (in.hasOption(MAP_COLUMN_JAVA)) {
out.setMapColumn(in.getOptionValue(MAP_COLUMN_JAVA));
}
if (!multiTable && in.hasOption(CLASS_NAME_ARG)) {
out.setClassName(in.getOptionValue(CLASS_NAME_ARG));
}

View File

@ -40,6 +40,7 @@
import com.cloudera.sqoop.Sqoop;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.SqoopOptions.FileLayout;
import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException;
import com.cloudera.sqoop.cli.RelatedOptions;
import com.cloudera.sqoop.cli.ToolOptions;
@ -839,6 +840,10 @@ protected void validateImportOptions(SqoopOptions options)
"MySQL direct export currently supports only text output format."
+ "Parameters --as-sequencefile and --as-avrodatafile are not "
+ "supported with --direct params in MySQL case.");
} else if (!options.getMapColumnJava().isEmpty()
&& options.getFileLayout() == FileLayout.AvroDataFile) {
throw new InvalidOptionsException(
"Overriding column types is currently not supported with avro.");
}
}

View File

@ -257,6 +257,28 @@ public void testBoundaryQueryParams() throws Exception {
assertEquals("select 1, 2", opts.getBoundaryQuery());
}
public void testMapColumnHiveParams() throws Exception {
String[] args = {
"--map-column-hive", "id=STRING",
};
SqoopOptions opts = parse(args);
Properties mapping = opts.getMapColumnHive();
assertTrue(mapping.containsKey("id"));
assertEquals("STRING", mapping.get("id"));
}
public void testMapColumnJavaParams() throws Exception {
String[] args = {
"--map-column-java", "id=String",
};
SqoopOptions opts = parse(args);
Properties mapping = opts.getMapColumnJava();
assertTrue(mapping.containsKey("id"));
assertEquals("String", mapping.get("id"));
}
public void testPropertySerialization1() {
// Test that if we write a SqoopOptions out to a Properties,
// and then read it back in, we get all the same results.

View File

@ -31,6 +31,8 @@
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.tool.ImportTool;
import com.cloudera.sqoop.testutil.HsqldbTestServer;
import java.sql.Types;
/**
* Test Hive DDL statement generation.
@ -136,4 +138,52 @@ public void testLzoSplitting() throws Exception {
+ "'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'",
createTable);
}
public void testUserMapping() throws Exception {
String[] args = {
"--map-column-hive", "id=STRING,value=INTEGER",
};
Configuration conf = new Configuration();
SqoopOptions options =
new ImportTool().parseArguments(args, null, null, false);
TableDefWriter writer = new TableDefWriter(options,
null, HsqldbTestServer.getTableName(), "outputTable", conf, false);
Map<String, Integer> colTypes = new HashMap<String, Integer>();
colTypes.put("id", Types.INTEGER);
colTypes.put("value", Types.VARCHAR);
writer.setColumnTypes(colTypes);
String createTable = writer.getCreateTableStmt();
assertNotNull(createTable);
assertTrue(createTable.contains("`id` STRING"));
assertTrue(createTable.contains("`value` INTEGER"));
assertFalse(createTable.contains("`id` INTEGER"));
assertFalse(createTable.contains("`value` STRING"));
}
public void testUserMappingFailWhenCantBeApplied() throws Exception {
String[] args = {
"--map-column-hive", "id=STRING,value=INTEGER",
};
Configuration conf = new Configuration();
SqoopOptions options =
new ImportTool().parseArguments(args, null, null, false);
TableDefWriter writer = new TableDefWriter(options,
null, HsqldbTestServer.getTableName(), "outputTable", conf, false);
Map<String, Integer> colTypes = new HashMap<String, Integer>();
colTypes.put("id", Types.INTEGER);
writer.setColumnTypes(colTypes);
try {
String createTable = writer.getCreateTableStmt();
fail("Expected failure on non applied mapping.");
} catch(IllegalArgumentException iae) {
// Expected, ok
}
}
}

View File

@ -46,6 +46,7 @@
import com.cloudera.sqoop.testutil.ImportJobTestCase;
import com.cloudera.sqoop.tool.ImportTool;
import com.cloudera.sqoop.util.ClassLoaderStack;
import java.lang.reflect.Field;
/**
* Test that the ClassWriter generates Java classes based on the given table,
@ -425,4 +426,44 @@ public void testEqualsMethod() throws IOException, ClassNotFoundException,
}
}
private static final String USERMAPPING_CLASS_AND_PACKAGE_NAME =
"usermapping.pkg.prefix.classname";
@Test
public void testUserMapping() throws IOException, ClassNotFoundException,
InstantiationException, IllegalAccessException, NoSuchMethodException,
InvocationTargetException {
// Set the option strings in an "argv" to redirect our srcdir and bindir
String [] argv = {
"--bindir", JAR_GEN_DIR,
"--outdir", CODE_GEN_DIR,
"--class-name", USERMAPPING_CLASS_AND_PACKAGE_NAME,
"--map-column-java", "INTFIELD1=String",
};
File ormJarFile = runGenerationTest(argv,
USERMAPPING_CLASS_AND_PACKAGE_NAME);
ClassLoader prevClassLoader = ClassLoaderStack.addJarFile(
ormJarFile.getCanonicalPath(),
USERMAPPING_CLASS_AND_PACKAGE_NAME);
Class tableClass = Class.forName(
USERMAPPING_CLASS_AND_PACKAGE_NAME,
true,
Thread.currentThread().getContextClassLoader());
try {
Field intfield = tableClass.getDeclaredField("INTFIELD1");
assertEquals(String.class, intfield.getType());
} catch (NoSuchFieldException ex) {
fail("Can't find field for INTFIELD1");
} catch (SecurityException ex) {
fail("Can't find field for INTFIELD1");
}
if (null != prevClassLoader) {
ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
}
}
}