diff --git a/src/java/com/cloudera/sqoop/orm/AvroSchemaGenerator.java b/src/java/com/cloudera/sqoop/orm/AvroSchemaGenerator.java index 3c8eca58..8ecee80a 100644 --- a/src/java/com/cloudera/sqoop/orm/AvroSchemaGenerator.java +++ b/src/java/com/cloudera/sqoop/orm/AvroSchemaGenerator.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,110 +15,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.orm; -import java.io.IOException; -import java.sql.Types; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.avro.Schema; -import org.apache.avro.Schema.Field; -import org.apache.avro.Schema.Type; - import com.cloudera.sqoop.SqoopOptions; import com.cloudera.sqoop.manager.ConnManager; /** - * Creates an Avro schema to represent a table from a database. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class AvroSchemaGenerator { - - private final SqoopOptions options; - private final ConnManager connManager; - private final String tableName; +public class AvroSchemaGenerator + extends org.apache.sqoop.orm.AvroSchemaGenerator { public AvroSchemaGenerator(final SqoopOptions opts, final ConnManager connMgr, final String table) { - this.options = opts; - this.connManager = connMgr; - this.tableName = table; - } - - public Schema generate() throws IOException { - ClassWriter classWriter = new ClassWriter(options, connManager, - tableName, null); - Map columnTypes = classWriter.getColumnTypes(); - String[] columnNames = classWriter.getColumnNames(columnTypes); - - List fields = new ArrayList(); - for (String columnName : columnNames) { - String cleanedCol = ClassWriter.toIdentifier(columnName); - int sqlType = columnTypes.get(cleanedCol); - Schema avroSchema = toAvroSchema(sqlType); - Field field = new Field(cleanedCol, avroSchema, null, null); - field.addProp("columnName", columnName); - field.addProp("sqlType", Integer.toString(sqlType)); - fields.add(field); - } - - String avroTableName = (tableName == null ? "QueryResult" : tableName); - - String doc = "Sqoop import of " + avroTableName; - Schema schema = Schema.createRecord(avroTableName, doc, null, false); - schema.setFields(fields); - schema.addProp("tableName", avroTableName); - return schema; - } - - private Type toAvroType(int sqlType) { - switch (sqlType) { - case Types.TINYINT: - case Types.SMALLINT: - case Types.INTEGER: - return Type.INT; - case Types.BIGINT: - return Type.LONG; - case Types.BIT: - case Types.BOOLEAN: - return Type.BOOLEAN; - case Types.REAL: - return Type.FLOAT; - case Types.FLOAT: - case Types.DOUBLE: - return Type.DOUBLE; - case Types.NUMERIC: - case Types.DECIMAL: - return Type.STRING; - case Types.CHAR: - case Types.VARCHAR: - case Types.LONGVARCHAR: - case Types.LONGNVARCHAR: - case Types.NVARCHAR: - case Types.NCHAR: - return Type.STRING; - case Types.DATE: - case Types.TIME: - case Types.TIMESTAMP: - return Type.LONG; - case Types.BINARY: - case Types.VARBINARY: - return Type.BYTES; - default: - throw new IllegalArgumentException("Cannot convert SQL type " - + sqlType); - } - } - - public Schema toAvroSchema(int sqlType) { - // All types are assumed nullabl;e make a union of the "true" type for - // a column and NULL. - List childSchemas = new ArrayList(); - childSchemas.add(Schema.create(toAvroType(sqlType))); - childSchemas.add(Schema.create(Schema.Type.NULL)); - return Schema.createUnion(childSchemas); + super(opts, connMgr, table); } } diff --git a/src/java/com/cloudera/sqoop/orm/ClassWriter.java b/src/java/com/cloudera/sqoop/orm/ClassWriter.java index 5eac3e80..61ff0544 100644 --- a/src/java/com/cloudera/sqoop/orm/ClassWriter.java +++ b/src/java/com/cloudera/sqoop/orm/ClassWriter.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,47 +15,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.orm; -import org.apache.hadoop.io.BytesWritable; +import java.util.HashSet; + import com.cloudera.sqoop.SqoopOptions; import com.cloudera.sqoop.manager.ConnManager; -import com.cloudera.sqoop.lib.BigDecimalSerializer; -import com.cloudera.sqoop.lib.BooleanParser; -import com.cloudera.sqoop.lib.DelimiterSet; -import com.cloudera.sqoop.lib.FieldFormatter; -import com.cloudera.sqoop.lib.JdbcWritableBridge; -import com.cloudera.sqoop.lib.LargeObjectLoader; -import com.cloudera.sqoop.lib.LobSerializer; -import com.cloudera.sqoop.lib.RecordParser; -import com.cloudera.sqoop.lib.BlobRef; -import com.cloudera.sqoop.lib.ClobRef; -import com.cloudera.sqoop.lib.SqoopRecord; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.util.HashSet; -import java.util.Map; -import java.util.Properties; -import java.util.Set; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; /** - * Creates an ORM class to represent a table from a database. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class ClassWriter { +public class ClassWriter + extends org.apache.sqoop.orm.ClassWriter { - public static final Log LOG = LogFactory.getLog(ClassWriter.class.getName()); - - // The following are keywords and cannot be used for class, method, or field - // names. public static final HashSet JAVA_RESERVED_WORDS; static { @@ -102,1216 +72,24 @@ public class ClassWriter { JAVA_RESERVED_WORDS.add("const"); } - /** - * This version number is injected into all generated Java classes to denote - * which version of the ClassWriter's output format was used to generate the - * class. - * - * If the way that we generate classes changes, bump this number. - * This number is retrieved by the SqoopRecord.getClassFormatVersion() - * method. - */ - public static final int CLASS_WRITER_VERSION = 3; + public static final int CLASS_WRITER_VERSION = + org.apache.sqoop.orm.ClassWriter.CLASS_WRITER_VERSION; - private SqoopOptions options; - private ConnManager connManager; - private String tableName; - private CompilationManager compileManager; + public static String toIdentifier(String candidate) { + return org.apache.sqoop.orm.ClassWriter.toIdentifier(candidate); + } + + public static String toJavaIdentifier(String candidate) { + return org.apache.sqoop.orm.ClassWriter.toJavaIdentifier(candidate); + } + + public static String getIdentifierStrForChar(char c) { + return org.apache.sqoop.orm.ClassWriter.getIdentifierStrForChar(c); + } - /** - * Creates a new ClassWriter to generate an ORM class for a table - * or arbitrary query. - * @param opts program-wide options - * @param connMgr the connection manager used to describe the table. - * @param table the name of the table to read. If null, query is taken - * from the SqoopOptions. - */ public ClassWriter(final SqoopOptions opts, final ConnManager connMgr, final String table, final CompilationManager compMgr) { - this.options = opts; - this.connManager = connMgr; - this.tableName = table; - this.compileManager = compMgr; + super(opts, connMgr, table, compMgr); } - /** - * Given some character that can't be in an identifier, - * try to map it to a string that can. - * - * @param c a character that can't be in a Java identifier - * @return a string of characters that can, or null if there's - * no good translation. - */ - static String getIdentifierStrForChar(char c) { - if (Character.isJavaIdentifierPart(c)) { - return "" + c; - } else if (Character.isWhitespace(c)) { - // Eliminate whitespace. - return null; - } else { - // All other characters map to underscore. - return "_"; - } - } - - /** - * @param word a word to test. - * @return true if 'word' is reserved the in Java language. - */ - private static boolean isReservedWord(String word) { - return JAVA_RESERVED_WORDS.contains(word); - } - - /** - * Coerce a candidate name for an identifier into one which is a valid - * Java or Avro identifier. - * - * Ensures that the returned identifier matches [A-Za-z_][A-Za-z0-9_]* - * and is not a reserved word. - * - * @param candidate A string we want to use as an identifier - * @return A string naming an identifier which compiles and is - * similar to the candidate. - */ - public static String toIdentifier(String candidate) { - StringBuilder sb = new StringBuilder(); - boolean first = true; - for (char c : candidate.toCharArray()) { - if (Character.isJavaIdentifierStart(c) && first) { - // Ok for this to be the first character of the identifier. - sb.append(c); - first = false; - } else if (Character.isJavaIdentifierPart(c) && !first) { - // Ok for this character to be in the output identifier. - sb.append(c); - } else { - // We have a character in the original that can't be - // part of this identifier we're building. - // If it's just not allowed to be the first char, add a leading '_'. - // If we have a reasonable translation (e.g., '-' -> '_'), do that. - // Otherwise, drop it. - if (first && Character.isJavaIdentifierPart(c) - && !Character.isJavaIdentifierStart(c)) { - sb.append("_"); - sb.append(c); - first = false; - } else { - // Try to map this to a different character or string. - // If we can't just give up. - String translated = getIdentifierStrForChar(c); - if (null != translated) { - sb.append(translated); - first = false; - } - } - } - } - return sb.toString(); - } - - /** - * Coerce a candidate name for an identifier into one which will - * definitely compile. - * - * Ensures that the returned identifier matches [A-Za-z_][A-Za-z0-9_]* - * and is not a reserved word. - * - * @param candidate A string we want to use as an identifier - * @return A string naming an identifier which compiles and is - * similar to the candidate. - */ - public static String toJavaIdentifier(String candidate) { - String output = toIdentifier(candidate); - if (isReservedWord(output)) { - // e.g., 'class' -> '_class'; - return "_" + output; - } - - return output; - } - - private String toJavaType(String columnName, int sqlType) { - Properties mapping = options.getMapColumnJava(); - - if(mapping.containsKey(columnName)) { - String type = mapping.getProperty(columnName); - if(LOG.isDebugEnabled()) { - LOG.info("Overriding type of column " + columnName + " to " + type); - } - return type; - } - - return connManager.toJavaType(sqlType); - } - - /** - * @param javaType - * @return the name of the method of JdbcWritableBridge to read an entry - * with a given java type. - */ - private String dbGetterForType(String javaType) { - // All Class-based types (e.g., java.math.BigDecimal) are handled with - // "readBar" where some.package.foo.Bar is the canonical class name. Turn - // the javaType string into the getter type string. - - String [] parts = javaType.split("\\."); - if (parts.length == 0) { - LOG.error("No ResultSet method for Java type " + javaType); - return null; - } - - String lastPart = parts[parts.length - 1]; - try { - String getter = "read" + Character.toUpperCase(lastPart.charAt(0)) - + lastPart.substring(1); - return getter; - } catch (StringIndexOutOfBoundsException oob) { - // lastPart.*() doesn't work on empty strings. - LOG.error("Could not infer JdbcWritableBridge getter for Java type " - + javaType); - return null; - } - } - - /** - * @param javaType - * @return the name of the method of JdbcWritableBridge to write an entry - * with a given java type. - */ - private String dbSetterForType(String javaType) { - // TODO(aaron): Lots of unit tests needed here. - // See dbGetterForType() for the logic used here; it's basically the same. - - String [] parts = javaType.split("\\."); - if (parts.length == 0) { - LOG.error("No PreparedStatement Set method for Java type " + javaType); - return null; - } - - String lastPart = parts[parts.length - 1]; - try { - String setter = "write" + Character.toUpperCase(lastPart.charAt(0)) - + lastPart.substring(1); - return setter; - } catch (StringIndexOutOfBoundsException oob) { - // lastPart.*() doesn't work on empty strings. - LOG.error("Could not infer PreparedStatement setter for Java type " - + javaType); - return null; - } - } - - private String stringifierForType(String javaType, String colName) { - if (javaType.equals("String")) { - // Check if it is null, and write the null representation in such case - String r = colName + "==null?\"" + this.options.getNullStringValue() - + "\":" + colName; - return r; - } else { - // This is an object type -- just call its toString() in a null-safe way. - // Also check if it is null, and instead write the null representation - // in such case - String r = colName + "==null?\"" + this.options.getNullNonStringValue() - + "\":" + "\"\" + " + colName; - return r; - } - } - - /** - * @param javaType the type to read - * @param inputObj the name of the DataInput to read from - * @param colName the column name to read - * @return the line of code involving a DataInput object to read an entry - * with a given java type. - */ - private String rpcGetterForType(String javaType, String inputObj, - String colName) { - if (javaType.equals("Integer")) { - return " this." + colName + " = Integer.valueOf(" + inputObj - + ".readInt());\n"; - } else if (javaType.equals("Long")) { - return " this." + colName + " = Long.valueOf(" + inputObj - + ".readLong());\n"; - } else if (javaType.equals("Float")) { - return " this." + colName + " = Float.valueOf(" + inputObj - + ".readFloat());\n"; - } else if (javaType.equals("Double")) { - return " this." + colName + " = Double.valueOf(" + inputObj - + ".readDouble());\n"; - } else if (javaType.equals("Boolean")) { - return " this." + colName + " = Boolean.valueOf(" + inputObj - + ".readBoolean());\n"; - } else if (javaType.equals("String")) { - return " this." + colName + " = Text.readString(" + inputObj + ");\n"; - } else if (javaType.equals("java.sql.Date")) { - return " this." + colName + " = new Date(" + inputObj - + ".readLong());\n"; - } else if (javaType.equals("java.sql.Time")) { - return " this." + colName + " = new Time(" + inputObj - + ".readLong());\n"; - } else if (javaType.equals("java.sql.Timestamp")) { - return " this." + colName + " = new Timestamp(" + inputObj - + ".readLong());\n" + " this." + colName + ".setNanos(" - + inputObj + ".readInt());\n"; - } else if (javaType.equals("java.math.BigDecimal")) { - return " this." + colName + " = " - + BigDecimalSerializer.class.getCanonicalName() - + ".readFields(" + inputObj + ");\n"; - } else if (javaType.equals(ClobRef.class.getName())) { - return " this." + colName + " = " - + LobSerializer.class.getCanonicalName() - + ".readClobFields(" + inputObj + ");\n"; - } else if (javaType.equals(BlobRef.class.getName())) { - return " this." + colName + " = " - + LobSerializer.class.getCanonicalName() - + ".readBlobFields(" + inputObj + ");\n"; - } else if (javaType.equals(BytesWritable.class.getName())) { - return " this." + colName + " = new BytesWritable();\n" - + " this." + colName + ".readFields(" + inputObj + ");\n"; - } else { - LOG.error("No ResultSet method for Java type " + javaType); - return null; - } - } - - /** - * Deserialize a possibly-null value from the DataInput stream. - * @param javaType name of the type to deserialize if it's not null. - * @param inputObj name of the DataInput to read from - * @param colName the column name to read. - * @return - */ - private String rpcGetterForMaybeNull(String javaType, String inputObj, - String colName) { - return " if (" + inputObj + ".readBoolean()) { \n" - + " this." + colName + " = null;\n" - + " } else {\n" - + rpcGetterForType(javaType, inputObj, colName) - + " }\n"; - } - - /** - * @param javaType the type to write - * @param inputObj the name of the DataOutput to write to - * @param colName the column name to write - * @return the line of code involving a DataOutput object to write an entry - * with a given java type. - */ - private String rpcSetterForType(String javaType, String outputObj, - String colName) { - if (javaType.equals("Integer")) { - return " " + outputObj + ".writeInt(this." + colName + ");\n"; - } else if (javaType.equals("Long")) { - return " " + outputObj + ".writeLong(this." + colName + ");\n"; - } else if (javaType.equals("Boolean")) { - return " " + outputObj + ".writeBoolean(this." + colName + ");\n"; - } else if (javaType.equals("Float")) { - return " " + outputObj + ".writeFloat(this." + colName + ");\n"; - } else if (javaType.equals("Double")) { - return " " + outputObj + ".writeDouble(this." + colName + ");\n"; - } else if (javaType.equals("String")) { - return " Text.writeString(" + outputObj + ", " + colName + ");\n"; - } else if (javaType.equals("java.sql.Date")) { - return " " + outputObj + ".writeLong(this." + colName - + ".getTime());\n"; - } else if (javaType.equals("java.sql.Time")) { - return " " + outputObj + ".writeLong(this." + colName - + ".getTime());\n"; - } else if (javaType.equals("java.sql.Timestamp")) { - return " " + outputObj + ".writeLong(this." + colName - + ".getTime());\n" + " " + outputObj + ".writeInt(this." + colName - + ".getNanos());\n"; - } else if (javaType.equals(BytesWritable.class.getName())) { - return " this." + colName + ".write(" + outputObj + ");\n"; - } else if (javaType.equals("java.math.BigDecimal")) { - return " " + BigDecimalSerializer.class.getCanonicalName() - + ".write(this." + colName + ", " + outputObj + ");\n"; - } else if (javaType.equals(ClobRef.class.getName())) { - return " " + LobSerializer.class.getCanonicalName() - + ".writeClob(this." + colName + ", " + outputObj + ");\n"; - } else if (javaType.equals(BlobRef.class.getName())) { - return " " + LobSerializer.class.getCanonicalName() - + ".writeBlob(this." + colName + ", " + outputObj + ");\n"; - } else { - LOG.error("No ResultSet method for Java type " + javaType); - return null; - } - } - - /** - * Serialize a possibly-null value to the DataOutput stream. First a boolean - * isNull is written, followed by the contents itself (if not null). - * @param javaType name of the type to deserialize if it's not null. - * @param inputObj name of the DataInput to read from - * @param colName the column name to read. - * @return - */ - private String rpcSetterForMaybeNull(String javaType, String outputObj, - String colName) { - return " if (null == this." + colName + ") { \n" - + " " + outputObj + ".writeBoolean(true);\n" - + " } else {\n" - + " " + outputObj + ".writeBoolean(false);\n" - + rpcSetterForType(javaType, outputObj, colName) - + " }\n"; - } - - /** - * Generate a member field, getter, setter and with method for each column. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table - * @param className - name of the generated class - * @param sb - StringBuilder to append code to - */ - private void generateFields(Map columnTypes, - String [] colNames, String className, StringBuilder sb) { - - for (String col : colNames) { - int sqlType = columnTypes.get(col); - String javaType = toJavaType(col, sqlType); - if (null == javaType) { - LOG.error("Cannot resolve SQL type " + sqlType); - continue; - } - - sb.append(" private " + javaType + " " + col + ";\n"); - sb.append(" public " + javaType + " get_" + col + "() {\n"); - sb.append(" return " + col + ";\n"); - sb.append(" }\n"); - sb.append(" public void set_" + col + "(" + javaType + " " + col - + ") {\n"); - sb.append(" this." + col + " = " + col + ";\n"); - sb.append(" }\n"); - sb.append(" public " + className + " with_" + col + "(" + javaType + " " - + col + ") {\n"); - sb.append(" this." + col + " = " + col + ";\n"); - sb.append(" return this;\n"); - sb.append(" }\n"); - } - } - - /** - * Generate an equals method that compares the fields for each column. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table - * @param className - name of the generated class - * @param sb - StringBuilder to append code to - */ - private void generateEquals(Map columnTypes, - String [] colNames, String className, StringBuilder sb) { - - sb.append(" public boolean equals(Object o) {\n"); - sb.append(" if (this == o) {\n"); - sb.append(" return true;\n"); - sb.append(" }\n"); - sb.append(" if (!(o instanceof " + className + ")) {\n"); - sb.append(" return false;\n"); - sb.append(" }\n"); - sb.append(" " + className + " that = (" + className + ") o;\n"); - sb.append(" boolean equal = true;\n"); - for (String col : colNames) { - int sqlType = columnTypes.get(col); - String javaType = toJavaType(col, sqlType); - if (null == javaType) { - LOG.error("Cannot resolve SQL type " + sqlType); - continue; - } - sb.append(" equal = equal && (this." + col + " == null ? that." + col - + " == null : this." + col + ".equals(that." + col + "));\n"); - } - sb.append(" return equal;\n"); - sb.append(" }\n"); - } - - /** - * Generate the readFields() method used by the database. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param sb - StringBuilder to append code to - */ - private void generateDbRead(Map columnTypes, - String [] colNames, StringBuilder sb) { - - sb.append(" public void readFields(ResultSet __dbResults) "); - sb.append("throws SQLException {\n"); - - // Save ResultSet object cursor for use in LargeObjectLoader - // if necessary. - sb.append(" this.__cur_result_set = __dbResults;\n"); - - int fieldNum = 0; - - for (String col : colNames) { - fieldNum++; - - int sqlType = columnTypes.get(col); - String javaType = toJavaType(col, sqlType); - if (null == javaType) { - LOG.error("No Java type for SQL type " + sqlType - + " for column " + col); - continue; - } - - String getterMethod = dbGetterForType(javaType); - if (null == getterMethod) { - LOG.error("No db getter method for Java type " + javaType); - continue; - } - - sb.append(" this." + col + " = JdbcWritableBridge." + getterMethod - + "(" + fieldNum + ", __dbResults);\n"); - } - - sb.append(" }\n"); - } - - /** - * Generate the loadLargeObjects() method called by the mapper to load - * delayed objects (that require the Context from the mapper). - */ - private void generateLoadLargeObjects(Map columnTypes, - String [] colNames, StringBuilder sb) { - - // This method relies on the __cur_result_set field being set by - // readFields() method generated by generateDbRead(). - - sb.append(" public void loadLargeObjects(LargeObjectLoader __loader)\n"); - sb.append(" throws SQLException, IOException, "); - sb.append("InterruptedException {\n"); - - int fieldNum = 0; - - for (String col : colNames) { - fieldNum++; - - int sqlType = columnTypes.get(col); - String javaType = toJavaType(col, sqlType); - if (null == javaType) { - LOG.error("No Java type for SQL type " + sqlType - + " for column " + col); - continue; - } - - String getterMethod = dbGetterForType(javaType); - if ("readClobRef".equals(getterMethod) - || "readBlobRef".equals(getterMethod)) { - // This field is a blob/clob field with delayed loading. Call the - // appropriate LargeObjectLoader method (which has the same name as a - // JdbcWritableBridge method). - sb.append(" this." + col + " = __loader." + getterMethod - + "(" + fieldNum + ", this.__cur_result_set);\n"); - } - } - sb.append(" }\n"); - } - - - /** - * Generate the write() method used by the database. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param sb - StringBuilder to append code to - */ - private void generateDbWrite(Map columnTypes, - String [] colNames, StringBuilder sb) { - - sb.append(" public void write(PreparedStatement __dbStmt) " - + "throws SQLException {\n"); - sb.append(" write(__dbStmt, 0);\n"); - sb.append(" }\n\n"); - - sb.append(" public int write(PreparedStatement __dbStmt, int __off) " - + "throws SQLException {\n"); - - int fieldNum = 0; - - for (String col : colNames) { - fieldNum++; - - int sqlType = columnTypes.get(col); - String javaType = toJavaType(col, sqlType); - if (null == javaType) { - LOG.error("No Java type for SQL type " + sqlType - + " for column " + col); - continue; - } - - String setterMethod = dbSetterForType(javaType); - if (null == setterMethod) { - LOG.error("No db setter method for Java type " + javaType); - continue; - } - - sb.append(" JdbcWritableBridge." + setterMethod + "(" + col + ", " - + fieldNum + " + __off, " + sqlType + ", __dbStmt);\n"); - } - - sb.append(" return " + fieldNum + ";\n"); - sb.append(" }\n"); - } - - - /** - * Generate the readFields() method used by the Hadoop RPC system. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param sb - StringBuilder to append code to - */ - private void generateHadoopRead(Map columnTypes, - String [] colNames, StringBuilder sb) { - - sb.append(" public void readFields(DataInput __dataIn) " - + "throws IOException {\n"); - - for (String col : colNames) { - int sqlType = columnTypes.get(col); - String javaType = toJavaType(col, sqlType); - if (null == javaType) { - LOG.error("No Java type for SQL type " + sqlType - + " for column " + col); - continue; - } - - String getterMethod = rpcGetterForMaybeNull(javaType, "__dataIn", col); - if (null == getterMethod) { - LOG.error("No RPC getter method for Java type " + javaType); - continue; - } - - sb.append(getterMethod); - } - - sb.append(" }\n"); - } - - /** - * Generate the clone() method. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param sb - StringBuilder to append code to - */ - private void generateCloneMethod(Map columnTypes, - String [] colNames, StringBuilder sb) { - - TableClassName tableNameInfo = new TableClassName(options); - String className = tableNameInfo.getShortClassForTable(tableName); - - sb.append(" public Object clone() throws CloneNotSupportedException {\n"); - sb.append(" " + className + " o = (" + className + ") super.clone();\n"); - - // For each field that is mutable, we need to perform the deep copy. - for (String colName : colNames) { - int sqlType = columnTypes.get(colName); - String javaType = toJavaType(colName, sqlType); - if (null == javaType) { - continue; - } else if (javaType.equals("java.sql.Date") - || javaType.equals("java.sql.Time") - || javaType.equals("java.sql.Timestamp") - || javaType.equals(ClobRef.class.getName()) - || javaType.equals(BlobRef.class.getName())) { - sb.append(" o." + colName + " = (o." + colName + " != null) ? (" - + javaType + ") o." + colName + ".clone() : null;\n"); - } else if (javaType.equals(BytesWritable.class.getName())) { - sb.append(" o." + colName + " = new BytesWritable(" - + "Arrays.copyOf(" + colName + ".getBytes(), " - + colName + ".getLength()));\n"); - } - } - - sb.append(" return o;\n"); - sb.append(" }\n\n"); - } - - /** - * Generate the setField() method. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param sb - StringBuilder to append code to - */ - private void generateSetField(Map columnTypes, - String [] colNames, StringBuilder sb) { - sb.append(" public void setField(String __fieldName, Object __fieldVal) " - + "{\n"); - boolean first = true; - for (String colName : colNames) { - int sqlType = columnTypes.get(colName); - String javaType = toJavaType(colName, sqlType); - if (null == javaType) { - continue; - } else { - if (!first) { - sb.append(" else"); - } - - sb.append(" if (\"" + colName + "\".equals(__fieldName)) {\n"); - sb.append(" this." + colName + " = (" + javaType - + ") __fieldVal;\n"); - sb.append(" }\n"); - first = false; - } - } - sb.append(" else {\n"); - sb.append(" throw new RuntimeException("); - sb.append("\"No such field: \" + __fieldName);\n"); - sb.append(" }\n"); - sb.append(" }\n"); - } - - /** - * Generate the getFieldMap() method. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param sb - StringBuilder to append code to - */ - private void generateGetFieldMap(Map columnTypes, - String [] colNames, StringBuilder sb) { - sb.append(" public Map getFieldMap() {\n"); - sb.append(" Map __sqoop$field_map = " - + "new TreeMap();\n"); - for (String colName : colNames) { - sb.append(" __sqoop$field_map.put(\"" + colName + "\", this." - + colName + ");\n"); - } - sb.append(" return __sqoop$field_map;\n"); - sb.append(" }\n\n"); - } - - /** - * Generate the toString() method. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param sb - StringBuilder to append code to - */ - private void generateToString(Map columnTypes, - String [] colNames, StringBuilder sb) { - - // Save the delimiters to the class. - sb.append(" private final DelimiterSet __outputDelimiters = "); - sb.append(options.getOutputDelimiters().formatConstructor() + ";\n"); - - // The default toString() method itself follows. This just calls - // the delimiter-specific toString() with the default delimiters. - // Also appends an end-of-record delimiter to the line. - sb.append(" public String toString() {\n"); - sb.append(" return toString(__outputDelimiters, true);\n"); - sb.append(" }\n"); - - // This toString() variant, though, accepts delimiters as arguments. - sb.append(" public String toString(DelimiterSet delimiters) {\n"); - sb.append(" return toString(delimiters, true);\n"); - sb.append(" }\n"); - - // This variant allows the user to specify whether or not an end-of-record - // delimiter should be appended. - sb.append(" public String toString(boolean useRecordDelim) {\n"); - sb.append(" return toString(__outputDelimiters, useRecordDelim);\n"); - sb.append(" }\n"); - - - // This toString() variant allows the user to specify delimiters, as well - // as whether or not the end-of-record delimiter should be added to the - // string. Use 'false' to do reasonable things with TextOutputFormat, - // which appends its own newline. - sb.append(" public String toString(DelimiterSet delimiters, "); - sb.append("boolean useRecordDelim) {\n"); - sb.append(" StringBuilder __sb = new StringBuilder();\n"); - sb.append(" char fieldDelim = delimiters.getFieldsTerminatedBy();\n"); - - boolean first = true; - for (String col : colNames) { - int sqlType = columnTypes.get(col); - String javaType = toJavaType(col, sqlType); - if (null == javaType) { - LOG.error("No Java type for SQL type " + sqlType - + " for column " + col); - continue; - } - - if (!first) { - // print inter-field tokens. - sb.append(" __sb.append(fieldDelim);\n"); - } - - first = false; - - String stringExpr = stringifierForType(javaType, col); - if (null == stringExpr) { - LOG.error("No toString method for Java type " + javaType); - continue; - } - - if (javaType.equals("String") && options.doHiveDropDelims()) { - sb.append(" // special case for strings hive, dropping" - + "delimiters \\n,\\r,\\01 from strings\n"); - sb.append(" __sb.append(FieldFormatter.hiveStringDropDelims(" - + stringExpr + ", delimiters));\n"); - } else if (javaType.equals("String") - && options.getHiveDelimsReplacement() != null) { - sb.append(" // special case for strings hive, replacing " - + "delimiters \\n,\\r,\\01 with '" - + options.getHiveDelimsReplacement() + "' from strings\n"); - sb.append(" __sb.append(FieldFormatter.hiveStringReplaceDelims(" - + stringExpr + ", \"" + options.getHiveDelimsReplacement() + "\", " - + "delimiters));\n"); - } else { - sb.append(" __sb.append(FieldFormatter.escapeAndEnclose(" - + stringExpr + ", delimiters));\n"); - } - } - - sb.append(" if (useRecordDelim) {\n"); - sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n"); - sb.append(" }\n"); - sb.append(" return __sb.toString();\n"); - sb.append(" }\n"); - } - - /** - * Helper method for generateParser(). Writes out the parse() method for one - * particular type we support as an input string-ish type. - */ - private void generateParseMethod(String typ, StringBuilder sb) { - sb.append(" public void parse(" + typ + " __record) " - + "throws RecordParser.ParseError {\n"); - sb.append(" if (null == this.__parser) {\n"); - sb.append(" this.__parser = new RecordParser(__inputDelimiters);\n"); - sb.append(" }\n"); - sb.append(" List __fields = " - + "this.__parser.parseRecord(__record);\n"); - sb.append(" __loadFromFields(__fields);\n"); - sb.append(" }\n\n"); - } - - /** - * Helper method for parseColumn(). Interpret the string null representation - * for a particular column. - */ - private void parseNullVal(String javaType, String colName, StringBuilder sb) { - if (javaType.equals("String")) { - sb.append(" if (__cur_str.equals(\"" - + this.options.getInNullStringValue() + "\")) { this."); - sb.append(colName); - sb.append(" = null; } else {\n"); - } else { - sb.append(" if (__cur_str.equals(\"" - + this.options.getInNullNonStringValue()); - sb.append("\") || __cur_str.length() == 0) { this."); - sb.append(colName); - sb.append(" = null; } else {\n"); - } - } - - /** - * Helper method for generateParser(). Generates the code that loads one - * field of a specified name and type from the next element of the field - * strings list. - */ - private void parseColumn(String colName, int colType, StringBuilder sb) { - // assume that we have __it and __cur_str vars, based on - // __loadFromFields() code. - sb.append(" __cur_str = __it.next();\n"); - String javaType = toJavaType(colName, colType); - - parseNullVal(javaType, colName, sb); - if (javaType.equals("String")) { - // TODO(aaron): Distinguish between 'null' and null. Currently they both - // set the actual object to null. - sb.append(" this." + colName + " = __cur_str;\n"); - } else if (javaType.equals("Integer")) { - sb.append(" this." + colName + " = Integer.valueOf(__cur_str);\n"); - } else if (javaType.equals("Long")) { - sb.append(" this." + colName + " = Long.valueOf(__cur_str);\n"); - } else if (javaType.equals("Float")) { - sb.append(" this." + colName + " = Float.valueOf(__cur_str);\n"); - } else if (javaType.equals("Double")) { - sb.append(" this." + colName + " = Double.valueOf(__cur_str);\n"); - } else if (javaType.equals("Boolean")) { - sb.append(" this." + colName - + " = BooleanParser.valueOf(__cur_str);\n"); - } else if (javaType.equals("java.sql.Date")) { - sb.append(" this." + colName - + " = java.sql.Date.valueOf(__cur_str);\n"); - } else if (javaType.equals("java.sql.Time")) { - sb.append(" this." + colName - + " = java.sql.Time.valueOf(__cur_str);\n"); - } else if (javaType.equals("java.sql.Timestamp")) { - sb.append(" this." + colName - + " = java.sql.Timestamp.valueOf(__cur_str);\n"); - } else if (javaType.equals("java.math.BigDecimal")) { - sb.append(" this." + colName - + " = new java.math.BigDecimal(__cur_str);\n"); - } else if (javaType.equals(ClobRef.class.getName())) { - sb.append(" this." + colName + " = ClobRef.parse(__cur_str);\n"); - } else if (javaType.equals(BlobRef.class.getName())) { - sb.append(" this." + colName + " = BlobRef.parse(__cur_str);\n"); - } else { - LOG.error("No parser available for Java type " + javaType); - } - - sb.append(" }\n\n"); // the closing '{' based on code in parseNullVal(); - } - - /** - * Generate the parse() method. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param sb - StringBuilder to append code to - */ - private void generateParser(Map columnTypes, - String [] colNames, StringBuilder sb) { - - // Embed into the class the delimiter characters to use when parsing input - // records. Note that these can differ from the delims to use as output - // via toString(), if the user wants to use this class to convert one - // format to another. - sb.append(" private final DelimiterSet __inputDelimiters = "); - sb.append(options.getInputDelimiters().formatConstructor() + ";\n"); - - // The parser object which will do the heavy lifting for field splitting. - sb.append(" private RecordParser __parser;\n"); - - // Generate wrapper methods which will invoke the parser. - generateParseMethod("Text", sb); - generateParseMethod("CharSequence", sb); - generateParseMethod("byte []", sb); - generateParseMethod("char []", sb); - generateParseMethod("ByteBuffer", sb); - generateParseMethod("CharBuffer", sb); - - // The wrapper methods call __loadFromFields() to actually interpret the - // raw field data as string, int, boolean, etc. The generation of this - // method is type-dependent for the fields. - sb.append(" private void __loadFromFields(List fields) {\n"); - sb.append(" Iterator __it = fields.listIterator();\n"); - sb.append(" String __cur_str;\n"); - for (String colName : colNames) { - int colType = columnTypes.get(colName); - parseColumn(colName, colType, sb); - } - sb.append(" }\n\n"); - } - - /** - * Generate the write() method used by the Hadoop RPC system. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param sb - StringBuilder to append code to - */ - private void generateHadoopWrite(Map columnTypes, - String [] colNames, StringBuilder sb) { - - sb.append(" public void write(DataOutput __dataOut) " - + "throws IOException {\n"); - - for (String col : colNames) { - int sqlType = columnTypes.get(col); - String javaType = toJavaType(col, sqlType); - if (null == javaType) { - LOG.error("No Java type for SQL type " + sqlType - + " for column " + col); - continue; - } - - String setterMethod = rpcSetterForMaybeNull(javaType, "__dataOut", col); - if (null == setterMethod) { - LOG.error("No RPC setter method for Java type " + javaType); - continue; - } - - sb.append(setterMethod); - } - - sb.append(" }\n"); - } - - /** - * Create a list of identifiers to use based on the true column names - * of the table. - * @param colNames the actual column names of the table. - * @return a list of column names in the same order which are - * cleaned up to be used as identifiers in the generated Java class. - */ - private String [] cleanColNames(String [] colNames) { - String [] cleanedColNames = new String[colNames.length]; - for (int i = 0; i < colNames.length; i++) { - String col = colNames[i]; - String identifier = toJavaIdentifier(col); - cleanedColNames[i] = identifier; - } - - return cleanedColNames; - } - - - /** - * Generate the ORM code for the class. - */ - public void generate() throws IOException { - Map columnTypes = getColumnTypes(); - - String[] colNames = getColumnNames(columnTypes); - - // Translate all the column names into names that are safe to - // use as identifiers. - String [] cleanedColNames = cleanColNames(colNames); - Set uniqColNames = new HashSet(); - for (int i = 0; i < colNames.length; i++) { - String identifier = cleanedColNames[i]; - - // Name can't be blank - if(identifier.isEmpty()) { - throw new IllegalArgumentException("We found column without column " - + "name. Please verify that you've entered all column names " - + "in your query if using free form query import (consider " - + "adding clause AS if you're using column transformation)"); - } - - // Guarantee uniq col identifier - if (uniqColNames.contains(identifier)) { - throw new IllegalArgumentException("Duplicate Column identifier " - + "specified: '" + identifier + "'"); - } - uniqColNames.add(identifier); - - // Make sure the col->type mapping holds for the - // new identifier name, too. - String col = colNames[i]; - Integer type = columnTypes.get(col); - if (type == null) { - // column doesn't have a type, means that is illegal column name! - throw new IllegalArgumentException("Column name '" + col - + "' not in table"); - } - columnTypes.put(identifier, type); - } - - // Check that all explicitly mapped columns are present in result set - Properties mapping = options.getMapColumnJava(); - if(mapping != null && !mapping.isEmpty()) { - for(Object column : mapping.keySet()) { - if(!uniqColNames.contains((String)column)) { - throw new IllegalArgumentException("No column by the name " + column - + "found while importing data"); - } - } - } - - // The db write() method may use column names in a different - // order. If this is set in the options, pull it out here and - // make sure we format the column names to identifiers in the same way - // as we do for the ordinary column list. - String [] dbWriteColNames = options.getDbOutputColumns(); - String [] cleanedDbWriteColNames = null; - if (null == dbWriteColNames) { - cleanedDbWriteColNames = cleanedColNames; - } else { - cleanedDbWriteColNames = cleanColNames(dbWriteColNames); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("selected columns:"); - for (String col : cleanedColNames) { - LOG.debug(" " + col); - } - - if (cleanedDbWriteColNames != cleanedColNames) { - // dbWrite() has a different set of columns than the rest of the - // generators. - LOG.debug("db write column order:"); - for (String dbCol : cleanedDbWriteColNames) { - LOG.debug(" " + dbCol); - } - } - } - - // Generate the Java code. - StringBuilder sb = generateClassForColumns(columnTypes, - cleanedColNames, cleanedDbWriteColNames); - - // Write this out to a file in the jar output directory. - // We'll move it to the user-visible CodeOutputDir after compiling. - String codeOutDir = options.getJarOutputDir(); - - // Get the class name to generate, which includes package components. - String className = new TableClassName(options).getClassForTable(tableName); - // Convert the '.' characters to '/' characters. - String sourceFilename = className.replace('.', File.separatorChar) - + ".java"; - String filename = codeOutDir + sourceFilename; - - if (LOG.isDebugEnabled()) { - LOG.debug("Writing source file: " + filename); - LOG.debug("Table name: " + tableName); - StringBuilder sbColTypes = new StringBuilder(); - for (String col : colNames) { - Integer colType = columnTypes.get(col); - sbColTypes.append(col + ":" + colType + ", "); - } - String colTypeStr = sbColTypes.toString(); - LOG.debug("Columns: " + colTypeStr); - LOG.debug("sourceFilename is " + sourceFilename); - } - - compileManager.addSourceFile(sourceFilename); - - // Create any missing parent directories. - File file = new File(filename); - File dir = file.getParentFile(); - if (null != dir && !dir.exists()) { - boolean mkdirSuccess = dir.mkdirs(); - if (!mkdirSuccess) { - LOG.debug("Could not create directory tree for " + dir); - } - } - - OutputStream ostream = null; - Writer writer = null; - try { - ostream = new FileOutputStream(filename); - writer = new OutputStreamWriter(ostream); - writer.append(sb.toString()); - } finally { - if (null != writer) { - try { - writer.close(); - } catch (IOException ioe) { - // ignored because we're closing. - } - } - - if (null != ostream) { - try { - ostream.close(); - } catch (IOException ioe) { - // ignored because we're closing. - } - } - } - } - - protected String[] getColumnNames(Map columnTypes) { - String [] colNames = options.getColumns(); - if (null == colNames) { - if (null != tableName) { - // Table-based import. Read column names from table. - colNames = connManager.getColumnNames(tableName); - } else { - // Infer/assign column names for arbitrary query. - colNames = connManager.getColumnNamesForQuery( - this.options.getSqlQuery()); - } - } else { - // These column names were provided by the user. They may not be in - // the same case as the keys in the columnTypes map. So make sure - // we add the appropriate aliases in that map. - for (String userColName : colNames) { - for (Map.Entry typeEntry : columnTypes.entrySet()) { - String typeColName = typeEntry.getKey(); - if (typeColName.equalsIgnoreCase(userColName) - && !typeColName.equals(userColName)) { - // We found the correct-case equivalent. - columnTypes.put(userColName, typeEntry.getValue()); - // No need to continue iteration; only one could match. - // Also, the use of put() just invalidated the iterator. - break; - } - } - } - } - return colNames; - } - - protected Map getColumnTypes() throws IOException { - return connManager.getColumnTypes(tableName, options.getSqlQuery()); - } - - /** - * Generate the ORM code for a table object containing the named columns. - * @param columnTypes - mapping from column names to sql types - * @param colNames - ordered list of column names for table. - * @param dbWriteColNames - ordered list of column names for the db - * write() method of the class. - * @return - A StringBuilder that contains the text of the class code. - */ - private StringBuilder generateClassForColumns( - Map columnTypes, - String [] colNames, String [] dbWriteColNames) { - if (colNames.length ==0) { - throw new IllegalArgumentException("Attempted to generate class with " - + "no columns!"); - } - StringBuilder sb = new StringBuilder(); - sb.append("// ORM class for " + tableName + "\n"); - sb.append("// WARNING: This class is AUTO-GENERATED. " - + "Modify at your own risk.\n"); - - TableClassName tableNameInfo = new TableClassName(options); - - String packageName = tableNameInfo.getPackageForTable(); - if (null != packageName) { - sb.append("package "); - sb.append(packageName); - sb.append(";\n"); - } - - sb.append("import org.apache.hadoop.io.BytesWritable;\n"); - sb.append("import org.apache.hadoop.io.Text;\n"); - sb.append("import org.apache.hadoop.io.Writable;\n"); - sb.append("import org.apache.hadoop.mapred.lib.db.DBWritable;\n"); - sb.append("import " + JdbcWritableBridge.class.getCanonicalName() + ";\n"); - sb.append("import " + DelimiterSet.class.getCanonicalName() + ";\n"); - sb.append("import " + FieldFormatter.class.getCanonicalName() + ";\n"); - sb.append("import " + RecordParser.class.getCanonicalName() + ";\n"); - sb.append("import " + BooleanParser.class.getCanonicalName() + ";\n"); - sb.append("import " + BlobRef.class.getCanonicalName() + ";\n"); - sb.append("import " + ClobRef.class.getCanonicalName() + ";\n"); - sb.append("import " + LargeObjectLoader.class.getCanonicalName() + ";\n"); - sb.append("import " + SqoopRecord.class.getCanonicalName() + ";\n"); - sb.append("import java.sql.PreparedStatement;\n"); - sb.append("import java.sql.ResultSet;\n"); - sb.append("import java.sql.SQLException;\n"); - sb.append("import java.io.DataInput;\n"); - sb.append("import java.io.DataOutput;\n"); - sb.append("import java.io.IOException;\n"); - sb.append("import java.nio.ByteBuffer;\n"); - sb.append("import java.nio.CharBuffer;\n"); - sb.append("import java.sql.Date;\n"); - sb.append("import java.sql.Time;\n"); - sb.append("import java.sql.Timestamp;\n"); - sb.append("import java.util.Arrays;\n"); - sb.append("import java.util.Iterator;\n"); - sb.append("import java.util.List;\n"); - sb.append("import java.util.Map;\n"); - sb.append("import java.util.TreeMap;\n"); - sb.append("\n"); - - String className = tableNameInfo.getShortClassForTable(tableName); - sb.append("public class " + className + " extends SqoopRecord " - + " implements DBWritable, Writable {\n"); - sb.append(" private final int PROTOCOL_VERSION = " - + CLASS_WRITER_VERSION + ";\n"); - sb.append( - " public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n"); - sb.append(" protected ResultSet __cur_result_set;\n"); - generateFields(columnTypes, colNames, className, sb); - generateEquals(columnTypes, colNames, className, sb); - generateDbRead(columnTypes, colNames, sb); - generateLoadLargeObjects(columnTypes, colNames, sb); - generateDbWrite(columnTypes, dbWriteColNames, sb); - generateHadoopRead(columnTypes, colNames, sb); - generateHadoopWrite(columnTypes, colNames, sb); - generateToString(columnTypes, colNames, sb); - generateParser(columnTypes, colNames, sb); - generateCloneMethod(columnTypes, colNames, sb); - generateGetFieldMap(columnTypes, colNames, sb); - generateSetField(columnTypes, colNames, sb); - - // TODO(aaron): Generate hashCode(), compareTo(), equals() so it can be a - // WritableComparable - - sb.append("}\n"); - - return sb; - } } diff --git a/src/java/com/cloudera/sqoop/orm/CompilationManager.java b/src/java/com/cloudera/sqoop/orm/CompilationManager.java index c9b93a09..152d8a43 100644 --- a/src/java/com/cloudera/sqoop/orm/CompilationManager.java +++ b/src/java/com/cloudera/sqoop/orm/CompilationManager.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,359 +15,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.orm; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.jar.JarOutputStream; -import java.util.zip.ZipEntry; - -import javax.tools.JavaCompiler; -import javax.tools.JavaFileObject; -import javax.tools.StandardJavaFileManager; -import javax.tools.ToolProvider; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.mapred.JobConf; - import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.util.FileListing; -import com.cloudera.sqoop.util.Jars; /** - * Manages the compilation of a bunch of .java files into .class files - * and eventually a jar. - * - * Also embeds this program's jar into the lib/ directory inside the compiled - * jar to ensure that the job runs correctly. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class CompilationManager { +public class CompilationManager + extends org.apache.sqoop.orm.CompilationManager { - /** If we cannot infer a jar name from a table name, etc., use this. */ public static final String DEFAULT_CODEGEN_JAR_NAME = - "sqoop-codegen-created.jar"; - - public static final Log LOG = LogFactory.getLog( - CompilationManager.class.getName()); - - private SqoopOptions options; - private List sources; + org.apache.sqoop.orm.CompilationManager.DEFAULT_CODEGEN_JAR_NAME; public CompilationManager(final SqoopOptions opts) { - options = opts; - sources = new ArrayList(); + super(opts); } - public void addSourceFile(String sourceName) { - sources.add(sourceName); - } - - /** - * locate the hadoop-*-core.jar in $HADOOP_HOME or --hadoop-home. - * If that doesn't work, check our classpath. - * @return the filename of the hadoop-*-core.jar file. - */ - private String findHadoopCoreJar() { - String hadoopHome = options.getHadoopHome(); - - if (null == hadoopHome) { - LOG.info("$HADOOP_HOME is not set"); - return Jars.getJarPathForClass(JobConf.class); - } - - if (!hadoopHome.endsWith(File.separator)) { - hadoopHome = hadoopHome + File.separator; - } - - File hadoopHomeFile = new File(hadoopHome); - LOG.info("HADOOP_HOME is " + hadoopHomeFile.getAbsolutePath()); - File [] entries = hadoopHomeFile.listFiles(); - - if (null == entries) { - LOG.warn("HADOOP_HOME appears empty or missing"); - return Jars.getJarPathForClass(JobConf.class); - } - - for (File f : entries) { - if (f.getName().startsWith("hadoop-") - && f.getName().endsWith("-core.jar")) { - LOG.info("Found hadoop core jar at: " + f.getAbsolutePath()); - return f.getAbsolutePath(); - } - } - - return Jars.getJarPathForClass(JobConf.class); - } - - /** - * Compile the .java files into .class files via embedded javac call. - * On success, move .java files to the code output dir. - */ - public void compile() throws IOException { - List args = new ArrayList(); - - // ensure that the jar output dir exists. - String jarOutDir = options.getJarOutputDir(); - File jarOutDirObj = new File(jarOutDir); - if (!jarOutDirObj.exists()) { - boolean mkdirSuccess = jarOutDirObj.mkdirs(); - if (!mkdirSuccess) { - LOG.debug("Warning: Could not make directories for " + jarOutDir); - } - } else if (LOG.isDebugEnabled()) { - LOG.debug("Found existing " + jarOutDir); - } - - // Make sure jarOutDir ends with a '/'. - if (!jarOutDir.endsWith(File.separator)) { - jarOutDir = jarOutDir + File.separator; - } - - // find hadoop-*-core.jar for classpath. - String coreJar = findHadoopCoreJar(); - if (null == coreJar) { - // Couldn't find a core jar to insert into the CP for compilation. If, - // however, we're running this from a unit test, then the path to the - // .class files might be set via the hadoop.alt.classpath property - // instead. Check there first. - String coreClassesPath = System.getProperty("hadoop.alt.classpath"); - if (null == coreClassesPath) { - // no -- we're out of options. Fail. - throw new IOException("Could not find hadoop core jar!"); - } else { - coreJar = coreClassesPath; - } - } - - // find sqoop jar for compilation classpath - String sqoopJar = Jars.getSqoopJarPath(); - if (null != sqoopJar) { - sqoopJar = File.pathSeparator + sqoopJar; - } else { - LOG.warn("Could not find sqoop jar; child compilation may fail"); - sqoopJar = ""; - } - - String curClasspath = System.getProperty("java.class.path"); - - args.add("-sourcepath"); - args.add(jarOutDir); - - args.add("-d"); - args.add(jarOutDir); - - args.add("-classpath"); - args.add(curClasspath + File.pathSeparator + coreJar + sqoopJar); - - JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); - if (null == compiler) { - LOG.error("It seems as though you are running sqoop with a JRE."); - LOG.error("Sqoop requires a JDK that can compile Java code."); - LOG.error("Please install a JDK and set $JAVA_HOME to use it."); - throw new IOException("Could not start Java compiler."); - } - StandardJavaFileManager fileManager = - compiler.getStandardFileManager(null, null, null); - - ArrayList srcFileNames = new ArrayList(); - for (String srcfile : sources) { - srcFileNames.add(jarOutDir + srcfile); - LOG.debug("Adding source file: " + jarOutDir + srcfile); - } - - if (LOG.isDebugEnabled()) { - LOG.debug("Invoking javac with args:"); - for (String arg : args) { - LOG.debug(" " + arg); - } - } - - Iterable srcFileObjs = - fileManager.getJavaFileObjectsFromStrings(srcFileNames); - JavaCompiler.CompilationTask task = compiler.getTask( - null, // Write to stderr - fileManager, - null, // No special diagnostic handling - args, - null, // Compile all classes in the source compilation units - srcFileObjs); - - boolean result = task.call(); - if (!result) { - throw new IOException("Error returned by javac"); - } - - // Where we should move source files after compilation. - String srcOutDir = new File(options.getCodeOutputDir()).getAbsolutePath(); - if (!srcOutDir.endsWith(File.separator)) { - srcOutDir = srcOutDir + File.separator; - } - - // Move these files to the srcOutDir. - for (String srcFileName : sources) { - String orig = jarOutDir + srcFileName; - String dest = srcOutDir + srcFileName; - File fOrig = new File(orig); - File fDest = new File(dest); - File fDestParent = fDest.getParentFile(); - if (null != fDestParent && !fDestParent.exists()) { - if (!fDestParent.mkdirs()) { - LOG.error("Could not make directory: " + fDestParent); - } - } - try { - FileUtils.moveFile(fOrig, fDest); - } catch (IOException e) { - LOG.error("Could not rename " + orig + " to " + dest, e); - } - } - } - - /** - * @return the complete filename of the .jar file to generate. */ - public String getJarFilename() { - String jarOutDir = options.getJarOutputDir(); - String tableName = options.getTableName(); - String specificClassName = options.getClassName(); - - if (specificClassName != null && specificClassName.length() > 0) { - return jarOutDir + specificClassName + ".jar"; - } else if (null != tableName && tableName.length() > 0) { - return jarOutDir + tableName + ".jar"; - } else if (this.sources.size() == 1) { - // if we only have one source file, find it's base name, - // turn "foo.java" into "foo", and then return jarDir + "foo" + ".jar" - String srcFileName = this.sources.get(0); - String basename = new File(srcFileName).getName(); - String [] parts = basename.split("\\."); - String preExtPart = parts[0]; - return jarOutDir + preExtPart + ".jar"; - } else { - return jarOutDir + DEFAULT_CODEGEN_JAR_NAME; - } - } - - /** - * Searches through a directory and its children for .class - * files to add to a jar. - * - * @param dir - The root directory to scan with this algorithm. - * @param jstream - The JarOutputStream to write .class files to. - */ - private void addClassFilesFromDir(File dir, JarOutputStream jstream) - throws IOException { - LOG.debug("Scanning for .class files in directory: " + dir); - List dirEntries = FileListing.getFileListing(dir); - String baseDirName = dir.getAbsolutePath(); - if (!baseDirName.endsWith(File.separator)) { - baseDirName = baseDirName + File.separator; - } - - // For each input class file, create a zipfile entry for it, - // read the file into a buffer, and write it to the jar file. - for (File entry : dirEntries) { - if (!entry.isDirectory()) { - // Chomp off the portion of the full path that is shared - // with the base directory where class files were put; - // we only record the subdir parts in the zip entry. - String fullPath = entry.getAbsolutePath(); - String chompedPath = fullPath.substring(baseDirName.length()); - - boolean include = chompedPath.endsWith(".class") - && sources.contains( - chompedPath.substring(0, chompedPath.length() - ".class".length()) - + ".java"); - - if (include) { - // include this file. - LOG.debug("Got classfile: " + entry.getPath() + " -> " + chompedPath); - ZipEntry ze = new ZipEntry(chompedPath); - jstream.putNextEntry(ze); - copyFileToStream(entry, jstream); - jstream.closeEntry(); - } - } - } - } - - /** - * Create an output jar file to use when executing MapReduce jobs. - */ - public void jar() throws IOException { - String jarOutDir = options.getJarOutputDir(); - - String jarFilename = getJarFilename(); - - LOG.info("Writing jar file: " + jarFilename); - - File jarFileObj = new File(jarFilename); - if (jarFileObj.exists()) { - LOG.debug("Found existing jar (" + jarFilename + "); removing."); - if (!jarFileObj.delete()) { - LOG.warn("Could not remove existing jar file: " + jarFilename); - } - } - - FileOutputStream fstream = null; - JarOutputStream jstream = null; - try { - fstream = new FileOutputStream(jarFilename); - jstream = new JarOutputStream(fstream); - - addClassFilesFromDir(new File(jarOutDir), jstream); - jstream.finish(); - } finally { - if (null != jstream) { - try { - jstream.close(); - } catch (IOException ioe) { - LOG.warn("IOException closing jar stream: " + ioe.toString()); - } - } - - if (null != fstream) { - try { - fstream.close(); - } catch (IOException ioe) { - LOG.warn("IOException closing file stream: " + ioe.toString()); - } - } - } - - LOG.debug("Finished writing jar file " + jarFilename); - } - - private static final int BUFFER_SZ = 4096; - - /** - * Utility method to copy a .class file into the jar stream. - * @param f - * @param ostream - * @throws IOException - */ - private void copyFileToStream(File f, OutputStream ostream) - throws IOException { - FileInputStream fis = new FileInputStream(f); - byte [] buffer = new byte[BUFFER_SZ]; - try { - while (true) { - int bytesReceived = fis.read(buffer); - if (bytesReceived < 1) { - break; - } - - ostream.write(buffer, 0, bytesReceived); - } - } finally { - fis.close(); - } - } } diff --git a/src/java/com/cloudera/sqoop/orm/TableClassName.java b/src/java/com/cloudera/sqoop/orm/TableClassName.java index 2be57b53..20c82af4 100644 --- a/src/java/com/cloudera/sqoop/orm/TableClassName.java +++ b/src/java/com/cloudera/sqoop/orm/TableClassName.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,104 +15,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.orm; import com.cloudera.sqoop.SqoopOptions; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - /** - * Reconciles the table name being imported with the class naming information - * specified in SqoopOptions to determine the actual package and class name to - * use for a table. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class TableClassName { - - public static final Log LOG = LogFactory.getLog( - TableClassName.class.getName()); - - private final SqoopOptions options; +public class TableClassName + extends org.apache.sqoop.orm.TableClassName { public TableClassName(final SqoopOptions opts) { - if (null == opts) { - throw new NullPointerException( - "Cannot instantiate a TableClassName on null options."); - } else { - this.options = opts; - } + super(opts); } - /** - * Taking into account --class-name and --package-name, return the actual - * package-part which will be used for a class. The actual table name being - * generated-for is irrelevant; so not an argument. - * - * @return the package where generated ORM classes go. Will be null for - * top-level. - */ - public String getPackageForTable() { - String predefinedClass = options.getClassName(); - if (null != predefinedClass) { - // If the predefined classname contains a package-part, return that. - int lastDot = predefinedClass.lastIndexOf('.'); - if (-1 == lastDot) { - // No package part. - return null; - } else { - // Return the string up to but not including the last dot. - return predefinedClass.substring(0, lastDot); - } - } else { - // If the user has specified a package name, return it. - // This will be null if the user hasn't specified one -- as we expect. - return options.getPackageName(); - } - } - - /** - * @param tableName the name of the table being imported. - * @return the full name of the class to generate/use to import a table. - */ - public String getClassForTable(String tableName) { - String predefinedClass = options.getClassName(); - if (predefinedClass != null) { - // The user's chosen a specific class name for this job. - return predefinedClass; - } - - String queryName = tableName; - if (null == queryName) { - queryName = "QueryResult"; - } - - String packageName = options.getPackageName(); - if (null != packageName) { - // return packageName.queryName. - return packageName + "." + queryName; - } - - // no specific class; no specific package. - // Just make sure it's a legal identifier. - return ClassWriter.toJavaIdentifier(queryName); - } - - /** - * @return just the last segment of the class name -- all package info - * stripped. - */ - public String getShortClassForTable(String tableName) { - String fullClass = getClassForTable(tableName); - if (null == fullClass) { - return null; - } - - int lastDot = fullClass.lastIndexOf('.'); - if (-1 == lastDot) { - return fullClass; - } else { - return fullClass.substring(lastDot + 1, fullClass.length()); - } - } } diff --git a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java index 13a4d488..a5f72f7a 100644 --- a/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java +++ b/src/java/com/cloudera/sqoop/tool/BaseSqoopTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,162 +15,188 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Properties; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.OptionGroup; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.util.StringUtils; -import org.apache.log4j.Category; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; - -import com.cloudera.sqoop.ConnFactory; -import com.cloudera.sqoop.Sqoop; -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.RelatedOptions; -import com.cloudera.sqoop.cli.ToolOptions; -import com.cloudera.sqoop.lib.DelimiterSet; -import com.cloudera.sqoop.manager.ConnManager; -import com.cloudera.sqoop.metastore.JobData; - /** - * Layer on top of SqoopTool that provides some basic common code - * that most SqoopTool implementations will use. - * - * Subclasses should call init() at the top of their run() method, - * and call destroy() at the end in a finally block. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public abstract class BaseSqoopTool extends SqoopTool { +public abstract class BaseSqoopTool + extends org.apache.sqoop.tool.BaseSqoopTool { - public static final Log LOG = LogFactory.getLog( - BaseSqoopTool.class.getName()); - - public static final String HELP_STR = "\nTry --help for usage instructions."; - - // Here are all the arguments that are used by the standard sqoop tools. - // Their names are recorded here so that tools can share them and their - // use consistently. The argument parser applies the leading '--' to each - // string. - public static final String CONNECT_STRING_ARG = "connect"; + public static final String HELP_STR = + org.apache.sqoop.tool.BaseSqoopTool.HELP_STR; + public static final String CONNECT_STRING_ARG = + org.apache.sqoop.tool.BaseSqoopTool.CONNECT_STRING_ARG; public static final String CONN_MANAGER_CLASS_NAME = - "connection-manager"; - public static final String CONNECT_PARAM_FILE = "connection-param-file"; - public static final String DRIVER_ARG = "driver"; - public static final String USERNAME_ARG = "username"; - public static final String PASSWORD_ARG = "password"; - public static final String PASSWORD_PROMPT_ARG = "P"; - public static final String DIRECT_ARG = "direct"; - public static final String BATCH_ARG = "batch"; - public static final String TABLE_ARG = "table"; - public static final String STAGING_TABLE_ARG = "staging-table"; - public static final String CLEAR_STAGING_TABLE_ARG = "clear-staging-table"; - public static final String COLUMNS_ARG = "columns"; - public static final String SPLIT_BY_ARG = "split-by"; - public static final String WHERE_ARG = "where"; - public static final String HADOOP_HOME_ARG = "hadoop-home"; - public static final String HIVE_HOME_ARG = "hive-home"; - public static final String WAREHOUSE_DIR_ARG = "warehouse-dir"; - public static final String TARGET_DIR_ARG = "target-dir"; - public static final String APPEND_ARG = "append"; - public static final String NULL_STRING = "null-string"; - public static final String INPUT_NULL_STRING = "input-null-string"; - public static final String NULL_NON_STRING = "null-non-string"; - public static final String INPUT_NULL_NON_STRING = "input-null-non-string"; - public static final String MAP_COLUMN_JAVA = "map-column-java"; - public static final String MAP_COLUMN_HIVE = "map-column-hive"; - - public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile"; - public static final String FMT_TEXTFILE_ARG = "as-textfile"; - public static final String FMT_AVRODATAFILE_ARG = "as-avrodatafile"; - public static final String HIVE_IMPORT_ARG = "hive-import"; - public static final String HIVE_TABLE_ARG = "hive-table"; - public static final String HIVE_OVERWRITE_ARG = "hive-overwrite"; - public static final String HIVE_DROP_DELIMS_ARG = "hive-drop-import-delims"; + org.apache.sqoop.tool.BaseSqoopTool.CONN_MANAGER_CLASS_NAME; + public static final String CONNECT_PARAM_FILE = + org.apache.sqoop.tool.BaseSqoopTool.CONNECT_PARAM_FILE; + public static final String DRIVER_ARG = + org.apache.sqoop.tool.BaseSqoopTool.DRIVER_ARG; + public static final String USERNAME_ARG = + org.apache.sqoop.tool.BaseSqoopTool.USERNAME_ARG; + public static final String PASSWORD_ARG = + org.apache.sqoop.tool.BaseSqoopTool.PASSWORD_ARG; + public static final String PASSWORD_PROMPT_ARG = + org.apache.sqoop.tool.BaseSqoopTool.PASSWORD_PROMPT_ARG; + public static final String DIRECT_ARG = + org.apache.sqoop.tool.BaseSqoopTool.DIRECT_ARG; + public static final String BATCH_ARG = + org.apache.sqoop.tool.BaseSqoopTool.BATCH_ARG; + public static final String TABLE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.TABLE_ARG; + public static final String STAGING_TABLE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.STAGING_TABLE_ARG; + public static final String CLEAR_STAGING_TABLE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.CLEAR_STAGING_TABLE_ARG; + public static final String COLUMNS_ARG = + org.apache.sqoop.tool.BaseSqoopTool.COLUMNS_ARG; + public static final String SPLIT_BY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.SPLIT_BY_ARG; + public static final String WHERE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.WHERE_ARG; + public static final String HADOOP_HOME_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HADOOP_HOME_ARG; + public static final String HIVE_HOME_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HIVE_HOME_ARG; + public static final String WAREHOUSE_DIR_ARG = + org.apache.sqoop.tool.BaseSqoopTool.WAREHOUSE_DIR_ARG; + public static final String TARGET_DIR_ARG = + org.apache.sqoop.tool.BaseSqoopTool.TARGET_DIR_ARG; + public static final String APPEND_ARG = + org.apache.sqoop.tool.BaseSqoopTool.APPEND_ARG; + public static final String NULL_STRING = + org.apache.sqoop.tool.BaseSqoopTool.NULL_STRING; + public static final String INPUT_NULL_STRING = + org.apache.sqoop.tool.BaseSqoopTool.INPUT_NULL_STRING; + public static final String NULL_NON_STRING = + org.apache.sqoop.tool.BaseSqoopTool.NULL_NON_STRING; + public static final String INPUT_NULL_NON_STRING = + org.apache.sqoop.tool.BaseSqoopTool.INPUT_NULL_NON_STRING; + public static final String MAP_COLUMN_JAVA = + org.apache.sqoop.tool.BaseSqoopTool.MAP_COLUMN_JAVA; + public static final String MAP_COLUMN_HIVE = + org.apache.sqoop.tool.BaseSqoopTool.MAP_COLUMN_HIVE; + public static final String FMT_SEQUENCEFILE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.FMT_SEQUENCEFILE_ARG; + public static final String FMT_TEXTFILE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.FMT_TEXTFILE_ARG; + public static final String FMT_AVRODATAFILE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.FMT_AVRODATAFILE_ARG; + public static final String HIVE_IMPORT_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HIVE_IMPORT_ARG; + public static final String HIVE_TABLE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HIVE_TABLE_ARG; + public static final String HIVE_OVERWRITE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HIVE_OVERWRITE_ARG; + public static final String HIVE_DROP_DELIMS_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HIVE_DROP_DELIMS_ARG; public static final String HIVE_DELIMS_REPLACEMENT_ARG = - "hive-delims-replacement"; - public static final String HIVE_PARTITION_KEY_ARG = "hive-partition-key"; - public static final String HIVE_PARTITION_VALUE_ARG = "hive-partition-value"; + org.apache.sqoop.tool.BaseSqoopTool.HIVE_DELIMS_REPLACEMENT_ARG; + public static final String HIVE_PARTITION_KEY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HIVE_PARTITION_KEY_ARG; + public static final String HIVE_PARTITION_VALUE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HIVE_PARTITION_VALUE_ARG; public static final String CREATE_HIVE_TABLE_ARG = - "create-hive-table"; - public static final String NUM_MAPPERS_ARG = "num-mappers"; - public static final String NUM_MAPPERS_SHORT_ARG = "m"; - public static final String COMPRESS_ARG = "compress"; - public static final String COMPRESSION_CODEC_ARG = "compression-codec"; - public static final String COMPRESS_SHORT_ARG = "z"; - public static final String DIRECT_SPLIT_SIZE_ARG = "direct-split-size"; - public static final String INLINE_LOB_LIMIT_ARG = "inline-lob-limit"; - public static final String FETCH_SIZE_ARG = "fetch-size"; - public static final String EXPORT_PATH_ARG = "export-dir"; - public static final String FIELDS_TERMINATED_BY_ARG = "fields-terminated-by"; - public static final String LINES_TERMINATED_BY_ARG = "lines-terminated-by"; + org.apache.sqoop.tool.BaseSqoopTool.CREATE_HIVE_TABLE_ARG; + public static final String NUM_MAPPERS_ARG = + org.apache.sqoop.tool.BaseSqoopTool.NUM_MAPPERS_ARG; + public static final String NUM_MAPPERS_SHORT_ARG = + org.apache.sqoop.tool.BaseSqoopTool.NUM_MAPPERS_SHORT_ARG; + public static final String COMPRESS_ARG = + org.apache.sqoop.tool.BaseSqoopTool.COMPRESS_ARG; + public static final String COMPRESSION_CODEC_ARG = + org.apache.sqoop.tool.BaseSqoopTool.COMPRESSION_CODEC_ARG; + public static final String COMPRESS_SHORT_ARG = + org.apache.sqoop.tool.BaseSqoopTool.COMPRESS_SHORT_ARG; + public static final String DIRECT_SPLIT_SIZE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.DIRECT_SPLIT_SIZE_ARG; + public static final String INLINE_LOB_LIMIT_ARG = + org.apache.sqoop.tool.BaseSqoopTool.INLINE_LOB_LIMIT_ARG; + public static final String FETCH_SIZE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.FETCH_SIZE_ARG; + public static final String EXPORT_PATH_ARG = + org.apache.sqoop.tool.BaseSqoopTool.EXPORT_PATH_ARG; + public static final String FIELDS_TERMINATED_BY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.FIELDS_TERMINATED_BY_ARG; + public static final String LINES_TERMINATED_BY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.LINES_TERMINATED_BY_ARG; public static final String OPTIONALLY_ENCLOSED_BY_ARG = - "optionally-enclosed-by"; - public static final String ENCLOSED_BY_ARG = "enclosed-by"; - public static final String ESCAPED_BY_ARG = "escaped-by"; - public static final String MYSQL_DELIMITERS_ARG = "mysql-delimiters"; + org.apache.sqoop.tool.BaseSqoopTool.OPTIONALLY_ENCLOSED_BY_ARG; + public static final String ENCLOSED_BY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.ENCLOSED_BY_ARG; + public static final String ESCAPED_BY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.ESCAPED_BY_ARG; + public static final String MYSQL_DELIMITERS_ARG = + org.apache.sqoop.tool.BaseSqoopTool.MYSQL_DELIMITERS_ARG; public static final String INPUT_FIELDS_TERMINATED_BY_ARG = - "input-fields-terminated-by"; + org.apache.sqoop.tool.BaseSqoopTool.INPUT_FIELDS_TERMINATED_BY_ARG; public static final String INPUT_LINES_TERMINATED_BY_ARG = - "input-lines-terminated-by"; + org.apache.sqoop.tool.BaseSqoopTool.INPUT_LINES_TERMINATED_BY_ARG; public static final String INPUT_OPTIONALLY_ENCLOSED_BY_ARG = - "input-optionally-enclosed-by"; - public static final String INPUT_ENCLOSED_BY_ARG = "input-enclosed-by"; - public static final String INPUT_ESCAPED_BY_ARG = "input-escaped-by"; - public static final String CODE_OUT_DIR_ARG = "outdir"; - public static final String BIN_OUT_DIR_ARG = "bindir"; - public static final String PACKAGE_NAME_ARG = "package-name"; - public static final String CLASS_NAME_ARG = "class-name"; - public static final String JAR_FILE_NAME_ARG = "jar-file"; - public static final String SQL_QUERY_ARG = "query"; - public static final String SQL_QUERY_BOUNDARY = "boundary-query"; - public static final String SQL_QUERY_SHORT_ARG = "e"; - public static final String VERBOSE_ARG = "verbose"; - public static final String HELP_ARG = "help"; - public static final String UPDATE_KEY_ARG = "update-key"; - public static final String UPDATE_MODE_ARG = "update-mode"; - - // Arguments for incremental imports. - public static final String INCREMENT_TYPE_ARG = "incremental"; - public static final String INCREMENT_COL_ARG = "check-column"; - public static final String INCREMENT_LAST_VAL_ARG = "last-value"; - - // HBase arguments. - public static final String HBASE_TABLE_ARG = "hbase-table"; - public static final String HBASE_COL_FAM_ARG = "column-family"; - public static final String HBASE_ROW_KEY_ARG = "hbase-row-key"; - public static final String HBASE_CREATE_TABLE_ARG = "hbase-create-table"; - - - // Arguments for the saved job management system. - public static final String STORAGE_METASTORE_ARG = "meta-connect"; - public static final String JOB_CMD_CREATE_ARG = "create"; - public static final String JOB_CMD_DELETE_ARG = "delete"; - public static final String JOB_CMD_EXEC_ARG = "exec"; - public static final String JOB_CMD_LIST_ARG = "list"; - public static final String JOB_CMD_SHOW_ARG = "show"; - - // Arguments for the metastore. - public static final String METASTORE_SHUTDOWN_ARG = "shutdown"; - - - // Arguments for merging datasets. - public static final String NEW_DATASET_ARG = "new-data"; - public static final String OLD_DATASET_ARG = "onto"; - public static final String MERGE_KEY_ARG = "merge-key"; + org.apache.sqoop.tool.BaseSqoopTool.INPUT_OPTIONALLY_ENCLOSED_BY_ARG; + public static final String INPUT_ENCLOSED_BY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.INPUT_ENCLOSED_BY_ARG; + public static final String INPUT_ESCAPED_BY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.INPUT_ESCAPED_BY_ARG; + public static final String CODE_OUT_DIR_ARG = + org.apache.sqoop.tool.BaseSqoopTool.CODE_OUT_DIR_ARG; + public static final String BIN_OUT_DIR_ARG = + org.apache.sqoop.tool.BaseSqoopTool.BIN_OUT_DIR_ARG; + public static final String PACKAGE_NAME_ARG = + org.apache.sqoop.tool.BaseSqoopTool.PACKAGE_NAME_ARG; + public static final String CLASS_NAME_ARG = + org.apache.sqoop.tool.BaseSqoopTool.CLASS_NAME_ARG; + public static final String JAR_FILE_NAME_ARG = + org.apache.sqoop.tool.BaseSqoopTool.JAR_FILE_NAME_ARG; + public static final String SQL_QUERY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.SQL_QUERY_ARG; + public static final String SQL_QUERY_BOUNDARY = + org.apache.sqoop.tool.BaseSqoopTool.SQL_QUERY_BOUNDARY; + public static final String SQL_QUERY_SHORT_ARG = + org.apache.sqoop.tool.BaseSqoopTool.SQL_QUERY_SHORT_ARG; + public static final String VERBOSE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.VERBOSE_ARG; + public static final String HELP_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HELP_ARG; + public static final String UPDATE_KEY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.UPDATE_KEY_ARG; + public static final String UPDATE_MODE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.UPDATE_MODE_ARG; + public static final String INCREMENT_TYPE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.INCREMENT_TYPE_ARG; + public static final String INCREMENT_COL_ARG = + org.apache.sqoop.tool.BaseSqoopTool.INCREMENT_COL_ARG; + public static final String INCREMENT_LAST_VAL_ARG = + org.apache.sqoop.tool.BaseSqoopTool.INCREMENT_LAST_VAL_ARG; + public static final String HBASE_TABLE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HBASE_TABLE_ARG; + public static final String HBASE_COL_FAM_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HBASE_COL_FAM_ARG; + public static final String HBASE_ROW_KEY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HBASE_ROW_KEY_ARG; + public static final String HBASE_CREATE_TABLE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.HBASE_CREATE_TABLE_ARG; + public static final String STORAGE_METASTORE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.STORAGE_METASTORE_ARG; + public static final String JOB_CMD_CREATE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.JOB_CMD_CREATE_ARG; + public static final String JOB_CMD_DELETE_ARG = + org.apache.sqoop.tool.BaseSqoopTool.JOB_CMD_DELETE_ARG; + public static final String JOB_CMD_EXEC_ARG = + org.apache.sqoop.tool.BaseSqoopTool.JOB_CMD_EXEC_ARG; + public static final String JOB_CMD_LIST_ARG = + org.apache.sqoop.tool.BaseSqoopTool.JOB_CMD_LIST_ARG; + public static final String JOB_CMD_SHOW_ARG = + org.apache.sqoop.tool.BaseSqoopTool.JOB_CMD_SHOW_ARG; + public static final String METASTORE_SHUTDOWN_ARG = + org.apache.sqoop.tool.BaseSqoopTool.METASTORE_SHUTDOWN_ARG; + public static final String NEW_DATASET_ARG = + org.apache.sqoop.tool.BaseSqoopTool.NEW_DATASET_ARG; + public static final String OLD_DATASET_ARG = + org.apache.sqoop.tool.BaseSqoopTool.OLD_DATASET_ARG; + public static final String MERGE_KEY_ARG = + org.apache.sqoop.tool.BaseSqoopTool.MERGE_KEY_ARG; public BaseSqoopTool() { } @@ -181,793 +205,4 @@ public BaseSqoopTool(String toolName) { super(toolName); } - protected ConnManager manager; - - public ConnManager getManager() { - return manager; - } - - public void setManager(ConnManager mgr) { - this.manager = mgr; - } - - /** - * Should be called at the beginning of the run() method to initialize - * the connection manager, etc. If this succeeds (returns true), it should - * be paired with a call to destroy(). - * @return true on success, false on failure. - */ - protected boolean init(SqoopOptions sqoopOpts) { - // Get the connection to the database. - try { - JobData data = new JobData(sqoopOpts, this); - this.manager = new ConnFactory(sqoopOpts.getConf()).getManager(data); - return true; - } catch (Exception e) { - LOG.error("Got error creating database manager: " - + StringUtils.stringifyException(e)); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(e); - } - } - - return false; - } - - /** - * Should be called in a 'finally' block at the end of the run() method. - */ - protected void destroy(SqoopOptions sqoopOpts) { - if (null != manager) { - try { - manager.close(); - } catch (SQLException sqlE) { - LOG.warn("Error while closing connection: " + sqlE); - } - } - } - - /** - * Examines a subset of the arrray presented, and determines if it - * contains any non-empty arguments. If so, logs the arguments - * and returns true. - * - * @param argv an array of strings to check. - * @param offset the first element of the array to check - * @param len the number of elements to check - * @return true if there are any non-null, non-empty argument strings - * present. - */ - protected boolean hasUnrecognizedArgs(String [] argv, int offset, int len) { - if (argv == null) { - return false; - } - - boolean unrecognized = false; - boolean printedBanner = false; - for (int i = offset; i < Math.min(argv.length, offset + len); i++) { - if (argv[i] != null && argv[i].length() > 0) { - if (!printedBanner) { - LOG.error("Error parsing arguments for " + getToolName() + ":"); - printedBanner = true; - } - LOG.error("Unrecognized argument: " + argv[i]); - unrecognized = true; - } - } - - return unrecognized; - } - - protected boolean hasUnrecognizedArgs(String [] argv) { - if (null == argv) { - return false; - } - return hasUnrecognizedArgs(argv, 0, argv.length); - } - - - /** - * If argv contains an entry "--", return an array containing all elements - * after the "--" separator. Otherwise, return null. - * @param argv a set of arguments to scan for the subcommand arguments. - */ - protected String [] getSubcommandArgs(String [] argv) { - if (null == argv) { - return null; - } - - for (int i = 0; i < argv.length; i++) { - if (argv[i].equals("--")) { - return Arrays.copyOfRange(argv, i + 1, argv.length); - } - } - - return null; - } - - /** - * @return RelatedOptions used by job management tools. - */ - protected RelatedOptions getJobOptions() { - RelatedOptions relatedOpts = new RelatedOptions( - "Job management arguments"); - relatedOpts.addOption(OptionBuilder.withArgName("jdbc-uri") - .hasArg() - .withDescription("Specify JDBC connect string for the metastore") - .withLongOpt(STORAGE_METASTORE_ARG) - .create()); - - // Create an option-group surrounding the operations a user - // can perform on jobs. - OptionGroup group = new OptionGroup(); - group.addOption(OptionBuilder.withArgName("job-id") - .hasArg() - .withDescription("Create a new saved job") - .withLongOpt(JOB_CMD_CREATE_ARG) - .create()); - group.addOption(OptionBuilder.withArgName("job-id") - .hasArg() - .withDescription("Delete a saved job") - .withLongOpt(JOB_CMD_DELETE_ARG) - .create()); - group.addOption(OptionBuilder.withArgName("job-id") - .hasArg() - .withDescription("Show the parameters for a saved job") - .withLongOpt(JOB_CMD_SHOW_ARG) - .create()); - - Option execOption = OptionBuilder.withArgName("job-id") - .hasArg() - .withDescription("Run a saved job") - .withLongOpt(JOB_CMD_EXEC_ARG) - .create(); - group.addOption(execOption); - - group.addOption(OptionBuilder - .withDescription("List saved jobs") - .withLongOpt(JOB_CMD_LIST_ARG) - .create()); - - relatedOpts.addOptionGroup(group); - - // Since the "common" options aren't used in the job tool, - // add these settings here. - relatedOpts.addOption(OptionBuilder - .withDescription("Print more information while working") - .withLongOpt(VERBOSE_ARG) - .create()); - relatedOpts.addOption(OptionBuilder - .withDescription("Print usage instructions") - .withLongOpt(HELP_ARG) - .create()); - - return relatedOpts; - } - - /** - * @return RelatedOptions used by most/all Sqoop tools. - */ - protected RelatedOptions getCommonOptions() { - // Connection args (common) - RelatedOptions commonOpts = new RelatedOptions("Common arguments"); - commonOpts.addOption(OptionBuilder.withArgName("jdbc-uri") - .hasArg().withDescription("Specify JDBC connect string") - .withLongOpt(CONNECT_STRING_ARG) - .create()); - commonOpts.addOption(OptionBuilder.withArgName("class-name") - .hasArg().withDescription("Specify connection manager class name") - .withLongOpt(CONN_MANAGER_CLASS_NAME) - .create()); - commonOpts.addOption(OptionBuilder.withArgName("properties-file") - .hasArg().withDescription("Specify connection parameters file") - .withLongOpt(CONNECT_PARAM_FILE) - .create()); - commonOpts.addOption(OptionBuilder.withArgName("class-name") - .hasArg().withDescription("Manually specify JDBC driver class to use") - .withLongOpt(DRIVER_ARG) - .create()); - commonOpts.addOption(OptionBuilder.withArgName("username") - .hasArg().withDescription("Set authentication username") - .withLongOpt(USERNAME_ARG) - .create()); - commonOpts.addOption(OptionBuilder.withArgName("password") - .hasArg().withDescription("Set authentication password") - .withLongOpt(PASSWORD_ARG) - .create()); - commonOpts.addOption(OptionBuilder - .withDescription("Read password from console") - .create(PASSWORD_PROMPT_ARG)); - - commonOpts.addOption(OptionBuilder.withArgName("dir") - .hasArg().withDescription("Override $HADOOP_HOME") - .withLongOpt(HADOOP_HOME_ARG) - .create()); - - // misc (common) - commonOpts.addOption(OptionBuilder - .withDescription("Print more information while working") - .withLongOpt(VERBOSE_ARG) - .create()); - commonOpts.addOption(OptionBuilder - .withDescription("Print usage instructions") - .withLongOpt(HELP_ARG) - .create()); - - return commonOpts; - } - - /** - * @param explicitHiveImport true if the user has an explicit --hive-import - * available, or false if this is implied by the tool. - * @return options governing interaction with Hive - */ - protected RelatedOptions getHiveOptions(boolean explicitHiveImport) { - RelatedOptions hiveOpts = new RelatedOptions("Hive arguments"); - if (explicitHiveImport) { - hiveOpts.addOption(OptionBuilder - .withDescription("Import tables into Hive " - + "(Uses Hive's default delimiters if none are set.)") - .withLongOpt(HIVE_IMPORT_ARG) - .create()); - } - - hiveOpts.addOption(OptionBuilder.withArgName("dir") - .hasArg().withDescription("Override $HIVE_HOME") - .withLongOpt(HIVE_HOME_ARG) - .create()); - hiveOpts.addOption(OptionBuilder - .withDescription("Overwrite existing data in the Hive table") - .withLongOpt(HIVE_OVERWRITE_ARG) - .create()); - hiveOpts.addOption(OptionBuilder - .withDescription("Fail if the target hive table exists") - .withLongOpt(CREATE_HIVE_TABLE_ARG) - .create()); - hiveOpts.addOption(OptionBuilder.withArgName("table-name") - .hasArg() - .withDescription("Sets the table name to use when importing to hive") - .withLongOpt(HIVE_TABLE_ARG) - .create()); - hiveOpts.addOption(OptionBuilder - .withDescription("Drop Hive record \\0x01 and row delimiters " - + "(\\n\\r) from imported string fields") - .withLongOpt(HIVE_DROP_DELIMS_ARG) - .create()); - hiveOpts.addOption(OptionBuilder - .hasArg() - .withDescription("Replace Hive record \\0x01 and row delimiters " - + "(\\n\\r) from imported string fields with user-defined string") - .withLongOpt(HIVE_DELIMS_REPLACEMENT_ARG) - .create()); - hiveOpts.addOption(OptionBuilder.withArgName("partition-key") - .hasArg() - .withDescription("Sets the partition key to use when importing to hive") - .withLongOpt(HIVE_PARTITION_KEY_ARG) - .create()); - hiveOpts.addOption(OptionBuilder.withArgName("partition-value") - .hasArg() - .withDescription("Sets the partition value to use when importing " - + "to hive") - .withLongOpt(HIVE_PARTITION_VALUE_ARG) - .create()); - hiveOpts.addOption(OptionBuilder - .hasArg() - .withDescription("Override mapping for specific column to hive" - + " types.") - .withLongOpt(MAP_COLUMN_HIVE) - .create()); - - return hiveOpts; - } - - /** - * @return options governing output format delimiters - */ - protected RelatedOptions getOutputFormatOptions() { - RelatedOptions formatOpts = new RelatedOptions( - "Output line formatting arguments"); - formatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets the field separator character") - .withLongOpt(FIELDS_TERMINATED_BY_ARG) - .create()); - formatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets the end-of-line character") - .withLongOpt(LINES_TERMINATED_BY_ARG) - .create()); - formatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets a field enclosing character") - .withLongOpt(OPTIONALLY_ENCLOSED_BY_ARG) - .create()); - formatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets a required field enclosing character") - .withLongOpt(ENCLOSED_BY_ARG) - .create()); - formatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets the escape character") - .withLongOpt(ESCAPED_BY_ARG) - .create()); - formatOpts.addOption(OptionBuilder - .withDescription("Uses MySQL's default delimiter set: " - + "fields: , lines: \\n escaped-by: \\ optionally-enclosed-by: '") - .withLongOpt(MYSQL_DELIMITERS_ARG) - .create()); - - return formatOpts; - } - - /** - * @return options governing input format delimiters. - */ - protected RelatedOptions getInputFormatOptions() { - RelatedOptions inputFormatOpts = - new RelatedOptions("Input parsing arguments"); - inputFormatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets the input field separator") - .withLongOpt(INPUT_FIELDS_TERMINATED_BY_ARG) - .create()); - inputFormatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets the input end-of-line char") - .withLongOpt(INPUT_LINES_TERMINATED_BY_ARG) - .create()); - inputFormatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets a field enclosing character") - .withLongOpt(INPUT_OPTIONALLY_ENCLOSED_BY_ARG) - .create()); - inputFormatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets a required field encloser") - .withLongOpt(INPUT_ENCLOSED_BY_ARG) - .create()); - inputFormatOpts.addOption(OptionBuilder.withArgName("char") - .hasArg() - .withDescription("Sets the input escape character") - .withLongOpt(INPUT_ESCAPED_BY_ARG) - .create()); - - return inputFormatOpts; - } - - /** - * @param multiTable true if these options will be used for bulk code-gen. - * @return options related to code generation. - */ - protected RelatedOptions getCodeGenOpts(boolean multiTable) { - RelatedOptions codeGenOpts = - new RelatedOptions("Code generation arguments"); - codeGenOpts.addOption(OptionBuilder.withArgName("dir") - .hasArg() - .withDescription("Output directory for generated code") - .withLongOpt(CODE_OUT_DIR_ARG) - .create()); - codeGenOpts.addOption(OptionBuilder.withArgName("dir") - .hasArg() - .withDescription("Output directory for compiled objects") - .withLongOpt(BIN_OUT_DIR_ARG) - .create()); - codeGenOpts.addOption(OptionBuilder.withArgName("name") - .hasArg() - .withDescription("Put auto-generated classes in this package") - .withLongOpt(PACKAGE_NAME_ARG) - .create()); - codeGenOpts.addOption(OptionBuilder.withArgName("null-str") - .hasArg() - .withDescription("Null string representation") - .withLongOpt(NULL_STRING) - .create()); - codeGenOpts.addOption(OptionBuilder.withArgName("null-str") - .hasArg() - .withDescription("Input null string representation") - .withLongOpt(INPUT_NULL_STRING) - .create()); - codeGenOpts.addOption(OptionBuilder.withArgName("null-str") - .hasArg() - .withDescription("Null non-string representation") - .withLongOpt(NULL_NON_STRING) - .create()); - codeGenOpts.addOption(OptionBuilder.withArgName("null-str") - .hasArg() - .withDescription("Input null non-string representation") - .withLongOpt(INPUT_NULL_NON_STRING) - .create()); - codeGenOpts.addOption(OptionBuilder - .hasArg() - .withDescription("Override mapping for specific columns to java types") - .withLongOpt(MAP_COLUMN_JAVA) - .create()); - - if (!multiTable) { - codeGenOpts.addOption(OptionBuilder.withArgName("name") - .hasArg() - .withDescription("Sets the generated class name. " - + "This overrides --" + PACKAGE_NAME_ARG + ". When combined " - + "with --" + JAR_FILE_NAME_ARG + ", sets the input class.") - .withLongOpt(CLASS_NAME_ARG) - .create()); - } - return codeGenOpts; - } - - protected RelatedOptions getHBaseOptions() { - RelatedOptions hbaseOpts = - new RelatedOptions("HBase arguments"); - hbaseOpts.addOption(OptionBuilder.withArgName("table") - .hasArg() - .withDescription("Import to in HBase") - .withLongOpt(HBASE_TABLE_ARG) - .create()); - hbaseOpts.addOption(OptionBuilder.withArgName("family") - .hasArg() - .withDescription("Sets the target column family for the import") - .withLongOpt(HBASE_COL_FAM_ARG) - .create()); - hbaseOpts.addOption(OptionBuilder.withArgName("col") - .hasArg() - .withDescription("Specifies which input column to use as the row key") - .withLongOpt(HBASE_ROW_KEY_ARG) - .create()); - hbaseOpts.addOption(OptionBuilder - .withDescription("If specified, create missing HBase tables") - .withLongOpt(HBASE_CREATE_TABLE_ARG) - .create()); - - return hbaseOpts; - } - - - - /** - * Apply common command-line to the state. - */ - protected void applyCommonOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - - // common options. - if (in.hasOption(VERBOSE_ARG)) { - // Immediately switch into DEBUG logging. - Category sqoopLogger = Logger.getLogger( - Sqoop.class.getName()).getParent(); - sqoopLogger.setLevel(Level.DEBUG); - LOG.debug("Enabled debug logging."); - } - - if (in.hasOption(HELP_ARG)) { - ToolOptions toolOpts = new ToolOptions(); - configureOptions(toolOpts); - printHelp(toolOpts); - throw new InvalidOptionsException(""); - } - - if (in.hasOption(CONNECT_STRING_ARG)) { - out.setConnectString(in.getOptionValue(CONNECT_STRING_ARG)); - } - - if (in.hasOption(CONN_MANAGER_CLASS_NAME)) { - out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME)); - } - - if (in.hasOption(CONNECT_PARAM_FILE)) { - File paramFile = new File(in.getOptionValue(CONNECT_PARAM_FILE)); - if (!paramFile.exists()) { - throw new InvalidOptionsException( - "Specified connection parameter file not found: " + paramFile); - } - InputStream inStream = null; - Properties connectionParams = new Properties(); - try { - inStream = new FileInputStream( - new File(in.getOptionValue(CONNECT_PARAM_FILE))); - connectionParams.load(inStream); - } catch (IOException ex) { - LOG.warn("Failed to load connection parameter file", ex); - throw new InvalidOptionsException( - "Error while loading connection parameter file: " - + ex.getMessage()); - } finally { - if (inStream != null) { - try { - inStream.close(); - } catch (IOException ex) { - LOG.warn("Failed to close input stream", ex); - } - } - } - LOG.debug("Loaded connection parameters: " + connectionParams); - out.setConnectionParams(connectionParams); - } - - if (in.hasOption(NULL_STRING)) { - out.setNullStringValue(in.getOptionValue(NULL_STRING)); - } - - if (in.hasOption(INPUT_NULL_STRING)) { - out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING)); - } - - if (in.hasOption(NULL_NON_STRING)) { - out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING)); - } - - if (in.hasOption(INPUT_NULL_NON_STRING)) { - out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING)); - } - - if (in.hasOption(DRIVER_ARG)) { - out.setDriverClassName(in.getOptionValue(DRIVER_ARG)); - } - - if (in.hasOption(USERNAME_ARG)) { - out.setUsername(in.getOptionValue(USERNAME_ARG)); - if (null == out.getPassword()) { - // Set password to empty if the username is set first, - // to ensure that they're either both null or neither is. - out.setPassword(""); - } - } - - if (in.hasOption(PASSWORD_ARG)) { - LOG.warn("Setting your password on the command-line is insecure. " - + "Consider using -" + PASSWORD_PROMPT_ARG + " instead."); - out.setPassword(in.getOptionValue(PASSWORD_ARG)); - } - - if (in.hasOption(PASSWORD_PROMPT_ARG)) { - out.setPasswordFromConsole(); - } - - if (in.hasOption(HADOOP_HOME_ARG)) { - out.setHadoopHome(in.getOptionValue(HADOOP_HOME_ARG)); - } - - } - - protected void applyHiveOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - - if (in.hasOption(HIVE_HOME_ARG)) { - out.setHiveHome(in.getOptionValue(HIVE_HOME_ARG)); - } - - if (in.hasOption(HIVE_IMPORT_ARG)) { - out.setHiveImport(true); - } - - if (in.hasOption(HIVE_OVERWRITE_ARG)) { - out.setOverwriteHiveTable(true); - } - - if (in.hasOption(CREATE_HIVE_TABLE_ARG)) { - out.setFailIfHiveTableExists(true); - } - - if (in.hasOption(HIVE_TABLE_ARG)) { - out.setHiveTableName(in.getOptionValue(HIVE_TABLE_ARG)); - } - - if (in.hasOption(HIVE_DROP_DELIMS_ARG)) { - out.setHiveDropDelims(true); - } - - if (in.hasOption(HIVE_DELIMS_REPLACEMENT_ARG)) { - out.setHiveDelimsReplacement( - in.getOptionValue(HIVE_DELIMS_REPLACEMENT_ARG)); - } - - if (in.hasOption(HIVE_PARTITION_KEY_ARG)) { - out.setHivePartitionKey(in.getOptionValue(HIVE_PARTITION_KEY_ARG)); - } - - if (in.hasOption(HIVE_PARTITION_VALUE_ARG)) { - out.setHivePartitionValue(in.getOptionValue(HIVE_PARTITION_VALUE_ARG)); - } - - if (in.hasOption(MAP_COLUMN_HIVE)) { - out.setMapColumnHive(in.getOptionValue(MAP_COLUMN_HIVE)); - } - } - - protected void applyOutputFormatOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - if (in.hasOption(FIELDS_TERMINATED_BY_ARG)) { - out.setFieldsTerminatedBy(SqoopOptions.toChar( - in.getOptionValue(FIELDS_TERMINATED_BY_ARG))); - out.setExplicitDelims(true); - } - - if (in.hasOption(LINES_TERMINATED_BY_ARG)) { - out.setLinesTerminatedBy(SqoopOptions.toChar( - in.getOptionValue(LINES_TERMINATED_BY_ARG))); - out.setExplicitDelims(true); - } - - if (in.hasOption(OPTIONALLY_ENCLOSED_BY_ARG)) { - out.setEnclosedBy(SqoopOptions.toChar( - in.getOptionValue(OPTIONALLY_ENCLOSED_BY_ARG))); - out.setOutputEncloseRequired(false); - out.setExplicitDelims(true); - } - - if (in.hasOption(ENCLOSED_BY_ARG)) { - out.setEnclosedBy(SqoopOptions.toChar( - in.getOptionValue(ENCLOSED_BY_ARG))); - out.setOutputEncloseRequired(true); - out.setExplicitDelims(true); - } - - if (in.hasOption(ESCAPED_BY_ARG)) { - out.setEscapedBy(SqoopOptions.toChar( - in.getOptionValue(ESCAPED_BY_ARG))); - out.setExplicitDelims(true); - } - - if (in.hasOption(MYSQL_DELIMITERS_ARG)) { - out.setOutputEncloseRequired(false); - out.setFieldsTerminatedBy(','); - out.setLinesTerminatedBy('\n'); - out.setEscapedBy('\\'); - out.setEnclosedBy('\''); - out.setExplicitDelims(true); - } - } - - protected void applyInputFormatOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - if (in.hasOption(INPUT_FIELDS_TERMINATED_BY_ARG)) { - out.setInputFieldsTerminatedBy(SqoopOptions.toChar( - in.getOptionValue(INPUT_FIELDS_TERMINATED_BY_ARG))); - } - - if (in.hasOption(INPUT_LINES_TERMINATED_BY_ARG)) { - out.setInputLinesTerminatedBy(SqoopOptions.toChar( - in.getOptionValue(INPUT_LINES_TERMINATED_BY_ARG))); - } - - if (in.hasOption(INPUT_OPTIONALLY_ENCLOSED_BY_ARG)) { - out.setInputEnclosedBy(SqoopOptions.toChar( - in.getOptionValue(INPUT_OPTIONALLY_ENCLOSED_BY_ARG))); - out.setInputEncloseRequired(false); - } - - if (in.hasOption(INPUT_ENCLOSED_BY_ARG)) { - out.setInputEnclosedBy(SqoopOptions.toChar( - in.getOptionValue(INPUT_ENCLOSED_BY_ARG))); - out.setInputEncloseRequired(true); - } - - if (in.hasOption(INPUT_ESCAPED_BY_ARG)) { - out.setInputEscapedBy(SqoopOptions.toChar( - in.getOptionValue(INPUT_ESCAPED_BY_ARG))); - } - } - - protected void applyCodeGenOptions(CommandLine in, SqoopOptions out, - boolean multiTable) throws InvalidOptionsException { - if (in.hasOption(CODE_OUT_DIR_ARG)) { - out.setCodeOutputDir(in.getOptionValue(CODE_OUT_DIR_ARG)); - } - - if (in.hasOption(BIN_OUT_DIR_ARG)) { - out.setJarOutputDir(in.getOptionValue(BIN_OUT_DIR_ARG)); - } - - if (in.hasOption(PACKAGE_NAME_ARG)) { - out.setPackageName(in.getOptionValue(PACKAGE_NAME_ARG)); - } - - if (in.hasOption(MAP_COLUMN_JAVA)) { - out.setMapColumn(in.getOptionValue(MAP_COLUMN_JAVA)); - } - - if (!multiTable && in.hasOption(CLASS_NAME_ARG)) { - out.setClassName(in.getOptionValue(CLASS_NAME_ARG)); - } - } - - protected void applyHBaseOptions(CommandLine in, SqoopOptions out) { - if (in.hasOption(HBASE_TABLE_ARG)) { - out.setHBaseTable(in.getOptionValue(HBASE_TABLE_ARG)); - } - - if (in.hasOption(HBASE_COL_FAM_ARG)) { - out.setHBaseColFamily(in.getOptionValue(HBASE_COL_FAM_ARG)); - } - - if (in.hasOption(HBASE_ROW_KEY_ARG)) { - out.setHBaseRowKeyColumn(in.getOptionValue(HBASE_ROW_KEY_ARG)); - } - - if (in.hasOption(HBASE_CREATE_TABLE_ARG)) { - out.setCreateHBaseTable(true); - } - } - - protected void validateCommonOptions(SqoopOptions options) - throws InvalidOptionsException { - if (options.getConnectString() == null) { - throw new InvalidOptionsException( - "Error: Required argument --connect is missing." - + HELP_STR); - } - } - - protected void validateCodeGenOptions(SqoopOptions options) - throws InvalidOptionsException { - if (options.getClassName() != null && options.getPackageName() != null) { - throw new InvalidOptionsException( - "--class-name overrides --package-name. You cannot use both." - + HELP_STR); - } - } - - protected void validateOutputFormatOptions(SqoopOptions options) - throws InvalidOptionsException { - if (options.doHiveImport()) { - if (!options.explicitDelims()) { - // user hasn't manually specified delimiters, and wants to import - // straight to Hive. Use Hive-style delimiters. - LOG.info("Using Hive-specific delimiters for output. You can override"); - LOG.info("delimiters with --fields-terminated-by, etc."); - options.setOutputDelimiters(DelimiterSet.HIVE_DELIMITERS); - } - - if (options.getOutputEscapedBy() != DelimiterSet.NULL_CHAR) { - LOG.warn("Hive does not support escape characters in fields;"); - LOG.warn("parse errors in Hive may result from using --escaped-by."); - } - - if (options.getOutputEnclosedBy() != DelimiterSet.NULL_CHAR) { - LOG.warn("Hive does not support quoted strings; parse errors"); - LOG.warn("in Hive may result from using --enclosed-by."); - } - } - } - - protected void validateHiveOptions(SqoopOptions options) - throws InvalidOptionsException { - // Empty; this method is present to maintain API consistency, and - // is reserved for future constraints on Hive options. - if (options.getHiveDelimsReplacement() != null - && options.doHiveDropDelims()) { - throw new InvalidOptionsException("The " + HIVE_DROP_DELIMS_ARG - + " option conflicts with the " + HIVE_DELIMS_REPLACEMENT_ARG - + " option." + HELP_STR); - } - } - - protected void validateHBaseOptions(SqoopOptions options) - throws InvalidOptionsException { - if ((options.getHBaseColFamily() != null && options.getHBaseTable() == null) - || (options.getHBaseColFamily() == null - && options.getHBaseTable() != null)) { - throw new InvalidOptionsException( - "Both --hbase-table and --column-family must be set together." - + HELP_STR); - } - } - - /** - * Given an array of extra arguments (usually populated via - * this.extraArguments), determine the offset of the first '--' - * argument in the list. Return 'extra.length' if there is none. - */ - protected int getDashPosition(String [] extra) { - int dashPos = extra.length; - for (int i = 0; i < extra.length; i++) { - if (extra[i].equals("--")) { - dashPos = i; - break; - } - } - - return dashPos; - } } - diff --git a/src/java/com/cloudera/sqoop/tool/CodeGenTool.java b/src/java/com/cloudera/sqoop/tool/CodeGenTool.java index 4e4eaefe..efed4810 100644 --- a/src/java/com/cloudera/sqoop/tool/CodeGenTool.java +++ b/src/java/com/cloudera/sqoop/tool/CodeGenTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,177 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.util.StringUtils; - -import com.cloudera.sqoop.Sqoop; -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.RelatedOptions; -import com.cloudera.sqoop.cli.ToolOptions; -import com.cloudera.sqoop.hive.HiveImport; -import com.cloudera.sqoop.orm.ClassWriter; -import com.cloudera.sqoop.orm.CompilationManager; - /** - * Tool that generates code from a database schema. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class CodeGenTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog(CodeGenTool.class.getName()); - - private List generatedJarFiles; - - public CodeGenTool() { - super("codegen"); - generatedJarFiles = new ArrayList(); - } - - /** - * @return a list of jar files generated as part of this import process - */ - public List getGeneratedJarFiles() { - ArrayList out = new ArrayList(generatedJarFiles); - return out; - } - - /** - * Generate the .class and .jar files. - * @return the filename of the emitted jar file. - * @throws IOException - */ - public String generateORM(SqoopOptions options, String tableName) - throws IOException { - String existingJar = options.getExistingJarName(); - if (existingJar != null) { - // This code generator is being invoked as part of an import or export - // process, and the user has pre-specified a jar and class to use. - // Don't generate. - LOG.info("Using existing jar: " + existingJar); - return existingJar; - } - - LOG.info("Beginning code generation"); - CompilationManager compileMgr = new CompilationManager(options); - ClassWriter classWriter = new ClassWriter(options, manager, tableName, - compileMgr); - classWriter.generate(); - compileMgr.compile(); - compileMgr.jar(); - String jarFile = compileMgr.getJarFilename(); - this.generatedJarFiles.add(jarFile); - return jarFile; - } - - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - if (!init(options)) { - return 1; - } - - try { - generateORM(options, options.getTableName()); - - // If the user has also specified Hive import code generation, - // use a HiveImport to generate the DDL statements and write - // them to files (but don't actually perform the import -- thus - // the generateOnly=true in the constructor). - if (options.doHiveImport()) { - HiveImport hiveImport = new HiveImport(options, manager, - options.getConf(), true); - hiveImport.importTable(options.getTableName(), - options.getHiveTableName(), true); - } - - } catch (IOException ioe) { - LOG.error("Encountered IOException running codegen job: " - + StringUtils.stringifyException(ioe)); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(ioe); - } else { - return 1; - } - } finally { - destroy(options); - } - - return 0; - } - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - - toolOptions.addUniqueOptions(getCommonOptions()); - - RelatedOptions codeGenOpts = getCodeGenOpts(false); - codeGenOpts.addOption(OptionBuilder.withArgName("table-name") - .hasArg() - .withDescription("Table to generate code for") - .withLongOpt(TABLE_ARG) - .create()); - toolOptions.addUniqueOptions(codeGenOpts); - - toolOptions.addUniqueOptions(getOutputFormatOptions()); - toolOptions.addUniqueOptions(getInputFormatOptions()); - toolOptions.addUniqueOptions(getHiveOptions(true)); - } - - @Override - /** {@inheritDoc} */ - public void printHelp(ToolOptions toolOptions) { - super.printHelp(toolOptions); - System.out.println(""); - System.out.println( - "At minimum, you must specify --connect and --table"); - } - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - - if (in.hasOption(TABLE_ARG)) { - out.setTableName(in.getOptionValue(TABLE_ARG)); - } - - applyCommonOptions(in, out); - applyOutputFormatOptions(in, out); - applyInputFormatOptions(in, out); - applyCodeGenOptions(in, out, false); - applyHiveOptions(in, out); - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - - if (hasUnrecognizedArgs(extraArguments)) { - throw new InvalidOptionsException(HELP_STR); - } - - validateCommonOptions(options); - validateCodeGenOptions(options); - validateOutputFormatOptions(options); - validateHiveOptions(options); - - if (options.getTableName() == null) { - throw new InvalidOptionsException( - "--table is required for code generation." + HELP_STR); - } - } +public class CodeGenTool + extends org.apache.sqoop.tool.CodeGenTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/CreateHiveTableTool.java b/src/java/com/cloudera/sqoop/tool/CreateHiveTableTool.java index 123b3362..103b66ec 100644 --- a/src/java/com/cloudera/sqoop/tool/CreateHiveTableTool.java +++ b/src/java/com/cloudera/sqoop/tool/CreateHiveTableTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,122 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.IOException; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.util.StringUtils; - -import com.cloudera.sqoop.Sqoop; -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.RelatedOptions; -import com.cloudera.sqoop.cli.ToolOptions; -import com.cloudera.sqoop.hive.HiveImport; - /** - * Tool that creates a Hive table definition. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class CreateHiveTableTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog( - CreateHiveTableTool.class.getName()); - - public CreateHiveTableTool() { - super("create-hive-table"); - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - if (!init(options)) { - return 1; - } - - try { - HiveImport hiveImport = new HiveImport(options, manager, - options.getConf(), false); - hiveImport.importTable(options.getTableName(), - options.getHiveTableName(), true); - } catch (IOException ioe) { - LOG.error("Encountered IOException running create table job: " - + StringUtils.stringifyException(ioe)); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(ioe); - } else { - return 1; - } - } finally { - destroy(options); - } - - return 0; - } - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - - toolOptions.addUniqueOptions(getCommonOptions()); - - RelatedOptions hiveOpts = getHiveOptions(false); - hiveOpts.addOption(OptionBuilder.withArgName("table-name") - .hasArg() - .withDescription("The db table to read the definition from") - .withLongOpt(TABLE_ARG) - .create()); - toolOptions.addUniqueOptions(hiveOpts); - - toolOptions.addUniqueOptions(getOutputFormatOptions()); - } - - @Override - /** {@inheritDoc} */ - public void printHelp(ToolOptions toolOptions) { - super.printHelp(toolOptions); - System.out.println(""); - System.out.println( - "At minimum, you must specify --connect and --table"); - } - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - - if (in.hasOption(TABLE_ARG)) { - out.setTableName(in.getOptionValue(TABLE_ARG)); - } - - out.setHiveImport(true); - - applyCommonOptions(in, out); - applyHiveOptions(in, out); - applyOutputFormatOptions(in, out); - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - - if (hasUnrecognizedArgs(extraArguments)) { - throw new InvalidOptionsException(HELP_STR); - } - - validateCommonOptions(options); - validateOutputFormatOptions(options); - validateHiveOptions(options); - - if (options.getTableName() == null) { - throw new InvalidOptionsException( - "--table is required for table definition importing." + HELP_STR); - } - } +public class CreateHiveTableTool + extends org.apache.sqoop.tool.CreateHiveTableTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/EvalSqlTool.java b/src/java/com/cloudera/sqoop/tool/EvalSqlTool.java index 358a2a8a..fcba5350 100644 --- a/src/java/com/cloudera/sqoop/tool/EvalSqlTool.java +++ b/src/java/com/cloudera/sqoop/tool/EvalSqlTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,159 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.IOException; -import java.io.PrintWriter; - -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.util.StringUtils; - -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.RelatedOptions; -import com.cloudera.sqoop.cli.ToolOptions; -import com.cloudera.sqoop.util.ResultSetPrinter; - /** - * Tool that evaluates a SQL statement and displays the results. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class EvalSqlTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog(EvalSqlTool.class.getName()); - - public EvalSqlTool() { - super("eval"); - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - if (!init(options)) { - return 1; - } - - PreparedStatement stmt = null; - ResultSet rs = null; - PrintWriter pw = null; - try { - Connection c = manager.getConnection(); - String query = options.getSqlQuery(); - LOG.debug("SQL query: " + query); - stmt = c.prepareStatement(query); - boolean resultType = stmt.execute(); - // Iterate over all the results from this statement. - while (true) { - LOG.debug("resultType=" + resultType); - if (!resultType) { - // This result was an update count. - int updateCount = stmt.getUpdateCount(); - LOG.debug("updateCount=" + updateCount); - if (updateCount == -1) { - // We are done iterating over results from this statement. - c.commit(); - break; - } else { - LOG.info(updateCount + " row(s) updated."); - } - } else { - // This yields a ResultSet. - rs = stmt.getResultSet(); - pw = new PrintWriter(System.out, true); - new ResultSetPrinter().printResultSet(pw, rs); - pw.close(); - pw = null; - } - - resultType = stmt.getMoreResults(); - } - } catch (IOException ioe) { - LOG.warn("IOException formatting results: " - + StringUtils.stringifyException(ioe)); - return 1; - } catch (SQLException sqlE) { - LOG.warn("SQL exception executing statement: " - + StringUtils.stringifyException(sqlE)); - return 1; - } finally { - if (null != pw) { - pw.close(); - } - if (null != rs) { - try { - rs.close(); - } catch (SQLException sqlE) { - LOG.warn("SQL exception closing ResultSet: " - + StringUtils.stringifyException(sqlE)); - } - } - if (null != stmt) { - try { - stmt.close(); - } catch (SQLException sqlE) { - LOG.warn("SQL exception closing statement: " - + StringUtils.stringifyException(sqlE)); - } - } - destroy(options); - } - - return 0; - } - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - toolOptions.addUniqueOptions(getCommonOptions()); - - RelatedOptions evalOpts = new RelatedOptions("SQL evaluation arguments"); - evalOpts.addOption(OptionBuilder.withArgName("statement") - .hasArg() - .withDescription("Execute 'statement' in SQL and exit") - .withLongOpt(SQL_QUERY_ARG) - .create(SQL_QUERY_SHORT_ARG)); - - toolOptions.addUniqueOptions(evalOpts); - } - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - - applyCommonOptions(in, out); - if (in.hasOption(SQL_QUERY_ARG)) { - out.setSqlQuery(in.getOptionValue(SQL_QUERY_ARG)); - } - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - - if (hasUnrecognizedArgs(extraArguments)) { - throw new InvalidOptionsException(HELP_STR); - } - - String sqlCmd = options.getSqlQuery(); - if (null == sqlCmd || sqlCmd.length() == 0) { - throw new InvalidOptionsException( - "This command requires the " + SQL_QUERY_ARG + " argument." - + HELP_STR); - } - - validateCommonOptions(options); - } +public class EvalSqlTool + extends org.apache.sqoop.tool.EvalSqlTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/ExportTool.java b/src/java/com/cloudera/sqoop/tool/ExportTool.java index dc9b3b1e..597f0fb2 100644 --- a/src/java/com/cloudera/sqoop/tool/ExportTool.java +++ b/src/java/com/cloudera/sqoop/tool/ExportTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,336 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.IOException; -import java.util.List; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import com.cloudera.sqoop.Sqoop; -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.SqoopOptions.UpdateMode; -import com.cloudera.sqoop.cli.RelatedOptions; -import com.cloudera.sqoop.cli.ToolOptions; -import com.cloudera.sqoop.manager.ExportJobContext; -import com.cloudera.sqoop.util.ExportException; - /** - * Tool that performs HDFS exports to databases. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class ExportTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog(ExportTool.class.getName()); - - private CodeGenTool codeGenerator; - - public ExportTool() { - super("export"); - this.codeGenerator = new CodeGenTool(); - } - - /** - * @return a list of jar files generated as part of this im/export process - */ - public List getGeneratedJarFiles() { - return codeGenerator.getGeneratedJarFiles(); - } - - private void exportTable(SqoopOptions options, String tableName) - throws ExportException, IOException { - String jarFile = null; - - // Generate the ORM code for the tables. - jarFile = codeGenerator.generateORM(options, tableName); - - ExportJobContext context = new ExportJobContext(tableName, jarFile, - options); - if (options.getUpdateKeyCol() != null) { - if (options.getUpdateMode() == UpdateMode.UpdateOnly) { - // UPDATE-based export. - manager.updateTable(context); - } else { - // Mixed update/insert export - manager.upsertTable(context); - } - } else { - // INSERT-based export. - manager.exportTable(context); - } - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - - if (!init(options)) { - return 1; - } - - codeGenerator.setManager(manager); - - if (options.getUpdateKeyCol() != null) { - manager.configureDbOutputColumns(options); - } - - try { - exportTable(options, options.getTableName()); - } catch (IOException ioe) { - LOG.error("Encountered IOException running export job: " - + ioe.toString()); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(ioe); - } else { - return 1; - } - } catch (ExportException ee) { - LOG.error("Error during export: " + ee.toString()); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(ee); - } else { - return 1; - } - } finally { - destroy(options); - } - - return 0; - } - - /** - * Construct the set of options that control exports. - * @return the RelatedOptions that can be used to parse the export - * arguments. - */ - protected RelatedOptions getExportOptions() { - RelatedOptions exportOpts = new RelatedOptions("Export control arguments"); - - exportOpts.addOption(OptionBuilder - .withDescription("Use direct export fast path") - .withLongOpt(DIRECT_ARG) - .create()); - exportOpts.addOption(OptionBuilder.withArgName("table-name") - .hasArg().withDescription("Table to populate") - .withLongOpt(TABLE_ARG) - .create()); - exportOpts.addOption(OptionBuilder.withArgName("n") - .hasArg().withDescription("Use 'n' map tasks to export in parallel") - .withLongOpt(NUM_MAPPERS_ARG) - .create(NUM_MAPPERS_SHORT_ARG)); - exportOpts.addOption(OptionBuilder.withArgName("dir") - .hasArg() - .withDescription("HDFS source path for the export") - .withLongOpt(EXPORT_PATH_ARG) - .create()); - exportOpts.addOption(OptionBuilder.withArgName("key") - .hasArg() - .withDescription("Update records by specified key column") - .withLongOpt(UPDATE_KEY_ARG) - .create()); - exportOpts.addOption(OptionBuilder.withArgName("table-name") - .hasArg().withDescription("Intermediate staging table") - .withLongOpt(STAGING_TABLE_ARG) - .create()); - exportOpts.addOption(OptionBuilder - .withDescription("Indicates that any data in " - + "staging table can be deleted") - .withLongOpt(CLEAR_STAGING_TABLE_ARG) - .create()); - exportOpts.addOption(OptionBuilder - .withDescription("Indicates underlying statements " - + "to be executed in batch mode") - .withLongOpt(BATCH_ARG) - .create()); - exportOpts.addOption(OptionBuilder - .withArgName("mode") - .hasArg() - .withDescription("Specifies how updates are performed when " - + "new rows are found with non-matching keys in database") - .withLongOpt(UPDATE_MODE_ARG) - .create()); - - return exportOpts; - } - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - - toolOptions.addUniqueOptions(getCommonOptions()); - toolOptions.addUniqueOptions(getExportOptions()); - - // Input parsing delimiters - toolOptions.addUniqueOptions(getInputFormatOptions()); - - // Used when sending data to a direct-mode export. - toolOptions.addUniqueOptions(getOutputFormatOptions()); - - // get common codegen opts. - RelatedOptions codeGenOpts = getCodeGenOpts(false); - - // add export-specific codegen opts: - codeGenOpts.addOption(OptionBuilder.withArgName("file") - .hasArg() - .withDescription("Disable code generation; use specified jar") - .withLongOpt(JAR_FILE_NAME_ARG) - .create()); - - toolOptions.addUniqueOptions(codeGenOpts); - } - - @Override - /** {@inheritDoc} */ - public void printHelp(ToolOptions toolOptions) { - super.printHelp(toolOptions); - System.out.println(""); - System.out.println( - "At minimum, you must specify --connect, --export-dir, and --table"); - } - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - - try { - applyCommonOptions(in, out); - - if (in.hasOption(DIRECT_ARG)) { - out.setDirectMode(true); - } - - if (in.hasOption(BATCH_ARG)) { - out.setBatchMode(true); - } - - if (in.hasOption(TABLE_ARG)) { - out.setTableName(in.getOptionValue(TABLE_ARG)); - } - - if (in.hasOption(NUM_MAPPERS_ARG)) { - out.setNumMappers(Integer.parseInt(in.getOptionValue(NUM_MAPPERS_ARG))); - } - - if (in.hasOption(EXPORT_PATH_ARG)) { - out.setExportDir(in.getOptionValue(EXPORT_PATH_ARG)); - } - - if (in.hasOption(JAR_FILE_NAME_ARG)) { - out.setExistingJarName(in.getOptionValue(JAR_FILE_NAME_ARG)); - } - - if (in.hasOption(UPDATE_KEY_ARG)) { - out.setUpdateKeyCol(in.getOptionValue(UPDATE_KEY_ARG)); - } - - if (in.hasOption(STAGING_TABLE_ARG)) { - out.setStagingTableName(in.getOptionValue(STAGING_TABLE_ARG)); - } - - if (in.hasOption(CLEAR_STAGING_TABLE_ARG)) { - out.setClearStagingTable(true); - } - - applyNewUpdateOptions(in, out); - applyInputFormatOptions(in, out); - applyOutputFormatOptions(in, out); - applyOutputFormatOptions(in, out); - applyCodeGenOptions(in, out, false); - } catch (NumberFormatException nfe) { - throw new InvalidOptionsException("Error: expected numeric argument.\n" - + "Try --help for usage."); - } - } - - /** - * Validate export-specific arguments. - * @param options the configured SqoopOptions to check - */ - protected void validateExportOptions(SqoopOptions options) - throws InvalidOptionsException { - if (options.getTableName() == null) { - throw new InvalidOptionsException("Export requires a --table argument." - + HELP_STR); - } else if (options.getExportDir() == null) { - throw new InvalidOptionsException( - "Export requires an --export-dir argument." - + HELP_STR); - } else if (options.getExistingJarName() != null - && options.getClassName() == null) { - throw new InvalidOptionsException("Jar specified with --jar-file, but no " - + "class specified with --class-name." + HELP_STR); - } else if (options.getExistingJarName() != null - && options.getUpdateKeyCol() != null) { - // We need to regenerate the class with the output column order set - // correctly for the update-based export. So we can't use a premade - // class. - throw new InvalidOptionsException("Jar cannot be specified with " - + "--jar-file when export is running in update mode."); - } else if (options.getStagingTableName() != null - && options.getUpdateKeyCol() != null) { - // Staging table may not be used when export is running in update mode - throw new InvalidOptionsException("Staging table cannot be used when " - + "export is running in update mode."); - } else if (options.getStagingTableName() != null - && options.getStagingTableName().equalsIgnoreCase( - options.getTableName())) { - // Name of staging table and destination table cannot be the same - throw new InvalidOptionsException("Staging table cannot be the same as " - + "the destination table. Name comparison used is case-insensitive."); - } else if (options.doClearStagingTable() - && options.getStagingTableName() == null) { - // Option to clear staging table specified but not the staging table name - throw new InvalidOptionsException("Option to clear the staging table is " - + "specified but the staging table name is not."); - } - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - - // If extraArguments is full, check for '--' followed by args for - // mysqldump or other commands we rely on. - options.setExtraArgs(getSubcommandArgs(extraArguments)); - int dashPos = extraArguments.length; - for (int i = 0; i < extraArguments.length; i++) { - if (extraArguments[i].equals("--")) { - dashPos = i; - break; - } - } - - if (hasUnrecognizedArgs(extraArguments, 0, dashPos)) { - throw new InvalidOptionsException(HELP_STR); - } - - validateExportOptions(options); - validateOutputFormatOptions(options); - validateCommonOptions(options); - validateCodeGenOptions(options); - } - - private void applyNewUpdateOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - if (in.hasOption(UPDATE_MODE_ARG)) { - String updateTypeStr = in.getOptionValue(UPDATE_MODE_ARG); - if ("updateonly".equals(updateTypeStr)) { - out.setUpdateMode(UpdateMode.UpdateOnly); - } else if ("allowinsert".equals(updateTypeStr)) { - out.setUpdateMode(UpdateMode.AllowInsert); - } else { - throw new InvalidOptionsException("Unknown new update mode: " - + updateTypeStr + ". Use 'updateonly' or 'allowinsert'." - + HELP_STR); - } - } - } +public class ExportTool + extends org.apache.sqoop.tool.ExportTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/HelpTool.java b/src/java/com/cloudera/sqoop/tool/HelpTool.java index 13f8131c..dc2933af 100644 --- a/src/java/com/cloudera/sqoop/tool/HelpTool.java +++ b/src/java/com/cloudera/sqoop/tool/HelpTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,96 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.util.Set; - -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.cli.ToolOptions; - /** - * Tool that explains the usage of Sqoop. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class HelpTool extends BaseSqoopTool { - - public HelpTool() { - super("help"); - } - - /** - * @param str the string to right-side pad - * @param num the minimum number of characters to return - * @return 'str' with enough right padding to make it num characters long. - */ - private static String padRight(String str, int num) { - StringBuilder sb = new StringBuilder(); - sb.append(str); - for (int count = str.length(); count < num; count++) { - sb.append(" "); - } - - return sb.toString(); - } - - /** - * Print out a list of all SqoopTool implementations and their - * descriptions. - */ - private void printAvailableTools() { - System.out.println("usage: sqoop COMMAND [ARGS]"); - System.out.println(""); - System.out.println("Available commands:"); - - Set toolNames = getToolNames(); - - int maxWidth = 0; - for (String tool : toolNames) { - maxWidth = Math.max(maxWidth, tool.length()); - } - - for (String tool : toolNames) { - System.out.println(" " + padRight(tool, maxWidth+2) - + getToolDescription(tool)); - } - - System.out.println(""); - System.out.println( - "See 'sqoop help COMMAND' for information on a specific command."); - } - - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - - if (this.extraArguments != null && this.extraArguments.length > 0) { - if (hasUnrecognizedArgs(extraArguments, 1, extraArguments.length)) { - return 1; - } - - SqoopTool subTool = SqoopTool.getTool(extraArguments[0]); - if (null == subTool) { - System.out.println("No such tool: " + extraArguments[0]); - System.out.println( - "Try 'sqoop help' for a list of available commands."); - return 1; - } else { - ToolOptions toolOpts = new ToolOptions(); - subTool.configureOptions(toolOpts); - subTool.printHelp(toolOpts); - return 0; - } - } else { - printAvailableTools(); - } - - return 0; - } - - @Override - public void printHelp(ToolOptions opts) { - System.out.println("usage: sqoop " + getToolName() + " [COMMAND]"); - } +public class HelpTool + extends org.apache.sqoop.tool.HelpTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/ImportAllTablesTool.java b/src/java/com/cloudera/sqoop/tool/ImportAllTablesTool.java index bb961b31..0c7724b8 100644 --- a/src/java/com/cloudera/sqoop/tool/ImportAllTablesTool.java +++ b/src/java/com/cloudera/sqoop/tool/ImportAllTablesTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,76 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import com.cloudera.sqoop.Sqoop; -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.hive.HiveImport; -import com.cloudera.sqoop.util.ImportException; - /** - * Tool that performs database imports of all tables in a database to HDFS. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class ImportAllTablesTool extends ImportTool { - - public static final Log LOG = LogFactory.getLog( - ImportAllTablesTool.class.getName()); - - public ImportAllTablesTool() { - super("import-all-tables", true); - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - HiveImport hiveImport = null; - - if (!init(options)) { - return 1; - } - - try { - if (options.doHiveImport()) { - hiveImport = new HiveImport(options, manager, options.getConf(), false); - } - - String [] tables = manager.listTables(); - if (null == tables) { - System.err.println("Could not retrieve tables list from server"); - LOG.error("manager.listTables() returned null"); - return 1; - } else { - for (String tableName : tables) { - importTable(options, tableName, hiveImport); - } - } - } catch (IOException ioe) { - LOG.error("Encountered IOException running import job: " - + ioe.toString()); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(ioe); - } else { - return 1; - } - } catch (ImportException ie) { - LOG.error("Error during import: " + ie.toString()); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(ie); - } else { - return 1; - } - } finally { - destroy(options); - } - - return 0; - } - +public class ImportAllTablesTool + extends org.apache.sqoop.tool.ImportAllTablesTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/ImportTool.java b/src/java/com/cloudera/sqoop/tool/ImportTool.java index 4253ac6b..8f00741a 100644 --- a/src/java/com/cloudera/sqoop/tool/ImportTool.java +++ b/src/java/com/cloudera/sqoop/tool/ImportTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,883 +15,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.IOException; - -import java.sql.Connection; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Statement; -import java.sql.Types; -import java.util.List; -import java.util.Map; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.util.StringUtils; - -import com.cloudera.sqoop.Sqoop; -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.FileLayout; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.RelatedOptions; -import com.cloudera.sqoop.cli.ToolOptions; -import com.cloudera.sqoop.hive.HiveImport; -import com.cloudera.sqoop.manager.ImportJobContext; - -import com.cloudera.sqoop.metastore.JobData; -import com.cloudera.sqoop.metastore.JobStorage; -import com.cloudera.sqoop.metastore.JobStorageFactory; -import com.cloudera.sqoop.util.AppendUtils; -import com.cloudera.sqoop.util.ImportException; -import org.apache.hadoop.fs.Path; - /** - * Tool that performs database imports to HDFS. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class ImportTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog(ImportTool.class.getName()); - - private CodeGenTool codeGenerator; - - // true if this is an all-tables import. Set by a subclass which - // overrides the run() method of this tool (which can only do - // a single table). - private boolean allTables; - - // store check column type for incremental option - private int checkColumnType; +public class ImportTool + extends org.apache.sqoop.tool.ImportTool { public ImportTool() { - this("import", false); + super(); } public ImportTool(String toolName, boolean allTables) { - super(toolName); - this.codeGenerator = new CodeGenTool(); - this.allTables = allTables; + super(toolName, allTables); } - @Override - protected boolean init(SqoopOptions sqoopOpts) { - boolean ret = super.init(sqoopOpts); - codeGenerator.setManager(manager); - return ret; - } - - /** - * @return a list of jar files generated as part of this import process - */ - public List getGeneratedJarFiles() { - return this.codeGenerator.getGeneratedJarFiles(); - } - - /** - * @return true if the supplied options specify an incremental import. - */ - private boolean isIncremental(SqoopOptions options) { - return !options.getIncrementalMode().equals( - SqoopOptions.IncrementalMode.None); - } - - /** - * If this is an incremental import, then we should save the - * user's state back to the metastore (if this job was run - * from the metastore). Otherwise, log to the user what data - * they need to supply next time. - */ - private void saveIncrementalState(SqoopOptions options) - throws IOException { - if (!isIncremental(options)) { - return; - } - - Map descriptor = options.getStorageDescriptor(); - String jobName = options.getJobName(); - - if (null != jobName && null != descriptor) { - // Actually save it back to the metastore. - LOG.info("Saving incremental import state to the metastore"); - JobStorageFactory ssf = new JobStorageFactory(options.getConf()); - JobStorage storage = ssf.getJobStorage(descriptor); - storage.open(descriptor); - try { - // Save the 'parent' SqoopOptions; this does not contain the mutations - // to the SqoopOptions state that occurred over the course of this - // execution, except for the one we specifically want to memorize: - // the latest value of the check column. - JobData data = new JobData(options.getParent(), this); - storage.update(jobName, data); - LOG.info("Updated data for job: " + jobName); - } finally { - storage.close(); - } - } else { - // If there wasn't a parent SqoopOptions, then the incremental - // state data was stored in the current SqoopOptions. - LOG.info("Incremental import complete! To run another incremental " - + "import of all data following this import, supply the " - + "following arguments:"); - SqoopOptions.IncrementalMode incrementalMode = - options.getIncrementalMode(); - switch (incrementalMode) { - case AppendRows: - LOG.info(" --incremental append"); - break; - case DateLastModified: - LOG.info(" --incremental lastmodified"); - break; - default: - LOG.warn("Undefined incremental mode: " + incrementalMode); - break; - } - LOG.info(" --check-column " + options.getIncrementalTestColumn()); - LOG.info(" --last-value " + options.getIncrementalLastValue()); - LOG.info("(Consider saving this with 'sqoop job --create')"); - } - } - - /** - * Return the max value in the incremental-import test column. This - * value must be numeric. - */ - private Object getMaxColumnId(SqoopOptions options) throws SQLException { - StringBuilder sb = new StringBuilder(); - sb.append("SELECT MAX("); - sb.append(options.getIncrementalTestColumn()); - sb.append(") FROM "); - sb.append(options.getTableName()); - - String where = options.getWhereClause(); - if (null != where) { - sb.append(" WHERE "); - sb.append(where); - } - - Connection conn = manager.getConnection(); - Statement s = null; - ResultSet rs = null; - try { - s = conn.createStatement(); - rs = s.executeQuery(sb.toString()); - if (!rs.next()) { - // This probably means the table is empty. - LOG.warn("Unexpected: empty results for max value query?"); - return null; - } - - ResultSetMetaData rsmd = rs.getMetaData(); - checkColumnType = rsmd.getColumnType(1); - if (checkColumnType == Types.TIMESTAMP) { - return rs.getTimestamp(1); - } else if (checkColumnType == Types.DATE) { - return rs.getDate(1); - } else if (checkColumnType == Types.TIME) { - return rs.getTime(1); - } else { - return rs.getObject(1); - } - } finally { - try { - if (null != rs) { - rs.close(); - } - } catch (SQLException sqlE) { - LOG.warn("SQL Exception closing resultset: " + sqlE); - } - - try { - if (null != s) { - s.close(); - } - } catch (SQLException sqlE) { - LOG.warn("SQL Exception closing statement: " + sqlE); - } - } - } - - /** - * Determine if a column is date/time. - * @return true if column type is TIMESTAMP, DATE, or TIME. - */ - private boolean isDateTimeColumn(int columnType) { - return (columnType == Types.TIMESTAMP) - || (columnType == Types.DATE) - || (columnType == Types.TIME); - } - - /** - * Initialize the constraints which set the incremental import range. - * @return false if an import is not necessary, because the dataset has not - * changed. - */ - private boolean initIncrementalConstraints(SqoopOptions options, - ImportJobContext context) throws ImportException, IOException { - - // If this is an incremental import, determine the constraints - // to inject in the WHERE clause or $CONDITIONS for a query. - // Also modify the 'last value' field of the SqoopOptions to - // specify the current job start time / start row. - - if (!isIncremental(options)) { - return true; - } - - SqoopOptions.IncrementalMode incrementalMode = options.getIncrementalMode(); - String nextIncrementalValue = null; - - Object nextVal; - switch (incrementalMode) { - case AppendRows: - try { - nextVal = getMaxColumnId(options); - if (isDateTimeColumn(checkColumnType)) { - nextIncrementalValue = (nextVal == null) ? null - : manager.datetimeToQueryString(nextVal.toString(), - checkColumnType); - } else { - nextIncrementalValue = (nextVal == null) ? null : nextVal.toString(); - } - } catch (SQLException sqlE) { - throw new IOException(sqlE); - } - break; - case DateLastModified: - checkColumnType = Types.TIMESTAMP; - nextVal = manager.getCurrentDbTimestamp(); - if (null == nextVal) { - throw new IOException("Could not get current time from database"); - } - nextIncrementalValue = manager.datetimeToQueryString(nextVal.toString(), - checkColumnType); - break; - default: - throw new ImportException("Undefined incremental import type: " - + incrementalMode); - } - - // Build the WHERE clause components that are used to import - // only this incremental section. - StringBuilder sb = new StringBuilder(); - String prevEndpoint = options.getIncrementalLastValue(); - - if (isDateTimeColumn(checkColumnType) && null != prevEndpoint - && !prevEndpoint.startsWith("\'") && !prevEndpoint.endsWith("\'")) { - // Incremental imports based on date/time should be 'quoted' in - // ANSI SQL. If the user didn't specify single-quotes, put them - // around, here. - prevEndpoint = manager.datetimeToQueryString(prevEndpoint, - checkColumnType); - } - - String checkColName = manager.escapeColName( - options.getIncrementalTestColumn()); - LOG.info("Incremental import based on column " + checkColName); - if (null != prevEndpoint) { - if (prevEndpoint.equals(nextIncrementalValue)) { - LOG.info("No new rows detected since last import."); - return false; - } - LOG.info("Lower bound value: " + prevEndpoint); - sb.append(checkColName); - switch (incrementalMode) { - case AppendRows: - sb.append(" > "); - break; - case DateLastModified: - sb.append(" >= "); - break; - default: - throw new ImportException("Undefined comparison"); - } - sb.append(prevEndpoint); - sb.append(" AND "); - } - - if (null != nextIncrementalValue) { - sb.append(checkColName); - switch (incrementalMode) { - case AppendRows: - sb.append(" <= "); - break; - case DateLastModified: - sb.append(" < "); - break; - default: - throw new ImportException("Undefined comparison"); - } - sb.append(nextIncrementalValue); - } else { - sb.append(checkColName); - sb.append(" IS NULL "); - } - - LOG.info("Upper bound value: " + nextIncrementalValue); - - String prevWhereClause = options.getWhereClause(); - if (null != prevWhereClause) { - sb.append(" AND ("); - sb.append(prevWhereClause); - sb.append(")"); - } - - String newConstraints = sb.toString(); - options.setWhereClause(newConstraints); - - // Save this state for next time. - SqoopOptions recordOptions = options.getParent(); - if (null == recordOptions) { - recordOptions = options; - } - recordOptions.setIncrementalLastValue( - (nextVal == null) ? null : nextVal.toString()); - - return true; - } - - /** - * Import a table or query. - * @return true if an import was performed, false otherwise. - */ - protected boolean importTable(SqoopOptions options, String tableName, - HiveImport hiveImport) throws IOException, ImportException { - String jarFile = null; - - // Generate the ORM code for the tables. - jarFile = codeGenerator.generateORM(options, tableName); - - // Do the actual import. - ImportJobContext context = new ImportJobContext(tableName, jarFile, - options, getOutputPath(options, tableName)); - - // If we're doing an incremental import, set up the - // filtering conditions used to get the latest records. - if (!initIncrementalConstraints(options, context)) { - return false; - } - - if (null != tableName) { - manager.importTable(context); - } else { - manager.importQuery(context); - } - - if (options.isAppendMode()) { - AppendUtils app = new AppendUtils(context); - app.append(); - } - - // If the user wants this table to be in Hive, perform that post-load. - if (options.doHiveImport()) { - hiveImport.importTable(tableName, options.getHiveTableName(), false); - } - - saveIncrementalState(options); - - return true; - } - - /** - * @return the output path for the imported files; - * in append mode this will point to a temporary folder. - * if importing to hbase, this may return null. - */ - private Path getOutputPath(SqoopOptions options, String tableName) { - // Get output directory - String hdfsWarehouseDir = options.getWarehouseDir(); - String hdfsTargetDir = options.getTargetDir(); - Path outputPath = null; - if (options.isAppendMode()) { - // Use temporary path, later removed when appending - outputPath = AppendUtils.getTempAppendDir(tableName); - LOG.debug("Using temporary folder: " + outputPath.getName()); - } else { - // Try in this order: target-dir or warehouse-dir - if (hdfsTargetDir != null) { - outputPath = new Path(hdfsTargetDir); - } else if (hdfsWarehouseDir != null) { - outputPath = new Path(hdfsWarehouseDir, tableName); - } else if (null != tableName) { - outputPath = new Path(tableName); - } - } - - return outputPath; - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - HiveImport hiveImport = null; - - if (allTables) { - // We got into this method, but we should be in a subclass. - // (This method only handles a single table) - // This should not be reached, but for sanity's sake, test here. - LOG.error("ImportTool.run() can only handle a single table."); - return 1; - } - - if (!init(options)) { - return 1; - } - - codeGenerator.setManager(manager); - - try { - if (options.doHiveImport()) { - hiveImport = new HiveImport(options, manager, options.getConf(), false); - } - - // Import a single table (or query) the user specified. - importTable(options, options.getTableName(), hiveImport); - } catch (IllegalArgumentException iea) { - LOG.error("Imported Failed: " + iea.getMessage()); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw iea; - } - return 1; - } catch (IOException ioe) { - LOG.error("Encountered IOException running import job: " - + StringUtils.stringifyException(ioe)); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(ioe); - } else { - return 1; - } - } catch (ImportException ie) { - LOG.error("Error during import: " + ie.toString()); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(ie); - } else { - return 1; - } - } finally { - destroy(options); - } - - return 0; - } - - /** - * Construct the set of options that control imports, either of one - * table or a batch of tables. - * @return the RelatedOptions that can be used to parse the import - * arguments. - */ - protected RelatedOptions getImportOptions() { - // Imports - RelatedOptions importOpts = new RelatedOptions("Import control arguments"); - - importOpts.addOption(OptionBuilder - .withDescription("Use direct import fast path") - .withLongOpt(DIRECT_ARG) - .create()); - - if (!allTables) { - importOpts.addOption(OptionBuilder.withArgName("table-name") - .hasArg().withDescription("Table to read") - .withLongOpt(TABLE_ARG) - .create()); - importOpts.addOption(OptionBuilder.withArgName("col,col,col...") - .hasArg().withDescription("Columns to import from table") - .withLongOpt(COLUMNS_ARG) - .create()); - importOpts.addOption(OptionBuilder.withArgName("column-name") - .hasArg() - .withDescription("Column of the table used to split work units") - .withLongOpt(SPLIT_BY_ARG) - .create()); - importOpts.addOption(OptionBuilder.withArgName("where clause") - .hasArg().withDescription("WHERE clause to use during import") - .withLongOpt(WHERE_ARG) - .create()); - importOpts.addOption(OptionBuilder - .withDescription("Imports data in append mode") - .withLongOpt(APPEND_ARG) - .create()); - importOpts.addOption(OptionBuilder.withArgName("dir") - .hasArg().withDescription("HDFS plain table destination") - .withLongOpt(TARGET_DIR_ARG) - .create()); - importOpts.addOption(OptionBuilder.withArgName("statement") - .hasArg() - .withDescription("Import results of SQL 'statement'") - .withLongOpt(SQL_QUERY_ARG) - .create(SQL_QUERY_SHORT_ARG)); - importOpts.addOption(OptionBuilder.withArgName("statement") - .hasArg() - .withDescription("Set boundary query for retrieving max and min" - + " value of the primary key") - .withLongOpt(SQL_QUERY_BOUNDARY) - .create()); - } - - importOpts.addOption(OptionBuilder.withArgName("dir") - .hasArg().withDescription("HDFS parent for table destination") - .withLongOpt(WAREHOUSE_DIR_ARG) - .create()); - importOpts.addOption(OptionBuilder - .withDescription("Imports data to SequenceFiles") - .withLongOpt(FMT_SEQUENCEFILE_ARG) - .create()); - importOpts.addOption(OptionBuilder - .withDescription("Imports data as plain text (default)") - .withLongOpt(FMT_TEXTFILE_ARG) - .create()); - importOpts.addOption(OptionBuilder - .withDescription("Imports data to Avro data files") - .withLongOpt(FMT_AVRODATAFILE_ARG) - .create()); - importOpts.addOption(OptionBuilder.withArgName("n") - .hasArg().withDescription("Use 'n' map tasks to import in parallel") - .withLongOpt(NUM_MAPPERS_ARG) - .create(NUM_MAPPERS_SHORT_ARG)); - importOpts.addOption(OptionBuilder - .withDescription("Enable compression") - .withLongOpt(COMPRESS_ARG) - .create(COMPRESS_SHORT_ARG)); - importOpts.addOption(OptionBuilder.withArgName("codec") - .hasArg() - .withDescription("Compression codec to use for import") - .withLongOpt(COMPRESSION_CODEC_ARG) - .create()); - importOpts.addOption(OptionBuilder.withArgName("n") - .hasArg() - .withDescription("Split the input stream every 'n' bytes " - + "when importing in direct mode") - .withLongOpt(DIRECT_SPLIT_SIZE_ARG) - .create()); - importOpts.addOption(OptionBuilder.withArgName("n") - .hasArg() - .withDescription("Set the maximum size for an inline LOB") - .withLongOpt(INLINE_LOB_LIMIT_ARG) - .create()); - importOpts.addOption(OptionBuilder.withArgName("n") - .hasArg() - .withDescription("Set number 'n' of rows to fetch from the " - + "database when more rows are needed") - .withLongOpt(FETCH_SIZE_ARG) - .create()); - - return importOpts; - } - - /** - * Return options for incremental import. - */ - protected RelatedOptions getIncrementalOptions() { - RelatedOptions incrementalOpts = - new RelatedOptions("Incremental import arguments"); - - incrementalOpts.addOption(OptionBuilder.withArgName("import-type") - .hasArg() - .withDescription( - "Define an incremental import of type 'append' or 'lastmodified'") - .withLongOpt(INCREMENT_TYPE_ARG) - .create()); - incrementalOpts.addOption(OptionBuilder.withArgName("column") - .hasArg() - .withDescription("Source column to check for incremental change") - .withLongOpt(INCREMENT_COL_ARG) - .create()); - incrementalOpts.addOption(OptionBuilder.withArgName("value") - .hasArg() - .withDescription("Last imported value in the incremental check column") - .withLongOpt(INCREMENT_LAST_VAL_ARG) - .create()); - - return incrementalOpts; - } - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - - toolOptions.addUniqueOptions(getCommonOptions()); - toolOptions.addUniqueOptions(getImportOptions()); - if (!allTables) { - toolOptions.addUniqueOptions(getIncrementalOptions()); - } - toolOptions.addUniqueOptions(getOutputFormatOptions()); - toolOptions.addUniqueOptions(getInputFormatOptions()); - toolOptions.addUniqueOptions(getHiveOptions(true)); - toolOptions.addUniqueOptions(getHBaseOptions()); - - // get common codegen opts. - RelatedOptions codeGenOpts = getCodeGenOpts(allTables); - - // add import-specific codegen opts: - codeGenOpts.addOption(OptionBuilder.withArgName("file") - .hasArg() - .withDescription("Disable code generation; use specified jar") - .withLongOpt(JAR_FILE_NAME_ARG) - .create()); - - toolOptions.addUniqueOptions(codeGenOpts); - } - - @Override - /** {@inheritDoc} */ - public void printHelp(ToolOptions toolOptions) { - super.printHelp(toolOptions); - System.out.println(""); - if (allTables) { - System.out.println("At minimum, you must specify --connect"); - } else { - System.out.println( - "At minimum, you must specify --connect and --table"); - } - - System.out.println( - "Arguments to mysqldump and other subprograms may be supplied"); - System.out.println( - "after a '--' on the command line."); - } - - private void applyIncrementalOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - if (in.hasOption(INCREMENT_TYPE_ARG)) { - String incrementalTypeStr = in.getOptionValue(INCREMENT_TYPE_ARG); - if ("append".equals(incrementalTypeStr)) { - out.setIncrementalMode(SqoopOptions.IncrementalMode.AppendRows); - // This argument implies ability to append to the same directory. - out.setAppendMode(true); - } else if ("lastmodified".equals(incrementalTypeStr)) { - out.setIncrementalMode(SqoopOptions.IncrementalMode.DateLastModified); - } else { - throw new InvalidOptionsException("Unknown incremental import mode: " - + incrementalTypeStr + ". Use 'append' or 'lastmodified'." - + HELP_STR); - } - } - - if (in.hasOption(INCREMENT_COL_ARG)) { - out.setIncrementalTestColumn(in.getOptionValue(INCREMENT_COL_ARG)); - } - - if (in.hasOption(INCREMENT_LAST_VAL_ARG)) { - out.setIncrementalLastValue(in.getOptionValue(INCREMENT_LAST_VAL_ARG)); - } - } - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - - try { - applyCommonOptions(in, out); - - if (in.hasOption(DIRECT_ARG)) { - out.setDirectMode(true); - } - - if (!allTables) { - if (in.hasOption(TABLE_ARG)) { - out.setTableName(in.getOptionValue(TABLE_ARG)); - } - - if (in.hasOption(COLUMNS_ARG)) { - String[] cols= in.getOptionValue(COLUMNS_ARG).split(","); - for (int i=0; i 1 - && options.getSplitByCol() == null) { - throw new InvalidOptionsException( - "When importing query results in parallel, you must specify --" - + SPLIT_BY_ARG + "." + HELP_STR); - } else if (options.isDirect() - && options.getFileLayout() != SqoopOptions.FileLayout.TextFile - && options.getConnectString().contains("jdbc:mysql://")) { - throw new InvalidOptionsException( - "MySQL direct export currently supports only text output format." - + "Parameters --as-sequencefile and --as-avrodatafile are not " - + "supported with --direct params in MySQL case."); - } else if (!options.getMapColumnJava().isEmpty() - && options.getFileLayout() == FileLayout.AvroDataFile) { - throw new InvalidOptionsException( - "Overriding column types is currently not supported with avro."); - } - } - - /** - * Validate the incremental import options. - */ - private void validateIncrementalOptions(SqoopOptions options) - throws InvalidOptionsException { - if (options.getIncrementalMode() != SqoopOptions.IncrementalMode.None - && options.getIncrementalTestColumn() == null) { - throw new InvalidOptionsException( - "For an incremental import, the check column must be specified " - + "with --" + INCREMENT_COL_ARG + ". " + HELP_STR); - } - - if (options.getIncrementalMode() == SqoopOptions.IncrementalMode.None - && options.getIncrementalTestColumn() != null) { - throw new InvalidOptionsException( - "You must specify an incremental import mode with --" - + INCREMENT_TYPE_ARG + ". " + HELP_STR); - } - - if (options.getIncrementalMode() != SqoopOptions.IncrementalMode.None - && options.getTableName() == null) { - throw new InvalidOptionsException("Incremental imports require a table." - + HELP_STR); - } - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - - // If extraArguments is full, check for '--' followed by args for - // mysqldump or other commands we rely on. - options.setExtraArgs(getSubcommandArgs(extraArguments)); - int dashPos = getDashPosition(extraArguments); - - if (hasUnrecognizedArgs(extraArguments, 0, dashPos)) { - throw new InvalidOptionsException(HELP_STR); - } - - validateImportOptions(options); - validateIncrementalOptions(options); - validateCommonOptions(options); - validateCodeGenOptions(options); - validateOutputFormatOptions(options); - validateHBaseOptions(options); - validateHiveOptions(options); - } } - diff --git a/src/java/com/cloudera/sqoop/tool/JobTool.java b/src/java/com/cloudera/sqoop/tool/JobTool.java index 3158a957..f1554d0c 100644 --- a/src/java/com/cloudera/sqoop/tool/JobTool.java +++ b/src/java/com/cloudera/sqoop/tool/JobTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,390 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.IOException; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.ParseException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.ToolRunner; - -import org.apache.log4j.Category; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; - -import com.cloudera.sqoop.Sqoop; -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.ToolOptions; -import com.cloudera.sqoop.metastore.hsqldb.HsqldbJobStorage; -import com.cloudera.sqoop.metastore.JobData; -import com.cloudera.sqoop.metastore.JobStorage; -import com.cloudera.sqoop.metastore.JobStorageFactory; - /** - * Tool that creates and executes saved jobs. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class JobTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog( - JobTool.class.getName()); - - private enum JobOp { - JobCreate, - JobDelete, - JobExecute, - JobList, - JobShow, - }; - - private Map storageDescriptor; - private String jobName; - private JobOp operation; - private JobStorage storage; - - public JobTool() { - super("job"); - } - - /** - * Given an array of strings, return all elements of this - * array up to (but not including) the first instance of "--". - */ - private String [] getElementsUpToDoubleDash(String [] array) { - String [] parseableChildArgv = null; - for (int i = 0; i < array.length; i++) { - if ("--".equals(array[i])) { - parseableChildArgv = Arrays.copyOfRange(array, 0, i); - break; - } - } - - if (parseableChildArgv == null) { - // Didn't find any nested '--'. - parseableChildArgv = array; - } - - return parseableChildArgv; - } - - /** - * Given an array of strings, return the first instance - * of "--" and all following elements. - * If no "--" exists, return null. - */ - private String [] getElementsAfterDoubleDash(String [] array) { - String [] extraChildArgv = null; - for (int i = 0; i < array.length; i++) { - if ("--".equals(array[i])) { - extraChildArgv = Arrays.copyOfRange(array, i, array.length); - break; - } - } - - return extraChildArgv; - } - - private int configureChildTool(SqoopOptions childOptions, - SqoopTool childTool, String [] childArgv) { - // Within the child arguments there may be a '--' followed by - // dependent args. Stash them off to the side. - - // Everything up to the '--'. - String [] parseableChildArgv = getElementsUpToDoubleDash(childArgv); - - // The '--' and any subsequent args. - String [] extraChildArgv = getElementsAfterDoubleDash(childArgv); - - // Now feed the arguments into the tool itself. - try { - childOptions = childTool.parseArguments(parseableChildArgv, - null, childOptions, false); - childTool.appendArgs(extraChildArgv); - childTool.validateOptions(childOptions); - } catch (ParseException pe) { - LOG.error("Error parsing arguments to the job-specific tool."); - LOG.error("See 'sqoop help ' for usage."); - return 1; - } catch (SqoopOptions.InvalidOptionsException e) { - System.err.println(e.getMessage()); - return 1; - } - - return 0; // Success. - } - - private int createJob(SqoopOptions options) throws IOException { - // In our extraArguments array, we should have a '--' followed by - // a tool name, and any tool-specific arguments. - // Create an instance of the named tool and then configure it to - // get a SqoopOptions out which we will serialize into a job. - int dashPos = getDashPosition(extraArguments); - int toolArgPos = dashPos + 1; - if (null == extraArguments || toolArgPos < 0 - || toolArgPos >= extraArguments.length) { - LOG.error("No tool specified; cannot create a job."); - LOG.error("Use: sqoop job --create " - + "-- [tool-args]"); - return 1; - } - - String jobToolName = extraArguments[toolArgPos]; - SqoopTool jobTool = SqoopTool.getTool(jobToolName); - if (null == jobTool) { - LOG.error("No such tool available: " + jobToolName); - return 1; - } - - // Create a SqoopOptions and Configuration based on the current one, - // but deep-copied. This will be populated within the job. - SqoopOptions jobOptions = new SqoopOptions(); - jobOptions.setConf(new Configuration(options.getConf())); - - // Get the arguments to feed to the child tool. - String [] childArgs = Arrays.copyOfRange(extraArguments, toolArgPos + 1, - extraArguments.length); - - int confRet = configureChildTool(jobOptions, jobTool, childArgs); - if (0 != confRet) { - // Error. - return confRet; - } - - // Now that the tool is fully configured, materialize the job. - JobData jobData = new JobData(jobOptions, jobTool); - this.storage.create(jobName, jobData); - return 0; // Success. - } - - private int listJobs(SqoopOptions opts) throws IOException { - List jobNames = storage.list(); - System.out.println("Available jobs:"); - for (String name : jobNames) { - System.out.println(" " + name); - } - return 0; - } - - private int deleteJob(SqoopOptions opts) throws IOException { - this.storage.delete(jobName); - return 0; - } - - private int execJob(SqoopOptions opts) throws IOException { - JobData data = this.storage.read(jobName); - if (null == data) { - LOG.error("No such job: " + jobName); - return 1; - } - - SqoopOptions childOpts = data.getSqoopOptions(); - SqoopTool childTool = data.getSqoopTool(); - - // Don't overwrite the original SqoopOptions with the - // arguments; make a child options. - - SqoopOptions clonedOpts = (SqoopOptions) childOpts.clone(); - clonedOpts.setParent(childOpts); - - int dashPos = getDashPosition(extraArguments); - String [] childArgv; - if (dashPos >= extraArguments.length) { - childArgv = new String[0]; - } else { - childArgv = Arrays.copyOfRange(extraArguments, dashPos + 1, - extraArguments.length); - } - - int confRet = configureChildTool(clonedOpts, childTool, childArgv); - if (0 != confRet) { - // Error. - return confRet; - } - - return childTool.run(clonedOpts); - } - - private int showJob(SqoopOptions opts) throws IOException { - JobData data = this.storage.read(jobName); - if (null == data) { - LOG.error("No such job: " + jobName); - return 1; - } - - SqoopOptions childOpts = data.getSqoopOptions(); - SqoopTool childTool = data.getSqoopTool(); - - System.out.println("Job: " + jobName); - System.out.println("Tool: " + childTool.getToolName()); - - System.out.println("Options:"); - System.out.println("----------------------------"); - Properties props = childOpts.writeProperties(); - for (Map.Entry entry : props.entrySet()) { - System.out.println(entry.getKey().toString() + " = " + entry.getValue()); - } - - // TODO: This does not show entries in the Configuration - // (SqoopOptions.getConf()) which were stored as different from the - // default. - - return 0; - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - // Get a JobStorage instance to use to materialize this job. - JobStorageFactory ssf = new JobStorageFactory(options.getConf()); - this.storage = ssf.getJobStorage(storageDescriptor); - if (null == this.storage) { - LOG.error("There is no JobStorage implementation available"); - LOG.error("that can read your specified storage descriptor."); - LOG.error("Don't know where to save this job info! You may"); - LOG.error("need to specify the connect string with --meta-connect."); - return 1; - } - - try { - // Open the storage layer. - this.storage.open(this.storageDescriptor); - - // And now determine what operation to perform with it. - switch (operation) { - case JobCreate: - return createJob(options); - case JobDelete: - return deleteJob(options); - case JobExecute: - return execJob(options); - case JobList: - return listJobs(options); - case JobShow: - return showJob(options); - default: - LOG.error("Undefined job operation: " + operation); - return 1; - } - } catch (IOException ioe) { - LOG.error("I/O error performing job operation: " - + StringUtils.stringifyException(ioe)); - return 1; - } finally { - if (null != this.storage) { - try { - storage.close(); - } catch (IOException ioe) { - LOG.warn("IOException closing JobStorage: " - + StringUtils.stringifyException(ioe)); - } - } - } - } - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - toolOptions.addUniqueOptions(getJobOptions()); - } - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - - if (in.hasOption(VERBOSE_ARG)) { - // Immediately switch into DEBUG logging. - Category sqoopLogger = Logger.getLogger( - Sqoop.class.getName()).getParent(); - sqoopLogger.setLevel(Level.DEBUG); - LOG.debug("Enabled debug logging."); - } - - if (in.hasOption(HELP_ARG)) { - ToolOptions toolOpts = new ToolOptions(); - configureOptions(toolOpts); - printHelp(toolOpts); - throw new InvalidOptionsException(""); - } - - this.storageDescriptor = new TreeMap(); - - if (in.hasOption(STORAGE_METASTORE_ARG)) { - this.storageDescriptor.put(HsqldbJobStorage.META_CONNECT_KEY, - in.getOptionValue(STORAGE_METASTORE_ARG)); - } - - // These are generated via an option group; exactly one - // of this exhaustive list will always be selected. - if (in.hasOption(JOB_CMD_CREATE_ARG)) { - this.operation = JobOp.JobCreate; - this.jobName = in.getOptionValue(JOB_CMD_CREATE_ARG); - } else if (in.hasOption(JOB_CMD_DELETE_ARG)) { - this.operation = JobOp.JobDelete; - this.jobName = in.getOptionValue(JOB_CMD_DELETE_ARG); - } else if (in.hasOption(JOB_CMD_EXEC_ARG)) { - this.operation = JobOp.JobExecute; - this.jobName = in.getOptionValue(JOB_CMD_EXEC_ARG); - } else if (in.hasOption(JOB_CMD_LIST_ARG)) { - this.operation = JobOp.JobList; - } else if (in.hasOption(JOB_CMD_SHOW_ARG)) { - this.operation = JobOp.JobShow; - this.jobName = in.getOptionValue(JOB_CMD_SHOW_ARG); - } - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - - if (null == operation - || (null == this.jobName && operation != JobOp.JobList)) { - throw new InvalidOptionsException("No job operation specified" - + HELP_STR); - } - - if (operation == JobOp.JobCreate) { - // Check that we have a '--' followed by at least a tool name. - if (extraArguments == null || extraArguments.length == 0) { - throw new InvalidOptionsException( - "Expected: -- [tool-args] " - + HELP_STR); - } - } - - int dashPos = getDashPosition(extraArguments); - if (hasUnrecognizedArgs(extraArguments, 0, dashPos)) { - throw new InvalidOptionsException(HELP_STR); - } - } - - @Override - /** {@inheritDoc} */ - public void printHelp(ToolOptions opts) { - System.out.println("usage: sqoop " + getToolName() - + " [GENERIC-ARGS] [JOB-ARGS] [-- [] [TOOL-ARGS]]"); - System.out.println(""); - - opts.printHelp(); - - System.out.println(""); - System.out.println("Generic Hadoop command-line arguments:"); - System.out.println("(must preceed any tool-specific arguments)"); - ToolRunner.printGenericCommandUsage(System.out); - } +public class JobTool + extends org.apache.sqoop.tool.JobTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/ListDatabasesTool.java b/src/java/com/cloudera/sqoop/tool/ListDatabasesTool.java index 1ac5c2c4..29d8923a 100644 --- a/src/java/com/cloudera/sqoop/tool/ListDatabasesTool.java +++ b/src/java/com/cloudera/sqoop/tool/ListDatabasesTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,76 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.ToolOptions; - /** - * Tool that lists available databases on a server. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class ListDatabasesTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog( - ListDatabasesTool.class.getName()); - - public ListDatabasesTool() { - super("list-databases"); - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - if (!init(options)) { - return 1; - } - - try { - String [] databases = manager.listDatabases(); - if (null == databases) { - System.err.println("Could not retrieve database list from server"); - LOG.error("manager.listDatabases() returned null"); - return 1; - } else { - for (String db : databases) { - System.out.println(db); - } - } - } finally { - destroy(options); - } - - return 0; - } - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - toolOptions.addUniqueOptions(getCommonOptions()); - } - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - applyCommonOptions(in, out); - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - - if (hasUnrecognizedArgs(extraArguments)) { - throw new InvalidOptionsException(HELP_STR); - } - validateCommonOptions(options); - } +public class ListDatabasesTool + extends org.apache.sqoop.tool.ListDatabasesTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/ListTablesTool.java b/src/java/com/cloudera/sqoop/tool/ListTablesTool.java index 295a16e0..08392efb 100644 --- a/src/java/com/cloudera/sqoop/tool/ListTablesTool.java +++ b/src/java/com/cloudera/sqoop/tool/ListTablesTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,76 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.ToolOptions; - /** - * Tool that lists available tables in a database. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class ListTablesTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog( - ListTablesTool.class.getName()); - - public ListTablesTool() { - super("list-tables"); - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - if (!init(options)) { - return 1; - } - - try { - String [] tables = manager.listTables(); - if (null == tables) { - System.err.println("Could not retrieve tables list from server"); - LOG.error("manager.listTables() returned null"); - return 1; - } else { - for (String tbl : tables) { - System.out.println(tbl); - } - } - } finally { - destroy(options); - } - - return 0; - } - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - toolOptions.addUniqueOptions(getCommonOptions()); - } - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - applyCommonOptions(in, out); - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - if (hasUnrecognizedArgs(extraArguments)) { - throw new InvalidOptionsException(HELP_STR); - } - - validateCommonOptions(options); - } +public class ListTablesTool + extends org.apache.sqoop.tool.ListTablesTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/MergeTool.java b/src/java/com/cloudera/sqoop/tool/MergeTool.java index ae85524f..1ae2a4c0 100644 --- a/src/java/com/cloudera/sqoop/tool/MergeTool.java +++ b/src/java/com/cloudera/sqoop/tool/MergeTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,223 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.IOException; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.hadoop.util.StringUtils; - -import org.apache.log4j.Category; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; - -import com.cloudera.sqoop.Sqoop; -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.RelatedOptions; -import com.cloudera.sqoop.cli.ToolOptions; -import com.cloudera.sqoop.mapreduce.MergeJob; - /** - * Tool that merges a more recent dataset on top of an older one. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class MergeTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog(MergeTool.class.getName()); - - public MergeTool() { - this("merge"); - } - - public MergeTool(String toolName) { - super(toolName); - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - try { - // Configure and execute a MapReduce job to merge these datasets. - MergeJob mergeJob = new MergeJob(options); - if (!mergeJob.runMergeJob()) { - LOG.error("MapReduce job failed!"); - return 1; - } - } catch (IOException ioe) { - LOG.error("Encountered IOException running import job: " - + StringUtils.stringifyException(ioe)); - if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { - throw new RuntimeException(ioe); - } else { - return 1; - } - } - - return 0; - } - - /** - * Construct the set of options that control imports, either of one - * table or a batch of tables. - * @return the RelatedOptions that can be used to parse the import - * arguments. - */ - protected RelatedOptions getMergeOptions() { - // Imports - RelatedOptions mergeOpts = new RelatedOptions("Merge arguments"); - - mergeOpts.addOption(OptionBuilder.withArgName("file") - .hasArg().withDescription("Load class from specified jar file") - .withLongOpt(JAR_FILE_NAME_ARG) - .create()); - - mergeOpts.addOption(OptionBuilder.withArgName("name") - .hasArg().withDescription("Specify record class name to load") - .withLongOpt(CLASS_NAME_ARG) - .create()); - - mergeOpts.addOption(OptionBuilder.withArgName("path") - .hasArg().withDescription("Path to the more recent data set") - .withLongOpt(NEW_DATASET_ARG) - .create()); - - mergeOpts.addOption(OptionBuilder.withArgName("path") - .hasArg().withDescription("Path to the older data set") - .withLongOpt(OLD_DATASET_ARG) - .create()); - - mergeOpts.addOption(OptionBuilder.withArgName("path") - .hasArg().withDescription("Destination path for merged results") - .withLongOpt(TARGET_DIR_ARG) - .create()); - - mergeOpts.addOption(OptionBuilder.withArgName("column") - .hasArg().withDescription("Key column to use to join results") - .withLongOpt(MERGE_KEY_ARG) - .create()); - - // Since the "common" options aren't used in the merge tool, - // add these settings here. - mergeOpts.addOption(OptionBuilder - .withDescription("Print more information while working") - .withLongOpt(VERBOSE_ARG) - .create()); - mergeOpts.addOption(OptionBuilder - .withDescription("Print usage instructions") - .withLongOpt(HELP_ARG) - .create()); - - return mergeOpts; - } - - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - toolOptions.addUniqueOptions(getMergeOptions()); - } - - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - - if (in.hasOption(VERBOSE_ARG)) { - // Immediately switch into DEBUG logging. - Category sqoopLogger = Logger.getLogger( - Sqoop.class.getName()).getParent(); - sqoopLogger.setLevel(Level.DEBUG); - LOG.debug("Enabled debug logging."); - } - - if (in.hasOption(HELP_ARG)) { - ToolOptions toolOpts = new ToolOptions(); - configureOptions(toolOpts); - printHelp(toolOpts); - throw new InvalidOptionsException(""); - } - - if (in.hasOption(JAR_FILE_NAME_ARG)) { - out.setExistingJarName(in.getOptionValue(JAR_FILE_NAME_ARG)); - } - - if (in.hasOption(CLASS_NAME_ARG)) { - out.setClassName(in.getOptionValue(CLASS_NAME_ARG)); - } - - if (in.hasOption(NEW_DATASET_ARG)) { - out.setMergeNewPath(in.getOptionValue(NEW_DATASET_ARG)); - } - - if (in.hasOption(OLD_DATASET_ARG)) { - out.setMergeOldPath(in.getOptionValue(OLD_DATASET_ARG)); - } - - if (in.hasOption(TARGET_DIR_ARG)) { - out.setTargetDir(in.getOptionValue(TARGET_DIR_ARG)); - } - - if (in.hasOption(MERGE_KEY_ARG)) { - out.setMergeKeyCol(in.getOptionValue(MERGE_KEY_ARG)); - } - } - - /** - * Validate merge-specific arguments. - * @param options the configured SqoopOptions to check - */ - protected void validateMergeOptions(SqoopOptions options) - throws InvalidOptionsException { - - if (options.getMergeNewPath() == null) { - throw new InvalidOptionsException("Must set the new dataset path with --" - + NEW_DATASET_ARG + "." + HELP_STR); - } - - if (options.getMergeOldPath() == null) { - throw new InvalidOptionsException("Must set the old dataset path with --" - + OLD_DATASET_ARG + "." + HELP_STR); - } - - if (options.getMergeKeyCol() == null) { - throw new InvalidOptionsException("Must set the merge key column with --" - + MERGE_KEY_ARG + "." + HELP_STR); - } - - if (options.getTargetDir() == null) { - throw new InvalidOptionsException("Must set the target directory with --" - + TARGET_DIR_ARG + "." + HELP_STR); - } - - if (options.getClassName() == null) { - throw new InvalidOptionsException("Must set the SqoopRecord class " - + "implementation to use with --" + CLASS_NAME_ARG + "." - + HELP_STR); - } - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - - // If extraArguments is full, check for '--' followed by args for - // mysqldump or other commands we rely on. - options.setExtraArgs(getSubcommandArgs(extraArguments)); - int dashPos = getDashPosition(extraArguments); - - if (hasUnrecognizedArgs(extraArguments, 0, dashPos)) { - throw new InvalidOptionsException(HELP_STR); - } - - validateMergeOptions(options); - } +public class MergeTool + extends org.apache.sqoop.tool.MergeTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/MetastoreTool.java b/src/java/com/cloudera/sqoop/tool/MetastoreTool.java index 9bf34ce6..210f9cc9 100644 --- a/src/java/com/cloudera/sqoop/tool/MetastoreTool.java +++ b/src/java/com/cloudera/sqoop/tool/MetastoreTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,78 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.RelatedOptions; -import com.cloudera.sqoop.cli.ToolOptions; - -import com.cloudera.sqoop.metastore.hsqldb.HsqldbMetaStore; - /** - * Tool that runs a standalone Sqoop metastore. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class MetastoreTool extends BaseSqoopTool { - - public static final Log LOG = LogFactory.getLog( - MetastoreTool.class.getName()); - - private HsqldbMetaStore metastore; - - // If set to true, shut an existing metastore down. - private boolean shutdown = false; - - public MetastoreTool() { - super("metastore"); - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - metastore = new HsqldbMetaStore(options.getConf()); - if (shutdown) { - LOG.info("Shutting down metastore..."); - metastore.shutdown(); - } else { - metastore.start(); - metastore.waitForServer(); - LOG.info("Server thread has quit."); - } - return 0; - } - - @Override - /** Configure the command-line arguments we expect to receive */ - public void configureOptions(ToolOptions toolOptions) { - RelatedOptions opts = new RelatedOptions("metastore arguments"); - opts.addOption(OptionBuilder - .withDescription("Cleanly shut down a running metastore") - .withLongOpt(METASTORE_SHUTDOWN_ARG) - .create()); - - toolOptions.addUniqueOptions(opts); - } - - @Override - /** {@inheritDoc} */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - if (in.hasOption(METASTORE_SHUTDOWN_ARG)) { - this.shutdown = true; - } - } - - @Override - /** {@inheritDoc} */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - } +public class MetastoreTool + extends org.apache.sqoop.tool.MetastoreTool { } - diff --git a/src/java/com/cloudera/sqoop/tool/SqoopTool.java b/src/java/com/cloudera/sqoop/tool/SqoopTool.java index 87befd30..c519d209 100644 --- a/src/java/com/cloudera/sqoop/tool/SqoopTool.java +++ b/src/java/com/cloudera/sqoop/tool/SqoopTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,491 +15,39 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.Reader; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Properties; import java.util.Set; -import java.util.TreeMap; -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.ParseException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.ToolRunner; - -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; -import com.cloudera.sqoop.cli.SqoopParser; -import com.cloudera.sqoop.cli.ToolOptions; -import com.cloudera.sqoop.config.ConfigurationHelper; - -import com.cloudera.sqoop.util.ClassLoaderStack; /** - * Base class for Sqoop subprograms (e.g., SqoopImport, SqoopExport, etc.) - * Allows subprograms to configure the arguments they accept and - * provides an entry-point to the subprogram. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public abstract class SqoopTool { +public abstract class SqoopTool + extends org.apache.sqoop.tool.SqoopTool { - public static final Log LOG = LogFactory.getLog(SqoopTool.class.getName()); + public static final String TOOL_PLUGINS_KEY = + org.apache.sqoop.tool.SqoopTool.TOOL_PLUGINS_KEY; - /** - * Configuration key that specifies the set of ToolPlugin instances to load - * before determining which SqoopTool instance to load. - */ - public static final String TOOL_PLUGINS_KEY = "sqoop.tool.plugins"; - - private static final Map> TOOLS; - private static final Map DESCRIPTIONS; - - static { - // All SqoopTool instances should be registered here so that - // they can be found internally. - TOOLS = new TreeMap>(); - DESCRIPTIONS = new TreeMap(); - - registerTool("codegen", CodeGenTool.class, - "Generate code to interact with database records"); - registerTool("create-hive-table", CreateHiveTableTool.class, - "Import a table definition into Hive"); - registerTool("eval", EvalSqlTool.class, - "Evaluate a SQL statement and display the results"); - registerTool("export", ExportTool.class, - "Export an HDFS directory to a database table"); - registerTool("import", ImportTool.class, - "Import a table from a database to HDFS"); - registerTool("import-all-tables", ImportAllTablesTool.class, - "Import tables from a database to HDFS"); - registerTool("help", HelpTool.class, "List available commands"); - registerTool("list-databases", ListDatabasesTool.class, - "List available databases on a server"); - registerTool("list-tables", ListTablesTool.class, - "List available tables in a database"); - registerTool("merge", MergeTool.class, - "Merge results of incremental imports"); - registerTool("metastore", MetastoreTool.class, - "Run a standalone Sqoop metastore"); - registerTool("job", JobTool.class, - "Work with saved jobs"); - registerTool("version", VersionTool.class, - "Display version information"); - } - - /** - * Add a tool to the available set of SqoopTool instances. - * @param toolName the name the user access the tool through. - * @param cls the class providing the tool. - * @param description a user-friendly description of the tool's function. - */ - private static void registerTool(String toolName, - Class cls, String description) { - Class existing = TOOLS.get(toolName); - if (null != existing) { - // Already have a tool with this name. Refuse to start. - throw new RuntimeException("A plugin is attempting to register a tool " - + "with name " + toolName + ", but this tool already exists (" - + existing.getName() + ")"); - } - - TOOLS.put(toolName, cls); - DESCRIPTIONS.put(toolName, description); - } - - /** - * Add tool to available set of SqoopTool instances using the ToolDesc - * struct as the sole argument. - */ - private static void registerTool(ToolDesc toolDescription) { - registerTool(toolDescription.getName(), toolDescription.getToolClass(), - toolDescription.getDesc()); - } - - /** - * Load plugins referenced in sqoop-site.xml or other config (e.g., tools.d/), - * to allow external tool definitions. - * - * @return the Configuration used to load the plugins. - */ - public static Configuration loadPlugins(Configuration conf) { - conf = loadPluginsFromConfDir(conf); - List plugins = conf.getInstances(TOOL_PLUGINS_KEY, - ToolPlugin.class); - for (ToolPlugin plugin : plugins) { - LOG.debug("Loading plugin: " + plugin.getClass().getName()); - List descriptions = plugin.getTools(); - for (ToolDesc desc : descriptions) { - LOG.debug(" Adding tool: " + desc.getName() - + " -> " + desc.getToolClass().getName()); - registerTool(desc); - } - } - - return conf; - } - - /** - * If $SQOOP_CONF_DIR/tools.d/ exists and sqoop.tool.plugins is not set, - * then we look through the files in that directory; they should contain - * lines of the form 'plugin.class.name[=/path/to/containing.jar]'. - * - *

Put all plugin.class.names into the Configuration, and load any - * specified jars into the ClassLoader. - *

- * - * @param conf the current configuration to populate with class names. - * @return conf again, after possibly populating sqoop.tool.plugins. - */ - private static Configuration loadPluginsFromConfDir(Configuration conf) { - if (conf.get(TOOL_PLUGINS_KEY) != null) { - LOG.debug(TOOL_PLUGINS_KEY + " is set; ignoring tools.d"); - return conf; - } - - String confDirName = System.getenv("SQOOP_CONF_DIR"); - if (null == confDirName) { - LOG.warn("$SQOOP_CONF_DIR has not been set in the environment. " - + "Cannot check for additional configuration."); - return conf; - } - - File confDir = new File(confDirName); - File toolsDir = new File(confDir, "tools.d"); - - if (toolsDir.exists() && toolsDir.isDirectory()) { - // We have a tools.d subdirectory. Get the file list, sort it, - // and process them in order. - String [] fileNames = toolsDir.list(); - Arrays.sort(fileNames); - - for (String fileName : fileNames) { - File f = new File(toolsDir, fileName); - if (f.isFile()) { - loadPluginsFromFile(conf, f); - } - } - } - - // Set the classloader in this configuration so that it will use - // the jars we just loaded in. - conf.setClassLoader(Thread.currentThread().getContextClassLoader()); - return conf; - } - - /** - * Read the specified file and extract any ToolPlugin implementation - * names from there. - * @param conf the configuration to populate. - * @param f the file containing the configuration data to add. - */ - private static void loadPluginsFromFile(Configuration conf, File f) { - Reader r = null; - try { - // The file format is actually Java properties-file syntax. - r = new InputStreamReader(new FileInputStream(f)); - Properties props = new Properties(); - props.load(r); - - for (Map.Entry entry : props.entrySet()) { - // Each key is a ToolPlugin class name. - // Each value, if set, is the jar that contains it. - String plugin = entry.getKey().toString(); - addPlugin(conf, plugin); - - String jarName = entry.getValue().toString(); - if (jarName.length() > 0) { - ClassLoaderStack.addJarFile(jarName, plugin); - LOG.debug("Added plugin " + plugin + " in jar " + jarName - + " specified by " + f); - } else if (LOG.isDebugEnabled()) { - LOG.debug("Added plugin " + plugin + " specified by " + f); - } - } - } catch (IOException ioe) { - LOG.error("Error loading ToolPlugin information from file " - + f + ": " + StringUtils.stringifyException(ioe)); - } finally { - if (null != r) { - try { - r.close(); - } catch (IOException ioe) { - LOG.warn("Error closing file " + f + ": " + ioe); - } - } - } - } - - /** - * Add the specified plugin class name to the configuration string - * listing plugin classes. - */ - private static void addPlugin(Configuration conf, String pluginName) { - String existingPlugins = conf.get(TOOL_PLUGINS_KEY); - String newPlugins = null; - if (null == existingPlugins || existingPlugins.length() == 0) { - newPlugins = pluginName; - } else { - newPlugins = existingPlugins + "," + pluginName; - } - - conf.set(TOOL_PLUGINS_KEY, newPlugins); - } - - /** - * @return the list of available tools. - */ public static final Set getToolNames() { - return TOOLS.keySet(); + return org.apache.sqoop.tool.SqoopTool.getToolNames(); } - /** - * @return the SqoopTool instance with the provided name, or null - * if no such tool exists. - */ public static final SqoopTool getTool(String toolName) { - Class cls = TOOLS.get(toolName); - try { - if (null != cls) { - SqoopTool tool = cls.newInstance(); - tool.setToolName(toolName); - return tool; - } - } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); - return null; - } - - return null; + return (SqoopTool)org.apache.sqoop.tool.SqoopTool.getTool(toolName); } - /** - * @return the user-friendly description for a tool, or null if the tool - * cannot be found. - */ public static final String getToolDescription(String toolName) { - return DESCRIPTIONS.get(toolName); + return org.apache.sqoop.tool.SqoopTool.getToolDescription(toolName); } - /** The name of the current tool. */ - private String toolName; - - /** Arguments that remained unparsed after parseArguments. */ - protected String [] extraArguments; - public SqoopTool() { - this.toolName = "<" + this.getClass().getName() + ">"; + super(); } public SqoopTool(String name) { - this.toolName = name; + super(name); } - public String getToolName() { - return this.toolName; - } - - protected void setToolName(String name) { - this.toolName = name; - } - - /** - * Main body of code to run the tool. - * @param options the SqoopOptions configured via - * configureOptions()/applyOptions(). - * @return an integer return code for external programs to consume. 0 - * represents success; nonzero means failure. - */ - public abstract int run(SqoopOptions options); - - /** - * Configure the command-line arguments we expect to receive. - * @param opts a ToolOptions that should be populated with sets of - * RelatedOptions for the tool. - */ - public void configureOptions(ToolOptions opts) { - // Default implementation does nothing. - } - - /** - * Print the help message for this tool. - * @param opts the configured tool options - */ - public void printHelp(ToolOptions opts) { - System.out.println("usage: sqoop " + getToolName() - + " [GENERIC-ARGS] [TOOL-ARGS]"); - System.out.println(""); - - opts.printHelp(); - - System.out.println(""); - System.out.println("Generic Hadoop command-line arguments:"); - System.out.println("(must preceed any tool-specific arguments)"); - ToolRunner.printGenericCommandUsage(System.out); - } - - /** Generate the SqoopOptions containing actual argument values from - * the extracted CommandLine arguments. - * @param in the CLI CommandLine that contain the user's set Options. - * @param out the SqoopOptions with all fields applied. - * @throws InvalidOptionsException if there's a problem. - */ - public void applyOptions(CommandLine in, SqoopOptions out) - throws InvalidOptionsException { - // Default implementation does nothing. - } - - /** - * Validates options and ensures that any required options are - * present and that any mutually-exclusive options are not selected. - * @throws InvalidOptionsException if there's a problem. - */ - public void validateOptions(SqoopOptions options) - throws InvalidOptionsException { - // Default implementation does nothing. - } - - /** - * Configures a SqoopOptions according to the specified arguments. - * Reads a set of arguments and uses them to configure a SqoopOptions - * and its embedded configuration (i.e., through GenericOptionsParser.) - * Stores any unparsed arguments in the extraArguments field. - * - * @param args the arguments to parse. - * @param conf if non-null, set as the configuration for the returned - * SqoopOptions. - * @param in a (perhaps partially-configured) SqoopOptions. If null, - * then a new SqoopOptions will be used. If this has a null configuration - * and conf is null, then a new Configuration will be inserted in this. - * @param useGenericOptions if true, will also parse generic Hadoop - * options into the Configuration. - * @return a SqoopOptions that is fully configured by a given tool. - */ - public SqoopOptions parseArguments(String [] args, - Configuration conf, SqoopOptions in, boolean useGenericOptions) - throws ParseException, SqoopOptions.InvalidOptionsException { - SqoopOptions out = in; - - if (null == out) { - out = new SqoopOptions(); - } - - if (null != conf) { - // User specified a configuration; use it and override any conf - // that may have been in the SqoopOptions. - out.setConf(conf); - } else if (null == out.getConf()) { - // User did not specify a configuration, but neither did the - // SqoopOptions. Fabricate a new one. - out.setConf(new Configuration()); - } - - // This tool is the "active" tool; bind it in the SqoopOptions. - out.setActiveSqoopTool(this); - - String [] toolArgs = args; // args after generic parser is done. - if (useGenericOptions) { - try { - toolArgs = ConfigurationHelper.parseGenericOptions( - out.getConf(), args); - } catch (IOException ioe) { - ParseException pe = new ParseException( - "Could not parse generic arguments"); - pe.initCause(ioe); - throw pe; - } - } - - // Parse tool-specific arguments. - ToolOptions toolOptions = new ToolOptions(); - configureOptions(toolOptions); - CommandLineParser parser = new SqoopParser(); - CommandLine cmdLine = parser.parse(toolOptions.merge(), toolArgs, true); - applyOptions(cmdLine, out); - this.extraArguments = cmdLine.getArgs(); - return out; - } - - /** - * Append 'extra' to extraArguments. - */ - public void appendArgs(String [] extra) { - int existingLen = - (this.extraArguments == null) ? 0 : this.extraArguments.length; - int newLen = (extra == null) ? 0 : extra.length; - String [] newExtra = new String[existingLen + newLen]; - - if (null != this.extraArguments) { - System.arraycopy(this.extraArguments, 0, newExtra, 0, existingLen); - } - - if (null != extra) { - System.arraycopy(extra, 0, newExtra, existingLen, newLen); - } - - this.extraArguments = newExtra; - } - - /** - * Allow a tool to specify a set of dependency jar filenames. This is used - * to allow tools to bundle arbitrary dependency jars necessary for a - * MapReduce job executed by Sqoop. The jar containing the SqoopTool - * instance itself will already be handled by Sqoop. - * - *

Called by JobBase.cacheJars().

- * - *

- * This does not load the jars into the current VM; they are assumed to be - * already on the classpath if they are needed on the client side (or - * otherwise classloaded by the tool itself). This is purely to specify jars - * necessary to be added to the distributed cache. The tool itself can - * classload these jars by running loadDependencyJars(). - *

- * - *

See also: c.c.s.util.Jars.getJarPathForClass()

- */ - public List getDependencyJars() { - // Default behavior: no additional dependencies. - return Collections.emptyList(); - } - - /** - * Loads dependency jars specified by getDependencyJars() into the current - * classloader stack. May optionally be called by a [third-party] tool - * before doing work, to ensure that all of its dependencies get classloaded - * properly. Note that dependencies will not be available until after the - * tool is already constructed. - */ - protected void loadDependencyJars(SqoopOptions options) throws IOException { - List deps = getDependencyJars(); - if (null == deps) { - return; - } - - for (String depFilename : deps) { - LOG.debug("Loading dependency: " + depFilename); - ClassLoaderStack.addJarFile(depFilename, null); - } - - options.getConf().setClassLoader( - Thread.currentThread().getContextClassLoader()); - } - - @Override - public String toString() { - return getToolName(); - } } - diff --git a/src/java/com/cloudera/sqoop/tool/ToolDesc.java b/src/java/com/cloudera/sqoop/tool/ToolDesc.java index cff7e74e..49e8274f 100644 --- a/src/java/com/cloudera/sqoop/tool/ToolDesc.java +++ b/src/java/com/cloudera/sqoop/tool/ToolDesc.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,46 +15,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; /** - * Describes a SqoopTool. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public final class ToolDesc { - private final String toolName; - private final Class toolClass; - private final String description; +public class ToolDesc + extends org.apache.sqoop.tool.ToolDesc { - - /** - * Main c'tor; sets all fields that describe a SqoopTool. - */ public ToolDesc(String name, Class cls, String desc) { - this.toolName = name; - this.toolClass = cls; - this.description = desc; - } - - /** - * @return the name used to invoke the tool (e.g., 'sqoop <foo>') - */ - public String getName() { - return toolName; - } - - /** - * @return a human-readable description of what the tool does. - */ - public String getDesc() { - return description; - } - - /** - * @return the class that implements SqoopTool. - */ - public Class getToolClass() { - return toolClass; + super(name, cls, desc); } } diff --git a/src/java/com/cloudera/sqoop/tool/ToolPlugin.java b/src/java/com/cloudera/sqoop/tool/ToolPlugin.java index db46e55e..46b69fb5 100644 --- a/src/java/com/cloudera/sqoop/tool/ToolPlugin.java +++ b/src/java/com/cloudera/sqoop/tool/ToolPlugin.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,20 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import java.util.List; - /** - * Abstract base class that defines the ToolPlugin API; additional SqoopTool - * implementations may be registered with the system via ToolPlugin classes. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public abstract class ToolPlugin { - /** - * Describes the tools made available by this plugin. - * @return a list of ToolDesc objects containing the tool name, class, - * and description. - */ - public abstract List getTools(); +public abstract class ToolPlugin + extends org.apache.sqoop.tool.ToolPlugin { } diff --git a/src/java/com/cloudera/sqoop/tool/VersionTool.java b/src/java/com/cloudera/sqoop/tool/VersionTool.java index f511d980..ab85b89e 100644 --- a/src/java/com/cloudera/sqoop/tool/VersionTool.java +++ b/src/java/com/cloudera/sqoop/tool/VersionTool.java @@ -1,6 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,32 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package com.cloudera.sqoop.tool; -import com.cloudera.sqoop.SqoopOptions; -import com.cloudera.sqoop.cli.ToolOptions; - /** - * Tool that prints Sqoop's version. + * @deprecated Moving to use org.apache.sqoop namespace. */ -public class VersionTool extends BaseSqoopTool { - - public VersionTool() { - super("version"); - } - - @Override - /** {@inheritDoc} */ - public int run(SqoopOptions options) { - // FIXME with maven buildnumber plugin - System.out.print("FIXME "); - return 0; - } - - @Override - public void printHelp(ToolOptions opts) { - System.out.println("usage: sqoop " + getToolName()); - } +public class VersionTool + extends org.apache.sqoop.tool.VersionTool { } - diff --git a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java new file mode 100644 index 00000000..2517418a --- /dev/null +++ b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.orm; + +import java.io.IOException; +import java.sql.Types; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; +import org.apache.avro.Schema.Type; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.manager.ConnManager; + +/** + * Creates an Avro schema to represent a table from a database. + */ +public class AvroSchemaGenerator { + + private final SqoopOptions options; + private final ConnManager connManager; + private final String tableName; + + public AvroSchemaGenerator(final SqoopOptions opts, final ConnManager connMgr, + final String table) { + this.options = opts; + this.connManager = connMgr; + this.tableName = table; + } + + public Schema generate() throws IOException { + ClassWriter classWriter = new ClassWriter(options, connManager, + tableName, null); + Map columnTypes = classWriter.getColumnTypes(); + String[] columnNames = classWriter.getColumnNames(columnTypes); + + List fields = new ArrayList(); + for (String columnName : columnNames) { + String cleanedCol = ClassWriter.toIdentifier(columnName); + int sqlType = columnTypes.get(cleanedCol); + Schema avroSchema = toAvroSchema(sqlType); + Field field = new Field(cleanedCol, avroSchema, null, null); + field.addProp("columnName", columnName); + field.addProp("sqlType", Integer.toString(sqlType)); + fields.add(field); + } + + String avroTableName = (tableName == null ? "QueryResult" : tableName); + + String doc = "Sqoop import of " + avroTableName; + Schema schema = Schema.createRecord(avroTableName, doc, null, false); + schema.setFields(fields); + schema.addProp("tableName", avroTableName); + return schema; + } + + private Type toAvroType(int sqlType) { + switch (sqlType) { + case Types.TINYINT: + case Types.SMALLINT: + case Types.INTEGER: + return Type.INT; + case Types.BIGINT: + return Type.LONG; + case Types.BIT: + case Types.BOOLEAN: + return Type.BOOLEAN; + case Types.REAL: + return Type.FLOAT; + case Types.FLOAT: + case Types.DOUBLE: + return Type.DOUBLE; + case Types.NUMERIC: + case Types.DECIMAL: + return Type.STRING; + case Types.CHAR: + case Types.VARCHAR: + case Types.LONGVARCHAR: + case Types.LONGNVARCHAR: + case Types.NVARCHAR: + case Types.NCHAR: + return Type.STRING; + case Types.DATE: + case Types.TIME: + case Types.TIMESTAMP: + return Type.LONG; + case Types.BINARY: + case Types.VARBINARY: + return Type.BYTES; + default: + throw new IllegalArgumentException("Cannot convert SQL type " + + sqlType); + } + } + + public Schema toAvroSchema(int sqlType) { + // All types are assumed nullabl;e make a union of the "true" type for + // a column and NULL. + List childSchemas = new ArrayList(); + childSchemas.add(Schema.create(toAvroType(sqlType))); + childSchemas.add(Schema.create(Schema.Type.NULL)); + return Schema.createUnion(childSchemas); + } + +} diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java new file mode 100644 index 00000000..75848546 --- /dev/null +++ b/src/java/org/apache/sqoop/orm/ClassWriter.java @@ -0,0 +1,1315 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.orm; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.HashSet; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.io.BytesWritable; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.manager.ConnManager; +import com.cloudera.sqoop.lib.BigDecimalSerializer; +import com.cloudera.sqoop.lib.BooleanParser; +import com.cloudera.sqoop.lib.DelimiterSet; +import com.cloudera.sqoop.lib.FieldFormatter; +import com.cloudera.sqoop.lib.JdbcWritableBridge; +import com.cloudera.sqoop.lib.LargeObjectLoader; +import com.cloudera.sqoop.lib.LobSerializer; +import com.cloudera.sqoop.lib.RecordParser; +import com.cloudera.sqoop.lib.BlobRef; +import com.cloudera.sqoop.lib.ClobRef; +import com.cloudera.sqoop.lib.SqoopRecord; + +/** + * Creates an ORM class to represent a table from a database. + */ +public class ClassWriter { + + public static final Log LOG = LogFactory.getLog(ClassWriter.class.getName()); + + // The following are keywords and cannot be used for class, method, or field + // names. + public static final HashSet JAVA_RESERVED_WORDS; + + static { + JAVA_RESERVED_WORDS = new HashSet(); + + JAVA_RESERVED_WORDS.add("abstract"); + JAVA_RESERVED_WORDS.add("else"); + JAVA_RESERVED_WORDS.add("int"); + JAVA_RESERVED_WORDS.add("strictfp"); + JAVA_RESERVED_WORDS.add("assert"); + JAVA_RESERVED_WORDS.add("enum"); + JAVA_RESERVED_WORDS.add("interface"); + JAVA_RESERVED_WORDS.add("super"); + JAVA_RESERVED_WORDS.add("boolean"); + JAVA_RESERVED_WORDS.add("extends"); + JAVA_RESERVED_WORDS.add("long"); + JAVA_RESERVED_WORDS.add("switch"); + JAVA_RESERVED_WORDS.add("break"); + JAVA_RESERVED_WORDS.add("false"); + JAVA_RESERVED_WORDS.add("native"); + JAVA_RESERVED_WORDS.add("synchronized"); + JAVA_RESERVED_WORDS.add("byte"); + JAVA_RESERVED_WORDS.add("final"); + JAVA_RESERVED_WORDS.add("new"); + JAVA_RESERVED_WORDS.add("this"); + JAVA_RESERVED_WORDS.add("case"); + JAVA_RESERVED_WORDS.add("finally"); + JAVA_RESERVED_WORDS.add("null"); + JAVA_RESERVED_WORDS.add("throw"); + JAVA_RESERVED_WORDS.add("catch"); + JAVA_RESERVED_WORDS.add("float"); + JAVA_RESERVED_WORDS.add("package"); + JAVA_RESERVED_WORDS.add("throws"); + JAVA_RESERVED_WORDS.add("char"); + JAVA_RESERVED_WORDS.add("for"); + JAVA_RESERVED_WORDS.add("private"); + JAVA_RESERVED_WORDS.add("transient"); + JAVA_RESERVED_WORDS.add("class"); + JAVA_RESERVED_WORDS.add("goto"); + JAVA_RESERVED_WORDS.add("protected"); + JAVA_RESERVED_WORDS.add("true"); + JAVA_RESERVED_WORDS.add("const"); + } + + /** + * This version number is injected into all generated Java classes to denote + * which version of the ClassWriter's output format was used to generate the + * class. + * + * If the way that we generate classes changes, bump this number. + * This number is retrieved by the SqoopRecord.getClassFormatVersion() + * method. + */ + public static final int CLASS_WRITER_VERSION = 3; + + private SqoopOptions options; + private ConnManager connManager; + private String tableName; + private CompilationManager compileManager; + + /** + * Creates a new ClassWriter to generate an ORM class for a table + * or arbitrary query. + * @param opts program-wide options + * @param connMgr the connection manager used to describe the table. + * @param table the name of the table to read. If null, query is taken + * from the SqoopOptions. + */ + public ClassWriter(final SqoopOptions opts, final ConnManager connMgr, + final String table, final CompilationManager compMgr) { + this.options = opts; + this.connManager = connMgr; + this.tableName = table; + this.compileManager = compMgr; + } + + /** + * Given some character that can't be in an identifier, + * try to map it to a string that can. + * + * @param c a character that can't be in a Java identifier + * @return a string of characters that can, or null if there's + * no good translation. + */ + public static String getIdentifierStrForChar(char c) { + if (Character.isJavaIdentifierPart(c)) { + return "" + c; + } else if (Character.isWhitespace(c)) { + // Eliminate whitespace. + return null; + } else { + // All other characters map to underscore. + return "_"; + } + } + + /** + * @param word a word to test. + * @return true if 'word' is reserved the in Java language. + */ + private static boolean isReservedWord(String word) { + return JAVA_RESERVED_WORDS.contains(word); + } + + /** + * Coerce a candidate name for an identifier into one which is a valid + * Java or Avro identifier. + * + * Ensures that the returned identifier matches [A-Za-z_][A-Za-z0-9_]* + * and is not a reserved word. + * + * @param candidate A string we want to use as an identifier + * @return A string naming an identifier which compiles and is + * similar to the candidate. + */ + public static String toIdentifier(String candidate) { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (char c : candidate.toCharArray()) { + if (Character.isJavaIdentifierStart(c) && first) { + // Ok for this to be the first character of the identifier. + sb.append(c); + first = false; + } else if (Character.isJavaIdentifierPart(c) && !first) { + // Ok for this character to be in the output identifier. + sb.append(c); + } else { + // We have a character in the original that can't be + // part of this identifier we're building. + // If it's just not allowed to be the first char, add a leading '_'. + // If we have a reasonable translation (e.g., '-' -> '_'), do that. + // Otherwise, drop it. + if (first && Character.isJavaIdentifierPart(c) + && !Character.isJavaIdentifierStart(c)) { + sb.append("_"); + sb.append(c); + first = false; + } else { + // Try to map this to a different character or string. + // If we can't just give up. + String translated = getIdentifierStrForChar(c); + if (null != translated) { + sb.append(translated); + first = false; + } + } + } + } + return sb.toString(); + } + + /** + * Coerce a candidate name for an identifier into one which will + * definitely compile. + * + * Ensures that the returned identifier matches [A-Za-z_][A-Za-z0-9_]* + * and is not a reserved word. + * + * @param candidate A string we want to use as an identifier + * @return A string naming an identifier which compiles and is + * similar to the candidate. + */ + public static String toJavaIdentifier(String candidate) { + String output = toIdentifier(candidate); + if (isReservedWord(output)) { + // e.g., 'class' -> '_class'; + return "_" + output; + } + + return output; + } + + private String toJavaType(String columnName, int sqlType) { + Properties mapping = options.getMapColumnJava(); + + if(mapping.containsKey(columnName)) { + String type = mapping.getProperty(columnName); + if(LOG.isDebugEnabled()) { + LOG.info("Overriding type of column " + columnName + " to " + type); + } + return type; + } + + return connManager.toJavaType(sqlType); + } + + /** + * @param javaType + * @return the name of the method of JdbcWritableBridge to read an entry + * with a given java type. + */ + private String dbGetterForType(String javaType) { + // All Class-based types (e.g., java.math.BigDecimal) are handled with + // "readBar" where some.package.foo.Bar is the canonical class name. Turn + // the javaType string into the getter type string. + + String [] parts = javaType.split("\\."); + if (parts.length == 0) { + LOG.error("No ResultSet method for Java type " + javaType); + return null; + } + + String lastPart = parts[parts.length - 1]; + try { + String getter = "read" + Character.toUpperCase(lastPart.charAt(0)) + + lastPart.substring(1); + return getter; + } catch (StringIndexOutOfBoundsException oob) { + // lastPart.*() doesn't work on empty strings. + LOG.error("Could not infer JdbcWritableBridge getter for Java type " + + javaType); + return null; + } + } + + /** + * @param javaType + * @return the name of the method of JdbcWritableBridge to write an entry + * with a given java type. + */ + private String dbSetterForType(String javaType) { + // TODO(aaron): Lots of unit tests needed here. + // See dbGetterForType() for the logic used here; it's basically the same. + + String [] parts = javaType.split("\\."); + if (parts.length == 0) { + LOG.error("No PreparedStatement Set method for Java type " + javaType); + return null; + } + + String lastPart = parts[parts.length - 1]; + try { + String setter = "write" + Character.toUpperCase(lastPart.charAt(0)) + + lastPart.substring(1); + return setter; + } catch (StringIndexOutOfBoundsException oob) { + // lastPart.*() doesn't work on empty strings. + LOG.error("Could not infer PreparedStatement setter for Java type " + + javaType); + return null; + } + } + + private String stringifierForType(String javaType, String colName) { + if (javaType.equals("String")) { + // Check if it is null, and write the null representation in such case + String r = colName + "==null?\"" + this.options.getNullStringValue() + + "\":" + colName; + return r; + } else { + // This is an object type -- just call its toString() in a null-safe way. + // Also check if it is null, and instead write the null representation + // in such case + String r = colName + "==null?\"" + this.options.getNullNonStringValue() + + "\":" + "\"\" + " + colName; + return r; + } + } + + /** + * @param javaType the type to read + * @param inputObj the name of the DataInput to read from + * @param colName the column name to read + * @return the line of code involving a DataInput object to read an entry + * with a given java type. + */ + private String rpcGetterForType(String javaType, String inputObj, + String colName) { + if (javaType.equals("Integer")) { + return " this." + colName + " = Integer.valueOf(" + inputObj + + ".readInt());\n"; + } else if (javaType.equals("Long")) { + return " this." + colName + " = Long.valueOf(" + inputObj + + ".readLong());\n"; + } else if (javaType.equals("Float")) { + return " this." + colName + " = Float.valueOf(" + inputObj + + ".readFloat());\n"; + } else if (javaType.equals("Double")) { + return " this." + colName + " = Double.valueOf(" + inputObj + + ".readDouble());\n"; + } else if (javaType.equals("Boolean")) { + return " this." + colName + " = Boolean.valueOf(" + inputObj + + ".readBoolean());\n"; + } else if (javaType.equals("String")) { + return " this." + colName + " = Text.readString(" + inputObj + ");\n"; + } else if (javaType.equals("java.sql.Date")) { + return " this." + colName + " = new Date(" + inputObj + + ".readLong());\n"; + } else if (javaType.equals("java.sql.Time")) { + return " this." + colName + " = new Time(" + inputObj + + ".readLong());\n"; + } else if (javaType.equals("java.sql.Timestamp")) { + return " this." + colName + " = new Timestamp(" + inputObj + + ".readLong());\n" + " this." + colName + ".setNanos(" + + inputObj + ".readInt());\n"; + } else if (javaType.equals("java.math.BigDecimal")) { + return " this." + colName + " = " + + BigDecimalSerializer.class.getCanonicalName() + + ".readFields(" + inputObj + ");\n"; + } else if (javaType.equals(ClobRef.class.getName())) { + return " this." + colName + " = " + + LobSerializer.class.getCanonicalName() + + ".readClobFields(" + inputObj + ");\n"; + } else if (javaType.equals(BlobRef.class.getName())) { + return " this." + colName + " = " + + LobSerializer.class.getCanonicalName() + + ".readBlobFields(" + inputObj + ");\n"; + } else if (javaType.equals(BytesWritable.class.getName())) { + return " this." + colName + " = new BytesWritable();\n" + + " this." + colName + ".readFields(" + inputObj + ");\n"; + } else { + LOG.error("No ResultSet method for Java type " + javaType); + return null; + } + } + + /** + * Deserialize a possibly-null value from the DataInput stream. + * @param javaType name of the type to deserialize if it's not null. + * @param inputObj name of the DataInput to read from + * @param colName the column name to read. + * @return + */ + private String rpcGetterForMaybeNull(String javaType, String inputObj, + String colName) { + return " if (" + inputObj + ".readBoolean()) { \n" + + " this." + colName + " = null;\n" + + " } else {\n" + + rpcGetterForType(javaType, inputObj, colName) + + " }\n"; + } + + /** + * @param javaType the type to write + * @param inputObj the name of the DataOutput to write to + * @param colName the column name to write + * @return the line of code involving a DataOutput object to write an entry + * with a given java type. + */ + private String rpcSetterForType(String javaType, String outputObj, + String colName) { + if (javaType.equals("Integer")) { + return " " + outputObj + ".writeInt(this." + colName + ");\n"; + } else if (javaType.equals("Long")) { + return " " + outputObj + ".writeLong(this." + colName + ");\n"; + } else if (javaType.equals("Boolean")) { + return " " + outputObj + ".writeBoolean(this." + colName + ");\n"; + } else if (javaType.equals("Float")) { + return " " + outputObj + ".writeFloat(this." + colName + ");\n"; + } else if (javaType.equals("Double")) { + return " " + outputObj + ".writeDouble(this." + colName + ");\n"; + } else if (javaType.equals("String")) { + return " Text.writeString(" + outputObj + ", " + colName + ");\n"; + } else if (javaType.equals("java.sql.Date")) { + return " " + outputObj + ".writeLong(this." + colName + + ".getTime());\n"; + } else if (javaType.equals("java.sql.Time")) { + return " " + outputObj + ".writeLong(this." + colName + + ".getTime());\n"; + } else if (javaType.equals("java.sql.Timestamp")) { + return " " + outputObj + ".writeLong(this." + colName + + ".getTime());\n" + " " + outputObj + ".writeInt(this." + colName + + ".getNanos());\n"; + } else if (javaType.equals(BytesWritable.class.getName())) { + return " this." + colName + ".write(" + outputObj + ");\n"; + } else if (javaType.equals("java.math.BigDecimal")) { + return " " + BigDecimalSerializer.class.getCanonicalName() + + ".write(this." + colName + ", " + outputObj + ");\n"; + } else if (javaType.equals(ClobRef.class.getName())) { + return " " + LobSerializer.class.getCanonicalName() + + ".writeClob(this." + colName + ", " + outputObj + ");\n"; + } else if (javaType.equals(BlobRef.class.getName())) { + return " " + LobSerializer.class.getCanonicalName() + + ".writeBlob(this." + colName + ", " + outputObj + ");\n"; + } else { + LOG.error("No ResultSet method for Java type " + javaType); + return null; + } + } + + /** + * Serialize a possibly-null value to the DataOutput stream. First a boolean + * isNull is written, followed by the contents itself (if not null). + * @param javaType name of the type to deserialize if it's not null. + * @param inputObj name of the DataInput to read from + * @param colName the column name to read. + * @return + */ + private String rpcSetterForMaybeNull(String javaType, String outputObj, + String colName) { + return " if (null == this." + colName + ") { \n" + + " " + outputObj + ".writeBoolean(true);\n" + + " } else {\n" + + " " + outputObj + ".writeBoolean(false);\n" + + rpcSetterForType(javaType, outputObj, colName) + + " }\n"; + } + + /** + * Generate a member field, getter, setter and with method for each column. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table + * @param className - name of the generated class + * @param sb - StringBuilder to append code to + */ + private void generateFields(Map columnTypes, + String [] colNames, String className, StringBuilder sb) { + + for (String col : colNames) { + int sqlType = columnTypes.get(col); + String javaType = toJavaType(col, sqlType); + if (null == javaType) { + LOG.error("Cannot resolve SQL type " + sqlType); + continue; + } + + sb.append(" private " + javaType + " " + col + ";\n"); + sb.append(" public " + javaType + " get_" + col + "() {\n"); + sb.append(" return " + col + ";\n"); + sb.append(" }\n"); + sb.append(" public void set_" + col + "(" + javaType + " " + col + + ") {\n"); + sb.append(" this." + col + " = " + col + ";\n"); + sb.append(" }\n"); + sb.append(" public " + className + " with_" + col + "(" + javaType + " " + + col + ") {\n"); + sb.append(" this." + col + " = " + col + ";\n"); + sb.append(" return this;\n"); + sb.append(" }\n"); + } + } + + /** + * Generate an equals method that compares the fields for each column. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table + * @param className - name of the generated class + * @param sb - StringBuilder to append code to + */ + private void generateEquals(Map columnTypes, + String [] colNames, String className, StringBuilder sb) { + + sb.append(" public boolean equals(Object o) {\n"); + sb.append(" if (this == o) {\n"); + sb.append(" return true;\n"); + sb.append(" }\n"); + sb.append(" if (!(o instanceof " + className + ")) {\n"); + sb.append(" return false;\n"); + sb.append(" }\n"); + sb.append(" " + className + " that = (" + className + ") o;\n"); + sb.append(" boolean equal = true;\n"); + for (String col : colNames) { + int sqlType = columnTypes.get(col); + String javaType = toJavaType(col, sqlType); + if (null == javaType) { + LOG.error("Cannot resolve SQL type " + sqlType); + continue; + } + sb.append(" equal = equal && (this." + col + " == null ? that." + col + + " == null : this." + col + ".equals(that." + col + "));\n"); + } + sb.append(" return equal;\n"); + sb.append(" }\n"); + } + + /** + * Generate the readFields() method used by the database. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param sb - StringBuilder to append code to + */ + private void generateDbRead(Map columnTypes, + String [] colNames, StringBuilder sb) { + + sb.append(" public void readFields(ResultSet __dbResults) "); + sb.append("throws SQLException {\n"); + + // Save ResultSet object cursor for use in LargeObjectLoader + // if necessary. + sb.append(" this.__cur_result_set = __dbResults;\n"); + + int fieldNum = 0; + + for (String col : colNames) { + fieldNum++; + + int sqlType = columnTypes.get(col); + String javaType = toJavaType(col, sqlType); + if (null == javaType) { + LOG.error("No Java type for SQL type " + sqlType + + " for column " + col); + continue; + } + + String getterMethod = dbGetterForType(javaType); + if (null == getterMethod) { + LOG.error("No db getter method for Java type " + javaType); + continue; + } + + sb.append(" this." + col + " = JdbcWritableBridge." + getterMethod + + "(" + fieldNum + ", __dbResults);\n"); + } + + sb.append(" }\n"); + } + + /** + * Generate the loadLargeObjects() method called by the mapper to load + * delayed objects (that require the Context from the mapper). + */ + private void generateLoadLargeObjects(Map columnTypes, + String [] colNames, StringBuilder sb) { + + // This method relies on the __cur_result_set field being set by + // readFields() method generated by generateDbRead(). + + sb.append(" public void loadLargeObjects(LargeObjectLoader __loader)\n"); + sb.append(" throws SQLException, IOException, "); + sb.append("InterruptedException {\n"); + + int fieldNum = 0; + + for (String col : colNames) { + fieldNum++; + + int sqlType = columnTypes.get(col); + String javaType = toJavaType(col, sqlType); + if (null == javaType) { + LOG.error("No Java type for SQL type " + sqlType + + " for column " + col); + continue; + } + + String getterMethod = dbGetterForType(javaType); + if ("readClobRef".equals(getterMethod) + || "readBlobRef".equals(getterMethod)) { + // This field is a blob/clob field with delayed loading. Call the + // appropriate LargeObjectLoader method (which has the same name as a + // JdbcWritableBridge method). + sb.append(" this." + col + " = __loader." + getterMethod + + "(" + fieldNum + ", this.__cur_result_set);\n"); + } + } + sb.append(" }\n"); + } + + + /** + * Generate the write() method used by the database. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param sb - StringBuilder to append code to + */ + private void generateDbWrite(Map columnTypes, + String [] colNames, StringBuilder sb) { + + sb.append(" public void write(PreparedStatement __dbStmt) " + + "throws SQLException {\n"); + sb.append(" write(__dbStmt, 0);\n"); + sb.append(" }\n\n"); + + sb.append(" public int write(PreparedStatement __dbStmt, int __off) " + + "throws SQLException {\n"); + + int fieldNum = 0; + + for (String col : colNames) { + fieldNum++; + + int sqlType = columnTypes.get(col); + String javaType = toJavaType(col, sqlType); + if (null == javaType) { + LOG.error("No Java type for SQL type " + sqlType + + " for column " + col); + continue; + } + + String setterMethod = dbSetterForType(javaType); + if (null == setterMethod) { + LOG.error("No db setter method for Java type " + javaType); + continue; + } + + sb.append(" JdbcWritableBridge." + setterMethod + "(" + col + ", " + + fieldNum + " + __off, " + sqlType + ", __dbStmt);\n"); + } + + sb.append(" return " + fieldNum + ";\n"); + sb.append(" }\n"); + } + + + /** + * Generate the readFields() method used by the Hadoop RPC system. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param sb - StringBuilder to append code to + */ + private void generateHadoopRead(Map columnTypes, + String [] colNames, StringBuilder sb) { + + sb.append(" public void readFields(DataInput __dataIn) " + + "throws IOException {\n"); + + for (String col : colNames) { + int sqlType = columnTypes.get(col); + String javaType = toJavaType(col, sqlType); + if (null == javaType) { + LOG.error("No Java type for SQL type " + sqlType + + " for column " + col); + continue; + } + + String getterMethod = rpcGetterForMaybeNull(javaType, "__dataIn", col); + if (null == getterMethod) { + LOG.error("No RPC getter method for Java type " + javaType); + continue; + } + + sb.append(getterMethod); + } + + sb.append(" }\n"); + } + + /** + * Generate the clone() method. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param sb - StringBuilder to append code to + */ + private void generateCloneMethod(Map columnTypes, + String [] colNames, StringBuilder sb) { + + TableClassName tableNameInfo = new TableClassName(options); + String className = tableNameInfo.getShortClassForTable(tableName); + + sb.append(" public Object clone() throws CloneNotSupportedException {\n"); + sb.append(" " + className + " o = (" + className + ") super.clone();\n"); + + // For each field that is mutable, we need to perform the deep copy. + for (String colName : colNames) { + int sqlType = columnTypes.get(colName); + String javaType = toJavaType(colName, sqlType); + if (null == javaType) { + continue; + } else if (javaType.equals("java.sql.Date") + || javaType.equals("java.sql.Time") + || javaType.equals("java.sql.Timestamp") + || javaType.equals(ClobRef.class.getName()) + || javaType.equals(BlobRef.class.getName())) { + sb.append(" o." + colName + " = (o." + colName + " != null) ? (" + + javaType + ") o." + colName + ".clone() : null;\n"); + } else if (javaType.equals(BytesWritable.class.getName())) { + sb.append(" o." + colName + " = new BytesWritable(" + + "Arrays.copyOf(" + colName + ".getBytes(), " + + colName + ".getLength()));\n"); + } + } + + sb.append(" return o;\n"); + sb.append(" }\n\n"); + } + + /** + * Generate the setField() method. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param sb - StringBuilder to append code to + */ + private void generateSetField(Map columnTypes, + String [] colNames, StringBuilder sb) { + sb.append(" public void setField(String __fieldName, Object __fieldVal) " + + "{\n"); + boolean first = true; + for (String colName : colNames) { + int sqlType = columnTypes.get(colName); + String javaType = toJavaType(colName, sqlType); + if (null == javaType) { + continue; + } else { + if (!first) { + sb.append(" else"); + } + + sb.append(" if (\"" + colName + "\".equals(__fieldName)) {\n"); + sb.append(" this." + colName + " = (" + javaType + + ") __fieldVal;\n"); + sb.append(" }\n"); + first = false; + } + } + sb.append(" else {\n"); + sb.append(" throw new RuntimeException("); + sb.append("\"No such field: \" + __fieldName);\n"); + sb.append(" }\n"); + sb.append(" }\n"); + } + + /** + * Generate the getFieldMap() method. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param sb - StringBuilder to append code to + */ + private void generateGetFieldMap(Map columnTypes, + String [] colNames, StringBuilder sb) { + sb.append(" public Map getFieldMap() {\n"); + sb.append(" Map __sqoop$field_map = " + + "new TreeMap();\n"); + for (String colName : colNames) { + sb.append(" __sqoop$field_map.put(\"" + colName + "\", this." + + colName + ");\n"); + } + sb.append(" return __sqoop$field_map;\n"); + sb.append(" }\n\n"); + } + + /** + * Generate the toString() method. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param sb - StringBuilder to append code to + */ + private void generateToString(Map columnTypes, + String [] colNames, StringBuilder sb) { + + // Save the delimiters to the class. + sb.append(" private final DelimiterSet __outputDelimiters = "); + sb.append(options.getOutputDelimiters().formatConstructor() + ";\n"); + + // The default toString() method itself follows. This just calls + // the delimiter-specific toString() with the default delimiters. + // Also appends an end-of-record delimiter to the line. + sb.append(" public String toString() {\n"); + sb.append(" return toString(__outputDelimiters, true);\n"); + sb.append(" }\n"); + + // This toString() variant, though, accepts delimiters as arguments. + sb.append(" public String toString(DelimiterSet delimiters) {\n"); + sb.append(" return toString(delimiters, true);\n"); + sb.append(" }\n"); + + // This variant allows the user to specify whether or not an end-of-record + // delimiter should be appended. + sb.append(" public String toString(boolean useRecordDelim) {\n"); + sb.append(" return toString(__outputDelimiters, useRecordDelim);\n"); + sb.append(" }\n"); + + + // This toString() variant allows the user to specify delimiters, as well + // as whether or not the end-of-record delimiter should be added to the + // string. Use 'false' to do reasonable things with TextOutputFormat, + // which appends its own newline. + sb.append(" public String toString(DelimiterSet delimiters, "); + sb.append("boolean useRecordDelim) {\n"); + sb.append(" StringBuilder __sb = new StringBuilder();\n"); + sb.append(" char fieldDelim = delimiters.getFieldsTerminatedBy();\n"); + + boolean first = true; + for (String col : colNames) { + int sqlType = columnTypes.get(col); + String javaType = toJavaType(col, sqlType); + if (null == javaType) { + LOG.error("No Java type for SQL type " + sqlType + + " for column " + col); + continue; + } + + if (!first) { + // print inter-field tokens. + sb.append(" __sb.append(fieldDelim);\n"); + } + + first = false; + + String stringExpr = stringifierForType(javaType, col); + if (null == stringExpr) { + LOG.error("No toString method for Java type " + javaType); + continue; + } + + if (javaType.equals("String") && options.doHiveDropDelims()) { + sb.append(" // special case for strings hive, dropping" + + "delimiters \\n,\\r,\\01 from strings\n"); + sb.append(" __sb.append(FieldFormatter.hiveStringDropDelims(" + + stringExpr + ", delimiters));\n"); + } else if (javaType.equals("String") + && options.getHiveDelimsReplacement() != null) { + sb.append(" // special case for strings hive, replacing " + + "delimiters \\n,\\r,\\01 with '" + + options.getHiveDelimsReplacement() + "' from strings\n"); + sb.append(" __sb.append(FieldFormatter.hiveStringReplaceDelims(" + + stringExpr + ", \"" + options.getHiveDelimsReplacement() + "\", " + + "delimiters));\n"); + } else { + sb.append(" __sb.append(FieldFormatter.escapeAndEnclose(" + + stringExpr + ", delimiters));\n"); + } + } + + sb.append(" if (useRecordDelim) {\n"); + sb.append(" __sb.append(delimiters.getLinesTerminatedBy());\n"); + sb.append(" }\n"); + sb.append(" return __sb.toString();\n"); + sb.append(" }\n"); + } + + /** + * Helper method for generateParser(). Writes out the parse() method for one + * particular type we support as an input string-ish type. + */ + private void generateParseMethod(String typ, StringBuilder sb) { + sb.append(" public void parse(" + typ + " __record) " + + "throws RecordParser.ParseError {\n"); + sb.append(" if (null == this.__parser) {\n"); + sb.append(" this.__parser = new RecordParser(__inputDelimiters);\n"); + sb.append(" }\n"); + sb.append(" List __fields = " + + "this.__parser.parseRecord(__record);\n"); + sb.append(" __loadFromFields(__fields);\n"); + sb.append(" }\n\n"); + } + + /** + * Helper method for parseColumn(). Interpret the string null representation + * for a particular column. + */ + private void parseNullVal(String javaType, String colName, StringBuilder sb) { + if (javaType.equals("String")) { + sb.append(" if (__cur_str.equals(\"" + + this.options.getInNullStringValue() + "\")) { this."); + sb.append(colName); + sb.append(" = null; } else {\n"); + } else { + sb.append(" if (__cur_str.equals(\"" + + this.options.getInNullNonStringValue()); + sb.append("\") || __cur_str.length() == 0) { this."); + sb.append(colName); + sb.append(" = null; } else {\n"); + } + } + + /** + * Helper method for generateParser(). Generates the code that loads one + * field of a specified name and type from the next element of the field + * strings list. + */ + private void parseColumn(String colName, int colType, StringBuilder sb) { + // assume that we have __it and __cur_str vars, based on + // __loadFromFields() code. + sb.append(" __cur_str = __it.next();\n"); + String javaType = toJavaType(colName, colType); + + parseNullVal(javaType, colName, sb); + if (javaType.equals("String")) { + // TODO(aaron): Distinguish between 'null' and null. Currently they both + // set the actual object to null. + sb.append(" this." + colName + " = __cur_str;\n"); + } else if (javaType.equals("Integer")) { + sb.append(" this." + colName + " = Integer.valueOf(__cur_str);\n"); + } else if (javaType.equals("Long")) { + sb.append(" this." + colName + " = Long.valueOf(__cur_str);\n"); + } else if (javaType.equals("Float")) { + sb.append(" this." + colName + " = Float.valueOf(__cur_str);\n"); + } else if (javaType.equals("Double")) { + sb.append(" this." + colName + " = Double.valueOf(__cur_str);\n"); + } else if (javaType.equals("Boolean")) { + sb.append(" this." + colName + + " = BooleanParser.valueOf(__cur_str);\n"); + } else if (javaType.equals("java.sql.Date")) { + sb.append(" this." + colName + + " = java.sql.Date.valueOf(__cur_str);\n"); + } else if (javaType.equals("java.sql.Time")) { + sb.append(" this." + colName + + " = java.sql.Time.valueOf(__cur_str);\n"); + } else if (javaType.equals("java.sql.Timestamp")) { + sb.append(" this." + colName + + " = java.sql.Timestamp.valueOf(__cur_str);\n"); + } else if (javaType.equals("java.math.BigDecimal")) { + sb.append(" this." + colName + + " = new java.math.BigDecimal(__cur_str);\n"); + } else if (javaType.equals(ClobRef.class.getName())) { + sb.append(" this." + colName + " = ClobRef.parse(__cur_str);\n"); + } else if (javaType.equals(BlobRef.class.getName())) { + sb.append(" this." + colName + " = BlobRef.parse(__cur_str);\n"); + } else { + LOG.error("No parser available for Java type " + javaType); + } + + sb.append(" }\n\n"); // the closing '{' based on code in parseNullVal(); + } + + /** + * Generate the parse() method. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param sb - StringBuilder to append code to + */ + private void generateParser(Map columnTypes, + String [] colNames, StringBuilder sb) { + + // Embed into the class the delimiter characters to use when parsing input + // records. Note that these can differ from the delims to use as output + // via toString(), if the user wants to use this class to convert one + // format to another. + sb.append(" private final DelimiterSet __inputDelimiters = "); + sb.append(options.getInputDelimiters().formatConstructor() + ";\n"); + + // The parser object which will do the heavy lifting for field splitting. + sb.append(" private RecordParser __parser;\n"); + + // Generate wrapper methods which will invoke the parser. + generateParseMethod("Text", sb); + generateParseMethod("CharSequence", sb); + generateParseMethod("byte []", sb); + generateParseMethod("char []", sb); + generateParseMethod("ByteBuffer", sb); + generateParseMethod("CharBuffer", sb); + + // The wrapper methods call __loadFromFields() to actually interpret the + // raw field data as string, int, boolean, etc. The generation of this + // method is type-dependent for the fields. + sb.append(" private void __loadFromFields(List fields) {\n"); + sb.append(" Iterator __it = fields.listIterator();\n"); + sb.append(" String __cur_str;\n"); + for (String colName : colNames) { + int colType = columnTypes.get(colName); + parseColumn(colName, colType, sb); + } + sb.append(" }\n\n"); + } + + /** + * Generate the write() method used by the Hadoop RPC system. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param sb - StringBuilder to append code to + */ + private void generateHadoopWrite(Map columnTypes, + String [] colNames, StringBuilder sb) { + + sb.append(" public void write(DataOutput __dataOut) " + + "throws IOException {\n"); + + for (String col : colNames) { + int sqlType = columnTypes.get(col); + String javaType = toJavaType(col, sqlType); + if (null == javaType) { + LOG.error("No Java type for SQL type " + sqlType + + " for column " + col); + continue; + } + + String setterMethod = rpcSetterForMaybeNull(javaType, "__dataOut", col); + if (null == setterMethod) { + LOG.error("No RPC setter method for Java type " + javaType); + continue; + } + + sb.append(setterMethod); + } + + sb.append(" }\n"); + } + + /** + * Create a list of identifiers to use based on the true column names + * of the table. + * @param colNames the actual column names of the table. + * @return a list of column names in the same order which are + * cleaned up to be used as identifiers in the generated Java class. + */ + private String [] cleanColNames(String [] colNames) { + String [] cleanedColNames = new String[colNames.length]; + for (int i = 0; i < colNames.length; i++) { + String col = colNames[i]; + String identifier = toJavaIdentifier(col); + cleanedColNames[i] = identifier; + } + + return cleanedColNames; + } + + + /** + * Generate the ORM code for the class. + */ + public void generate() throws IOException { + Map columnTypes = getColumnTypes(); + + String[] colNames = getColumnNames(columnTypes); + + // Translate all the column names into names that are safe to + // use as identifiers. + String [] cleanedColNames = cleanColNames(colNames); + Set uniqColNames = new HashSet(); + for (int i = 0; i < colNames.length; i++) { + String identifier = cleanedColNames[i]; + + // Name can't be blank + if(identifier.isEmpty()) { + throw new IllegalArgumentException("We found column without column " + + "name. Please verify that you've entered all column names " + + "in your query if using free form query import (consider " + + "adding clause AS if you're using column transformation)"); + } + + // Guarantee uniq col identifier + if (uniqColNames.contains(identifier)) { + throw new IllegalArgumentException("Duplicate Column identifier " + + "specified: '" + identifier + "'"); + } + uniqColNames.add(identifier); + + // Make sure the col->type mapping holds for the + // new identifier name, too. + String col = colNames[i]; + Integer type = columnTypes.get(col); + if (type == null) { + // column doesn't have a type, means that is illegal column name! + throw new IllegalArgumentException("Column name '" + col + + "' not in table"); + } + columnTypes.put(identifier, type); + } + + // Check that all explicitly mapped columns are present in result set + Properties mapping = options.getMapColumnJava(); + if(mapping != null && !mapping.isEmpty()) { + for(Object column : mapping.keySet()) { + if(!uniqColNames.contains((String)column)) { + throw new IllegalArgumentException("No column by the name " + column + + "found while importing data"); + } + } + } + + // The db write() method may use column names in a different + // order. If this is set in the options, pull it out here and + // make sure we format the column names to identifiers in the same way + // as we do for the ordinary column list. + String [] dbWriteColNames = options.getDbOutputColumns(); + String [] cleanedDbWriteColNames = null; + if (null == dbWriteColNames) { + cleanedDbWriteColNames = cleanedColNames; + } else { + cleanedDbWriteColNames = cleanColNames(dbWriteColNames); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("selected columns:"); + for (String col : cleanedColNames) { + LOG.debug(" " + col); + } + + if (cleanedDbWriteColNames != cleanedColNames) { + // dbWrite() has a different set of columns than the rest of the + // generators. + LOG.debug("db write column order:"); + for (String dbCol : cleanedDbWriteColNames) { + LOG.debug(" " + dbCol); + } + } + } + + // Generate the Java code. + StringBuilder sb = generateClassForColumns(columnTypes, + cleanedColNames, cleanedDbWriteColNames); + + // Write this out to a file in the jar output directory. + // We'll move it to the user-visible CodeOutputDir after compiling. + String codeOutDir = options.getJarOutputDir(); + + // Get the class name to generate, which includes package components. + String className = new TableClassName(options).getClassForTable(tableName); + // Convert the '.' characters to '/' characters. + String sourceFilename = className.replace('.', File.separatorChar) + + ".java"; + String filename = codeOutDir + sourceFilename; + + if (LOG.isDebugEnabled()) { + LOG.debug("Writing source file: " + filename); + LOG.debug("Table name: " + tableName); + StringBuilder sbColTypes = new StringBuilder(); + for (String col : colNames) { + Integer colType = columnTypes.get(col); + sbColTypes.append(col + ":" + colType + ", "); + } + String colTypeStr = sbColTypes.toString(); + LOG.debug("Columns: " + colTypeStr); + LOG.debug("sourceFilename is " + sourceFilename); + } + + compileManager.addSourceFile(sourceFilename); + + // Create any missing parent directories. + File file = new File(filename); + File dir = file.getParentFile(); + if (null != dir && !dir.exists()) { + boolean mkdirSuccess = dir.mkdirs(); + if (!mkdirSuccess) { + LOG.debug("Could not create directory tree for " + dir); + } + } + + OutputStream ostream = null; + Writer writer = null; + try { + ostream = new FileOutputStream(filename); + writer = new OutputStreamWriter(ostream); + writer.append(sb.toString()); + } finally { + if (null != writer) { + try { + writer.close(); + } catch (IOException ioe) { + // ignored because we're closing. + } + } + + if (null != ostream) { + try { + ostream.close(); + } catch (IOException ioe) { + // ignored because we're closing. + } + } + } + } + + protected String[] getColumnNames(Map columnTypes) { + String [] colNames = options.getColumns(); + if (null == colNames) { + if (null != tableName) { + // Table-based import. Read column names from table. + colNames = connManager.getColumnNames(tableName); + } else { + // Infer/assign column names for arbitrary query. + colNames = connManager.getColumnNamesForQuery( + this.options.getSqlQuery()); + } + } else { + // These column names were provided by the user. They may not be in + // the same case as the keys in the columnTypes map. So make sure + // we add the appropriate aliases in that map. + for (String userColName : colNames) { + for (Map.Entry typeEntry : columnTypes.entrySet()) { + String typeColName = typeEntry.getKey(); + if (typeColName.equalsIgnoreCase(userColName) + && !typeColName.equals(userColName)) { + // We found the correct-case equivalent. + columnTypes.put(userColName, typeEntry.getValue()); + // No need to continue iteration; only one could match. + // Also, the use of put() just invalidated the iterator. + break; + } + } + } + } + return colNames; + } + + protected Map getColumnTypes() throws IOException { + return connManager.getColumnTypes(tableName, options.getSqlQuery()); + } + + /** + * Generate the ORM code for a table object containing the named columns. + * @param columnTypes - mapping from column names to sql types + * @param colNames - ordered list of column names for table. + * @param dbWriteColNames - ordered list of column names for the db + * write() method of the class. + * @return - A StringBuilder that contains the text of the class code. + */ + private StringBuilder generateClassForColumns( + Map columnTypes, + String [] colNames, String [] dbWriteColNames) { + if (colNames.length ==0) { + throw new IllegalArgumentException("Attempted to generate class with " + + "no columns!"); + } + StringBuilder sb = new StringBuilder(); + sb.append("// ORM class for " + tableName + "\n"); + sb.append("// WARNING: This class is AUTO-GENERATED. " + + "Modify at your own risk.\n"); + + TableClassName tableNameInfo = new TableClassName(options); + + String packageName = tableNameInfo.getPackageForTable(); + if (null != packageName) { + sb.append("package "); + sb.append(packageName); + sb.append(";\n"); + } + + sb.append("import org.apache.hadoop.io.BytesWritable;\n"); + sb.append("import org.apache.hadoop.io.Text;\n"); + sb.append("import org.apache.hadoop.io.Writable;\n"); + sb.append("import org.apache.hadoop.mapred.lib.db.DBWritable;\n"); + sb.append("import " + JdbcWritableBridge.class.getCanonicalName() + ";\n"); + sb.append("import " + DelimiterSet.class.getCanonicalName() + ";\n"); + sb.append("import " + FieldFormatter.class.getCanonicalName() + ";\n"); + sb.append("import " + RecordParser.class.getCanonicalName() + ";\n"); + sb.append("import " + BooleanParser.class.getCanonicalName() + ";\n"); + sb.append("import " + BlobRef.class.getCanonicalName() + ";\n"); + sb.append("import " + ClobRef.class.getCanonicalName() + ";\n"); + sb.append("import " + LargeObjectLoader.class.getCanonicalName() + ";\n"); + sb.append("import " + SqoopRecord.class.getCanonicalName() + ";\n"); + sb.append("import java.sql.PreparedStatement;\n"); + sb.append("import java.sql.ResultSet;\n"); + sb.append("import java.sql.SQLException;\n"); + sb.append("import java.io.DataInput;\n"); + sb.append("import java.io.DataOutput;\n"); + sb.append("import java.io.IOException;\n"); + sb.append("import java.nio.ByteBuffer;\n"); + sb.append("import java.nio.CharBuffer;\n"); + sb.append("import java.sql.Date;\n"); + sb.append("import java.sql.Time;\n"); + sb.append("import java.sql.Timestamp;\n"); + sb.append("import java.util.Arrays;\n"); + sb.append("import java.util.Iterator;\n"); + sb.append("import java.util.List;\n"); + sb.append("import java.util.Map;\n"); + sb.append("import java.util.TreeMap;\n"); + sb.append("\n"); + + String className = tableNameInfo.getShortClassForTable(tableName); + sb.append("public class " + className + " extends SqoopRecord " + + " implements DBWritable, Writable {\n"); + sb.append(" private final int PROTOCOL_VERSION = " + + CLASS_WRITER_VERSION + ";\n"); + sb.append( + " public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n"); + sb.append(" protected ResultSet __cur_result_set;\n"); + generateFields(columnTypes, colNames, className, sb); + generateEquals(columnTypes, colNames, className, sb); + generateDbRead(columnTypes, colNames, sb); + generateLoadLargeObjects(columnTypes, colNames, sb); + generateDbWrite(columnTypes, dbWriteColNames, sb); + generateHadoopRead(columnTypes, colNames, sb); + generateHadoopWrite(columnTypes, colNames, sb); + generateToString(columnTypes, colNames, sb); + generateParser(columnTypes, colNames, sb); + generateCloneMethod(columnTypes, colNames, sb); + generateGetFieldMap(columnTypes, colNames, sb); + generateSetField(columnTypes, colNames, sb); + + // TODO(aaron): Generate hashCode(), compareTo(), equals() so it can be a + // WritableComparable + + sb.append("}\n"); + + return sb; + } +} diff --git a/src/java/org/apache/sqoop/orm/CompilationManager.java b/src/java/org/apache/sqoop/orm/CompilationManager.java new file mode 100644 index 00000000..b5edf654 --- /dev/null +++ b/src/java/org/apache/sqoop/orm/CompilationManager.java @@ -0,0 +1,373 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.orm; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.jar.JarOutputStream; +import java.util.zip.ZipEntry; + +import javax.tools.JavaCompiler; +import javax.tools.JavaFileObject; +import javax.tools.StandardJavaFileManager; +import javax.tools.ToolProvider; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.mapred.JobConf; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.util.FileListing; +import com.cloudera.sqoop.util.Jars; + +/** + * Manages the compilation of a bunch of .java files into .class files + * and eventually a jar. + * + * Also embeds this program's jar into the lib/ directory inside the compiled + * jar to ensure that the job runs correctly. + */ +public class CompilationManager { + + /** If we cannot infer a jar name from a table name, etc., use this. */ + public static final String DEFAULT_CODEGEN_JAR_NAME = + "sqoop-codegen-created.jar"; + + public static final Log LOG = LogFactory.getLog( + CompilationManager.class.getName()); + + private SqoopOptions options; + private List sources; + + public CompilationManager(final SqoopOptions opts) { + options = opts; + sources = new ArrayList(); + } + + public void addSourceFile(String sourceName) { + sources.add(sourceName); + } + + /** + * locate the hadoop-*-core.jar in $HADOOP_HOME or --hadoop-home. + * If that doesn't work, check our classpath. + * @return the filename of the hadoop-*-core.jar file. + */ + private String findHadoopCoreJar() { + String hadoopHome = options.getHadoopHome(); + + if (null == hadoopHome) { + LOG.info("$HADOOP_HOME is not set"); + return Jars.getJarPathForClass(JobConf.class); + } + + if (!hadoopHome.endsWith(File.separator)) { + hadoopHome = hadoopHome + File.separator; + } + + File hadoopHomeFile = new File(hadoopHome); + LOG.info("HADOOP_HOME is " + hadoopHomeFile.getAbsolutePath()); + File [] entries = hadoopHomeFile.listFiles(); + + if (null == entries) { + LOG.warn("HADOOP_HOME appears empty or missing"); + return Jars.getJarPathForClass(JobConf.class); + } + + for (File f : entries) { + if (f.getName().startsWith("hadoop-") + && f.getName().endsWith("-core.jar")) { + LOG.info("Found hadoop core jar at: " + f.getAbsolutePath()); + return f.getAbsolutePath(); + } + } + + return Jars.getJarPathForClass(JobConf.class); + } + + /** + * Compile the .java files into .class files via embedded javac call. + * On success, move .java files to the code output dir. + */ + public void compile() throws IOException { + List args = new ArrayList(); + + // ensure that the jar output dir exists. + String jarOutDir = options.getJarOutputDir(); + File jarOutDirObj = new File(jarOutDir); + if (!jarOutDirObj.exists()) { + boolean mkdirSuccess = jarOutDirObj.mkdirs(); + if (!mkdirSuccess) { + LOG.debug("Warning: Could not make directories for " + jarOutDir); + } + } else if (LOG.isDebugEnabled()) { + LOG.debug("Found existing " + jarOutDir); + } + + // Make sure jarOutDir ends with a '/'. + if (!jarOutDir.endsWith(File.separator)) { + jarOutDir = jarOutDir + File.separator; + } + + // find hadoop-*-core.jar for classpath. + String coreJar = findHadoopCoreJar(); + if (null == coreJar) { + // Couldn't find a core jar to insert into the CP for compilation. If, + // however, we're running this from a unit test, then the path to the + // .class files might be set via the hadoop.alt.classpath property + // instead. Check there first. + String coreClassesPath = System.getProperty("hadoop.alt.classpath"); + if (null == coreClassesPath) { + // no -- we're out of options. Fail. + throw new IOException("Could not find hadoop core jar!"); + } else { + coreJar = coreClassesPath; + } + } + + // find sqoop jar for compilation classpath + String sqoopJar = Jars.getSqoopJarPath(); + if (null != sqoopJar) { + sqoopJar = File.pathSeparator + sqoopJar; + } else { + LOG.warn("Could not find sqoop jar; child compilation may fail"); + sqoopJar = ""; + } + + String curClasspath = System.getProperty("java.class.path"); + + args.add("-sourcepath"); + args.add(jarOutDir); + + args.add("-d"); + args.add(jarOutDir); + + args.add("-classpath"); + args.add(curClasspath + File.pathSeparator + coreJar + sqoopJar); + + JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); + if (null == compiler) { + LOG.error("It seems as though you are running sqoop with a JRE."); + LOG.error("Sqoop requires a JDK that can compile Java code."); + LOG.error("Please install a JDK and set $JAVA_HOME to use it."); + throw new IOException("Could not start Java compiler."); + } + StandardJavaFileManager fileManager = + compiler.getStandardFileManager(null, null, null); + + ArrayList srcFileNames = new ArrayList(); + for (String srcfile : sources) { + srcFileNames.add(jarOutDir + srcfile); + LOG.debug("Adding source file: " + jarOutDir + srcfile); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Invoking javac with args:"); + for (String arg : args) { + LOG.debug(" " + arg); + } + } + + Iterable srcFileObjs = + fileManager.getJavaFileObjectsFromStrings(srcFileNames); + JavaCompiler.CompilationTask task = compiler.getTask( + null, // Write to stderr + fileManager, + null, // No special diagnostic handling + args, + null, // Compile all classes in the source compilation units + srcFileObjs); + + boolean result = task.call(); + if (!result) { + throw new IOException("Error returned by javac"); + } + + // Where we should move source files after compilation. + String srcOutDir = new File(options.getCodeOutputDir()).getAbsolutePath(); + if (!srcOutDir.endsWith(File.separator)) { + srcOutDir = srcOutDir + File.separator; + } + + // Move these files to the srcOutDir. + for (String srcFileName : sources) { + String orig = jarOutDir + srcFileName; + String dest = srcOutDir + srcFileName; + File fOrig = new File(orig); + File fDest = new File(dest); + File fDestParent = fDest.getParentFile(); + if (null != fDestParent && !fDestParent.exists()) { + if (!fDestParent.mkdirs()) { + LOG.error("Could not make directory: " + fDestParent); + } + } + try { + FileUtils.moveFile(fOrig, fDest); + } catch (IOException e) { + LOG.error("Could not rename " + orig + " to " + dest, e); + } + } + } + + /** + * @return the complete filename of the .jar file to generate. */ + public String getJarFilename() { + String jarOutDir = options.getJarOutputDir(); + String tableName = options.getTableName(); + String specificClassName = options.getClassName(); + + if (specificClassName != null && specificClassName.length() > 0) { + return jarOutDir + specificClassName + ".jar"; + } else if (null != tableName && tableName.length() > 0) { + return jarOutDir + tableName + ".jar"; + } else if (this.sources.size() == 1) { + // if we only have one source file, find it's base name, + // turn "foo.java" into "foo", and then return jarDir + "foo" + ".jar" + String srcFileName = this.sources.get(0); + String basename = new File(srcFileName).getName(); + String [] parts = basename.split("\\."); + String preExtPart = parts[0]; + return jarOutDir + preExtPart + ".jar"; + } else { + return jarOutDir + DEFAULT_CODEGEN_JAR_NAME; + } + } + + /** + * Searches through a directory and its children for .class + * files to add to a jar. + * + * @param dir - The root directory to scan with this algorithm. + * @param jstream - The JarOutputStream to write .class files to. + */ + private void addClassFilesFromDir(File dir, JarOutputStream jstream) + throws IOException { + LOG.debug("Scanning for .class files in directory: " + dir); + List dirEntries = FileListing.getFileListing(dir); + String baseDirName = dir.getAbsolutePath(); + if (!baseDirName.endsWith(File.separator)) { + baseDirName = baseDirName + File.separator; + } + + // For each input class file, create a zipfile entry for it, + // read the file into a buffer, and write it to the jar file. + for (File entry : dirEntries) { + if (!entry.isDirectory()) { + // Chomp off the portion of the full path that is shared + // with the base directory where class files were put; + // we only record the subdir parts in the zip entry. + String fullPath = entry.getAbsolutePath(); + String chompedPath = fullPath.substring(baseDirName.length()); + + boolean include = chompedPath.endsWith(".class") + && sources.contains( + chompedPath.substring(0, chompedPath.length() - ".class".length()) + + ".java"); + + if (include) { + // include this file. + LOG.debug("Got classfile: " + entry.getPath() + " -> " + chompedPath); + ZipEntry ze = new ZipEntry(chompedPath); + jstream.putNextEntry(ze); + copyFileToStream(entry, jstream); + jstream.closeEntry(); + } + } + } + } + + /** + * Create an output jar file to use when executing MapReduce jobs. + */ + public void jar() throws IOException { + String jarOutDir = options.getJarOutputDir(); + + String jarFilename = getJarFilename(); + + LOG.info("Writing jar file: " + jarFilename); + + File jarFileObj = new File(jarFilename); + if (jarFileObj.exists()) { + LOG.debug("Found existing jar (" + jarFilename + "); removing."); + if (!jarFileObj.delete()) { + LOG.warn("Could not remove existing jar file: " + jarFilename); + } + } + + FileOutputStream fstream = null; + JarOutputStream jstream = null; + try { + fstream = new FileOutputStream(jarFilename); + jstream = new JarOutputStream(fstream); + + addClassFilesFromDir(new File(jarOutDir), jstream); + jstream.finish(); + } finally { + if (null != jstream) { + try { + jstream.close(); + } catch (IOException ioe) { + LOG.warn("IOException closing jar stream: " + ioe.toString()); + } + } + + if (null != fstream) { + try { + fstream.close(); + } catch (IOException ioe) { + LOG.warn("IOException closing file stream: " + ioe.toString()); + } + } + } + + LOG.debug("Finished writing jar file " + jarFilename); + } + + private static final int BUFFER_SZ = 4096; + + /** + * Utility method to copy a .class file into the jar stream. + * @param f + * @param ostream + * @throws IOException + */ + private void copyFileToStream(File f, OutputStream ostream) + throws IOException { + FileInputStream fis = new FileInputStream(f); + byte [] buffer = new byte[BUFFER_SZ]; + try { + while (true) { + int bytesReceived = fis.read(buffer); + if (bytesReceived < 1) { + break; + } + + ostream.write(buffer, 0, bytesReceived); + } + } finally { + fis.close(); + } + } +} diff --git a/src/java/org/apache/sqoop/orm/TableClassName.java b/src/java/org/apache/sqoop/orm/TableClassName.java new file mode 100644 index 00000000..88ab6226 --- /dev/null +++ b/src/java/org/apache/sqoop/orm/TableClassName.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.orm; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.cloudera.sqoop.SqoopOptions; + +/** + * Reconciles the table name being imported with the class naming information + * specified in SqoopOptions to determine the actual package and class name to + * use for a table. + */ +public class TableClassName { + + public static final Log LOG = LogFactory.getLog( + TableClassName.class.getName()); + + private final SqoopOptions options; + + public TableClassName(final SqoopOptions opts) { + if (null == opts) { + throw new NullPointerException( + "Cannot instantiate a TableClassName on null options."); + } else { + this.options = opts; + } + } + + /** + * Taking into account --class-name and --package-name, return the actual + * package-part which will be used for a class. The actual table name being + * generated-for is irrelevant; so not an argument. + * + * @return the package where generated ORM classes go. Will be null for + * top-level. + */ + public String getPackageForTable() { + String predefinedClass = options.getClassName(); + if (null != predefinedClass) { + // If the predefined classname contains a package-part, return that. + int lastDot = predefinedClass.lastIndexOf('.'); + if (-1 == lastDot) { + // No package part. + return null; + } else { + // Return the string up to but not including the last dot. + return predefinedClass.substring(0, lastDot); + } + } else { + // If the user has specified a package name, return it. + // This will be null if the user hasn't specified one -- as we expect. + return options.getPackageName(); + } + } + + /** + * @param tableName the name of the table being imported. + * @return the full name of the class to generate/use to import a table. + */ + public String getClassForTable(String tableName) { + String predefinedClass = options.getClassName(); + if (predefinedClass != null) { + // The user's chosen a specific class name for this job. + return predefinedClass; + } + + String queryName = tableName; + if (null == queryName) { + queryName = "QueryResult"; + } + + String packageName = options.getPackageName(); + if (null != packageName) { + // return packageName.queryName. + return packageName + "." + queryName; + } + + // no specific class; no specific package. + // Just make sure it's a legal identifier. + return ClassWriter.toJavaIdentifier(queryName); + } + + /** + * @return just the last segment of the class name -- all package info + * stripped. + */ + public String getShortClassForTable(String tableName) { + String fullClass = getClassForTable(tableName); + if (null == fullClass) { + return null; + } + + int lastDot = fullClass.lastIndexOf('.'); + if (-1 == lastDot) { + return fullClass; + } else { + return fullClass.substring(lastDot + 1, fullClass.length()); + } + } +} diff --git a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java new file mode 100644 index 00000000..44491e9f --- /dev/null +++ b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java @@ -0,0 +1,971 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Properties; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Category; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + +import com.cloudera.sqoop.ConnFactory; +import com.cloudera.sqoop.Sqoop; +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.RelatedOptions; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.lib.DelimiterSet; +import com.cloudera.sqoop.manager.ConnManager; +import com.cloudera.sqoop.metastore.JobData; + +/** + * Layer on top of SqoopTool that provides some basic common code + * that most SqoopTool implementations will use. + * + * Subclasses should call init() at the top of their run() method, + * and call destroy() at the end in a finally block. + */ +public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool { + + public static final Log LOG = LogFactory.getLog( + BaseSqoopTool.class.getName()); + + public static final String HELP_STR = "\nTry --help for usage instructions."; + + // Here are all the arguments that are used by the standard sqoop tools. + // Their names are recorded here so that tools can share them and their + // use consistently. The argument parser applies the leading '--' to each + // string. + public static final String CONNECT_STRING_ARG = "connect"; + public static final String CONN_MANAGER_CLASS_NAME = + "connection-manager"; + public static final String CONNECT_PARAM_FILE = "connection-param-file"; + public static final String DRIVER_ARG = "driver"; + public static final String USERNAME_ARG = "username"; + public static final String PASSWORD_ARG = "password"; + public static final String PASSWORD_PROMPT_ARG = "P"; + public static final String DIRECT_ARG = "direct"; + public static final String BATCH_ARG = "batch"; + public static final String TABLE_ARG = "table"; + public static final String STAGING_TABLE_ARG = "staging-table"; + public static final String CLEAR_STAGING_TABLE_ARG = "clear-staging-table"; + public static final String COLUMNS_ARG = "columns"; + public static final String SPLIT_BY_ARG = "split-by"; + public static final String WHERE_ARG = "where"; + public static final String HADOOP_HOME_ARG = "hadoop-home"; + public static final String HIVE_HOME_ARG = "hive-home"; + public static final String WAREHOUSE_DIR_ARG = "warehouse-dir"; + public static final String TARGET_DIR_ARG = "target-dir"; + public static final String APPEND_ARG = "append"; + public static final String NULL_STRING = "null-string"; + public static final String INPUT_NULL_STRING = "input-null-string"; + public static final String NULL_NON_STRING = "null-non-string"; + public static final String INPUT_NULL_NON_STRING = "input-null-non-string"; + public static final String MAP_COLUMN_JAVA = "map-column-java"; + public static final String MAP_COLUMN_HIVE = "map-column-hive"; + + public static final String FMT_SEQUENCEFILE_ARG = "as-sequencefile"; + public static final String FMT_TEXTFILE_ARG = "as-textfile"; + public static final String FMT_AVRODATAFILE_ARG = "as-avrodatafile"; + public static final String HIVE_IMPORT_ARG = "hive-import"; + public static final String HIVE_TABLE_ARG = "hive-table"; + public static final String HIVE_OVERWRITE_ARG = "hive-overwrite"; + public static final String HIVE_DROP_DELIMS_ARG = "hive-drop-import-delims"; + public static final String HIVE_DELIMS_REPLACEMENT_ARG = + "hive-delims-replacement"; + public static final String HIVE_PARTITION_KEY_ARG = "hive-partition-key"; + public static final String HIVE_PARTITION_VALUE_ARG = "hive-partition-value"; + public static final String CREATE_HIVE_TABLE_ARG = + "create-hive-table"; + public static final String NUM_MAPPERS_ARG = "num-mappers"; + public static final String NUM_MAPPERS_SHORT_ARG = "m"; + public static final String COMPRESS_ARG = "compress"; + public static final String COMPRESSION_CODEC_ARG = "compression-codec"; + public static final String COMPRESS_SHORT_ARG = "z"; + public static final String DIRECT_SPLIT_SIZE_ARG = "direct-split-size"; + public static final String INLINE_LOB_LIMIT_ARG = "inline-lob-limit"; + public static final String FETCH_SIZE_ARG = "fetch-size"; + public static final String EXPORT_PATH_ARG = "export-dir"; + public static final String FIELDS_TERMINATED_BY_ARG = "fields-terminated-by"; + public static final String LINES_TERMINATED_BY_ARG = "lines-terminated-by"; + public static final String OPTIONALLY_ENCLOSED_BY_ARG = + "optionally-enclosed-by"; + public static final String ENCLOSED_BY_ARG = "enclosed-by"; + public static final String ESCAPED_BY_ARG = "escaped-by"; + public static final String MYSQL_DELIMITERS_ARG = "mysql-delimiters"; + public static final String INPUT_FIELDS_TERMINATED_BY_ARG = + "input-fields-terminated-by"; + public static final String INPUT_LINES_TERMINATED_BY_ARG = + "input-lines-terminated-by"; + public static final String INPUT_OPTIONALLY_ENCLOSED_BY_ARG = + "input-optionally-enclosed-by"; + public static final String INPUT_ENCLOSED_BY_ARG = "input-enclosed-by"; + public static final String INPUT_ESCAPED_BY_ARG = "input-escaped-by"; + public static final String CODE_OUT_DIR_ARG = "outdir"; + public static final String BIN_OUT_DIR_ARG = "bindir"; + public static final String PACKAGE_NAME_ARG = "package-name"; + public static final String CLASS_NAME_ARG = "class-name"; + public static final String JAR_FILE_NAME_ARG = "jar-file"; + public static final String SQL_QUERY_ARG = "query"; + public static final String SQL_QUERY_BOUNDARY = "boundary-query"; + public static final String SQL_QUERY_SHORT_ARG = "e"; + public static final String VERBOSE_ARG = "verbose"; + public static final String HELP_ARG = "help"; + public static final String UPDATE_KEY_ARG = "update-key"; + public static final String UPDATE_MODE_ARG = "update-mode"; + + // Arguments for incremental imports. + public static final String INCREMENT_TYPE_ARG = "incremental"; + public static final String INCREMENT_COL_ARG = "check-column"; + public static final String INCREMENT_LAST_VAL_ARG = "last-value"; + + // HBase arguments. + public static final String HBASE_TABLE_ARG = "hbase-table"; + public static final String HBASE_COL_FAM_ARG = "column-family"; + public static final String HBASE_ROW_KEY_ARG = "hbase-row-key"; + public static final String HBASE_CREATE_TABLE_ARG = "hbase-create-table"; + + + // Arguments for the saved job management system. + public static final String STORAGE_METASTORE_ARG = "meta-connect"; + public static final String JOB_CMD_CREATE_ARG = "create"; + public static final String JOB_CMD_DELETE_ARG = "delete"; + public static final String JOB_CMD_EXEC_ARG = "exec"; + public static final String JOB_CMD_LIST_ARG = "list"; + public static final String JOB_CMD_SHOW_ARG = "show"; + + // Arguments for the metastore. + public static final String METASTORE_SHUTDOWN_ARG = "shutdown"; + + + // Arguments for merging datasets. + public static final String NEW_DATASET_ARG = "new-data"; + public static final String OLD_DATASET_ARG = "onto"; + public static final String MERGE_KEY_ARG = "merge-key"; + + public BaseSqoopTool() { + } + + public BaseSqoopTool(String toolName) { + super(toolName); + } + + protected ConnManager manager; + + public ConnManager getManager() { + return manager; + } + + public void setManager(ConnManager mgr) { + this.manager = mgr; + } + + /** + * Should be called at the beginning of the run() method to initialize + * the connection manager, etc. If this succeeds (returns true), it should + * be paired with a call to destroy(). + * @return true on success, false on failure. + */ + protected boolean init(SqoopOptions sqoopOpts) { + // Get the connection to the database. + try { + JobData data = new JobData(sqoopOpts, this); + this.manager = new ConnFactory(sqoopOpts.getConf()).getManager(data); + return true; + } catch (Exception e) { + LOG.error("Got error creating database manager: " + + StringUtils.stringifyException(e)); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(e); + } + } + + return false; + } + + /** + * Should be called in a 'finally' block at the end of the run() method. + */ + protected void destroy(SqoopOptions sqoopOpts) { + if (null != manager) { + try { + manager.close(); + } catch (SQLException sqlE) { + LOG.warn("Error while closing connection: " + sqlE); + } + } + } + + /** + * Examines a subset of the arrray presented, and determines if it + * contains any non-empty arguments. If so, logs the arguments + * and returns true. + * + * @param argv an array of strings to check. + * @param offset the first element of the array to check + * @param len the number of elements to check + * @return true if there are any non-null, non-empty argument strings + * present. + */ + protected boolean hasUnrecognizedArgs(String [] argv, int offset, int len) { + if (argv == null) { + return false; + } + + boolean unrecognized = false; + boolean printedBanner = false; + for (int i = offset; i < Math.min(argv.length, offset + len); i++) { + if (argv[i] != null && argv[i].length() > 0) { + if (!printedBanner) { + LOG.error("Error parsing arguments for " + getToolName() + ":"); + printedBanner = true; + } + LOG.error("Unrecognized argument: " + argv[i]); + unrecognized = true; + } + } + + return unrecognized; + } + + protected boolean hasUnrecognizedArgs(String [] argv) { + if (null == argv) { + return false; + } + return hasUnrecognizedArgs(argv, 0, argv.length); + } + + + /** + * If argv contains an entry "--", return an array containing all elements + * after the "--" separator. Otherwise, return null. + * @param argv a set of arguments to scan for the subcommand arguments. + */ + protected String [] getSubcommandArgs(String [] argv) { + if (null == argv) { + return null; + } + + for (int i = 0; i < argv.length; i++) { + if (argv[i].equals("--")) { + return Arrays.copyOfRange(argv, i + 1, argv.length); + } + } + + return null; + } + + /** + * @return RelatedOptions used by job management tools. + */ + protected RelatedOptions getJobOptions() { + RelatedOptions relatedOpts = new RelatedOptions( + "Job management arguments"); + relatedOpts.addOption(OptionBuilder.withArgName("jdbc-uri") + .hasArg() + .withDescription("Specify JDBC connect string for the metastore") + .withLongOpt(STORAGE_METASTORE_ARG) + .create()); + + // Create an option-group surrounding the operations a user + // can perform on jobs. + OptionGroup group = new OptionGroup(); + group.addOption(OptionBuilder.withArgName("job-id") + .hasArg() + .withDescription("Create a new saved job") + .withLongOpt(JOB_CMD_CREATE_ARG) + .create()); + group.addOption(OptionBuilder.withArgName("job-id") + .hasArg() + .withDescription("Delete a saved job") + .withLongOpt(JOB_CMD_DELETE_ARG) + .create()); + group.addOption(OptionBuilder.withArgName("job-id") + .hasArg() + .withDescription("Show the parameters for a saved job") + .withLongOpt(JOB_CMD_SHOW_ARG) + .create()); + + Option execOption = OptionBuilder.withArgName("job-id") + .hasArg() + .withDescription("Run a saved job") + .withLongOpt(JOB_CMD_EXEC_ARG) + .create(); + group.addOption(execOption); + + group.addOption(OptionBuilder + .withDescription("List saved jobs") + .withLongOpt(JOB_CMD_LIST_ARG) + .create()); + + relatedOpts.addOptionGroup(group); + + // Since the "common" options aren't used in the job tool, + // add these settings here. + relatedOpts.addOption(OptionBuilder + .withDescription("Print more information while working") + .withLongOpt(VERBOSE_ARG) + .create()); + relatedOpts.addOption(OptionBuilder + .withDescription("Print usage instructions") + .withLongOpt(HELP_ARG) + .create()); + + return relatedOpts; + } + + /** + * @return RelatedOptions used by most/all Sqoop tools. + */ + protected RelatedOptions getCommonOptions() { + // Connection args (common) + RelatedOptions commonOpts = new RelatedOptions("Common arguments"); + commonOpts.addOption(OptionBuilder.withArgName("jdbc-uri") + .hasArg().withDescription("Specify JDBC connect string") + .withLongOpt(CONNECT_STRING_ARG) + .create()); + commonOpts.addOption(OptionBuilder.withArgName("class-name") + .hasArg().withDescription("Specify connection manager class name") + .withLongOpt(CONN_MANAGER_CLASS_NAME) + .create()); + commonOpts.addOption(OptionBuilder.withArgName("properties-file") + .hasArg().withDescription("Specify connection parameters file") + .withLongOpt(CONNECT_PARAM_FILE) + .create()); + commonOpts.addOption(OptionBuilder.withArgName("class-name") + .hasArg().withDescription("Manually specify JDBC driver class to use") + .withLongOpt(DRIVER_ARG) + .create()); + commonOpts.addOption(OptionBuilder.withArgName("username") + .hasArg().withDescription("Set authentication username") + .withLongOpt(USERNAME_ARG) + .create()); + commonOpts.addOption(OptionBuilder.withArgName("password") + .hasArg().withDescription("Set authentication password") + .withLongOpt(PASSWORD_ARG) + .create()); + commonOpts.addOption(OptionBuilder + .withDescription("Read password from console") + .create(PASSWORD_PROMPT_ARG)); + + commonOpts.addOption(OptionBuilder.withArgName("dir") + .hasArg().withDescription("Override $HADOOP_HOME") + .withLongOpt(HADOOP_HOME_ARG) + .create()); + + // misc (common) + commonOpts.addOption(OptionBuilder + .withDescription("Print more information while working") + .withLongOpt(VERBOSE_ARG) + .create()); + commonOpts.addOption(OptionBuilder + .withDescription("Print usage instructions") + .withLongOpt(HELP_ARG) + .create()); + + return commonOpts; + } + + /** + * @param explicitHiveImport true if the user has an explicit --hive-import + * available, or false if this is implied by the tool. + * @return options governing interaction with Hive + */ + protected RelatedOptions getHiveOptions(boolean explicitHiveImport) { + RelatedOptions hiveOpts = new RelatedOptions("Hive arguments"); + if (explicitHiveImport) { + hiveOpts.addOption(OptionBuilder + .withDescription("Import tables into Hive " + + "(Uses Hive's default delimiters if none are set.)") + .withLongOpt(HIVE_IMPORT_ARG) + .create()); + } + + hiveOpts.addOption(OptionBuilder.withArgName("dir") + .hasArg().withDescription("Override $HIVE_HOME") + .withLongOpt(HIVE_HOME_ARG) + .create()); + hiveOpts.addOption(OptionBuilder + .withDescription("Overwrite existing data in the Hive table") + .withLongOpt(HIVE_OVERWRITE_ARG) + .create()); + hiveOpts.addOption(OptionBuilder + .withDescription("Fail if the target hive table exists") + .withLongOpt(CREATE_HIVE_TABLE_ARG) + .create()); + hiveOpts.addOption(OptionBuilder.withArgName("table-name") + .hasArg() + .withDescription("Sets the table name to use when importing to hive") + .withLongOpt(HIVE_TABLE_ARG) + .create()); + hiveOpts.addOption(OptionBuilder + .withDescription("Drop Hive record \\0x01 and row delimiters " + + "(\\n\\r) from imported string fields") + .withLongOpt(HIVE_DROP_DELIMS_ARG) + .create()); + hiveOpts.addOption(OptionBuilder + .hasArg() + .withDescription("Replace Hive record \\0x01 and row delimiters " + + "(\\n\\r) from imported string fields with user-defined string") + .withLongOpt(HIVE_DELIMS_REPLACEMENT_ARG) + .create()); + hiveOpts.addOption(OptionBuilder.withArgName("partition-key") + .hasArg() + .withDescription("Sets the partition key to use when importing to hive") + .withLongOpt(HIVE_PARTITION_KEY_ARG) + .create()); + hiveOpts.addOption(OptionBuilder.withArgName("partition-value") + .hasArg() + .withDescription("Sets the partition value to use when importing " + + "to hive") + .withLongOpt(HIVE_PARTITION_VALUE_ARG) + .create()); + hiveOpts.addOption(OptionBuilder + .hasArg() + .withDescription("Override mapping for specific column to hive" + + " types.") + .withLongOpt(MAP_COLUMN_HIVE) + .create()); + + return hiveOpts; + } + + /** + * @return options governing output format delimiters + */ + protected RelatedOptions getOutputFormatOptions() { + RelatedOptions formatOpts = new RelatedOptions( + "Output line formatting arguments"); + formatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets the field separator character") + .withLongOpt(FIELDS_TERMINATED_BY_ARG) + .create()); + formatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets the end-of-line character") + .withLongOpt(LINES_TERMINATED_BY_ARG) + .create()); + formatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets a field enclosing character") + .withLongOpt(OPTIONALLY_ENCLOSED_BY_ARG) + .create()); + formatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets a required field enclosing character") + .withLongOpt(ENCLOSED_BY_ARG) + .create()); + formatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets the escape character") + .withLongOpt(ESCAPED_BY_ARG) + .create()); + formatOpts.addOption(OptionBuilder + .withDescription("Uses MySQL's default delimiter set: " + + "fields: , lines: \\n escaped-by: \\ optionally-enclosed-by: '") + .withLongOpt(MYSQL_DELIMITERS_ARG) + .create()); + + return formatOpts; + } + + /** + * @return options governing input format delimiters. + */ + protected RelatedOptions getInputFormatOptions() { + RelatedOptions inputFormatOpts = + new RelatedOptions("Input parsing arguments"); + inputFormatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets the input field separator") + .withLongOpt(INPUT_FIELDS_TERMINATED_BY_ARG) + .create()); + inputFormatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets the input end-of-line char") + .withLongOpt(INPUT_LINES_TERMINATED_BY_ARG) + .create()); + inputFormatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets a field enclosing character") + .withLongOpt(INPUT_OPTIONALLY_ENCLOSED_BY_ARG) + .create()); + inputFormatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets a required field encloser") + .withLongOpt(INPUT_ENCLOSED_BY_ARG) + .create()); + inputFormatOpts.addOption(OptionBuilder.withArgName("char") + .hasArg() + .withDescription("Sets the input escape character") + .withLongOpt(INPUT_ESCAPED_BY_ARG) + .create()); + + return inputFormatOpts; + } + + /** + * @param multiTable true if these options will be used for bulk code-gen. + * @return options related to code generation. + */ + protected RelatedOptions getCodeGenOpts(boolean multiTable) { + RelatedOptions codeGenOpts = + new RelatedOptions("Code generation arguments"); + codeGenOpts.addOption(OptionBuilder.withArgName("dir") + .hasArg() + .withDescription("Output directory for generated code") + .withLongOpt(CODE_OUT_DIR_ARG) + .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("dir") + .hasArg() + .withDescription("Output directory for compiled objects") + .withLongOpt(BIN_OUT_DIR_ARG) + .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("name") + .hasArg() + .withDescription("Put auto-generated classes in this package") + .withLongOpt(PACKAGE_NAME_ARG) + .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("null-str") + .hasArg() + .withDescription("Null string representation") + .withLongOpt(NULL_STRING) + .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("null-str") + .hasArg() + .withDescription("Input null string representation") + .withLongOpt(INPUT_NULL_STRING) + .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("null-str") + .hasArg() + .withDescription("Null non-string representation") + .withLongOpt(NULL_NON_STRING) + .create()); + codeGenOpts.addOption(OptionBuilder.withArgName("null-str") + .hasArg() + .withDescription("Input null non-string representation") + .withLongOpt(INPUT_NULL_NON_STRING) + .create()); + codeGenOpts.addOption(OptionBuilder + .hasArg() + .withDescription("Override mapping for specific columns to java types") + .withLongOpt(MAP_COLUMN_JAVA) + .create()); + + if (!multiTable) { + codeGenOpts.addOption(OptionBuilder.withArgName("name") + .hasArg() + .withDescription("Sets the generated class name. " + + "This overrides --" + PACKAGE_NAME_ARG + ". When combined " + + "with --" + JAR_FILE_NAME_ARG + ", sets the input class.") + .withLongOpt(CLASS_NAME_ARG) + .create()); + } + return codeGenOpts; + } + + protected RelatedOptions getHBaseOptions() { + RelatedOptions hbaseOpts = + new RelatedOptions("HBase arguments"); + hbaseOpts.addOption(OptionBuilder.withArgName("table") + .hasArg() + .withDescription("Import to
in HBase") + .withLongOpt(HBASE_TABLE_ARG) + .create()); + hbaseOpts.addOption(OptionBuilder.withArgName("family") + .hasArg() + .withDescription("Sets the target column family for the import") + .withLongOpt(HBASE_COL_FAM_ARG) + .create()); + hbaseOpts.addOption(OptionBuilder.withArgName("col") + .hasArg() + .withDescription("Specifies which input column to use as the row key") + .withLongOpt(HBASE_ROW_KEY_ARG) + .create()); + hbaseOpts.addOption(OptionBuilder + .withDescription("If specified, create missing HBase tables") + .withLongOpt(HBASE_CREATE_TABLE_ARG) + .create()); + + return hbaseOpts; + } + + + + /** + * Apply common command-line to the state. + */ + protected void applyCommonOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + + // common options. + if (in.hasOption(VERBOSE_ARG)) { + // Immediately switch into DEBUG logging. + Category sqoopLogger = Logger.getLogger( + Sqoop.class.getName()).getParent(); + sqoopLogger.setLevel(Level.DEBUG); + LOG.debug("Enabled debug logging."); + } + + if (in.hasOption(HELP_ARG)) { + ToolOptions toolOpts = new ToolOptions(); + configureOptions(toolOpts); + printHelp(toolOpts); + throw new InvalidOptionsException(""); + } + + if (in.hasOption(CONNECT_STRING_ARG)) { + out.setConnectString(in.getOptionValue(CONNECT_STRING_ARG)); + } + + if (in.hasOption(CONN_MANAGER_CLASS_NAME)) { + out.setConnManagerClassName(in.getOptionValue(CONN_MANAGER_CLASS_NAME)); + } + + if (in.hasOption(CONNECT_PARAM_FILE)) { + File paramFile = new File(in.getOptionValue(CONNECT_PARAM_FILE)); + if (!paramFile.exists()) { + throw new InvalidOptionsException( + "Specified connection parameter file not found: " + paramFile); + } + InputStream inStream = null; + Properties connectionParams = new Properties(); + try { + inStream = new FileInputStream( + new File(in.getOptionValue(CONNECT_PARAM_FILE))); + connectionParams.load(inStream); + } catch (IOException ex) { + LOG.warn("Failed to load connection parameter file", ex); + throw new InvalidOptionsException( + "Error while loading connection parameter file: " + + ex.getMessage()); + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException ex) { + LOG.warn("Failed to close input stream", ex); + } + } + } + LOG.debug("Loaded connection parameters: " + connectionParams); + out.setConnectionParams(connectionParams); + } + + if (in.hasOption(NULL_STRING)) { + out.setNullStringValue(in.getOptionValue(NULL_STRING)); + } + + if (in.hasOption(INPUT_NULL_STRING)) { + out.setInNullStringValue(in.getOptionValue(INPUT_NULL_STRING)); + } + + if (in.hasOption(NULL_NON_STRING)) { + out.setNullNonStringValue(in.getOptionValue(NULL_NON_STRING)); + } + + if (in.hasOption(INPUT_NULL_NON_STRING)) { + out.setInNullNonStringValue(in.getOptionValue(INPUT_NULL_NON_STRING)); + } + + if (in.hasOption(DRIVER_ARG)) { + out.setDriverClassName(in.getOptionValue(DRIVER_ARG)); + } + + if (in.hasOption(USERNAME_ARG)) { + out.setUsername(in.getOptionValue(USERNAME_ARG)); + if (null == out.getPassword()) { + // Set password to empty if the username is set first, + // to ensure that they're either both null or neither is. + out.setPassword(""); + } + } + + if (in.hasOption(PASSWORD_ARG)) { + LOG.warn("Setting your password on the command-line is insecure. " + + "Consider using -" + PASSWORD_PROMPT_ARG + " instead."); + out.setPassword(in.getOptionValue(PASSWORD_ARG)); + } + + if (in.hasOption(PASSWORD_PROMPT_ARG)) { + out.setPasswordFromConsole(); + } + + if (in.hasOption(HADOOP_HOME_ARG)) { + out.setHadoopHome(in.getOptionValue(HADOOP_HOME_ARG)); + } + + } + + protected void applyHiveOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + + if (in.hasOption(HIVE_HOME_ARG)) { + out.setHiveHome(in.getOptionValue(HIVE_HOME_ARG)); + } + + if (in.hasOption(HIVE_IMPORT_ARG)) { + out.setHiveImport(true); + } + + if (in.hasOption(HIVE_OVERWRITE_ARG)) { + out.setOverwriteHiveTable(true); + } + + if (in.hasOption(CREATE_HIVE_TABLE_ARG)) { + out.setFailIfHiveTableExists(true); + } + + if (in.hasOption(HIVE_TABLE_ARG)) { + out.setHiveTableName(in.getOptionValue(HIVE_TABLE_ARG)); + } + + if (in.hasOption(HIVE_DROP_DELIMS_ARG)) { + out.setHiveDropDelims(true); + } + + if (in.hasOption(HIVE_DELIMS_REPLACEMENT_ARG)) { + out.setHiveDelimsReplacement( + in.getOptionValue(HIVE_DELIMS_REPLACEMENT_ARG)); + } + + if (in.hasOption(HIVE_PARTITION_KEY_ARG)) { + out.setHivePartitionKey(in.getOptionValue(HIVE_PARTITION_KEY_ARG)); + } + + if (in.hasOption(HIVE_PARTITION_VALUE_ARG)) { + out.setHivePartitionValue(in.getOptionValue(HIVE_PARTITION_VALUE_ARG)); + } + + if (in.hasOption(MAP_COLUMN_HIVE)) { + out.setMapColumnHive(in.getOptionValue(MAP_COLUMN_HIVE)); + } + } + + protected void applyOutputFormatOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + if (in.hasOption(FIELDS_TERMINATED_BY_ARG)) { + out.setFieldsTerminatedBy(SqoopOptions.toChar( + in.getOptionValue(FIELDS_TERMINATED_BY_ARG))); + out.setExplicitDelims(true); + } + + if (in.hasOption(LINES_TERMINATED_BY_ARG)) { + out.setLinesTerminatedBy(SqoopOptions.toChar( + in.getOptionValue(LINES_TERMINATED_BY_ARG))); + out.setExplicitDelims(true); + } + + if (in.hasOption(OPTIONALLY_ENCLOSED_BY_ARG)) { + out.setEnclosedBy(SqoopOptions.toChar( + in.getOptionValue(OPTIONALLY_ENCLOSED_BY_ARG))); + out.setOutputEncloseRequired(false); + out.setExplicitDelims(true); + } + + if (in.hasOption(ENCLOSED_BY_ARG)) { + out.setEnclosedBy(SqoopOptions.toChar( + in.getOptionValue(ENCLOSED_BY_ARG))); + out.setOutputEncloseRequired(true); + out.setExplicitDelims(true); + } + + if (in.hasOption(ESCAPED_BY_ARG)) { + out.setEscapedBy(SqoopOptions.toChar( + in.getOptionValue(ESCAPED_BY_ARG))); + out.setExplicitDelims(true); + } + + if (in.hasOption(MYSQL_DELIMITERS_ARG)) { + out.setOutputEncloseRequired(false); + out.setFieldsTerminatedBy(','); + out.setLinesTerminatedBy('\n'); + out.setEscapedBy('\\'); + out.setEnclosedBy('\''); + out.setExplicitDelims(true); + } + } + + protected void applyInputFormatOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + if (in.hasOption(INPUT_FIELDS_TERMINATED_BY_ARG)) { + out.setInputFieldsTerminatedBy(SqoopOptions.toChar( + in.getOptionValue(INPUT_FIELDS_TERMINATED_BY_ARG))); + } + + if (in.hasOption(INPUT_LINES_TERMINATED_BY_ARG)) { + out.setInputLinesTerminatedBy(SqoopOptions.toChar( + in.getOptionValue(INPUT_LINES_TERMINATED_BY_ARG))); + } + + if (in.hasOption(INPUT_OPTIONALLY_ENCLOSED_BY_ARG)) { + out.setInputEnclosedBy(SqoopOptions.toChar( + in.getOptionValue(INPUT_OPTIONALLY_ENCLOSED_BY_ARG))); + out.setInputEncloseRequired(false); + } + + if (in.hasOption(INPUT_ENCLOSED_BY_ARG)) { + out.setInputEnclosedBy(SqoopOptions.toChar( + in.getOptionValue(INPUT_ENCLOSED_BY_ARG))); + out.setInputEncloseRequired(true); + } + + if (in.hasOption(INPUT_ESCAPED_BY_ARG)) { + out.setInputEscapedBy(SqoopOptions.toChar( + in.getOptionValue(INPUT_ESCAPED_BY_ARG))); + } + } + + protected void applyCodeGenOptions(CommandLine in, SqoopOptions out, + boolean multiTable) throws InvalidOptionsException { + if (in.hasOption(CODE_OUT_DIR_ARG)) { + out.setCodeOutputDir(in.getOptionValue(CODE_OUT_DIR_ARG)); + } + + if (in.hasOption(BIN_OUT_DIR_ARG)) { + out.setJarOutputDir(in.getOptionValue(BIN_OUT_DIR_ARG)); + } + + if (in.hasOption(PACKAGE_NAME_ARG)) { + out.setPackageName(in.getOptionValue(PACKAGE_NAME_ARG)); + } + + if (in.hasOption(MAP_COLUMN_JAVA)) { + out.setMapColumn(in.getOptionValue(MAP_COLUMN_JAVA)); + } + + if (!multiTable && in.hasOption(CLASS_NAME_ARG)) { + out.setClassName(in.getOptionValue(CLASS_NAME_ARG)); + } + } + + protected void applyHBaseOptions(CommandLine in, SqoopOptions out) { + if (in.hasOption(HBASE_TABLE_ARG)) { + out.setHBaseTable(in.getOptionValue(HBASE_TABLE_ARG)); + } + + if (in.hasOption(HBASE_COL_FAM_ARG)) { + out.setHBaseColFamily(in.getOptionValue(HBASE_COL_FAM_ARG)); + } + + if (in.hasOption(HBASE_ROW_KEY_ARG)) { + out.setHBaseRowKeyColumn(in.getOptionValue(HBASE_ROW_KEY_ARG)); + } + + if (in.hasOption(HBASE_CREATE_TABLE_ARG)) { + out.setCreateHBaseTable(true); + } + } + + protected void validateCommonOptions(SqoopOptions options) + throws InvalidOptionsException { + if (options.getConnectString() == null) { + throw new InvalidOptionsException( + "Error: Required argument --connect is missing." + + HELP_STR); + } + } + + protected void validateCodeGenOptions(SqoopOptions options) + throws InvalidOptionsException { + if (options.getClassName() != null && options.getPackageName() != null) { + throw new InvalidOptionsException( + "--class-name overrides --package-name. You cannot use both." + + HELP_STR); + } + } + + protected void validateOutputFormatOptions(SqoopOptions options) + throws InvalidOptionsException { + if (options.doHiveImport()) { + if (!options.explicitDelims()) { + // user hasn't manually specified delimiters, and wants to import + // straight to Hive. Use Hive-style delimiters. + LOG.info("Using Hive-specific delimiters for output. You can override"); + LOG.info("delimiters with --fields-terminated-by, etc."); + options.setOutputDelimiters(DelimiterSet.HIVE_DELIMITERS); + } + + if (options.getOutputEscapedBy() != DelimiterSet.NULL_CHAR) { + LOG.warn("Hive does not support escape characters in fields;"); + LOG.warn("parse errors in Hive may result from using --escaped-by."); + } + + if (options.getOutputEnclosedBy() != DelimiterSet.NULL_CHAR) { + LOG.warn("Hive does not support quoted strings; parse errors"); + LOG.warn("in Hive may result from using --enclosed-by."); + } + } + } + + protected void validateHiveOptions(SqoopOptions options) + throws InvalidOptionsException { + // Empty; this method is present to maintain API consistency, and + // is reserved for future constraints on Hive options. + if (options.getHiveDelimsReplacement() != null + && options.doHiveDropDelims()) { + throw new InvalidOptionsException("The " + HIVE_DROP_DELIMS_ARG + + " option conflicts with the " + HIVE_DELIMS_REPLACEMENT_ARG + + " option." + HELP_STR); + } + } + + protected void validateHBaseOptions(SqoopOptions options) + throws InvalidOptionsException { + if ((options.getHBaseColFamily() != null && options.getHBaseTable() == null) + || (options.getHBaseColFamily() == null + && options.getHBaseTable() != null)) { + throw new InvalidOptionsException( + "Both --hbase-table and --column-family must be set together." + + HELP_STR); + } + } + + /** + * Given an array of extra arguments (usually populated via + * this.extraArguments), determine the offset of the first '--' + * argument in the list. Return 'extra.length' if there is none. + */ + protected int getDashPosition(String [] extra) { + int dashPos = extra.length; + for (int i = 0; i < extra.length; i++) { + if (extra[i].equals("--")) { + dashPos = i; + break; + } + } + + return dashPos; + } +} + diff --git a/src/java/org/apache/sqoop/tool/CodeGenTool.java b/src/java/org/apache/sqoop/tool/CodeGenTool.java new file mode 100644 index 00000000..e828e4c9 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/CodeGenTool.java @@ -0,0 +1,191 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.StringUtils; + +import com.cloudera.sqoop.Sqoop; +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.RelatedOptions; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.hive.HiveImport; +import com.cloudera.sqoop.orm.ClassWriter; +import com.cloudera.sqoop.orm.CompilationManager; + +/** + * Tool that generates code from a database schema. + */ +public class CodeGenTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog(CodeGenTool.class.getName()); + + private List generatedJarFiles; + + public CodeGenTool() { + super("codegen"); + generatedJarFiles = new ArrayList(); + } + + /** + * @return a list of jar files generated as part of this import process + */ + public List getGeneratedJarFiles() { + ArrayList out = new ArrayList(generatedJarFiles); + return out; + } + + /** + * Generate the .class and .jar files. + * @return the filename of the emitted jar file. + * @throws IOException + */ + public String generateORM(SqoopOptions options, String tableName) + throws IOException { + String existingJar = options.getExistingJarName(); + if (existingJar != null) { + // This code generator is being invoked as part of an import or export + // process, and the user has pre-specified a jar and class to use. + // Don't generate. + LOG.info("Using existing jar: " + existingJar); + return existingJar; + } + + LOG.info("Beginning code generation"); + CompilationManager compileMgr = new CompilationManager(options); + ClassWriter classWriter = new ClassWriter(options, manager, tableName, + compileMgr); + classWriter.generate(); + compileMgr.compile(); + compileMgr.jar(); + String jarFile = compileMgr.getJarFilename(); + this.generatedJarFiles.add(jarFile); + return jarFile; + } + + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + if (!init(options)) { + return 1; + } + + try { + generateORM(options, options.getTableName()); + + // If the user has also specified Hive import code generation, + // use a HiveImport to generate the DDL statements and write + // them to files (but don't actually perform the import -- thus + // the generateOnly=true in the constructor). + if (options.doHiveImport()) { + HiveImport hiveImport = new HiveImport(options, manager, + options.getConf(), true); + hiveImport.importTable(options.getTableName(), + options.getHiveTableName(), true); + } + + } catch (IOException ioe) { + LOG.error("Encountered IOException running codegen job: " + + StringUtils.stringifyException(ioe)); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(ioe); + } else { + return 1; + } + } finally { + destroy(options); + } + + return 0; + } + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + + toolOptions.addUniqueOptions(getCommonOptions()); + + RelatedOptions codeGenOpts = getCodeGenOpts(false); + codeGenOpts.addOption(OptionBuilder.withArgName("table-name") + .hasArg() + .withDescription("Table to generate code for") + .withLongOpt(TABLE_ARG) + .create()); + toolOptions.addUniqueOptions(codeGenOpts); + + toolOptions.addUniqueOptions(getOutputFormatOptions()); + toolOptions.addUniqueOptions(getInputFormatOptions()); + toolOptions.addUniqueOptions(getHiveOptions(true)); + } + + @Override + /** {@inheritDoc} */ + public void printHelp(ToolOptions toolOptions) { + super.printHelp(toolOptions); + System.out.println(""); + System.out.println( + "At minimum, you must specify --connect and --table"); + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + + if (in.hasOption(TABLE_ARG)) { + out.setTableName(in.getOptionValue(TABLE_ARG)); + } + + applyCommonOptions(in, out); + applyOutputFormatOptions(in, out); + applyInputFormatOptions(in, out); + applyCodeGenOptions(in, out, false); + applyHiveOptions(in, out); + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + + if (hasUnrecognizedArgs(extraArguments)) { + throw new InvalidOptionsException(HELP_STR); + } + + validateCommonOptions(options); + validateCodeGenOptions(options); + validateOutputFormatOptions(options); + validateHiveOptions(options); + + if (options.getTableName() == null) { + throw new InvalidOptionsException( + "--table is required for code generation." + HELP_STR); + } + } +} + diff --git a/src/java/org/apache/sqoop/tool/CreateHiveTableTool.java b/src/java/org/apache/sqoop/tool/CreateHiveTableTool.java new file mode 100644 index 00000000..427376d9 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/CreateHiveTableTool.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.IOException; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.StringUtils; + +import com.cloudera.sqoop.Sqoop; +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.RelatedOptions; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.hive.HiveImport; + +/** + * Tool that creates a Hive table definition. + */ +public class CreateHiveTableTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog( + CreateHiveTableTool.class.getName()); + + public CreateHiveTableTool() { + super("create-hive-table"); + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + if (!init(options)) { + return 1; + } + + try { + HiveImport hiveImport = new HiveImport(options, manager, + options.getConf(), false); + hiveImport.importTable(options.getTableName(), + options.getHiveTableName(), true); + } catch (IOException ioe) { + LOG.error("Encountered IOException running create table job: " + + StringUtils.stringifyException(ioe)); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(ioe); + } else { + return 1; + } + } finally { + destroy(options); + } + + return 0; + } + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + + toolOptions.addUniqueOptions(getCommonOptions()); + + RelatedOptions hiveOpts = getHiveOptions(false); + hiveOpts.addOption(OptionBuilder.withArgName("table-name") + .hasArg() + .withDescription("The db table to read the definition from") + .withLongOpt(TABLE_ARG) + .create()); + toolOptions.addUniqueOptions(hiveOpts); + + toolOptions.addUniqueOptions(getOutputFormatOptions()); + } + + @Override + /** {@inheritDoc} */ + public void printHelp(ToolOptions toolOptions) { + super.printHelp(toolOptions); + System.out.println(""); + System.out.println( + "At minimum, you must specify --connect and --table"); + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + + if (in.hasOption(TABLE_ARG)) { + out.setTableName(in.getOptionValue(TABLE_ARG)); + } + + out.setHiveImport(true); + + applyCommonOptions(in, out); + applyHiveOptions(in, out); + applyOutputFormatOptions(in, out); + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + + if (hasUnrecognizedArgs(extraArguments)) { + throw new InvalidOptionsException(HELP_STR); + } + + validateCommonOptions(options); + validateOutputFormatOptions(options); + validateHiveOptions(options); + + if (options.getTableName() == null) { + throw new InvalidOptionsException( + "--table is required for table definition importing." + HELP_STR); + } + } +} + diff --git a/src/java/org/apache/sqoop/tool/EvalSqlTool.java b/src/java/org/apache/sqoop/tool/EvalSqlTool.java new file mode 100644 index 00000000..413aa3d8 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/EvalSqlTool.java @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.IOException; +import java.io.PrintWriter; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.util.StringUtils; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.RelatedOptions; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.util.ResultSetPrinter; + +/** + * Tool that evaluates a SQL statement and displays the results. + */ +public class EvalSqlTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog(EvalSqlTool.class.getName()); + + public EvalSqlTool() { + super("eval"); + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + if (!init(options)) { + return 1; + } + + PreparedStatement stmt = null; + ResultSet rs = null; + PrintWriter pw = null; + try { + Connection c = manager.getConnection(); + String query = options.getSqlQuery(); + LOG.debug("SQL query: " + query); + stmt = c.prepareStatement(query); + boolean resultType = stmt.execute(); + // Iterate over all the results from this statement. + while (true) { + LOG.debug("resultType=" + resultType); + if (!resultType) { + // This result was an update count. + int updateCount = stmt.getUpdateCount(); + LOG.debug("updateCount=" + updateCount); + if (updateCount == -1) { + // We are done iterating over results from this statement. + c.commit(); + break; + } else { + LOG.info(updateCount + " row(s) updated."); + } + } else { + // This yields a ResultSet. + rs = stmt.getResultSet(); + pw = new PrintWriter(System.out, true); + new ResultSetPrinter().printResultSet(pw, rs); + pw.close(); + pw = null; + } + + resultType = stmt.getMoreResults(); + } + } catch (IOException ioe) { + LOG.warn("IOException formatting results: " + + StringUtils.stringifyException(ioe)); + return 1; + } catch (SQLException sqlE) { + LOG.warn("SQL exception executing statement: " + + StringUtils.stringifyException(sqlE)); + return 1; + } finally { + if (null != pw) { + pw.close(); + } + if (null != rs) { + try { + rs.close(); + } catch (SQLException sqlE) { + LOG.warn("SQL exception closing ResultSet: " + + StringUtils.stringifyException(sqlE)); + } + } + if (null != stmt) { + try { + stmt.close(); + } catch (SQLException sqlE) { + LOG.warn("SQL exception closing statement: " + + StringUtils.stringifyException(sqlE)); + } + } + destroy(options); + } + + return 0; + } + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + toolOptions.addUniqueOptions(getCommonOptions()); + + RelatedOptions evalOpts = new RelatedOptions("SQL evaluation arguments"); + evalOpts.addOption(OptionBuilder.withArgName("statement") + .hasArg() + .withDescription("Execute 'statement' in SQL and exit") + .withLongOpt(SQL_QUERY_ARG) + .create(SQL_QUERY_SHORT_ARG)); + + toolOptions.addUniqueOptions(evalOpts); + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + + applyCommonOptions(in, out); + if (in.hasOption(SQL_QUERY_ARG)) { + out.setSqlQuery(in.getOptionValue(SQL_QUERY_ARG)); + } + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + + if (hasUnrecognizedArgs(extraArguments)) { + throw new InvalidOptionsException(HELP_STR); + } + + String sqlCmd = options.getSqlQuery(); + if (null == sqlCmd || sqlCmd.length() == 0) { + throw new InvalidOptionsException( + "This command requires the " + SQL_QUERY_ARG + " argument." + + HELP_STR); + } + + validateCommonOptions(options); + } +} + diff --git a/src/java/org/apache/sqoop/tool/ExportTool.java b/src/java/org/apache/sqoop/tool/ExportTool.java new file mode 100644 index 00000000..288cd908 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/ExportTool.java @@ -0,0 +1,350 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.IOException; +import java.util.List; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.cloudera.sqoop.Sqoop; +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.SqoopOptions.UpdateMode; +import com.cloudera.sqoop.cli.RelatedOptions; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.manager.ExportJobContext; +import com.cloudera.sqoop.util.ExportException; + +/** + * Tool that performs HDFS exports to databases. + */ +public class ExportTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog(ExportTool.class.getName()); + + private CodeGenTool codeGenerator; + + public ExportTool() { + super("export"); + this.codeGenerator = new CodeGenTool(); + } + + /** + * @return a list of jar files generated as part of this im/export process + */ + public List getGeneratedJarFiles() { + return codeGenerator.getGeneratedJarFiles(); + } + + private void exportTable(SqoopOptions options, String tableName) + throws ExportException, IOException { + String jarFile = null; + + // Generate the ORM code for the tables. + jarFile = codeGenerator.generateORM(options, tableName); + + ExportJobContext context = new ExportJobContext(tableName, jarFile, + options); + if (options.getUpdateKeyCol() != null) { + if (options.getUpdateMode() == UpdateMode.UpdateOnly) { + // UPDATE-based export. + manager.updateTable(context); + } else { + // Mixed update/insert export + manager.upsertTable(context); + } + } else { + // INSERT-based export. + manager.exportTable(context); + } + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + + if (!init(options)) { + return 1; + } + + codeGenerator.setManager(manager); + + if (options.getUpdateKeyCol() != null) { + manager.configureDbOutputColumns(options); + } + + try { + exportTable(options, options.getTableName()); + } catch (IOException ioe) { + LOG.error("Encountered IOException running export job: " + + ioe.toString()); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(ioe); + } else { + return 1; + } + } catch (ExportException ee) { + LOG.error("Error during export: " + ee.toString()); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(ee); + } else { + return 1; + } + } finally { + destroy(options); + } + + return 0; + } + + /** + * Construct the set of options that control exports. + * @return the RelatedOptions that can be used to parse the export + * arguments. + */ + protected RelatedOptions getExportOptions() { + RelatedOptions exportOpts = new RelatedOptions("Export control arguments"); + + exportOpts.addOption(OptionBuilder + .withDescription("Use direct export fast path") + .withLongOpt(DIRECT_ARG) + .create()); + exportOpts.addOption(OptionBuilder.withArgName("table-name") + .hasArg().withDescription("Table to populate") + .withLongOpt(TABLE_ARG) + .create()); + exportOpts.addOption(OptionBuilder.withArgName("n") + .hasArg().withDescription("Use 'n' map tasks to export in parallel") + .withLongOpt(NUM_MAPPERS_ARG) + .create(NUM_MAPPERS_SHORT_ARG)); + exportOpts.addOption(OptionBuilder.withArgName("dir") + .hasArg() + .withDescription("HDFS source path for the export") + .withLongOpt(EXPORT_PATH_ARG) + .create()); + exportOpts.addOption(OptionBuilder.withArgName("key") + .hasArg() + .withDescription("Update records by specified key column") + .withLongOpt(UPDATE_KEY_ARG) + .create()); + exportOpts.addOption(OptionBuilder.withArgName("table-name") + .hasArg().withDescription("Intermediate staging table") + .withLongOpt(STAGING_TABLE_ARG) + .create()); + exportOpts.addOption(OptionBuilder + .withDescription("Indicates that any data in " + + "staging table can be deleted") + .withLongOpt(CLEAR_STAGING_TABLE_ARG) + .create()); + exportOpts.addOption(OptionBuilder + .withDescription("Indicates underlying statements " + + "to be executed in batch mode") + .withLongOpt(BATCH_ARG) + .create()); + exportOpts.addOption(OptionBuilder + .withArgName("mode") + .hasArg() + .withDescription("Specifies how updates are performed when " + + "new rows are found with non-matching keys in database") + .withLongOpt(UPDATE_MODE_ARG) + .create()); + + return exportOpts; + } + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + + toolOptions.addUniqueOptions(getCommonOptions()); + toolOptions.addUniqueOptions(getExportOptions()); + + // Input parsing delimiters + toolOptions.addUniqueOptions(getInputFormatOptions()); + + // Used when sending data to a direct-mode export. + toolOptions.addUniqueOptions(getOutputFormatOptions()); + + // get common codegen opts. + RelatedOptions codeGenOpts = getCodeGenOpts(false); + + // add export-specific codegen opts: + codeGenOpts.addOption(OptionBuilder.withArgName("file") + .hasArg() + .withDescription("Disable code generation; use specified jar") + .withLongOpt(JAR_FILE_NAME_ARG) + .create()); + + toolOptions.addUniqueOptions(codeGenOpts); + } + + @Override + /** {@inheritDoc} */ + public void printHelp(ToolOptions toolOptions) { + super.printHelp(toolOptions); + System.out.println(""); + System.out.println( + "At minimum, you must specify --connect, --export-dir, and --table"); + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + + try { + applyCommonOptions(in, out); + + if (in.hasOption(DIRECT_ARG)) { + out.setDirectMode(true); + } + + if (in.hasOption(BATCH_ARG)) { + out.setBatchMode(true); + } + + if (in.hasOption(TABLE_ARG)) { + out.setTableName(in.getOptionValue(TABLE_ARG)); + } + + if (in.hasOption(NUM_MAPPERS_ARG)) { + out.setNumMappers(Integer.parseInt(in.getOptionValue(NUM_MAPPERS_ARG))); + } + + if (in.hasOption(EXPORT_PATH_ARG)) { + out.setExportDir(in.getOptionValue(EXPORT_PATH_ARG)); + } + + if (in.hasOption(JAR_FILE_NAME_ARG)) { + out.setExistingJarName(in.getOptionValue(JAR_FILE_NAME_ARG)); + } + + if (in.hasOption(UPDATE_KEY_ARG)) { + out.setUpdateKeyCol(in.getOptionValue(UPDATE_KEY_ARG)); + } + + if (in.hasOption(STAGING_TABLE_ARG)) { + out.setStagingTableName(in.getOptionValue(STAGING_TABLE_ARG)); + } + + if (in.hasOption(CLEAR_STAGING_TABLE_ARG)) { + out.setClearStagingTable(true); + } + + applyNewUpdateOptions(in, out); + applyInputFormatOptions(in, out); + applyOutputFormatOptions(in, out); + applyOutputFormatOptions(in, out); + applyCodeGenOptions(in, out, false); + } catch (NumberFormatException nfe) { + throw new InvalidOptionsException("Error: expected numeric argument.\n" + + "Try --help for usage."); + } + } + + /** + * Validate export-specific arguments. + * @param options the configured SqoopOptions to check + */ + protected void validateExportOptions(SqoopOptions options) + throws InvalidOptionsException { + if (options.getTableName() == null) { + throw new InvalidOptionsException("Export requires a --table argument." + + HELP_STR); + } else if (options.getExportDir() == null) { + throw new InvalidOptionsException( + "Export requires an --export-dir argument." + + HELP_STR); + } else if (options.getExistingJarName() != null + && options.getClassName() == null) { + throw new InvalidOptionsException("Jar specified with --jar-file, but no " + + "class specified with --class-name." + HELP_STR); + } else if (options.getExistingJarName() != null + && options.getUpdateKeyCol() != null) { + // We need to regenerate the class with the output column order set + // correctly for the update-based export. So we can't use a premade + // class. + throw new InvalidOptionsException("Jar cannot be specified with " + + "--jar-file when export is running in update mode."); + } else if (options.getStagingTableName() != null + && options.getUpdateKeyCol() != null) { + // Staging table may not be used when export is running in update mode + throw new InvalidOptionsException("Staging table cannot be used when " + + "export is running in update mode."); + } else if (options.getStagingTableName() != null + && options.getStagingTableName().equalsIgnoreCase( + options.getTableName())) { + // Name of staging table and destination table cannot be the same + throw new InvalidOptionsException("Staging table cannot be the same as " + + "the destination table. Name comparison used is case-insensitive."); + } else if (options.doClearStagingTable() + && options.getStagingTableName() == null) { + // Option to clear staging table specified but not the staging table name + throw new InvalidOptionsException("Option to clear the staging table is " + + "specified but the staging table name is not."); + } + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + + // If extraArguments is full, check for '--' followed by args for + // mysqldump or other commands we rely on. + options.setExtraArgs(getSubcommandArgs(extraArguments)); + int dashPos = extraArguments.length; + for (int i = 0; i < extraArguments.length; i++) { + if (extraArguments[i].equals("--")) { + dashPos = i; + break; + } + } + + if (hasUnrecognizedArgs(extraArguments, 0, dashPos)) { + throw new InvalidOptionsException(HELP_STR); + } + + validateExportOptions(options); + validateOutputFormatOptions(options); + validateCommonOptions(options); + validateCodeGenOptions(options); + } + + private void applyNewUpdateOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + if (in.hasOption(UPDATE_MODE_ARG)) { + String updateTypeStr = in.getOptionValue(UPDATE_MODE_ARG); + if ("updateonly".equals(updateTypeStr)) { + out.setUpdateMode(UpdateMode.UpdateOnly); + } else if ("allowinsert".equals(updateTypeStr)) { + out.setUpdateMode(UpdateMode.AllowInsert); + } else { + throw new InvalidOptionsException("Unknown new update mode: " + + updateTypeStr + ". Use 'updateonly' or 'allowinsert'." + + HELP_STR); + } + } + } +} + diff --git a/src/java/org/apache/sqoop/tool/HelpTool.java b/src/java/org/apache/sqoop/tool/HelpTool.java new file mode 100644 index 00000000..bc1d7e94 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/HelpTool.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.util.Set; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.cli.ToolOptions; + +/** + * Tool that explains the usage of Sqoop. + */ +public class HelpTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public HelpTool() { + super("help"); + } + + /** + * @param str the string to right-side pad + * @param num the minimum number of characters to return + * @return 'str' with enough right padding to make it num characters long. + */ + private static String padRight(String str, int num) { + StringBuilder sb = new StringBuilder(); + sb.append(str); + for (int count = str.length(); count < num; count++) { + sb.append(" "); + } + + return sb.toString(); + } + + /** + * Print out a list of all SqoopTool implementations and their + * descriptions. + */ + private void printAvailableTools() { + System.out.println("usage: sqoop COMMAND [ARGS]"); + System.out.println(""); + System.out.println("Available commands:"); + + Set toolNames = getToolNames(); + + int maxWidth = 0; + for (String tool : toolNames) { + maxWidth = Math.max(maxWidth, tool.length()); + } + + for (String tool : toolNames) { + System.out.println(" " + padRight(tool, maxWidth+2) + + getToolDescription(tool)); + } + + System.out.println(""); + System.out.println( + "See 'sqoop help COMMAND' for information on a specific command."); + } + + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + + if (this.extraArguments != null && this.extraArguments.length > 0) { + if (hasUnrecognizedArgs(extraArguments, 1, extraArguments.length)) { + return 1; + } + + SqoopTool subTool = SqoopTool.getTool(extraArguments[0]); + if (null == subTool) { + System.out.println("No such tool: " + extraArguments[0]); + System.out.println( + "Try 'sqoop help' for a list of available commands."); + return 1; + } else { + ToolOptions toolOpts = new ToolOptions(); + subTool.configureOptions(toolOpts); + subTool.printHelp(toolOpts); + return 0; + } + } else { + printAvailableTools(); + } + + return 0; + } + + @Override + public void printHelp(ToolOptions opts) { + System.out.println("usage: sqoop " + getToolName() + " [COMMAND]"); + } +} + diff --git a/src/java/org/apache/sqoop/tool/ImportAllTablesTool.java b/src/java/org/apache/sqoop/tool/ImportAllTablesTool.java new file mode 100644 index 00000000..158a3f18 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/ImportAllTablesTool.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.cloudera.sqoop.Sqoop; +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.hive.HiveImport; +import com.cloudera.sqoop.util.ImportException; + +/** + * Tool that performs database imports of all tables in a database to HDFS. + */ +public class ImportAllTablesTool extends com.cloudera.sqoop.tool.ImportTool { + + public static final Log LOG = LogFactory.getLog( + ImportAllTablesTool.class.getName()); + + public ImportAllTablesTool() { + super("import-all-tables", true); + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + HiveImport hiveImport = null; + + if (!init(options)) { + return 1; + } + + try { + if (options.doHiveImport()) { + hiveImport = new HiveImport(options, manager, options.getConf(), false); + } + + String [] tables = manager.listTables(); + if (null == tables) { + System.err.println("Could not retrieve tables list from server"); + LOG.error("manager.listTables() returned null"); + return 1; + } else { + for (String tableName : tables) { + importTable(options, tableName, hiveImport); + } + } + } catch (IOException ioe) { + LOG.error("Encountered IOException running import job: " + + ioe.toString()); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(ioe); + } else { + return 1; + } + } catch (ImportException ie) { + LOG.error("Error during import: " + ie.toString()); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(ie); + } else { + return 1; + } + } finally { + destroy(options); + } + + return 0; + } + +} + diff --git a/src/java/org/apache/sqoop/tool/ImportTool.java b/src/java/org/apache/sqoop/tool/ImportTool.java new file mode 100644 index 00000000..d5a04e75 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/ImportTool.java @@ -0,0 +1,897 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.IOException; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.List; +import java.util.Map; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.util.StringUtils; + +import com.cloudera.sqoop.Sqoop; +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.FileLayout; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.RelatedOptions; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.hive.HiveImport; +import com.cloudera.sqoop.manager.ImportJobContext; + +import com.cloudera.sqoop.metastore.JobData; +import com.cloudera.sqoop.metastore.JobStorage; +import com.cloudera.sqoop.metastore.JobStorageFactory; +import com.cloudera.sqoop.util.AppendUtils; +import com.cloudera.sqoop.util.ImportException; +import org.apache.hadoop.fs.Path; + +/** + * Tool that performs database imports to HDFS. + */ +public class ImportTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog(ImportTool.class.getName()); + + private CodeGenTool codeGenerator; + + // true if this is an all-tables import. Set by a subclass which + // overrides the run() method of this tool (which can only do + // a single table). + private boolean allTables; + + // store check column type for incremental option + private int checkColumnType; + + public ImportTool() { + this("import", false); + } + + public ImportTool(String toolName, boolean allTables) { + super(toolName); + this.codeGenerator = new CodeGenTool(); + this.allTables = allTables; + } + + @Override + protected boolean init(SqoopOptions sqoopOpts) { + boolean ret = super.init(sqoopOpts); + codeGenerator.setManager(manager); + return ret; + } + + /** + * @return a list of jar files generated as part of this import process + */ + public List getGeneratedJarFiles() { + return this.codeGenerator.getGeneratedJarFiles(); + } + + /** + * @return true if the supplied options specify an incremental import. + */ + private boolean isIncremental(SqoopOptions options) { + return !options.getIncrementalMode().equals( + SqoopOptions.IncrementalMode.None); + } + + /** + * If this is an incremental import, then we should save the + * user's state back to the metastore (if this job was run + * from the metastore). Otherwise, log to the user what data + * they need to supply next time. + */ + private void saveIncrementalState(SqoopOptions options) + throws IOException { + if (!isIncremental(options)) { + return; + } + + Map descriptor = options.getStorageDescriptor(); + String jobName = options.getJobName(); + + if (null != jobName && null != descriptor) { + // Actually save it back to the metastore. + LOG.info("Saving incremental import state to the metastore"); + JobStorageFactory ssf = new JobStorageFactory(options.getConf()); + JobStorage storage = ssf.getJobStorage(descriptor); + storage.open(descriptor); + try { + // Save the 'parent' SqoopOptions; this does not contain the mutations + // to the SqoopOptions state that occurred over the course of this + // execution, except for the one we specifically want to memorize: + // the latest value of the check column. + JobData data = new JobData(options.getParent(), this); + storage.update(jobName, data); + LOG.info("Updated data for job: " + jobName); + } finally { + storage.close(); + } + } else { + // If there wasn't a parent SqoopOptions, then the incremental + // state data was stored in the current SqoopOptions. + LOG.info("Incremental import complete! To run another incremental " + + "import of all data following this import, supply the " + + "following arguments:"); + SqoopOptions.IncrementalMode incrementalMode = + options.getIncrementalMode(); + switch (incrementalMode) { + case AppendRows: + LOG.info(" --incremental append"); + break; + case DateLastModified: + LOG.info(" --incremental lastmodified"); + break; + default: + LOG.warn("Undefined incremental mode: " + incrementalMode); + break; + } + LOG.info(" --check-column " + options.getIncrementalTestColumn()); + LOG.info(" --last-value " + options.getIncrementalLastValue()); + LOG.info("(Consider saving this with 'sqoop job --create')"); + } + } + + /** + * Return the max value in the incremental-import test column. This + * value must be numeric. + */ + private Object getMaxColumnId(SqoopOptions options) throws SQLException { + StringBuilder sb = new StringBuilder(); + sb.append("SELECT MAX("); + sb.append(options.getIncrementalTestColumn()); + sb.append(") FROM "); + sb.append(options.getTableName()); + + String where = options.getWhereClause(); + if (null != where) { + sb.append(" WHERE "); + sb.append(where); + } + + Connection conn = manager.getConnection(); + Statement s = null; + ResultSet rs = null; + try { + s = conn.createStatement(); + rs = s.executeQuery(sb.toString()); + if (!rs.next()) { + // This probably means the table is empty. + LOG.warn("Unexpected: empty results for max value query?"); + return null; + } + + ResultSetMetaData rsmd = rs.getMetaData(); + checkColumnType = rsmd.getColumnType(1); + if (checkColumnType == Types.TIMESTAMP) { + return rs.getTimestamp(1); + } else if (checkColumnType == Types.DATE) { + return rs.getDate(1); + } else if (checkColumnType == Types.TIME) { + return rs.getTime(1); + } else { + return rs.getObject(1); + } + } finally { + try { + if (null != rs) { + rs.close(); + } + } catch (SQLException sqlE) { + LOG.warn("SQL Exception closing resultset: " + sqlE); + } + + try { + if (null != s) { + s.close(); + } + } catch (SQLException sqlE) { + LOG.warn("SQL Exception closing statement: " + sqlE); + } + } + } + + /** + * Determine if a column is date/time. + * @return true if column type is TIMESTAMP, DATE, or TIME. + */ + private boolean isDateTimeColumn(int columnType) { + return (columnType == Types.TIMESTAMP) + || (columnType == Types.DATE) + || (columnType == Types.TIME); + } + + /** + * Initialize the constraints which set the incremental import range. + * @return false if an import is not necessary, because the dataset has not + * changed. + */ + private boolean initIncrementalConstraints(SqoopOptions options, + ImportJobContext context) throws ImportException, IOException { + + // If this is an incremental import, determine the constraints + // to inject in the WHERE clause or $CONDITIONS for a query. + // Also modify the 'last value' field of the SqoopOptions to + // specify the current job start time / start row. + + if (!isIncremental(options)) { + return true; + } + + SqoopOptions.IncrementalMode incrementalMode = options.getIncrementalMode(); + String nextIncrementalValue = null; + + Object nextVal; + switch (incrementalMode) { + case AppendRows: + try { + nextVal = getMaxColumnId(options); + if (isDateTimeColumn(checkColumnType)) { + nextIncrementalValue = (nextVal == null) ? null + : manager.datetimeToQueryString(nextVal.toString(), + checkColumnType); + } else { + nextIncrementalValue = (nextVal == null) ? null : nextVal.toString(); + } + } catch (SQLException sqlE) { + throw new IOException(sqlE); + } + break; + case DateLastModified: + checkColumnType = Types.TIMESTAMP; + nextVal = manager.getCurrentDbTimestamp(); + if (null == nextVal) { + throw new IOException("Could not get current time from database"); + } + nextIncrementalValue = manager.datetimeToQueryString(nextVal.toString(), + checkColumnType); + break; + default: + throw new ImportException("Undefined incremental import type: " + + incrementalMode); + } + + // Build the WHERE clause components that are used to import + // only this incremental section. + StringBuilder sb = new StringBuilder(); + String prevEndpoint = options.getIncrementalLastValue(); + + if (isDateTimeColumn(checkColumnType) && null != prevEndpoint + && !prevEndpoint.startsWith("\'") && !prevEndpoint.endsWith("\'")) { + // Incremental imports based on date/time should be 'quoted' in + // ANSI SQL. If the user didn't specify single-quotes, put them + // around, here. + prevEndpoint = manager.datetimeToQueryString(prevEndpoint, + checkColumnType); + } + + String checkColName = manager.escapeColName( + options.getIncrementalTestColumn()); + LOG.info("Incremental import based on column " + checkColName); + if (null != prevEndpoint) { + if (prevEndpoint.equals(nextIncrementalValue)) { + LOG.info("No new rows detected since last import."); + return false; + } + LOG.info("Lower bound value: " + prevEndpoint); + sb.append(checkColName); + switch (incrementalMode) { + case AppendRows: + sb.append(" > "); + break; + case DateLastModified: + sb.append(" >= "); + break; + default: + throw new ImportException("Undefined comparison"); + } + sb.append(prevEndpoint); + sb.append(" AND "); + } + + if (null != nextIncrementalValue) { + sb.append(checkColName); + switch (incrementalMode) { + case AppendRows: + sb.append(" <= "); + break; + case DateLastModified: + sb.append(" < "); + break; + default: + throw new ImportException("Undefined comparison"); + } + sb.append(nextIncrementalValue); + } else { + sb.append(checkColName); + sb.append(" IS NULL "); + } + + LOG.info("Upper bound value: " + nextIncrementalValue); + + String prevWhereClause = options.getWhereClause(); + if (null != prevWhereClause) { + sb.append(" AND ("); + sb.append(prevWhereClause); + sb.append(")"); + } + + String newConstraints = sb.toString(); + options.setWhereClause(newConstraints); + + // Save this state for next time. + SqoopOptions recordOptions = options.getParent(); + if (null == recordOptions) { + recordOptions = options; + } + recordOptions.setIncrementalLastValue( + (nextVal == null) ? null : nextVal.toString()); + + return true; + } + + /** + * Import a table or query. + * @return true if an import was performed, false otherwise. + */ + protected boolean importTable(SqoopOptions options, String tableName, + HiveImport hiveImport) throws IOException, ImportException { + String jarFile = null; + + // Generate the ORM code for the tables. + jarFile = codeGenerator.generateORM(options, tableName); + + // Do the actual import. + ImportJobContext context = new ImportJobContext(tableName, jarFile, + options, getOutputPath(options, tableName)); + + // If we're doing an incremental import, set up the + // filtering conditions used to get the latest records. + if (!initIncrementalConstraints(options, context)) { + return false; + } + + if (null != tableName) { + manager.importTable(context); + } else { + manager.importQuery(context); + } + + if (options.isAppendMode()) { + AppendUtils app = new AppendUtils(context); + app.append(); + } + + // If the user wants this table to be in Hive, perform that post-load. + if (options.doHiveImport()) { + hiveImport.importTable(tableName, options.getHiveTableName(), false); + } + + saveIncrementalState(options); + + return true; + } + + /** + * @return the output path for the imported files; + * in append mode this will point to a temporary folder. + * if importing to hbase, this may return null. + */ + private Path getOutputPath(SqoopOptions options, String tableName) { + // Get output directory + String hdfsWarehouseDir = options.getWarehouseDir(); + String hdfsTargetDir = options.getTargetDir(); + Path outputPath = null; + if (options.isAppendMode()) { + // Use temporary path, later removed when appending + outputPath = AppendUtils.getTempAppendDir(tableName); + LOG.debug("Using temporary folder: " + outputPath.getName()); + } else { + // Try in this order: target-dir or warehouse-dir + if (hdfsTargetDir != null) { + outputPath = new Path(hdfsTargetDir); + } else if (hdfsWarehouseDir != null) { + outputPath = new Path(hdfsWarehouseDir, tableName); + } else if (null != tableName) { + outputPath = new Path(tableName); + } + } + + return outputPath; + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + HiveImport hiveImport = null; + + if (allTables) { + // We got into this method, but we should be in a subclass. + // (This method only handles a single table) + // This should not be reached, but for sanity's sake, test here. + LOG.error("ImportTool.run() can only handle a single table."); + return 1; + } + + if (!init(options)) { + return 1; + } + + codeGenerator.setManager(manager); + + try { + if (options.doHiveImport()) { + hiveImport = new HiveImport(options, manager, options.getConf(), false); + } + + // Import a single table (or query) the user specified. + importTable(options, options.getTableName(), hiveImport); + } catch (IllegalArgumentException iea) { + LOG.error("Imported Failed: " + iea.getMessage()); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw iea; + } + return 1; + } catch (IOException ioe) { + LOG.error("Encountered IOException running import job: " + + StringUtils.stringifyException(ioe)); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(ioe); + } else { + return 1; + } + } catch (ImportException ie) { + LOG.error("Error during import: " + ie.toString()); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(ie); + } else { + return 1; + } + } finally { + destroy(options); + } + + return 0; + } + + /** + * Construct the set of options that control imports, either of one + * table or a batch of tables. + * @return the RelatedOptions that can be used to parse the import + * arguments. + */ + protected RelatedOptions getImportOptions() { + // Imports + RelatedOptions importOpts = new RelatedOptions("Import control arguments"); + + importOpts.addOption(OptionBuilder + .withDescription("Use direct import fast path") + .withLongOpt(DIRECT_ARG) + .create()); + + if (!allTables) { + importOpts.addOption(OptionBuilder.withArgName("table-name") + .hasArg().withDescription("Table to read") + .withLongOpt(TABLE_ARG) + .create()); + importOpts.addOption(OptionBuilder.withArgName("col,col,col...") + .hasArg().withDescription("Columns to import from table") + .withLongOpt(COLUMNS_ARG) + .create()); + importOpts.addOption(OptionBuilder.withArgName("column-name") + .hasArg() + .withDescription("Column of the table used to split work units") + .withLongOpt(SPLIT_BY_ARG) + .create()); + importOpts.addOption(OptionBuilder.withArgName("where clause") + .hasArg().withDescription("WHERE clause to use during import") + .withLongOpt(WHERE_ARG) + .create()); + importOpts.addOption(OptionBuilder + .withDescription("Imports data in append mode") + .withLongOpt(APPEND_ARG) + .create()); + importOpts.addOption(OptionBuilder.withArgName("dir") + .hasArg().withDescription("HDFS plain table destination") + .withLongOpt(TARGET_DIR_ARG) + .create()); + importOpts.addOption(OptionBuilder.withArgName("statement") + .hasArg() + .withDescription("Import results of SQL 'statement'") + .withLongOpt(SQL_QUERY_ARG) + .create(SQL_QUERY_SHORT_ARG)); + importOpts.addOption(OptionBuilder.withArgName("statement") + .hasArg() + .withDescription("Set boundary query for retrieving max and min" + + " value of the primary key") + .withLongOpt(SQL_QUERY_BOUNDARY) + .create()); + } + + importOpts.addOption(OptionBuilder.withArgName("dir") + .hasArg().withDescription("HDFS parent for table destination") + .withLongOpt(WAREHOUSE_DIR_ARG) + .create()); + importOpts.addOption(OptionBuilder + .withDescription("Imports data to SequenceFiles") + .withLongOpt(FMT_SEQUENCEFILE_ARG) + .create()); + importOpts.addOption(OptionBuilder + .withDescription("Imports data as plain text (default)") + .withLongOpt(FMT_TEXTFILE_ARG) + .create()); + importOpts.addOption(OptionBuilder + .withDescription("Imports data to Avro data files") + .withLongOpt(FMT_AVRODATAFILE_ARG) + .create()); + importOpts.addOption(OptionBuilder.withArgName("n") + .hasArg().withDescription("Use 'n' map tasks to import in parallel") + .withLongOpt(NUM_MAPPERS_ARG) + .create(NUM_MAPPERS_SHORT_ARG)); + importOpts.addOption(OptionBuilder + .withDescription("Enable compression") + .withLongOpt(COMPRESS_ARG) + .create(COMPRESS_SHORT_ARG)); + importOpts.addOption(OptionBuilder.withArgName("codec") + .hasArg() + .withDescription("Compression codec to use for import") + .withLongOpt(COMPRESSION_CODEC_ARG) + .create()); + importOpts.addOption(OptionBuilder.withArgName("n") + .hasArg() + .withDescription("Split the input stream every 'n' bytes " + + "when importing in direct mode") + .withLongOpt(DIRECT_SPLIT_SIZE_ARG) + .create()); + importOpts.addOption(OptionBuilder.withArgName("n") + .hasArg() + .withDescription("Set the maximum size for an inline LOB") + .withLongOpt(INLINE_LOB_LIMIT_ARG) + .create()); + importOpts.addOption(OptionBuilder.withArgName("n") + .hasArg() + .withDescription("Set number 'n' of rows to fetch from the " + + "database when more rows are needed") + .withLongOpt(FETCH_SIZE_ARG) + .create()); + + return importOpts; + } + + /** + * Return options for incremental import. + */ + protected RelatedOptions getIncrementalOptions() { + RelatedOptions incrementalOpts = + new RelatedOptions("Incremental import arguments"); + + incrementalOpts.addOption(OptionBuilder.withArgName("import-type") + .hasArg() + .withDescription( + "Define an incremental import of type 'append' or 'lastmodified'") + .withLongOpt(INCREMENT_TYPE_ARG) + .create()); + incrementalOpts.addOption(OptionBuilder.withArgName("column") + .hasArg() + .withDescription("Source column to check for incremental change") + .withLongOpt(INCREMENT_COL_ARG) + .create()); + incrementalOpts.addOption(OptionBuilder.withArgName("value") + .hasArg() + .withDescription("Last imported value in the incremental check column") + .withLongOpt(INCREMENT_LAST_VAL_ARG) + .create()); + + return incrementalOpts; + } + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + + toolOptions.addUniqueOptions(getCommonOptions()); + toolOptions.addUniqueOptions(getImportOptions()); + if (!allTables) { + toolOptions.addUniqueOptions(getIncrementalOptions()); + } + toolOptions.addUniqueOptions(getOutputFormatOptions()); + toolOptions.addUniqueOptions(getInputFormatOptions()); + toolOptions.addUniqueOptions(getHiveOptions(true)); + toolOptions.addUniqueOptions(getHBaseOptions()); + + // get common codegen opts. + RelatedOptions codeGenOpts = getCodeGenOpts(allTables); + + // add import-specific codegen opts: + codeGenOpts.addOption(OptionBuilder.withArgName("file") + .hasArg() + .withDescription("Disable code generation; use specified jar") + .withLongOpt(JAR_FILE_NAME_ARG) + .create()); + + toolOptions.addUniqueOptions(codeGenOpts); + } + + @Override + /** {@inheritDoc} */ + public void printHelp(ToolOptions toolOptions) { + super.printHelp(toolOptions); + System.out.println(""); + if (allTables) { + System.out.println("At minimum, you must specify --connect"); + } else { + System.out.println( + "At minimum, you must specify --connect and --table"); + } + + System.out.println( + "Arguments to mysqldump and other subprograms may be supplied"); + System.out.println( + "after a '--' on the command line."); + } + + private void applyIncrementalOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + if (in.hasOption(INCREMENT_TYPE_ARG)) { + String incrementalTypeStr = in.getOptionValue(INCREMENT_TYPE_ARG); + if ("append".equals(incrementalTypeStr)) { + out.setIncrementalMode(SqoopOptions.IncrementalMode.AppendRows); + // This argument implies ability to append to the same directory. + out.setAppendMode(true); + } else if ("lastmodified".equals(incrementalTypeStr)) { + out.setIncrementalMode(SqoopOptions.IncrementalMode.DateLastModified); + } else { + throw new InvalidOptionsException("Unknown incremental import mode: " + + incrementalTypeStr + ". Use 'append' or 'lastmodified'." + + HELP_STR); + } + } + + if (in.hasOption(INCREMENT_COL_ARG)) { + out.setIncrementalTestColumn(in.getOptionValue(INCREMENT_COL_ARG)); + } + + if (in.hasOption(INCREMENT_LAST_VAL_ARG)) { + out.setIncrementalLastValue(in.getOptionValue(INCREMENT_LAST_VAL_ARG)); + } + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + + try { + applyCommonOptions(in, out); + + if (in.hasOption(DIRECT_ARG)) { + out.setDirectMode(true); + } + + if (!allTables) { + if (in.hasOption(TABLE_ARG)) { + out.setTableName(in.getOptionValue(TABLE_ARG)); + } + + if (in.hasOption(COLUMNS_ARG)) { + String[] cols= in.getOptionValue(COLUMNS_ARG).split(","); + for (int i=0; i 1 + && options.getSplitByCol() == null) { + throw new InvalidOptionsException( + "When importing query results in parallel, you must specify --" + + SPLIT_BY_ARG + "." + HELP_STR); + } else if (options.isDirect() + && options.getFileLayout() != SqoopOptions.FileLayout.TextFile + && options.getConnectString().contains("jdbc:mysql://")) { + throw new InvalidOptionsException( + "MySQL direct export currently supports only text output format." + + "Parameters --as-sequencefile and --as-avrodatafile are not " + + "supported with --direct params in MySQL case."); + } else if (!options.getMapColumnJava().isEmpty() + && options.getFileLayout() == FileLayout.AvroDataFile) { + throw new InvalidOptionsException( + "Overriding column types is currently not supported with avro."); + } + } + + /** + * Validate the incremental import options. + */ + private void validateIncrementalOptions(SqoopOptions options) + throws InvalidOptionsException { + if (options.getIncrementalMode() != SqoopOptions.IncrementalMode.None + && options.getIncrementalTestColumn() == null) { + throw new InvalidOptionsException( + "For an incremental import, the check column must be specified " + + "with --" + INCREMENT_COL_ARG + ". " + HELP_STR); + } + + if (options.getIncrementalMode() == SqoopOptions.IncrementalMode.None + && options.getIncrementalTestColumn() != null) { + throw new InvalidOptionsException( + "You must specify an incremental import mode with --" + + INCREMENT_TYPE_ARG + ". " + HELP_STR); + } + + if (options.getIncrementalMode() != SqoopOptions.IncrementalMode.None + && options.getTableName() == null) { + throw new InvalidOptionsException("Incremental imports require a table." + + HELP_STR); + } + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + + // If extraArguments is full, check for '--' followed by args for + // mysqldump or other commands we rely on. + options.setExtraArgs(getSubcommandArgs(extraArguments)); + int dashPos = getDashPosition(extraArguments); + + if (hasUnrecognizedArgs(extraArguments, 0, dashPos)) { + throw new InvalidOptionsException(HELP_STR); + } + + validateImportOptions(options); + validateIncrementalOptions(options); + validateCommonOptions(options); + validateCodeGenOptions(options); + validateOutputFormatOptions(options); + validateHBaseOptions(options); + validateHiveOptions(options); + } +} + diff --git a/src/java/org/apache/sqoop/tool/JobTool.java b/src/java/org/apache/sqoop/tool/JobTool.java new file mode 100644 index 00000000..dcf94f11 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/JobTool.java @@ -0,0 +1,406 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.IOException; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.TreeMap; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.ParseException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.ToolRunner; + +import org.apache.log4j.Category; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + +import com.cloudera.sqoop.Sqoop; +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.metastore.hsqldb.HsqldbJobStorage; +import com.cloudera.sqoop.metastore.JobData; +import com.cloudera.sqoop.metastore.JobStorage; +import com.cloudera.sqoop.metastore.JobStorageFactory; + +/** + * Tool that creates and executes saved jobs. + */ +public class JobTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog( + JobTool.class.getName()); + + private enum JobOp { + JobCreate, + JobDelete, + JobExecute, + JobList, + JobShow, + }; + + private Map storageDescriptor; + private String jobName; + private JobOp operation; + private JobStorage storage; + + public JobTool() { + super("job"); + } + + /** + * Given an array of strings, return all elements of this + * array up to (but not including) the first instance of "--". + */ + private String [] getElementsUpToDoubleDash(String [] array) { + String [] parseableChildArgv = null; + for (int i = 0; i < array.length; i++) { + if ("--".equals(array[i])) { + parseableChildArgv = Arrays.copyOfRange(array, 0, i); + break; + } + } + + if (parseableChildArgv == null) { + // Didn't find any nested '--'. + parseableChildArgv = array; + } + + return parseableChildArgv; + } + + /** + * Given an array of strings, return the first instance + * of "--" and all following elements. + * If no "--" exists, return null. + */ + private String [] getElementsAfterDoubleDash(String [] array) { + String [] extraChildArgv = null; + for (int i = 0; i < array.length; i++) { + if ("--".equals(array[i])) { + extraChildArgv = Arrays.copyOfRange(array, i, array.length); + break; + } + } + + return extraChildArgv; + } + + private int configureChildTool(SqoopOptions childOptions, + SqoopTool childTool, String [] childArgv) { + // Within the child arguments there may be a '--' followed by + // dependent args. Stash them off to the side. + + // Everything up to the '--'. + String [] parseableChildArgv = getElementsUpToDoubleDash(childArgv); + + // The '--' and any subsequent args. + String [] extraChildArgv = getElementsAfterDoubleDash(childArgv); + + // Now feed the arguments into the tool itself. + try { + childOptions = childTool.parseArguments(parseableChildArgv, + null, childOptions, false); + childTool.appendArgs(extraChildArgv); + childTool.validateOptions(childOptions); + } catch (ParseException pe) { + LOG.error("Error parsing arguments to the job-specific tool."); + LOG.error("See 'sqoop help ' for usage."); + return 1; + } catch (SqoopOptions.InvalidOptionsException e) { + System.err.println(e.getMessage()); + return 1; + } + + return 0; // Success. + } + + private int createJob(SqoopOptions options) throws IOException { + // In our extraArguments array, we should have a '--' followed by + // a tool name, and any tool-specific arguments. + // Create an instance of the named tool and then configure it to + // get a SqoopOptions out which we will serialize into a job. + int dashPos = getDashPosition(extraArguments); + int toolArgPos = dashPos + 1; + if (null == extraArguments || toolArgPos < 0 + || toolArgPos >= extraArguments.length) { + LOG.error("No tool specified; cannot create a job."); + LOG.error("Use: sqoop job --create " + + "-- [tool-args]"); + return 1; + } + + String jobToolName = extraArguments[toolArgPos]; + SqoopTool jobTool = SqoopTool.getTool(jobToolName); + if (null == jobTool) { + LOG.error("No such tool available: " + jobToolName); + return 1; + } + + // Create a SqoopOptions and Configuration based on the current one, + // but deep-copied. This will be populated within the job. + SqoopOptions jobOptions = new SqoopOptions(); + jobOptions.setConf(new Configuration(options.getConf())); + + // Get the arguments to feed to the child tool. + String [] childArgs = Arrays.copyOfRange(extraArguments, toolArgPos + 1, + extraArguments.length); + + int confRet = configureChildTool(jobOptions, jobTool, childArgs); + if (0 != confRet) { + // Error. + return confRet; + } + + // Now that the tool is fully configured, materialize the job. + //TODO(jarcec): Remove the cast when JobData will be moved to apache package + JobData jobData = new JobData(jobOptions, + (com.cloudera.sqoop.tool.SqoopTool)jobTool); + this.storage.create(jobName, jobData); + return 0; // Success. + } + + private int listJobs(SqoopOptions opts) throws IOException { + List jobNames = storage.list(); + System.out.println("Available jobs:"); + for (String name : jobNames) { + System.out.println(" " + name); + } + return 0; + } + + private int deleteJob(SqoopOptions opts) throws IOException { + this.storage.delete(jobName); + return 0; + } + + private int execJob(SqoopOptions opts) throws IOException { + JobData data = this.storage.read(jobName); + if (null == data) { + LOG.error("No such job: " + jobName); + return 1; + } + + SqoopOptions childOpts = data.getSqoopOptions(); + SqoopTool childTool = data.getSqoopTool(); + + // Don't overwrite the original SqoopOptions with the + // arguments; make a child options. + + SqoopOptions clonedOpts = (SqoopOptions) childOpts.clone(); + clonedOpts.setParent(childOpts); + + int dashPos = getDashPosition(extraArguments); + String [] childArgv; + if (dashPos >= extraArguments.length) { + childArgv = new String[0]; + } else { + childArgv = Arrays.copyOfRange(extraArguments, dashPos + 1, + extraArguments.length); + } + + int confRet = configureChildTool(clonedOpts, childTool, childArgv); + if (0 != confRet) { + // Error. + return confRet; + } + + return childTool.run(clonedOpts); + } + + private int showJob(SqoopOptions opts) throws IOException { + JobData data = this.storage.read(jobName); + if (null == data) { + LOG.error("No such job: " + jobName); + return 1; + } + + SqoopOptions childOpts = data.getSqoopOptions(); + SqoopTool childTool = data.getSqoopTool(); + + System.out.println("Job: " + jobName); + System.out.println("Tool: " + childTool.getToolName()); + + System.out.println("Options:"); + System.out.println("----------------------------"); + Properties props = childOpts.writeProperties(); + for (Map.Entry entry : props.entrySet()) { + System.out.println(entry.getKey().toString() + " = " + entry.getValue()); + } + + // TODO: This does not show entries in the Configuration + // (SqoopOptions.getConf()) which were stored as different from the + // default. + + return 0; + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + // Get a JobStorage instance to use to materialize this job. + JobStorageFactory ssf = new JobStorageFactory(options.getConf()); + this.storage = ssf.getJobStorage(storageDescriptor); + if (null == this.storage) { + LOG.error("There is no JobStorage implementation available"); + LOG.error("that can read your specified storage descriptor."); + LOG.error("Don't know where to save this job info! You may"); + LOG.error("need to specify the connect string with --meta-connect."); + return 1; + } + + try { + // Open the storage layer. + this.storage.open(this.storageDescriptor); + + // And now determine what operation to perform with it. + switch (operation) { + case JobCreate: + return createJob(options); + case JobDelete: + return deleteJob(options); + case JobExecute: + return execJob(options); + case JobList: + return listJobs(options); + case JobShow: + return showJob(options); + default: + LOG.error("Undefined job operation: " + operation); + return 1; + } + } catch (IOException ioe) { + LOG.error("I/O error performing job operation: " + + StringUtils.stringifyException(ioe)); + return 1; + } finally { + if (null != this.storage) { + try { + storage.close(); + } catch (IOException ioe) { + LOG.warn("IOException closing JobStorage: " + + StringUtils.stringifyException(ioe)); + } + } + } + } + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + toolOptions.addUniqueOptions(getJobOptions()); + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + + if (in.hasOption(VERBOSE_ARG)) { + // Immediately switch into DEBUG logging. + Category sqoopLogger = Logger.getLogger( + Sqoop.class.getName()).getParent(); + sqoopLogger.setLevel(Level.DEBUG); + LOG.debug("Enabled debug logging."); + } + + if (in.hasOption(HELP_ARG)) { + ToolOptions toolOpts = new ToolOptions(); + configureOptions(toolOpts); + printHelp(toolOpts); + throw new InvalidOptionsException(""); + } + + this.storageDescriptor = new TreeMap(); + + if (in.hasOption(STORAGE_METASTORE_ARG)) { + this.storageDescriptor.put(HsqldbJobStorage.META_CONNECT_KEY, + in.getOptionValue(STORAGE_METASTORE_ARG)); + } + + // These are generated via an option group; exactly one + // of this exhaustive list will always be selected. + if (in.hasOption(JOB_CMD_CREATE_ARG)) { + this.operation = JobOp.JobCreate; + this.jobName = in.getOptionValue(JOB_CMD_CREATE_ARG); + } else if (in.hasOption(JOB_CMD_DELETE_ARG)) { + this.operation = JobOp.JobDelete; + this.jobName = in.getOptionValue(JOB_CMD_DELETE_ARG); + } else if (in.hasOption(JOB_CMD_EXEC_ARG)) { + this.operation = JobOp.JobExecute; + this.jobName = in.getOptionValue(JOB_CMD_EXEC_ARG); + } else if (in.hasOption(JOB_CMD_LIST_ARG)) { + this.operation = JobOp.JobList; + } else if (in.hasOption(JOB_CMD_SHOW_ARG)) { + this.operation = JobOp.JobShow; + this.jobName = in.getOptionValue(JOB_CMD_SHOW_ARG); + } + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + + if (null == operation + || (null == this.jobName && operation != JobOp.JobList)) { + throw new InvalidOptionsException("No job operation specified" + + HELP_STR); + } + + if (operation == JobOp.JobCreate) { + // Check that we have a '--' followed by at least a tool name. + if (extraArguments == null || extraArguments.length == 0) { + throw new InvalidOptionsException( + "Expected: -- [tool-args] " + + HELP_STR); + } + } + + int dashPos = getDashPosition(extraArguments); + if (hasUnrecognizedArgs(extraArguments, 0, dashPos)) { + throw new InvalidOptionsException(HELP_STR); + } + } + + @Override + /** {@inheritDoc} */ + public void printHelp(ToolOptions opts) { + System.out.println("usage: sqoop " + getToolName() + + " [GENERIC-ARGS] [JOB-ARGS] [-- [] [TOOL-ARGS]]"); + System.out.println(""); + + opts.printHelp(); + + System.out.println(""); + System.out.println("Generic Hadoop command-line arguments:"); + System.out.println("(must preceed any tool-specific arguments)"); + ToolRunner.printGenericCommandUsage(System.out); + } +} + diff --git a/src/java/org/apache/sqoop/tool/ListDatabasesTool.java b/src/java/org/apache/sqoop/tool/ListDatabasesTool.java new file mode 100644 index 00000000..2dfbfb51 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/ListDatabasesTool.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.ToolOptions; + +/** + * Tool that lists available databases on a server. + */ +public class ListDatabasesTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog( + ListDatabasesTool.class.getName()); + + public ListDatabasesTool() { + super("list-databases"); + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + if (!init(options)) { + return 1; + } + + try { + String [] databases = manager.listDatabases(); + if (null == databases) { + System.err.println("Could not retrieve database list from server"); + LOG.error("manager.listDatabases() returned null"); + return 1; + } else { + for (String db : databases) { + System.out.println(db); + } + } + } finally { + destroy(options); + } + + return 0; + } + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + toolOptions.addUniqueOptions(getCommonOptions()); + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + applyCommonOptions(in, out); + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + + if (hasUnrecognizedArgs(extraArguments)) { + throw new InvalidOptionsException(HELP_STR); + } + validateCommonOptions(options); + } +} + diff --git a/src/java/org/apache/sqoop/tool/ListTablesTool.java b/src/java/org/apache/sqoop/tool/ListTablesTool.java new file mode 100644 index 00000000..529df37a --- /dev/null +++ b/src/java/org/apache/sqoop/tool/ListTablesTool.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.ToolOptions; + +/** + * Tool that lists available tables in a database. + */ +public class ListTablesTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog( + ListTablesTool.class.getName()); + + public ListTablesTool() { + super("list-tables"); + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + if (!init(options)) { + return 1; + } + + try { + String [] tables = manager.listTables(); + if (null == tables) { + System.err.println("Could not retrieve tables list from server"); + LOG.error("manager.listTables() returned null"); + return 1; + } else { + for (String tbl : tables) { + System.out.println(tbl); + } + } + } finally { + destroy(options); + } + + return 0; + } + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + toolOptions.addUniqueOptions(getCommonOptions()); + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + applyCommonOptions(in, out); + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + if (hasUnrecognizedArgs(extraArguments)) { + throw new InvalidOptionsException(HELP_STR); + } + + validateCommonOptions(options); + } +} + diff --git a/src/java/org/apache/sqoop/tool/MergeTool.java b/src/java/org/apache/sqoop/tool/MergeTool.java new file mode 100644 index 00000000..741d430a --- /dev/null +++ b/src/java/org/apache/sqoop/tool/MergeTool.java @@ -0,0 +1,235 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.IOException; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Category; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + +import com.cloudera.sqoop.Sqoop; +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.RelatedOptions; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.mapreduce.MergeJob; + +/** + * Tool that merges a more recent dataset on top of an older one. + */ +public class MergeTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog(MergeTool.class.getName()); + + public MergeTool() { + this("merge"); + } + + public MergeTool(String toolName) { + super(toolName); + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + try { + // Configure and execute a MapReduce job to merge these datasets. + MergeJob mergeJob = new MergeJob(options); + if (!mergeJob.runMergeJob()) { + LOG.error("MapReduce job failed!"); + return 1; + } + } catch (IOException ioe) { + LOG.error("Encountered IOException running import job: " + + StringUtils.stringifyException(ioe)); + if (System.getProperty(Sqoop.SQOOP_RETHROW_PROPERTY) != null) { + throw new RuntimeException(ioe); + } else { + return 1; + } + } + + return 0; + } + + /** + * Construct the set of options that control imports, either of one + * table or a batch of tables. + * @return the RelatedOptions that can be used to parse the import + * arguments. + */ + protected RelatedOptions getMergeOptions() { + // Imports + RelatedOptions mergeOpts = new RelatedOptions("Merge arguments"); + + mergeOpts.addOption(OptionBuilder.withArgName("file") + .hasArg().withDescription("Load class from specified jar file") + .withLongOpt(JAR_FILE_NAME_ARG) + .create()); + + mergeOpts.addOption(OptionBuilder.withArgName("name") + .hasArg().withDescription("Specify record class name to load") + .withLongOpt(CLASS_NAME_ARG) + .create()); + + mergeOpts.addOption(OptionBuilder.withArgName("path") + .hasArg().withDescription("Path to the more recent data set") + .withLongOpt(NEW_DATASET_ARG) + .create()); + + mergeOpts.addOption(OptionBuilder.withArgName("path") + .hasArg().withDescription("Path to the older data set") + .withLongOpt(OLD_DATASET_ARG) + .create()); + + mergeOpts.addOption(OptionBuilder.withArgName("path") + .hasArg().withDescription("Destination path for merged results") + .withLongOpt(TARGET_DIR_ARG) + .create()); + + mergeOpts.addOption(OptionBuilder.withArgName("column") + .hasArg().withDescription("Key column to use to join results") + .withLongOpt(MERGE_KEY_ARG) + .create()); + + // Since the "common" options aren't used in the merge tool, + // add these settings here. + mergeOpts.addOption(OptionBuilder + .withDescription("Print more information while working") + .withLongOpt(VERBOSE_ARG) + .create()); + mergeOpts.addOption(OptionBuilder + .withDescription("Print usage instructions") + .withLongOpt(HELP_ARG) + .create()); + + return mergeOpts; + } + + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + toolOptions.addUniqueOptions(getMergeOptions()); + } + + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + + if (in.hasOption(VERBOSE_ARG)) { + // Immediately switch into DEBUG logging. + Category sqoopLogger = Logger.getLogger( + Sqoop.class.getName()).getParent(); + sqoopLogger.setLevel(Level.DEBUG); + LOG.debug("Enabled debug logging."); + } + + if (in.hasOption(HELP_ARG)) { + ToolOptions toolOpts = new ToolOptions(); + configureOptions(toolOpts); + printHelp(toolOpts); + throw new InvalidOptionsException(""); + } + + if (in.hasOption(JAR_FILE_NAME_ARG)) { + out.setExistingJarName(in.getOptionValue(JAR_FILE_NAME_ARG)); + } + + if (in.hasOption(CLASS_NAME_ARG)) { + out.setClassName(in.getOptionValue(CLASS_NAME_ARG)); + } + + if (in.hasOption(NEW_DATASET_ARG)) { + out.setMergeNewPath(in.getOptionValue(NEW_DATASET_ARG)); + } + + if (in.hasOption(OLD_DATASET_ARG)) { + out.setMergeOldPath(in.getOptionValue(OLD_DATASET_ARG)); + } + + if (in.hasOption(TARGET_DIR_ARG)) { + out.setTargetDir(in.getOptionValue(TARGET_DIR_ARG)); + } + + if (in.hasOption(MERGE_KEY_ARG)) { + out.setMergeKeyCol(in.getOptionValue(MERGE_KEY_ARG)); + } + } + + /** + * Validate merge-specific arguments. + * @param options the configured SqoopOptions to check + */ + protected void validateMergeOptions(SqoopOptions options) + throws InvalidOptionsException { + + if (options.getMergeNewPath() == null) { + throw new InvalidOptionsException("Must set the new dataset path with --" + + NEW_DATASET_ARG + "." + HELP_STR); + } + + if (options.getMergeOldPath() == null) { + throw new InvalidOptionsException("Must set the old dataset path with --" + + OLD_DATASET_ARG + "." + HELP_STR); + } + + if (options.getMergeKeyCol() == null) { + throw new InvalidOptionsException("Must set the merge key column with --" + + MERGE_KEY_ARG + "." + HELP_STR); + } + + if (options.getTargetDir() == null) { + throw new InvalidOptionsException("Must set the target directory with --" + + TARGET_DIR_ARG + "." + HELP_STR); + } + + if (options.getClassName() == null) { + throw new InvalidOptionsException("Must set the SqoopRecord class " + + "implementation to use with --" + CLASS_NAME_ARG + "." + + HELP_STR); + } + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + + // If extraArguments is full, check for '--' followed by args for + // mysqldump or other commands we rely on. + options.setExtraArgs(getSubcommandArgs(extraArguments)); + int dashPos = getDashPosition(extraArguments); + + if (hasUnrecognizedArgs(extraArguments, 0, dashPos)) { + throw new InvalidOptionsException(HELP_STR); + } + + validateMergeOptions(options); + } +} + diff --git a/src/java/org/apache/sqoop/tool/MetastoreTool.java b/src/java/org/apache/sqoop/tool/MetastoreTool.java new file mode 100644 index 00000000..53e56f0d --- /dev/null +++ b/src/java/org/apache/sqoop/tool/MetastoreTool.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.RelatedOptions; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.metastore.hsqldb.HsqldbMetaStore; + +/** + * Tool that runs a standalone Sqoop metastore. + */ +public class MetastoreTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public static final Log LOG = LogFactory.getLog( + MetastoreTool.class.getName()); + + private HsqldbMetaStore metastore; + + // If set to true, shut an existing metastore down. + private boolean shutdown = false; + + public MetastoreTool() { + super("metastore"); + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + metastore = new HsqldbMetaStore(options.getConf()); + if (shutdown) { + LOG.info("Shutting down metastore..."); + metastore.shutdown(); + } else { + metastore.start(); + metastore.waitForServer(); + LOG.info("Server thread has quit."); + } + return 0; + } + + @Override + /** Configure the command-line arguments we expect to receive */ + public void configureOptions(ToolOptions toolOptions) { + RelatedOptions opts = new RelatedOptions("metastore arguments"); + opts.addOption(OptionBuilder + .withDescription("Cleanly shut down a running metastore") + .withLongOpt(METASTORE_SHUTDOWN_ARG) + .create()); + + toolOptions.addUniqueOptions(opts); + } + + @Override + /** {@inheritDoc} */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + if (in.hasOption(METASTORE_SHUTDOWN_ARG)) { + this.shutdown = true; + } + } + + @Override + /** {@inheritDoc} */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + } +} + diff --git a/src/java/org/apache/sqoop/tool/SqoopTool.java b/src/java/org/apache/sqoop/tool/SqoopTool.java new file mode 100644 index 00000000..67b6427e --- /dev/null +++ b/src/java/org/apache/sqoop/tool/SqoopTool.java @@ -0,0 +1,507 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.TreeMap; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.ParseException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.ToolRunner; +import org.apache.sqoop.util.ClassLoaderStack; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.SqoopOptions.InvalidOptionsException; +import com.cloudera.sqoop.cli.SqoopParser; +import com.cloudera.sqoop.cli.ToolOptions; +import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.tool.ToolDesc; + +/** + * Base class for Sqoop subprograms (e.g., SqoopImport, SqoopExport, etc.) + * Allows subprograms to configure the arguments they accept and + * provides an entry-point to the subprogram. + */ +public abstract class SqoopTool { + + public static final Log LOG = LogFactory.getLog(SqoopTool.class.getName()); + + /** + * Configuration key that specifies the set of ToolPlugin instances to load + * before determining which SqoopTool instance to load. + */ + public static final String TOOL_PLUGINS_KEY = "sqoop.tool.plugins"; + + private static final Map> TOOLS; + private static final Map DESCRIPTIONS; + + static { + // All SqoopTool instances should be registered here so that + // they can be found internally. + TOOLS = new TreeMap>(); + DESCRIPTIONS = new TreeMap(); + + registerTool("codegen", CodeGenTool.class, + "Generate code to interact with database records"); + registerTool("create-hive-table", CreateHiveTableTool.class, + "Import a table definition into Hive"); + registerTool("eval", EvalSqlTool.class, + "Evaluate a SQL statement and display the results"); + registerTool("export", ExportTool.class, + "Export an HDFS directory to a database table"); + registerTool("import", ImportTool.class, + "Import a table from a database to HDFS"); + registerTool("import-all-tables", ImportAllTablesTool.class, + "Import tables from a database to HDFS"); + registerTool("help", HelpTool.class, "List available commands"); + registerTool("list-databases", ListDatabasesTool.class, + "List available databases on a server"); + registerTool("list-tables", ListTablesTool.class, + "List available tables in a database"); + registerTool("merge", MergeTool.class, + "Merge results of incremental imports"); + registerTool("metastore", MetastoreTool.class, + "Run a standalone Sqoop metastore"); + registerTool("job", JobTool.class, + "Work with saved jobs"); + registerTool("version", VersionTool.class, + "Display version information"); + } + + /** + * Add a tool to the available set of SqoopTool instances. + * @param toolName the name the user access the tool through. + * @param cls the class providing the tool. + * @param description a user-friendly description of the tool's function. + */ + private static void registerTool(String toolName, + Class cls, String description) { + Class existing = TOOLS.get(toolName); + if (null != existing) { + // Already have a tool with this name. Refuse to start. + throw new RuntimeException("A plugin is attempting to register a tool " + + "with name " + toolName + ", but this tool already exists (" + + existing.getName() + ")"); + } + + TOOLS.put(toolName, cls); + DESCRIPTIONS.put(toolName, description); + } + + /** + * Add tool to available set of SqoopTool instances using the ToolDesc + * struct as the sole argument. + */ + private static void registerTool(ToolDesc toolDescription) { + registerTool(toolDescription.getName(), toolDescription.getToolClass(), + toolDescription.getDesc()); + } + + /** + * Load plugins referenced in sqoop-site.xml or other config (e.g., tools.d/), + * to allow external tool definitions. + * + * @return the Configuration used to load the plugins. + */ + public static Configuration loadPlugins(Configuration conf) { + conf = loadPluginsFromConfDir(conf); + List plugins = conf.getInstances(TOOL_PLUGINS_KEY, + ToolPlugin.class); + for (ToolPlugin plugin : plugins) { + LOG.debug("Loading plugin: " + plugin.getClass().getName()); + List descriptions = plugin.getTools(); + for (ToolDesc desc : descriptions) { + LOG.debug(" Adding tool: " + desc.getName() + + " -> " + desc.getToolClass().getName()); + registerTool(desc); + } + } + + return conf; + } + + /** + * If $SQOOP_CONF_DIR/tools.d/ exists and sqoop.tool.plugins is not set, + * then we look through the files in that directory; they should contain + * lines of the form 'plugin.class.name[=/path/to/containing.jar]'. + * + *

Put all plugin.class.names into the Configuration, and load any + * specified jars into the ClassLoader. + *

+ * + * @param conf the current configuration to populate with class names. + * @return conf again, after possibly populating sqoop.tool.plugins. + */ + private static Configuration loadPluginsFromConfDir(Configuration conf) { + if (conf.get(TOOL_PLUGINS_KEY) != null) { + LOG.debug(TOOL_PLUGINS_KEY + " is set; ignoring tools.d"); + return conf; + } + + String confDirName = System.getenv("SQOOP_CONF_DIR"); + if (null == confDirName) { + LOG.warn("$SQOOP_CONF_DIR has not been set in the environment. " + + "Cannot check for additional configuration."); + return conf; + } + + File confDir = new File(confDirName); + File toolsDir = new File(confDir, "tools.d"); + + if (toolsDir.exists() && toolsDir.isDirectory()) { + // We have a tools.d subdirectory. Get the file list, sort it, + // and process them in order. + String [] fileNames = toolsDir.list(); + Arrays.sort(fileNames); + + for (String fileName : fileNames) { + File f = new File(toolsDir, fileName); + if (f.isFile()) { + loadPluginsFromFile(conf, f); + } + } + } + + // Set the classloader in this configuration so that it will use + // the jars we just loaded in. + conf.setClassLoader(Thread.currentThread().getContextClassLoader()); + return conf; + } + + /** + * Read the specified file and extract any ToolPlugin implementation + * names from there. + * @param conf the configuration to populate. + * @param f the file containing the configuration data to add. + */ + private static void loadPluginsFromFile(Configuration conf, File f) { + Reader r = null; + try { + // The file format is actually Java properties-file syntax. + r = new InputStreamReader(new FileInputStream(f)); + Properties props = new Properties(); + props.load(r); + + for (Map.Entry entry : props.entrySet()) { + // Each key is a ToolPlugin class name. + // Each value, if set, is the jar that contains it. + String plugin = entry.getKey().toString(); + addPlugin(conf, plugin); + + String jarName = entry.getValue().toString(); + if (jarName.length() > 0) { + ClassLoaderStack.addJarFile(jarName, plugin); + LOG.debug("Added plugin " + plugin + " in jar " + jarName + + " specified by " + f); + } else if (LOG.isDebugEnabled()) { + LOG.debug("Added plugin " + plugin + " specified by " + f); + } + } + } catch (IOException ioe) { + LOG.error("Error loading ToolPlugin information from file " + + f + ": " + StringUtils.stringifyException(ioe)); + } finally { + if (null != r) { + try { + r.close(); + } catch (IOException ioe) { + LOG.warn("Error closing file " + f + ": " + ioe); + } + } + } + } + + /** + * Add the specified plugin class name to the configuration string + * listing plugin classes. + */ + private static void addPlugin(Configuration conf, String pluginName) { + String existingPlugins = conf.get(TOOL_PLUGINS_KEY); + String newPlugins = null; + if (null == existingPlugins || existingPlugins.length() == 0) { + newPlugins = pluginName; + } else { + newPlugins = existingPlugins + "," + pluginName; + } + + conf.set(TOOL_PLUGINS_KEY, newPlugins); + } + + /** + * @return the list of available tools. + */ + public static Set getToolNames() { + return TOOLS.keySet(); + } + + /** + * @return the SqoopTool instance with the provided name, or null + * if no such tool exists. + */ + public static SqoopTool getTool(String toolName) { + Class cls = TOOLS.get(toolName); + try { + if (null != cls) { + SqoopTool tool = cls.newInstance(); + tool.setToolName(toolName); + return tool; + } + } catch (Exception e) { + LOG.error(StringUtils.stringifyException(e)); + return null; + } + + return null; + } + + /** + * @return the user-friendly description for a tool, or null if the tool + * cannot be found. + */ + public static String getToolDescription(String toolName) { + return DESCRIPTIONS.get(toolName); + } + + /** The name of the current tool. */ + private String toolName; + + /** Arguments that remained unparsed after parseArguments. */ + protected String [] extraArguments; + + public SqoopTool() { + this.toolName = "<" + this.getClass().getName() + ">"; + } + + public SqoopTool(String name) { + this.toolName = name; + } + + public String getToolName() { + return this.toolName; + } + + protected void setToolName(String name) { + this.toolName = name; + } + + /** + * Main body of code to run the tool. + * @param options the SqoopOptions configured via + * configureOptions()/applyOptions(). + * @return an integer return code for external programs to consume. 0 + * represents success; nonzero means failure. + */ + public abstract int run(SqoopOptions options); + + /** + * Configure the command-line arguments we expect to receive. + * @param opts a ToolOptions that should be populated with sets of + * RelatedOptions for the tool. + */ + public void configureOptions(ToolOptions opts) { + // Default implementation does nothing. + } + + /** + * Print the help message for this tool. + * @param opts the configured tool options + */ + public void printHelp(ToolOptions opts) { + System.out.println("usage: sqoop " + getToolName() + + " [GENERIC-ARGS] [TOOL-ARGS]"); + System.out.println(""); + + opts.printHelp(); + + System.out.println(""); + System.out.println("Generic Hadoop command-line arguments:"); + System.out.println("(must preceed any tool-specific arguments)"); + ToolRunner.printGenericCommandUsage(System.out); + } + + /** Generate the SqoopOptions containing actual argument values from + * the extracted CommandLine arguments. + * @param in the CLI CommandLine that contain the user's set Options. + * @param out the SqoopOptions with all fields applied. + * @throws InvalidOptionsException if there's a problem. + */ + public void applyOptions(CommandLine in, SqoopOptions out) + throws InvalidOptionsException { + // Default implementation does nothing. + } + + /** + * Validates options and ensures that any required options are + * present and that any mutually-exclusive options are not selected. + * @throws InvalidOptionsException if there's a problem. + */ + public void validateOptions(SqoopOptions options) + throws InvalidOptionsException { + // Default implementation does nothing. + } + + /** + * Configures a SqoopOptions according to the specified arguments. + * Reads a set of arguments and uses them to configure a SqoopOptions + * and its embedded configuration (i.e., through GenericOptionsParser.) + * Stores any unparsed arguments in the extraArguments field. + * + * @param args the arguments to parse. + * @param conf if non-null, set as the configuration for the returned + * SqoopOptions. + * @param in a (perhaps partially-configured) SqoopOptions. If null, + * then a new SqoopOptions will be used. If this has a null configuration + * and conf is null, then a new Configuration will be inserted in this. + * @param useGenericOptions if true, will also parse generic Hadoop + * options into the Configuration. + * @return a SqoopOptions that is fully configured by a given tool. + */ + public SqoopOptions parseArguments(String [] args, + Configuration conf, SqoopOptions in, boolean useGenericOptions) + throws ParseException, SqoopOptions.InvalidOptionsException { + SqoopOptions out = in; + + if (null == out) { + out = new SqoopOptions(); + } + + if (null != conf) { + // User specified a configuration; use it and override any conf + // that may have been in the SqoopOptions. + out.setConf(conf); + } else if (null == out.getConf()) { + // User did not specify a configuration, but neither did the + // SqoopOptions. Fabricate a new one. + out.setConf(new Configuration()); + } + + // This tool is the "active" tool; bind it in the SqoopOptions. + //TODO(jarcec): Remove the cast when SqoopOptions will be moved + // to apache package + out.setActiveSqoopTool((com.cloudera.sqoop.tool.SqoopTool)this); + + String [] toolArgs = args; // args after generic parser is done. + if (useGenericOptions) { + try { + toolArgs = ConfigurationHelper.parseGenericOptions( + out.getConf(), args); + } catch (IOException ioe) { + ParseException pe = new ParseException( + "Could not parse generic arguments"); + pe.initCause(ioe); + throw pe; + } + } + + // Parse tool-specific arguments. + ToolOptions toolOptions = new ToolOptions(); + configureOptions(toolOptions); + CommandLineParser parser = new SqoopParser(); + CommandLine cmdLine = parser.parse(toolOptions.merge(), toolArgs, true); + applyOptions(cmdLine, out); + this.extraArguments = cmdLine.getArgs(); + return out; + } + + /** + * Append 'extra' to extraArguments. + */ + public void appendArgs(String [] extra) { + int existingLen = + (this.extraArguments == null) ? 0 : this.extraArguments.length; + int newLen = (extra == null) ? 0 : extra.length; + String [] newExtra = new String[existingLen + newLen]; + + if (null != this.extraArguments) { + System.arraycopy(this.extraArguments, 0, newExtra, 0, existingLen); + } + + if (null != extra) { + System.arraycopy(extra, 0, newExtra, existingLen, newLen); + } + + this.extraArguments = newExtra; + } + + /** + * Allow a tool to specify a set of dependency jar filenames. This is used + * to allow tools to bundle arbitrary dependency jars necessary for a + * MapReduce job executed by Sqoop. The jar containing the SqoopTool + * instance itself will already be handled by Sqoop. + * + *

Called by JobBase.cacheJars().

+ * + *

+ * This does not load the jars into the current VM; they are assumed to be + * already on the classpath if they are needed on the client side (or + * otherwise classloaded by the tool itself). This is purely to specify jars + * necessary to be added to the distributed cache. The tool itself can + * classload these jars by running loadDependencyJars(). + *

+ * + *

See also: c.c.s.util.Jars.getJarPathForClass()

+ */ + public List getDependencyJars() { + // Default behavior: no additional dependencies. + return Collections.emptyList(); + } + + /** + * Loads dependency jars specified by getDependencyJars() into the current + * classloader stack. May optionally be called by a [third-party] tool + * before doing work, to ensure that all of its dependencies get classloaded + * properly. Note that dependencies will not be available until after the + * tool is already constructed. + */ + protected void loadDependencyJars(SqoopOptions options) throws IOException { + List deps = getDependencyJars(); + if (null == deps) { + return; + } + + for (String depFilename : deps) { + LOG.debug("Loading dependency: " + depFilename); + ClassLoaderStack.addJarFile(depFilename, null); + } + + options.getConf().setClassLoader( + Thread.currentThread().getContextClassLoader()); + } + + @Override + public String toString() { + return getToolName(); + } +} + diff --git a/src/java/org/apache/sqoop/tool/ToolDesc.java b/src/java/org/apache/sqoop/tool/ToolDesc.java new file mode 100644 index 00000000..e90e6015 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/ToolDesc.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +/** + * Describes a SqoopTool. + * This class should be final + */ +public class ToolDesc { + private final String toolName; + private final Class toolClass; + private final String description; + + + /** + * Main c'tor; sets all fields that describe a SqoopTool. + */ + public ToolDesc(String name, Class cls, String desc) { + this.toolName = name; + this.toolClass = cls; + this.description = desc; + } + + /** + * @return the name used to invoke the tool (e.g., 'sqoop <foo>') + */ + public String getName() { + return toolName; + } + + /** + * @return a human-readable description of what the tool does. + */ + public String getDesc() { + return description; + } + + /** + * @return the class that implements SqoopTool. + */ + public Class getToolClass() { + return toolClass; + } + +} diff --git a/src/java/org/apache/sqoop/tool/ToolPlugin.java b/src/java/org/apache/sqoop/tool/ToolPlugin.java new file mode 100644 index 00000000..5fa5e6fa --- /dev/null +++ b/src/java/org/apache/sqoop/tool/ToolPlugin.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import java.util.List; + +import com.cloudera.sqoop.tool.ToolDesc; + +/** + * Abstract base class that defines the ToolPlugin API; additional SqoopTool + * implementations may be registered with the system via ToolPlugin classes. + */ +public abstract class ToolPlugin { + /** + * Describes the tools made available by this plugin. + * @return a list of ToolDesc objects containing the tool name, class, + * and description. + */ + public abstract List getTools(); +} diff --git a/src/java/org/apache/sqoop/tool/VersionTool.java b/src/java/org/apache/sqoop/tool/VersionTool.java new file mode 100644 index 00000000..07a61dd4 --- /dev/null +++ b/src/java/org/apache/sqoop/tool/VersionTool.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.sqoop.tool; + +import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.cli.ToolOptions; + +/** + * Tool that prints Sqoop's version. + */ +public class VersionTool extends com.cloudera.sqoop.tool.BaseSqoopTool { + + public VersionTool() { + super("version"); + } + + @Override + /** {@inheritDoc} */ + public int run(SqoopOptions options) { + // FIXME with maven buildnumber plugin + System.out.print("FIXME "); + return 0; + } + + @Override + public void printHelp(ToolOptions opts) { + System.out.println("usage: sqoop " + getToolName()); + } +} +