5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-21 11:21:39 +08:00

MAPREDUCE-1148. SQL identifiers are a superset of Java identifiers. Contributed by Aaron Kimball.

From: Thomas White <tomwhite@apache.org>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1149845 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:03:29 +00:00
parent 12827a1765
commit 4686e0fee7
3 changed files with 202 additions and 14 deletions

View File

@ -33,6 +33,7 @@
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.HashSet;
import java.util.Map;
import org.apache.commons.logging.Log;
@ -40,14 +41,57 @@
/**
* Creates an ORM class to represent a table from a database
*
*
*
*/
public class ClassWriter {
public static final Log LOG = LogFactory.getLog(ClassWriter.class.getName());
// The following are keywords and cannot be used for class, method, or field
// names.
public static final HashSet<String> JAVA_RESERVED_WORDS;
static {
JAVA_RESERVED_WORDS = new HashSet<String>();
JAVA_RESERVED_WORDS.add("abstract");
JAVA_RESERVED_WORDS.add("else");
JAVA_RESERVED_WORDS.add("int");
JAVA_RESERVED_WORDS.add("strictfp");
JAVA_RESERVED_WORDS.add("assert");
JAVA_RESERVED_WORDS.add("enum");
JAVA_RESERVED_WORDS.add("interface");
JAVA_RESERVED_WORDS.add("super");
JAVA_RESERVED_WORDS.add("boolean");
JAVA_RESERVED_WORDS.add("extends");
JAVA_RESERVED_WORDS.add("long");
JAVA_RESERVED_WORDS.add("switch");
JAVA_RESERVED_WORDS.add("break");
JAVA_RESERVED_WORDS.add("false");
JAVA_RESERVED_WORDS.add("native");
JAVA_RESERVED_WORDS.add("synchronized");
JAVA_RESERVED_WORDS.add("byte");
JAVA_RESERVED_WORDS.add("final");
JAVA_RESERVED_WORDS.add("new");
JAVA_RESERVED_WORDS.add("this");
JAVA_RESERVED_WORDS.add("case");
JAVA_RESERVED_WORDS.add("finally");
JAVA_RESERVED_WORDS.add("null");
JAVA_RESERVED_WORDS.add("throw");
JAVA_RESERVED_WORDS.add("catch");
JAVA_RESERVED_WORDS.add("float");
JAVA_RESERVED_WORDS.add("package");
JAVA_RESERVED_WORDS.add("throws");
JAVA_RESERVED_WORDS.add("char");
JAVA_RESERVED_WORDS.add("for");
JAVA_RESERVED_WORDS.add("private");
JAVA_RESERVED_WORDS.add("transient");
JAVA_RESERVED_WORDS.add("class");
JAVA_RESERVED_WORDS.add("goto");
JAVA_RESERVED_WORDS.add("protected");
JAVA_RESERVED_WORDS.add("true");
JAVA_RESERVED_WORDS.add("const");
}
/**
* This version number is injected into all generated Java classes to denote
* which version of the ClassWriter's output format was used to generate the
@ -76,6 +120,87 @@ public ClassWriter(final SqoopOptions opts, final ConnManager connMgr,
this.compileManager = compMgr;
}
/**
* Given some character that can't be in an identifier,
* try to map it to a string that can.
*
* @param c a character that can't be in a Java identifier
* @return a string of characters that can, or null if there's
* no good translation.
*/
static String getIdentifierStrForChar(char c) {
if (Character.isJavaIdentifierPart(c)) {
return "" + c;
} else if (Character.isWhitespace(c)) {
// Eliminate whitespace.
return null;
} else {
// All other characters map to underscore.
return "_";
}
}
/**
* @param word a word to test.
* @return true if 'word' is reserved the in Java language.
*/
private static boolean isReservedWord(String word) {
return JAVA_RESERVED_WORDS.contains(word);
}
/**
* Coerce a candidate name for an identifier into one which will
* definitely compile.
*
* Ensures that the returned identifier matches [A-Za-z_][A-Za-z0-9_]*
* and is not a reserved word.
*
* @param candidate A string we want to use as an identifier
* @return A string naming an identifier which compiles and is
* similar to the candidate.
*/
public static String toIdentifier(String candidate) {
StringBuilder sb = new StringBuilder();
boolean first = true;
for (char c : candidate.toCharArray()) {
if (Character.isJavaIdentifierStart(c) && first) {
// Ok for this to be the first character of the identifier.
sb.append(c);
first = false;
} else if (Character.isJavaIdentifierPart(c) && !first) {
// Ok for this character to be in the output identifier.
sb.append(c);
} else {
// We have a character in the original that can't be
// part of this identifier we're building.
// If it's just not allowed to be the first char, add a leading '_'.
// If we have a reasonable translation (e.g., '-' -> '_'), do that.
// Otherwise, drop it.
if (first && Character.isJavaIdentifierPart(c)
&& !Character.isJavaIdentifierStart(c)) {
sb.append("_");
sb.append(c);
first = false;
} else {
// Try to map this to a different character or string.
// If we can't just give up.
String translated = getIdentifierStrForChar(c);
if (null != translated) {
sb.append(translated);
first = false;
}
}
}
}
String output = sb.toString();
if (isReservedWord(output)) {
// e.g., 'class' -> '_class';
return "_" + output;
}
return output;
}
/**
* @param javaType
@ -593,8 +718,21 @@ public void generate() throws IOException {
colNames = connManager.getColumnNames(tableName);
}
// Translate all the column names into names that are safe to
// use as identifiers.
String [] cleanedColNames = new String[colNames.length];
for (int i = 0; i < colNames.length; i++) {
String col = colNames[i];
String identifier = toIdentifier(col);
cleanedColNames[i] = identifier;
// make sure the col->type mapping holds for the
// new identifier name, too.
columnTypes.put(identifier, columnTypes.get(col));
}
// Generate the Java code
StringBuilder sb = generateClassForColumns(columnTypes, colNames);
StringBuilder sb = generateClassForColumns(columnTypes, cleanedColNames);
// Write this out to a file.
String codeOutDir = options.getCodeOutputDir();
@ -605,16 +743,18 @@ public void generate() throws IOException {
String sourceFilename = className.replace('.', File.separatorChar) + ".java";
String filename = codeOutDir + sourceFilename;
LOG.debug("Writing source file: " + filename);
LOG.debug("Table name: " + tableName);
StringBuilder sbColTypes = new StringBuilder();
for (String col : colNames) {
Integer colType = columnTypes.get(col);
sbColTypes.append(col + ":" + colType + ", ");
if (LOG.isDebugEnabled()) {
LOG.debug("Writing source file: " + filename);
LOG.debug("Table name: " + tableName);
StringBuilder sbColTypes = new StringBuilder();
for (String col : colNames) {
Integer colType = columnTypes.get(col);
sbColTypes.append(col + ":" + colType + ", ");
}
String colTypeStr = sbColTypes.toString();
LOG.debug("Columns: " + colTypeStr);
LOG.debug("sourceFilename is " + sourceFilename);
}
String colTypeStr = sbColTypes.toString();
LOG.debug("Columns: " + colTypeStr);
LOG.debug("sourceFilename is " + sourceFilename);
compileManager.addSourceFile(sourceFilename);

View File

@ -90,7 +90,8 @@ public String getClassForTable(String tableName) {
}
// no specific class; no specific package.
return tableName;
// Just make sure it's a legal identifier.
return ClassWriter.toIdentifier(tableName);
}
/**

View File

@ -21,6 +21,8 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.sql.Connection;
import java.sql.Statement;
import java.sql.SQLException;
import java.util.Enumeration;
import java.util.jar.JarEntry;
@ -267,5 +269,50 @@ public void testSetPackageName() {
runGenerationTest(argv, OVERRIDE_PACKAGE_NAME + "." + HsqldbTestServer.getTableName());
}
// Test the SQL identifier -> Java identifier conversion.
@Test
public void testIdentifierConversion() {
assertNull(ClassWriter.getIdentifierStrForChar(' '));
assertNull(ClassWriter.getIdentifierStrForChar('\t'));
assertNull(ClassWriter.getIdentifierStrForChar('\r'));
assertNull(ClassWriter.getIdentifierStrForChar('\n'));
assertEquals("x", ClassWriter.getIdentifierStrForChar('x'));
assertEquals("_", ClassWriter.getIdentifierStrForChar('-'));
assertEquals("_", ClassWriter.getIdentifierStrForChar('_'));
assertEquals("foo", ClassWriter.toIdentifier("foo"));
assertEquals("_class", ClassWriter.toIdentifier("class"));
assertEquals("_class", ClassWriter.toIdentifier("cla ss"));
assertEquals("_int", ClassWriter.toIdentifier("int"));
assertEquals("thisismanywords", ClassWriter.toIdentifier("this is many words"));
assertEquals("_9isLegalInSql", ClassWriter.toIdentifier("9isLegalInSql"));
assertEquals("___", ClassWriter.toIdentifier("___"));
}
@Test
public void testWeirdColumnNames() throws SQLException {
// Recreate the table with column names that aren't legal Java identifiers.
String tableName = HsqldbTestServer.getTableName();
Connection connection = testServer.getConnection();
Statement st = connection.createStatement();
st.executeUpdate("DROP TABLE " + tableName + " IF EXISTS");
st.executeUpdate("CREATE TABLE " + tableName + " (class INT, \"9field\" INT)");
st.executeUpdate("INSERT INTO " + tableName + " VALUES(42, 41)");
connection.commit();
connection.close();
String [] argv = {
"--bindir",
JAR_GEN_DIR,
"--outdir",
CODE_GEN_DIR,
"--package-name",
OVERRIDE_PACKAGE_NAME
};
runGenerationTest(argv, OVERRIDE_PACKAGE_NAME + "." + HsqldbTestServer.getTableName());
}
}