mirror of
https://github.com/apache/sqoop.git
synced 2025-05-17 01:11:07 +08:00
Revert "SQOOP-2920: sqoop performance deteriorates significantly on wide datasets; sqoop 100% on cpu"
I've mistakenly committed SQOOP-2920 and SQOOP-2906 inside this commit, so I'll revert it and commit them separately.
This commit is contained in:
parent
83f0c1a40b
commit
ac217a032c
@ -114,25 +114,11 @@ public static String toAvroColumn(String column) {
|
|||||||
* Format candidate to avro specifics
|
* Format candidate to avro specifics
|
||||||
*/
|
*/
|
||||||
public static String toAvroIdentifier(String candidate) {
|
public static String toAvroIdentifier(String candidate) {
|
||||||
char[] data = candidate.toCharArray();
|
String formattedCandidate = candidate.replaceAll("\\W+", "_");
|
||||||
boolean skip = false;
|
if (formattedCandidate.substring(0,1).matches("[a-zA-Z_]")) {
|
||||||
int stringIndex = 0;
|
return formattedCandidate;
|
||||||
|
|
||||||
for (char c:data) {
|
|
||||||
if (Character.isLetterOrDigit(c) || c == '_') {
|
|
||||||
data[stringIndex++] = c;
|
|
||||||
skip = false;
|
|
||||||
} else if(!skip) {
|
|
||||||
data[stringIndex++] = '_';
|
|
||||||
skip = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
char initial = data[0];
|
|
||||||
if (Character.isLetter(initial) || initial == '_') {
|
|
||||||
return new String(data, 0, stringIndex);
|
|
||||||
} else {
|
} else {
|
||||||
return "AVRO_".concat(new String(data, 0, stringIndex));
|
return "AVRO_" + formattedCandidate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1064,49 +1064,57 @@ private void myGenerateCloneMethod(Map<String, Integer> columnTypes,
|
|||||||
* @param colNames - ordered list of column names for table.
|
* @param colNames - ordered list of column names for table.
|
||||||
* @param sb - StringBuilder to append code to
|
* @param sb - StringBuilder to append code to
|
||||||
*/
|
*/
|
||||||
private void generateSetField(Map<String, Integer> columnTypes, String[] colNames, String[] rawColNames,
|
private void generateSetField(Map<String, Integer> columnTypes,
|
||||||
StringBuilder sb) {
|
String [] colNames, String [] rawColNames, StringBuilder sb) {
|
||||||
String sep = System.getProperty("line.separator");
|
|
||||||
sb.append(" public void setField(String __fieldName, Object __fieldVal) " + "{" + sep);
|
|
||||||
sb.append(" if (!setters.containsKey(__fieldName)) {" + sep);
|
|
||||||
sb.append(" throw new RuntimeException(\"No such field:\"+__fieldName);" + sep);
|
|
||||||
sb.append(" }" + sep);
|
|
||||||
sb.append(" setters.get(__fieldName).setField(__fieldVal);" + sep);
|
|
||||||
sb.append(" }\n" + sep);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void generateConstructorAndInitMethods(Map<String, Integer> colTypes, String[] colNames, String[] rawColNames,
|
int numberOfMethods =
|
||||||
String typeName, StringBuilder sb) {
|
this.getNumberOfMethods(colNames, maxColumnsPerMethod);
|
||||||
String sep = System.getProperty("line.separator");
|
|
||||||
int numberOfMethods = getNumberOfMethods(colNames, maxColumnsPerMethod);
|
sb.append(" public void setField(String __fieldName, Object __fieldVal) "
|
||||||
for (int methodNumber = 0; methodNumber < numberOfMethods; ++methodNumber) {
|
+ "{\n");
|
||||||
sb.append(" private void init" + methodNumber + "() {" + sep);
|
if (numberOfMethods > 1) {
|
||||||
for (int i = methodNumber * maxColumnsPerMethod; i < topBoundary(colNames, methodNumber,
|
boolean first = true;
|
||||||
maxColumnsPerMethod); ++i) {
|
for (int i = 0; i < numberOfMethods; ++i) {
|
||||||
String colName = colNames[i];
|
if (!first) {
|
||||||
String rawColName = rawColNames[i];
|
sb.append(" else");
|
||||||
int sqlType = colTypes.get(colName);
|
}
|
||||||
String javaType = toJavaType(colName, sqlType);
|
sb.append(" if (this.setField" + i
|
||||||
|
+ "(__fieldName, __fieldVal)) {\n");
|
||||||
|
sb.append(" return;\n");
|
||||||
|
sb.append(" }\n");
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
boolean first = true;
|
||||||
|
for (int i = 0; i < colNames.length; i++) {
|
||||||
|
int sqlType = columnTypes.get(colNames[i]);
|
||||||
|
String javaType = toJavaType(colNames[i], sqlType);
|
||||||
if (null == javaType) {
|
if (null == javaType) {
|
||||||
LOG.error("Cannot resolve SQL type " + sqlType);
|
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
sb.append(" setters.put(\"" + serializeRawColName(rawColName) + "\", new FieldSetterCommand() {" + sep);
|
if (!first) {
|
||||||
sb.append(" @Override" + sep);
|
sb.append(" else");
|
||||||
sb.append(" public void setField(Object value) {" + sep);
|
}
|
||||||
sb.append(" " + colName + " = (" + javaType + ")value;" + sep);
|
|
||||||
sb.append(" }" + sep);
|
sb.append(" if (\"" + serializeRawColName(rawColNames[i]) + "\".equals(__fieldName)) {\n");
|
||||||
sb.append(" });" + sep);
|
sb.append(" this." + colNames[i] + " = (" + javaType
|
||||||
|
+ ") __fieldVal;\n");
|
||||||
|
sb.append(" }\n");
|
||||||
|
first = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sb.append(" }" + sep);
|
|
||||||
}
|
}
|
||||||
sb.append(" public " + typeName + "() {" + sep);
|
sb.append(" else {\n");
|
||||||
|
sb.append(" throw new RuntimeException(");
|
||||||
|
sb.append("\"No such field: \" + __fieldName);\n");
|
||||||
|
sb.append(" }\n");
|
||||||
|
sb.append(" }\n");
|
||||||
|
|
||||||
for (int i = 0; i < numberOfMethods; ++i) {
|
for (int i = 0; i < numberOfMethods; ++i) {
|
||||||
sb.append(" init" + i + "();" + sep);
|
myGenerateSetField(columnTypes, colNames, rawColNames, sb, i, maxColumnsPerMethod);
|
||||||
}
|
}
|
||||||
sb.append(" }" + sep);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Raw column name is a column name as it was created on database and we need to serialize it between
|
* Raw column name is a column name as it was created on database and we need to serialize it between
|
||||||
* double quotes into java class that will be further complied with javac. Various databases supports
|
* double quotes into java class that will be further complied with javac. Various databases supports
|
||||||
@ -1176,7 +1184,7 @@ private void generateGetFieldMap(Map<String, Integer> columnTypes,
|
|||||||
|
|
||||||
sb.append(" public Map<String, Object> getFieldMap() {\n");
|
sb.append(" public Map<String, Object> getFieldMap() {\n");
|
||||||
sb.append(" Map<String, Object> __sqoop$field_map = "
|
sb.append(" Map<String, Object> __sqoop$field_map = "
|
||||||
+ "new HashMap<String, Object>();\n");
|
+ "new TreeMap<String, Object>();\n");
|
||||||
if (numberOfMethods > 1) {
|
if (numberOfMethods > 1) {
|
||||||
for (int i = 0; i < numberOfMethods; ++i) {
|
for (int i = 0; i < numberOfMethods; ++i) {
|
||||||
sb.append(" this.getFieldMap" + i + "(__sqoop$field_map);\n");
|
sb.append(" this.getFieldMap" + i + "(__sqoop$field_map);\n");
|
||||||
@ -1926,7 +1934,7 @@ private StringBuilder generateClassForColumns(
|
|||||||
sb.append("import java.util.Iterator;\n");
|
sb.append("import java.util.Iterator;\n");
|
||||||
sb.append("import java.util.List;\n");
|
sb.append("import java.util.List;\n");
|
||||||
sb.append("import java.util.Map;\n");
|
sb.append("import java.util.Map;\n");
|
||||||
sb.append("import java.util.HashMap;\n");
|
sb.append("import java.util.TreeMap;\n");
|
||||||
sb.append("\n");
|
sb.append("\n");
|
||||||
|
|
||||||
String className = tableNameInfo.getShortClassForTable(tableName);
|
String className = tableNameInfo.getShortClassForTable(tableName);
|
||||||
@ -1936,12 +1944,7 @@ private StringBuilder generateClassForColumns(
|
|||||||
+ CLASS_WRITER_VERSION + ";\n");
|
+ CLASS_WRITER_VERSION + ";\n");
|
||||||
sb.append(
|
sb.append(
|
||||||
" public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n");
|
" public int getClassFormatVersion() { return PROTOCOL_VERSION; }\n");
|
||||||
sb.append(" public static interface FieldSetterCommand {");
|
|
||||||
sb.append(" void setField(Object value);");
|
|
||||||
sb.append(" }");
|
|
||||||
sb.append(" protected ResultSet __cur_result_set;\n");
|
sb.append(" protected ResultSet __cur_result_set;\n");
|
||||||
sb.append(" private Map<String, FieldSetterCommand> setters = new HashMap<String, FieldSetterCommand>();\n");
|
|
||||||
generateConstructorAndInitMethods(columnTypes, colNames, rawColNames, className, sb);
|
|
||||||
generateFields(columnTypes, colNames, className, sb);
|
generateFields(columnTypes, colNames, className, sb);
|
||||||
generateEquals(columnTypes, colNames, className, sb);
|
generateEquals(columnTypes, colNames, className, sb);
|
||||||
generateDbRead(columnTypes, colNames, sb);
|
generateDbRead(columnTypes, colNames, sb);
|
||||||
|
@ -296,15 +296,11 @@ private void addClassFilesFromDir(File dir, JarOutputStream jstream)
|
|||||||
// we only record the subdir parts in the zip entry.
|
// we only record the subdir parts in the zip entry.
|
||||||
String fullPath = entry.getAbsolutePath();
|
String fullPath = entry.getAbsolutePath();
|
||||||
String chompedPath = fullPath.substring(baseDirName.length());
|
String chompedPath = fullPath.substring(baseDirName.length());
|
||||||
int indexOfDollarSign = chompedPath.indexOf("$");
|
|
||||||
String innerTypesChompedPath = chompedPath
|
|
||||||
.substring(0, indexOfDollarSign == -1 ? chompedPath.length() : indexOfDollarSign);
|
|
||||||
|
|
||||||
boolean include = chompedPath.endsWith(".class")
|
boolean include = chompedPath.endsWith(".class")
|
||||||
&& (sources.contains(
|
&& sources.contains(
|
||||||
chompedPath.substring(0, chompedPath.length() - ".class".length())
|
chompedPath.substring(0, chompedPath.length() - ".class".length())
|
||||||
+ ".java")
|
+ ".java");
|
||||||
|| sources.contains(innerTypesChompedPath + ".java"));
|
|
||||||
|
|
||||||
if (include) {
|
if (include) {
|
||||||
// include this file.
|
// include this file.
|
||||||
|
@ -26,7 +26,6 @@
|
|||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
import java.sql.Statement;
|
import java.sql.Statement;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.Random;
|
|
||||||
import java.util.jar.JarEntry;
|
import java.util.jar.JarEntry;
|
||||||
import java.util.jar.JarInputStream;
|
import java.util.jar.JarInputStream;
|
||||||
|
|
||||||
@ -58,9 +57,6 @@ public class TestClassWriter extends TestCase {
|
|||||||
|
|
||||||
public static final Log LOG =
|
public static final Log LOG =
|
||||||
LogFactory.getLog(TestClassWriter.class.getName());
|
LogFactory.getLog(TestClassWriter.class.getName());
|
||||||
private static final String WIDE_TABLE_NAME = "WIDETABLE";
|
|
||||||
private static final int WIDE_TABLE_COLUMN_COUNT = 800;
|
|
||||||
private static final int WIDE_TABLE_ROW_COUNT = 20_000;
|
|
||||||
|
|
||||||
// instance variables populated during setUp, used during tests
|
// instance variables populated during setUp, used during tests
|
||||||
private HsqldbTestServer testServer;
|
private HsqldbTestServer testServer;
|
||||||
@ -126,16 +122,12 @@ public void tearDown() {
|
|||||||
static final String JAR_GEN_DIR = ImportJobTestCase.TEMP_BASE_DIR
|
static final String JAR_GEN_DIR = ImportJobTestCase.TEMP_BASE_DIR
|
||||||
+ "sqoop/test/jargen";
|
+ "sqoop/test/jargen";
|
||||||
|
|
||||||
private File runGenerationTest(String[] argv, String classNameToCheck) {
|
|
||||||
return runGenerationTest(argv, classNameToCheck, HsqldbTestServer.getTableName());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run a test to verify that we can generate code and it emits the output
|
* Run a test to verify that we can generate code and it emits the output
|
||||||
* files where we expect them.
|
* files where we expect them.
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private File runGenerationTest(String[] argv, String classNameToCheck, String tableName) {
|
private File runGenerationTest(String [] argv, String classNameToCheck) {
|
||||||
File codeGenDirFile = new File(CODE_GEN_DIR);
|
File codeGenDirFile = new File(CODE_GEN_DIR);
|
||||||
File classGenDirFile = new File(JAR_GEN_DIR);
|
File classGenDirFile = new File(JAR_GEN_DIR);
|
||||||
|
|
||||||
@ -148,7 +140,7 @@ private File runGenerationTest(String[] argv, String classNameToCheck, String ta
|
|||||||
|
|
||||||
CompilationManager compileMgr = new CompilationManager(options);
|
CompilationManager compileMgr = new CompilationManager(options);
|
||||||
ClassWriter writer = new ClassWriter(options, manager,
|
ClassWriter writer = new ClassWriter(options, manager,
|
||||||
tableName, compileMgr);
|
HsqldbTestServer.getTableName(), compileMgr);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
writer.generate();
|
writer.generate();
|
||||||
@ -683,55 +675,4 @@ public void testNoClassGeneration() throws Exception {
|
|||||||
};
|
};
|
||||||
runFailedGenerationTest(argv, HsqldbTestServer.getTableName());
|
runFailedGenerationTest(argv, HsqldbTestServer.getTableName());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 10000)
|
|
||||||
public void testWideTableClassGeneration() throws Exception {
|
|
||||||
createWideTable();
|
|
||||||
options = new SqoopOptions(HsqldbTestServer.getDbUrl(), WIDE_TABLE_NAME);
|
|
||||||
|
|
||||||
// Set the option strings in an "argv" to redirect our srcdir and bindir.
|
|
||||||
String [] argv = {
|
|
||||||
"--bindir",
|
|
||||||
JAR_GEN_DIR,
|
|
||||||
"--outdir",
|
|
||||||
CODE_GEN_DIR,
|
|
||||||
};
|
|
||||||
|
|
||||||
File ormJarFile = runGenerationTest(argv, WIDE_TABLE_NAME, WIDE_TABLE_NAME);
|
|
||||||
|
|
||||||
ClassLoader prevClassLoader = ClassLoaderStack.addJarFile(ormJarFile.getCanonicalPath(),
|
|
||||||
WIDE_TABLE_NAME);
|
|
||||||
Class tableClass = Class.forName(WIDE_TABLE_NAME, true,
|
|
||||||
Thread.currentThread().getContextClassLoader());
|
|
||||||
|
|
||||||
Object instance = tableClass.newInstance();
|
|
||||||
Method setterMethod = tableClass.getMethod("setField", String.class, Object.class);
|
|
||||||
Random random = new Random(0);
|
|
||||||
for (int j = 0; j < WIDE_TABLE_ROW_COUNT; ++j) {
|
|
||||||
for (int i = 0; i < WIDE_TABLE_COLUMN_COUNT; ++i) {
|
|
||||||
setterMethod.invoke(instance, "INTFIELD" + i, random.nextInt());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (null != prevClassLoader) {
|
|
||||||
ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createWideTable() throws Exception {
|
|
||||||
try (Connection conn = testServer.getConnection(); Statement stmt = conn.createStatement();) {
|
|
||||||
stmt.executeUpdate("DROP TABLE \"" + WIDE_TABLE_NAME + "\" IF EXISTS");
|
|
||||||
StringBuilder sb = new StringBuilder("CREATE TABLE \"" + WIDE_TABLE_NAME + "\" (");
|
|
||||||
for (int i = 0; i < WIDE_TABLE_COLUMN_COUNT; ++i) {
|
|
||||||
sb.append("intField" + i + " INT");
|
|
||||||
if (i < WIDE_TABLE_COLUMN_COUNT - 1) {
|
|
||||||
sb.append(",");
|
|
||||||
} else {
|
|
||||||
sb.append(")");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stmt.executeUpdate(sb.toString());
|
|
||||||
conn.commit();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user