diff --git a/src/java/org/apache/sqoop/avro/AvroUtil.java b/src/java/org/apache/sqoop/avro/AvroUtil.java index 8d901302..ee29f140 100644 --- a/src/java/org/apache/sqoop/avro/AvroUtil.java +++ b/src/java/org/apache/sqoop/avro/AvroUtil.java @@ -28,7 +28,6 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.avro.mapred.FsInput; -import org.apache.commons.lang.StringEscapeUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -107,10 +106,7 @@ public static Object toAvro(Object o, Schema.Field field, boolean bigDecimalForm * Convert Column name into Avro column name. */ public static String toAvroColumn(String column) { - // We're unescaping identifiers to get the real Unicode characters - // back, and not the escaped versions. - String candidate = StringEscapeUtils.unescapeJava( - ClassWriter.toJavaIdentifier(column)); + String candidate = ClassWriter.toJavaIdentifier(column); return toAvroIdentifier(candidate); } diff --git a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java index 5b1c7457..3c31c43a 100644 --- a/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java +++ b/src/java/org/apache/sqoop/orm/AvroSchemaGenerator.java @@ -29,7 +29,6 @@ import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; -import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -89,9 +88,7 @@ public Schema generate(String schemaNameOverride) throws IOException { List fields = new ArrayList(); for (String columnName : columnNames) { - // We're unescaping identifiers to get the real Unicode characters - // back, and not the escaped versions. - String cleanedCol = AvroUtil.toAvroIdentifier(StringEscapeUtils.unescapeJava(ClassWriter.toJavaIdentifier(columnName))); + String cleanedCol = AvroUtil.toAvroIdentifier(ClassWriter.toJavaIdentifier(columnName)); List columnInfoList = columnInfo.get(columnName); int sqlType = columnInfoList.get(0); Integer precision = columnInfoList.get(1); diff --git a/src/java/org/apache/sqoop/orm/ClassWriter.java b/src/java/org/apache/sqoop/orm/ClassWriter.java index 0c8d86d0..c18a36f3 100644 --- a/src/java/org/apache/sqoop/orm/ClassWriter.java +++ b/src/java/org/apache/sqoop/orm/ClassWriter.java @@ -24,6 +24,7 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.Date; import java.util.HashSet; import java.util.List; @@ -284,16 +285,7 @@ public static String toJavaIdentifier(String candidate) { return "_" + output; } - // Calling StringEscapeUtils#escapeJava is required because we'd like to - // support Unicode characters in identifiers even if the locale of the host - // system is not supporting UTF-8, or by any reason the locale is different - // from that. Good example: if a column name would contain a \uC3A1 char - // in it's name, though the locale would not support Unicode characters - // then the generated java file would contain unrecognizable characters - // for the compiler, and javac would fail with a compile error. If the name - // of the column would be Alm\uC3A1a then it would be Alm\uC3A1a after the - // escaping, and this every places where it's used/ - return StringEscapeUtils.escapeJava(output); + return output; } private String toJavaType(String columnName, int sqlType) { @@ -1796,7 +1788,7 @@ public void generate() throws IOException { Writer writer = null; try { ostream = new FileOutputStream(filename); - writer = new OutputStreamWriter(ostream); + writer = new OutputStreamWriter(ostream, StandardCharsets.UTF_8); writer.append(sb.toString()); } finally { if (null != writer) { diff --git a/src/java/org/apache/sqoop/orm/CompilationManager.java b/src/java/org/apache/sqoop/orm/CompilationManager.java index 0a2a87f2..c1a656b7 100644 --- a/src/java/org/apache/sqoop/orm/CompilationManager.java +++ b/src/java/org/apache/sqoop/orm/CompilationManager.java @@ -23,6 +23,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -170,6 +171,9 @@ public void compile() throws IOException { String curClasspath = System.getProperty("java.class.path"); LOG.debug("Current sqoop classpath = " + curClasspath); + args.add("-encoding"); + args.add(StandardCharsets.UTF_8.toString()); + args.add("-sourcepath"); args.add(jarOutDir);