diff --git a/src/java/com/cloudera/sqoop/io/CodecMap.java b/src/java/com/cloudera/sqoop/io/CodecMap.java
index d94f541a..85641644 100644
--- a/src/java/com/cloudera/sqoop/io/CodecMap.java
+++ b/src/java/com/cloudera/sqoop/io/CodecMap.java
@@ -18,21 +18,25 @@
package com.cloudera.sqoop.io;
+import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.ReflectionUtils;
/**
* Provides a mapping from codec names to concrete implementation class names.
- * This is used by LobFile.
*/
public final class CodecMap {
// Supported codec map values
+ // Note: do not add more values here, since codecs are discovered using the
+ // standard Hadoop mechanism (io.compression.codecs). See
+ // CompressionCodecFactory.
public static final String NONE = "none";
public static final String DEFLATE = "deflate";
public static final String LZO = "lzo";
@@ -45,6 +49,18 @@ public final class CodecMap {
codecNames.put(NONE, null);
codecNames.put(DEFLATE, "org.apache.hadoop.io.compress.DefaultCodec");
codecNames.put(LZO, "com.hadoop.compression.lzo.LzoCodec");
+
+ // add more from Hadoop CompressionCodecFactory
+ for (Class extends CompressionCodec> cls
+ : CompressionCodecFactory.getCodecClasses(new Configuration())) {
+ String simpleName = cls.getSimpleName();
+ String codecName = simpleName;
+ if (simpleName.endsWith("Codec")) {
+ codecName = simpleName.substring(0, simpleName.length()
+ - "Codec".length());
+ }
+ codecNames.put(codecName.toLowerCase(), cls.getCanonicalName());
+ }
}
private CodecMap() {
@@ -73,6 +89,12 @@ public static String getCodecClassName(String codecName)
*/
public static CompressionCodec getCodec(String codecName,
Configuration conf) throws UnsupportedCodecException {
+ // Try standard Hadoop mechanism first
+ CompressionCodec codec = getCodecByName(codecName, conf);
+ if (codec != null) {
+ return codec;
+ }
+ // Fall back to Sqoop mechanism
String codecClassName = null;
try {
codecClassName = getCodecClassName(codecName);
@@ -90,6 +112,52 @@ public static CompressionCodec getCodec(String codecName,
}
}
+ /**
+ * Find the relevant compression codec for the codec's canonical class name
+ * or by codec alias.
+ *
+ * Codec aliases are case insensitive.
+ *
+ * The code alias is the short class name (without the package name).
+ * If the short class name ends with 'Codec', then there are two aliases for
+ * the codec, the complete short class name and the short class name without
+ * the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ * alias are 'gzip' and 'gzipcodec'.
+ *
+ * Note: When HADOOP-7323 is available this method can be replaced with a call
+ * to CompressionCodecFactory.
+ * @param classname the canonical class name of the codec or the codec alias
+ * @return the codec object or null if none matching the name were found
+ */
+ private static CompressionCodec getCodecByName(String codecName,
+ Configuration conf) {
+ List> codecs =
+ CompressionCodecFactory.getCodecClasses(conf);
+ for (Class extends CompressionCodec> cls : codecs) {
+ if (codecMatches(cls, codecName)) {
+ return ReflectionUtils.newInstance(cls, conf);
+ }
+ }
+ return null;
+ }
+
+ private static boolean codecMatches(Class extends CompressionCodec> cls,
+ String codecName) {
+ String simpleName = cls.getSimpleName();
+ if (cls.getName().equals(codecName)
+ || simpleName.equalsIgnoreCase(codecName)) {
+ return true;
+ }
+ if (simpleName.endsWith("Codec")) {
+ String prefix = simpleName.substring(0, simpleName.length()
+ - "Codec".length());
+ if (prefix.equalsIgnoreCase(codecName)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
/**
* Return the set of available codec names.
*/
diff --git a/src/java/com/cloudera/sqoop/mapreduce/ImportJobBase.java b/src/java/com/cloudera/sqoop/mapreduce/ImportJobBase.java
index 2df6ad39..799c388f 100644
--- a/src/java/com/cloudera/sqoop/mapreduce/ImportJobBase.java
+++ b/src/java/com/cloudera/sqoop/mapreduce/ImportJobBase.java
@@ -41,6 +41,7 @@
import com.cloudera.sqoop.util.ImportException;
import com.cloudera.sqoop.util.PerfCounters;
import com.cloudera.sqoop.config.ConfigurationHelper;
+import com.cloudera.sqoop.io.CodecMap;
import com.cloudera.sqoop.manager.ImportJobContext;
/**
@@ -93,10 +94,7 @@ protected void configureOutputFormat(Job job, String tableName,
codecClass = GzipCodec.class;
} else {
Configuration conf = job.getConfiguration();
- @SuppressWarnings("unchecked")
- Class extends CompressionCodec> c =
- (Class extends CompressionCodec>) conf.getClassByName(codecName);
- codecClass = c;
+ codecClass = CodecMap.getCodec(codecName, conf).getClass();
}
FileOutputFormat.setOutputCompressorClass(job, codecClass);
diff --git a/src/java/com/cloudera/sqoop/util/DirectImportUtils.java b/src/java/com/cloudera/sqoop/util/DirectImportUtils.java
index 307376e9..ebb5d572 100644
--- a/src/java/com/cloudera/sqoop/util/DirectImportUtils.java
+++ b/src/java/com/cloudera/sqoop/util/DirectImportUtils.java
@@ -32,10 +32,10 @@
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.conf.Configuration;
import com.cloudera.sqoop.SqoopOptions;
+import com.cloudera.sqoop.io.CodecMap;
import com.cloudera.sqoop.io.SplittingOutputStream;
import com.cloudera.sqoop.io.SplittableBufferedWriter;
-import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Shell;
import com.cloudera.sqoop.manager.ImportJobContext;
@@ -96,15 +96,7 @@ private static CompressionCodec getCodec(Configuration conf,
if (options.getCompressionCodec() == null) {
return new GzipCodec();
} else {
- try {
- @SuppressWarnings("unchecked")
- Class extends CompressionCodec> c =
- (Class extends CompressionCodec>)
- conf.getClassByName(options.getCompressionCodec());
- return ReflectionUtils.newInstance(c, conf);
- } catch (ClassNotFoundException e) {
- throw new IOException(e);
- }
+ return CodecMap.getCodec(options.getCompressionCodec(), conf);
}
}
return null;
diff --git a/src/test/com/cloudera/sqoop/io/TestCodecMap.java b/src/test/com/cloudera/sqoop/io/TestCodecMap.java
new file mode 100644
index 00000000..83569ea7
--- /dev/null
+++ b/src/test/com/cloudera/sqoop/io/TestCodecMap.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to Cloudera, Inc. under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Cloudera, Inc. licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.cloudera.sqoop.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.GzipCodec;
+
+import junit.framework.TestCase;
+
+/**
+ * Test looking up codecs by name.
+ */
+public class TestCodecMap extends TestCase {
+
+ private void verifyCodec(Class> c, String codecName)
+ throws UnsupportedCodecException {
+ CompressionCodec codec = CodecMap.getCodec(codecName, new Configuration());
+ assertEquals(codec.getClass(), c);
+ }
+
+ public void testGetCodecNames() {
+ // gzip is picked up from Hadoop defaults
+ assertTrue(CodecMap.getCodecNames().contains("gzip"));
+ }
+
+ public void testGetCodec() throws IOException {
+ verifyCodec(GzipCodec.class, "gzip");
+ verifyCodec(GzipCodec.class, "Gzip");
+ verifyCodec(GzipCodec.class, "GZIP");
+ verifyCodec(GzipCodec.class, "gzipcodec");
+ verifyCodec(GzipCodec.class, "GzipCodec");
+ verifyCodec(GzipCodec.class, "GZIPCODEC");
+ verifyCodec(GzipCodec.class, "org.apache.hadoop.io.compress.GzipCodec");
+ }
+
+ public void testUnrecognizedCodec() {
+ try {
+ CodecMap.getCodec("bogus", new Configuration());
+ fail("'bogus' codec should throw exception");
+ } catch (UnsupportedCodecException e) {
+ // expected
+ }
+ }
+}