diff --git a/src/java/com/cloudera/sqoop/io/CodecMap.java b/src/java/com/cloudera/sqoop/io/CodecMap.java index d94f541a..85641644 100644 --- a/src/java/com/cloudera/sqoop/io/CodecMap.java +++ b/src/java/com/cloudera/sqoop/io/CodecMap.java @@ -18,21 +18,25 @@ package com.cloudera.sqoop.io; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.util.ReflectionUtils; /** * Provides a mapping from codec names to concrete implementation class names. - * This is used by LobFile. */ public final class CodecMap { // Supported codec map values + // Note: do not add more values here, since codecs are discovered using the + // standard Hadoop mechanism (io.compression.codecs). See + // CompressionCodecFactory. public static final String NONE = "none"; public static final String DEFLATE = "deflate"; public static final String LZO = "lzo"; @@ -45,6 +49,18 @@ public final class CodecMap { codecNames.put(NONE, null); codecNames.put(DEFLATE, "org.apache.hadoop.io.compress.DefaultCodec"); codecNames.put(LZO, "com.hadoop.compression.lzo.LzoCodec"); + + // add more from Hadoop CompressionCodecFactory + for (Class cls + : CompressionCodecFactory.getCodecClasses(new Configuration())) { + String simpleName = cls.getSimpleName(); + String codecName = simpleName; + if (simpleName.endsWith("Codec")) { + codecName = simpleName.substring(0, simpleName.length() + - "Codec".length()); + } + codecNames.put(codecName.toLowerCase(), cls.getCanonicalName()); + } } private CodecMap() { @@ -73,6 +89,12 @@ public static String getCodecClassName(String codecName) */ public static CompressionCodec getCodec(String codecName, Configuration conf) throws UnsupportedCodecException { + // Try standard Hadoop mechanism first + CompressionCodec codec = getCodecByName(codecName, conf); + if (codec != null) { + return codec; + } + // Fall back to Sqoop mechanism String codecClassName = null; try { codecClassName = getCodecClassName(codecName); @@ -90,6 +112,52 @@ public static CompressionCodec getCodec(String codecName, } } + /** + * Find the relevant compression codec for the codec's canonical class name + * or by codec alias. + *

+ * Codec aliases are case insensitive. + *

+ * The code alias is the short class name (without the package name). + * If the short class name ends with 'Codec', then there are two aliases for + * the codec, the complete short class name and the short class name without + * the 'Codec' ending. For example for the 'GzipCodec' codec class name the + * alias are 'gzip' and 'gzipcodec'. + *

+ * Note: When HADOOP-7323 is available this method can be replaced with a call + * to CompressionCodecFactory. + * @param classname the canonical class name of the codec or the codec alias + * @return the codec object or null if none matching the name were found + */ + private static CompressionCodec getCodecByName(String codecName, + Configuration conf) { + List> codecs = + CompressionCodecFactory.getCodecClasses(conf); + for (Class cls : codecs) { + if (codecMatches(cls, codecName)) { + return ReflectionUtils.newInstance(cls, conf); + } + } + return null; + } + + private static boolean codecMatches(Class cls, + String codecName) { + String simpleName = cls.getSimpleName(); + if (cls.getName().equals(codecName) + || simpleName.equalsIgnoreCase(codecName)) { + return true; + } + if (simpleName.endsWith("Codec")) { + String prefix = simpleName.substring(0, simpleName.length() + - "Codec".length()); + if (prefix.equalsIgnoreCase(codecName)) { + return true; + } + } + return false; + } + /** * Return the set of available codec names. */ diff --git a/src/java/com/cloudera/sqoop/mapreduce/ImportJobBase.java b/src/java/com/cloudera/sqoop/mapreduce/ImportJobBase.java index 2df6ad39..799c388f 100644 --- a/src/java/com/cloudera/sqoop/mapreduce/ImportJobBase.java +++ b/src/java/com/cloudera/sqoop/mapreduce/ImportJobBase.java @@ -41,6 +41,7 @@ import com.cloudera.sqoop.util.ImportException; import com.cloudera.sqoop.util.PerfCounters; import com.cloudera.sqoop.config.ConfigurationHelper; +import com.cloudera.sqoop.io.CodecMap; import com.cloudera.sqoop.manager.ImportJobContext; /** @@ -93,10 +94,7 @@ protected void configureOutputFormat(Job job, String tableName, codecClass = GzipCodec.class; } else { Configuration conf = job.getConfiguration(); - @SuppressWarnings("unchecked") - Class c = - (Class) conf.getClassByName(codecName); - codecClass = c; + codecClass = CodecMap.getCodec(codecName, conf).getClass(); } FileOutputFormat.setOutputCompressorClass(job, codecClass); diff --git a/src/java/com/cloudera/sqoop/util/DirectImportUtils.java b/src/java/com/cloudera/sqoop/util/DirectImportUtils.java index 307376e9..ebb5d572 100644 --- a/src/java/com/cloudera/sqoop/util/DirectImportUtils.java +++ b/src/java/com/cloudera/sqoop/util/DirectImportUtils.java @@ -32,10 +32,10 @@ import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.conf.Configuration; import com.cloudera.sqoop.SqoopOptions; +import com.cloudera.sqoop.io.CodecMap; import com.cloudera.sqoop.io.SplittingOutputStream; import com.cloudera.sqoop.io.SplittableBufferedWriter; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Shell; import com.cloudera.sqoop.manager.ImportJobContext; @@ -96,15 +96,7 @@ private static CompressionCodec getCodec(Configuration conf, if (options.getCompressionCodec() == null) { return new GzipCodec(); } else { - try { - @SuppressWarnings("unchecked") - Class c = - (Class) - conf.getClassByName(options.getCompressionCodec()); - return ReflectionUtils.newInstance(c, conf); - } catch (ClassNotFoundException e) { - throw new IOException(e); - } + return CodecMap.getCodec(options.getCompressionCodec(), conf); } } return null; diff --git a/src/test/com/cloudera/sqoop/io/TestCodecMap.java b/src/test/com/cloudera/sqoop/io/TestCodecMap.java new file mode 100644 index 00000000..83569ea7 --- /dev/null +++ b/src/test/com/cloudera/sqoop/io/TestCodecMap.java @@ -0,0 +1,63 @@ +/** + * Licensed to Cloudera, Inc. under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Cloudera, Inc. licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.cloudera.sqoop.io; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.GzipCodec; + +import junit.framework.TestCase; + +/** + * Test looking up codecs by name. + */ +public class TestCodecMap extends TestCase { + + private void verifyCodec(Class c, String codecName) + throws UnsupportedCodecException { + CompressionCodec codec = CodecMap.getCodec(codecName, new Configuration()); + assertEquals(codec.getClass(), c); + } + + public void testGetCodecNames() { + // gzip is picked up from Hadoop defaults + assertTrue(CodecMap.getCodecNames().contains("gzip")); + } + + public void testGetCodec() throws IOException { + verifyCodec(GzipCodec.class, "gzip"); + verifyCodec(GzipCodec.class, "Gzip"); + verifyCodec(GzipCodec.class, "GZIP"); + verifyCodec(GzipCodec.class, "gzipcodec"); + verifyCodec(GzipCodec.class, "GzipCodec"); + verifyCodec(GzipCodec.class, "GZIPCODEC"); + verifyCodec(GzipCodec.class, "org.apache.hadoop.io.compress.GzipCodec"); + } + + public void testUnrecognizedCodec() { + try { + CodecMap.getCodec("bogus", new Configuration()); + fail("'bogus' codec should throw exception"); + } catch (UnsupportedCodecException e) { + // expected + } + } +}