5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 15:50:28 +08:00

SQOOP-215. Support for codec aliases.

(Tom White via Arvind Prabhakar)

From: Arvind Prabhakar <arvind@cloudera.com>

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1150039 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Bayer 2011-07-22 20:04:37 +00:00
parent fe4f54a413
commit b84aaf3ab0
4 changed files with 136 additions and 15 deletions

View File

@ -18,21 +18,25 @@
package com.cloudera.sqoop.io;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.ReflectionUtils;
/**
* Provides a mapping from codec names to concrete implementation class names.
* This is used by LobFile.
*/
public final class CodecMap {
// Supported codec map values
// Note: do not add more values here, since codecs are discovered using the
// standard Hadoop mechanism (io.compression.codecs). See
// CompressionCodecFactory.
public static final String NONE = "none";
public static final String DEFLATE = "deflate";
public static final String LZO = "lzo";
@ -45,6 +49,18 @@ public final class CodecMap {
codecNames.put(NONE, null);
codecNames.put(DEFLATE, "org.apache.hadoop.io.compress.DefaultCodec");
codecNames.put(LZO, "com.hadoop.compression.lzo.LzoCodec");
// add more from Hadoop CompressionCodecFactory
for (Class<? extends CompressionCodec> cls
: CompressionCodecFactory.getCodecClasses(new Configuration())) {
String simpleName = cls.getSimpleName();
String codecName = simpleName;
if (simpleName.endsWith("Codec")) {
codecName = simpleName.substring(0, simpleName.length()
- "Codec".length());
}
codecNames.put(codecName.toLowerCase(), cls.getCanonicalName());
}
}
private CodecMap() {
@ -73,6 +89,12 @@ public static String getCodecClassName(String codecName)
*/
public static CompressionCodec getCodec(String codecName,
Configuration conf) throws UnsupportedCodecException {
// Try standard Hadoop mechanism first
CompressionCodec codec = getCodecByName(codecName, conf);
if (codec != null) {
return codec;
}
// Fall back to Sqoop mechanism
String codecClassName = null;
try {
codecClassName = getCodecClassName(codecName);
@ -90,6 +112,52 @@ public static CompressionCodec getCodec(String codecName,
}
}
/**
* Find the relevant compression codec for the codec's canonical class name
* or by codec alias.
* <p>
* Codec aliases are case insensitive.
* <p>
* The code alias is the short class name (without the package name).
* If the short class name ends with 'Codec', then there are two aliases for
* the codec, the complete short class name and the short class name without
* the 'Codec' ending. For example for the 'GzipCodec' codec class name the
* alias are 'gzip' and 'gzipcodec'.
* <p>
* Note: When HADOOP-7323 is available this method can be replaced with a call
* to CompressionCodecFactory.
* @param classname the canonical class name of the codec or the codec alias
* @return the codec object or null if none matching the name were found
*/
private static CompressionCodec getCodecByName(String codecName,
Configuration conf) {
List<Class<? extends CompressionCodec>> codecs =
CompressionCodecFactory.getCodecClasses(conf);
for (Class<? extends CompressionCodec> cls : codecs) {
if (codecMatches(cls, codecName)) {
return ReflectionUtils.newInstance(cls, conf);
}
}
return null;
}
private static boolean codecMatches(Class<? extends CompressionCodec> cls,
String codecName) {
String simpleName = cls.getSimpleName();
if (cls.getName().equals(codecName)
|| simpleName.equalsIgnoreCase(codecName)) {
return true;
}
if (simpleName.endsWith("Codec")) {
String prefix = simpleName.substring(0, simpleName.length()
- "Codec".length());
if (prefix.equalsIgnoreCase(codecName)) {
return true;
}
}
return false;
}
/**
* Return the set of available codec names.
*/

View File

@ -41,6 +41,7 @@
import com.cloudera.sqoop.util.ImportException;
import com.cloudera.sqoop.util.PerfCounters;
import com.cloudera.sqoop.config.ConfigurationHelper;
import com.cloudera.sqoop.io.CodecMap;
import com.cloudera.sqoop.manager.ImportJobContext;
/**
@ -93,10 +94,7 @@ protected void configureOutputFormat(Job job, String tableName,
codecClass = GzipCodec.class;
} else {
Configuration conf = job.getConfiguration();
@SuppressWarnings("unchecked")
Class<? extends CompressionCodec> c =
(Class<? extends CompressionCodec>) conf.getClassByName(codecName);
codecClass = c;
codecClass = CodecMap.getCodec(codecName, conf).getClass();
}
FileOutputFormat.setOutputCompressorClass(job, codecClass);

View File

@ -32,10 +32,10 @@
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.conf.Configuration;
import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.io.CodecMap;
import com.cloudera.sqoop.io.SplittingOutputStream;
import com.cloudera.sqoop.io.SplittableBufferedWriter;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Shell;
import com.cloudera.sqoop.manager.ImportJobContext;
@ -96,15 +96,7 @@ private static CompressionCodec getCodec(Configuration conf,
if (options.getCompressionCodec() == null) {
return new GzipCodec();
} else {
try {
@SuppressWarnings("unchecked")
Class<? extends CompressionCodec> c =
(Class<? extends CompressionCodec>)
conf.getClassByName(options.getCompressionCodec());
return ReflectionUtils.newInstance(c, conf);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
return CodecMap.getCodec(options.getCompressionCodec(), conf);
}
}
return null;

View File

@ -0,0 +1,63 @@
/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.io;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import junit.framework.TestCase;
/**
* Test looking up codecs by name.
*/
public class TestCodecMap extends TestCase {
private void verifyCodec(Class<?> c, String codecName)
throws UnsupportedCodecException {
CompressionCodec codec = CodecMap.getCodec(codecName, new Configuration());
assertEquals(codec.getClass(), c);
}
public void testGetCodecNames() {
// gzip is picked up from Hadoop defaults
assertTrue(CodecMap.getCodecNames().contains("gzip"));
}
public void testGetCodec() throws IOException {
verifyCodec(GzipCodec.class, "gzip");
verifyCodec(GzipCodec.class, "Gzip");
verifyCodec(GzipCodec.class, "GZIP");
verifyCodec(GzipCodec.class, "gzipcodec");
verifyCodec(GzipCodec.class, "GzipCodec");
verifyCodec(GzipCodec.class, "GZIPCODEC");
verifyCodec(GzipCodec.class, "org.apache.hadoop.io.compress.GzipCodec");
}
public void testUnrecognizedCodec() {
try {
CodecMap.getCodec("bogus", new Configuration());
fail("'bogus' codec should throw exception");
} catch (UnsupportedCodecException e) {
// expected
}
}
}