mirror of
https://github.com/apache/sqoop.git
synced 2025-05-03 15:50:28 +08:00
SQOOP-215. Support for codec aliases.
(Tom White via Arvind Prabhakar) From: Arvind Prabhakar <arvind@cloudera.com> git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1150039 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fe4f54a413
commit
b84aaf3ab0
@ -18,21 +18,25 @@
|
||||
|
||||
package com.cloudera.sqoop.io;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||
import org.apache.hadoop.io.compress.CompressionCodecFactory;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
|
||||
/**
|
||||
* Provides a mapping from codec names to concrete implementation class names.
|
||||
* This is used by LobFile.
|
||||
*/
|
||||
public final class CodecMap {
|
||||
|
||||
// Supported codec map values
|
||||
// Note: do not add more values here, since codecs are discovered using the
|
||||
// standard Hadoop mechanism (io.compression.codecs). See
|
||||
// CompressionCodecFactory.
|
||||
public static final String NONE = "none";
|
||||
public static final String DEFLATE = "deflate";
|
||||
public static final String LZO = "lzo";
|
||||
@ -45,6 +49,18 @@ public final class CodecMap {
|
||||
codecNames.put(NONE, null);
|
||||
codecNames.put(DEFLATE, "org.apache.hadoop.io.compress.DefaultCodec");
|
||||
codecNames.put(LZO, "com.hadoop.compression.lzo.LzoCodec");
|
||||
|
||||
// add more from Hadoop CompressionCodecFactory
|
||||
for (Class<? extends CompressionCodec> cls
|
||||
: CompressionCodecFactory.getCodecClasses(new Configuration())) {
|
||||
String simpleName = cls.getSimpleName();
|
||||
String codecName = simpleName;
|
||||
if (simpleName.endsWith("Codec")) {
|
||||
codecName = simpleName.substring(0, simpleName.length()
|
||||
- "Codec".length());
|
||||
}
|
||||
codecNames.put(codecName.toLowerCase(), cls.getCanonicalName());
|
||||
}
|
||||
}
|
||||
|
||||
private CodecMap() {
|
||||
@ -73,6 +89,12 @@ public static String getCodecClassName(String codecName)
|
||||
*/
|
||||
public static CompressionCodec getCodec(String codecName,
|
||||
Configuration conf) throws UnsupportedCodecException {
|
||||
// Try standard Hadoop mechanism first
|
||||
CompressionCodec codec = getCodecByName(codecName, conf);
|
||||
if (codec != null) {
|
||||
return codec;
|
||||
}
|
||||
// Fall back to Sqoop mechanism
|
||||
String codecClassName = null;
|
||||
try {
|
||||
codecClassName = getCodecClassName(codecName);
|
||||
@ -90,6 +112,52 @@ public static CompressionCodec getCodec(String codecName,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the relevant compression codec for the codec's canonical class name
|
||||
* or by codec alias.
|
||||
* <p>
|
||||
* Codec aliases are case insensitive.
|
||||
* <p>
|
||||
* The code alias is the short class name (without the package name).
|
||||
* If the short class name ends with 'Codec', then there are two aliases for
|
||||
* the codec, the complete short class name and the short class name without
|
||||
* the 'Codec' ending. For example for the 'GzipCodec' codec class name the
|
||||
* alias are 'gzip' and 'gzipcodec'.
|
||||
* <p>
|
||||
* Note: When HADOOP-7323 is available this method can be replaced with a call
|
||||
* to CompressionCodecFactory.
|
||||
* @param classname the canonical class name of the codec or the codec alias
|
||||
* @return the codec object or null if none matching the name were found
|
||||
*/
|
||||
private static CompressionCodec getCodecByName(String codecName,
|
||||
Configuration conf) {
|
||||
List<Class<? extends CompressionCodec>> codecs =
|
||||
CompressionCodecFactory.getCodecClasses(conf);
|
||||
for (Class<? extends CompressionCodec> cls : codecs) {
|
||||
if (codecMatches(cls, codecName)) {
|
||||
return ReflectionUtils.newInstance(cls, conf);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static boolean codecMatches(Class<? extends CompressionCodec> cls,
|
||||
String codecName) {
|
||||
String simpleName = cls.getSimpleName();
|
||||
if (cls.getName().equals(codecName)
|
||||
|| simpleName.equalsIgnoreCase(codecName)) {
|
||||
return true;
|
||||
}
|
||||
if (simpleName.endsWith("Codec")) {
|
||||
String prefix = simpleName.substring(0, simpleName.length()
|
||||
- "Codec".length());
|
||||
if (prefix.equalsIgnoreCase(codecName)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the set of available codec names.
|
||||
*/
|
||||
|
@ -41,6 +41,7 @@
|
||||
import com.cloudera.sqoop.util.ImportException;
|
||||
import com.cloudera.sqoop.util.PerfCounters;
|
||||
import com.cloudera.sqoop.config.ConfigurationHelper;
|
||||
import com.cloudera.sqoop.io.CodecMap;
|
||||
import com.cloudera.sqoop.manager.ImportJobContext;
|
||||
|
||||
/**
|
||||
@ -93,10 +94,7 @@ protected void configureOutputFormat(Job job, String tableName,
|
||||
codecClass = GzipCodec.class;
|
||||
} else {
|
||||
Configuration conf = job.getConfiguration();
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends CompressionCodec> c =
|
||||
(Class<? extends CompressionCodec>) conf.getClassByName(codecName);
|
||||
codecClass = c;
|
||||
codecClass = CodecMap.getCodec(codecName, conf).getClass();
|
||||
}
|
||||
FileOutputFormat.setOutputCompressorClass(job, codecClass);
|
||||
|
||||
|
@ -32,10 +32,10 @@
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import com.cloudera.sqoop.SqoopOptions;
|
||||
import com.cloudera.sqoop.io.CodecMap;
|
||||
import com.cloudera.sqoop.io.SplittingOutputStream;
|
||||
import com.cloudera.sqoop.io.SplittableBufferedWriter;
|
||||
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
import org.apache.hadoop.util.Shell;
|
||||
import com.cloudera.sqoop.manager.ImportJobContext;
|
||||
|
||||
@ -96,15 +96,7 @@ private static CompressionCodec getCodec(Configuration conf,
|
||||
if (options.getCompressionCodec() == null) {
|
||||
return new GzipCodec();
|
||||
} else {
|
||||
try {
|
||||
@SuppressWarnings("unchecked")
|
||||
Class<? extends CompressionCodec> c =
|
||||
(Class<? extends CompressionCodec>)
|
||||
conf.getClassByName(options.getCompressionCodec());
|
||||
return ReflectionUtils.newInstance(c, conf);
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
return CodecMap.getCodec(options.getCompressionCodec(), conf);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
63
src/test/com/cloudera/sqoop/io/TestCodecMap.java
Normal file
63
src/test/com/cloudera/sqoop/io/TestCodecMap.java
Normal file
@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Licensed to Cloudera, Inc. under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Cloudera, Inc. licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.cloudera.sqoop.io;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.compress.CompressionCodec;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Test looking up codecs by name.
|
||||
*/
|
||||
public class TestCodecMap extends TestCase {
|
||||
|
||||
private void verifyCodec(Class<?> c, String codecName)
|
||||
throws UnsupportedCodecException {
|
||||
CompressionCodec codec = CodecMap.getCodec(codecName, new Configuration());
|
||||
assertEquals(codec.getClass(), c);
|
||||
}
|
||||
|
||||
public void testGetCodecNames() {
|
||||
// gzip is picked up from Hadoop defaults
|
||||
assertTrue(CodecMap.getCodecNames().contains("gzip"));
|
||||
}
|
||||
|
||||
public void testGetCodec() throws IOException {
|
||||
verifyCodec(GzipCodec.class, "gzip");
|
||||
verifyCodec(GzipCodec.class, "Gzip");
|
||||
verifyCodec(GzipCodec.class, "GZIP");
|
||||
verifyCodec(GzipCodec.class, "gzipcodec");
|
||||
verifyCodec(GzipCodec.class, "GzipCodec");
|
||||
verifyCodec(GzipCodec.class, "GZIPCODEC");
|
||||
verifyCodec(GzipCodec.class, "org.apache.hadoop.io.compress.GzipCodec");
|
||||
}
|
||||
|
||||
public void testUnrecognizedCodec() {
|
||||
try {
|
||||
CodecMap.getCodec("bogus", new Configuration());
|
||||
fail("'bogus' codec should throw exception");
|
||||
} catch (UnsupportedCodecException e) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user