5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-02 08:42:03 +08:00

SQOOP-397. Make Sqoop work with Hadoop 0.23

(Bilung Lee via Arvind Prabhakar)


git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1211583 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Arvind Prabhakar 2011-12-07 18:33:08 +00:00
parent c6eca91af0
commit 6c1ddf5f7c
9 changed files with 118 additions and 29 deletions

View File

@ -55,9 +55,9 @@ If you want to build everything (including the documentation), type
+ant package+. This will appear in the
+build/sqoop-(version)/+ directory.
Sqoop is built against the latest Hadoop distribution available from Cloudera.
These dependencies are obtained via IVY which downloads the necessary binaries
from Cloudera maven repository.
This version of Sqoop is built against Hadoop 0.23 available from Apache
maven repository by default. These dependencies are obtained via IVY which
downloads the necessary binaries.
== Testing Sqoop
@ -274,3 +274,12 @@ will allow you to edit Sqoop sources in Eclipse with all the library
dependencies correctly resolved. To compile the jars, you should still
use ant.
== Using a specific version of Hadoop
Now Sqoop defaults to use Hadoop 0.23 available from Apache maven repository.
To switch back to the previous version of Hadoop 0.20, for example, run:
++++
ant test -Dhadoopversion=20
++++

View File

@ -24,6 +24,13 @@
xmlns:artifact="urn:maven-artifact-ant"
xmlns:ivy="antlib:org.apache.ivy.ant">
<!-- load ant-contrib tasks to get the "if" task. -->
<taskdef resource="net/sf/antcontrib/antcontrib.properties">
<classpath>
<pathelement location="${basedir}/lib/ant-contrib-1.0b3.jar"/>
</classpath>
</taskdef>
<!-- Load system-wide and project-wide default properties set by
the user, to avoid needing to override with -D. -->
<property file="${user.home}/build.properties" />
@ -150,10 +157,22 @@
-->
<property name="sqoop.thirdparty.lib.dir" value="" />
<!-- Set default Hadoop version if not set -->
<if>
<isset property="hadoopversion" />
<then>
<echo message="Use Hadoop 0.${hadoopversion}" />
</then>
<else>
<echo message="Use Hadoop 0.23 by default" />
<property name="hadoopversion" value="23" />
</else>
</if>
<!-- Ivy-based dependency resolution -->
<property name="ivy.dir" location="${basedir}/ivy" />
<property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"/>
<loadproperties srcfile="${ivy.dir}/libraries.properties"/>
<property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"/>
<property name="ivy.jar" location="${lib.dir}/ivy-${ivy.version}.jar"/>
<property name="ivy_repo_url"
value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
@ -184,13 +203,6 @@
<property name="sqoop.test.sqlserver.connectstring.host_url"
value="jdbc:sqlserver://sqlserverhost:1433"/>
<!-- load ant-contrib tasks to get the "if" task. -->
<taskdef resource="net/sf/antcontrib/antcontrib.properties">
<classpath>
<pathelement location="${basedir}/lib/ant-contrib-1.0b3.jar"/>
</classpath>
</taskdef>
<!-- The classpath for compiling and running Sqoop -->
<if>
<isset property="hadoop.home" />
@ -1062,20 +1074,20 @@
<!-- retrieve ivy-managed artifacts from the Hadoop distribution -->
<target name="ivy-resolve-hadoop" depends="ivy-init"
unless="hadoop.is.local">
<ivy:resolve settingsRef="${name}.ivy.settings" conf="cloudera" />
<ivy:resolve settingsRef="${name}.ivy.settings" conf="hadoop${hadoopversion}" />
</target>
<target name="ivy-retrieve-hadoop" depends="ivy-init,ivy-resolve-hadoop">
<!-- retrieve hadoop refs normally. -->
<ivy:retrieve settingsRef="${name}.ivy.settings"
pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
sync="true" />
<ivy:cachepath pathid="${name}.hadoop.classpath" conf="cloudera" />
<ivy:cachepath pathid="${name}.hadoop.classpath" conf="hadoop${hadoopversion}" />
</target>
<!-- retrieve ivy-managed test artifacts from the Hadoop distribution -->
<target name="ivy-resolve-hadoop-test" depends="ivy-init"
unless="hadoop.is.local">
<ivy:resolve settingsRef="${name}.ivy.settings" conf="clouderatest" />
<ivy:resolve settingsRef="${name}.ivy.settings" conf="hadoop${hadoopversion}test" />
</target>
<target name="ivy-retrieve-hadoop-test"
depends="ivy-init,ivy-resolve-hadoop-test">
@ -1083,7 +1095,7 @@
pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
sync="true" />
<ivy:cachepath pathid="${name}.hadooptest.classpath"
conf="clouderatest" />
conf="hadoop${hadoopversion}test" />
</target>
<!-- retrieve ivy-managed artifacts for checkstyle -->

39
ivy.xml
View File

@ -36,14 +36,13 @@ under the License.
<conf name="common" visibility="private"
extends="runtime"
description="artifacts needed to compile/test the application"/>
<conf name="cloudera" visibility="private"
extends="common,runtime"
description="artifacts from Cloudera for compile/test" />
<conf name="hbase" visibility="private" />
<conf name="hadoop23" visibility="private" extends="common,runtime,hbase" />
<conf name="hadoop20" visibility="private" extends="common,runtime,hbase" />
<conf name="test" visibility="private" extends="common,runtime"/>
<conf name="clouderatest" visibility="private"
extends="test,cloudera"
description="artifacts from Cloudera for testing" />
<conf name="hadoop23test" visibility="private" extends="test,hadoop23" />
<conf name="hadoop20test" visibility="private" extends="test,hadoop20" />
<!-- We don't redistribute everything we depend on (e.g., Hadoop itself);
anything which Hadoop itself also depends on, we do not ship.
@ -67,11 +66,28 @@ under the License.
<artifact conf="master"/>
</publications>
<dependencies>
<!-- Dependencies for Cloudera's Distribution for Hadoop -->
<!-- Dependencies for Hadoop 0.23 -->
<dependency org="org.apache.hadoop" name="hadoop-common"
rev="${hadoop-common.version}" conf="hadoop23->default">
<artifact name="hadoop-common" type="jar" />
<artifact name="hadoop-common" type="jar" m:classifier="tests"/>
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-hdfs"
rev="${hadoop-common.version}" conf="hadoop23->default">
<artifact name="hadoop-hdfs" type="jar" />
<artifact name="hadoop-hdfs" type="jar" m:classifier="tests"/>
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common"
rev="${hadoop-common.version}" conf="hadoop23->default"/>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
rev="${hadoop-common.version}" conf="hadoop23->default"/>
<!-- Dependencies for Hadoop 0.20 -->
<dependency org="org.apache.hadoop" name="hadoop-core"
rev="${hadoop-core.cloudera.version}" conf="cloudera->default"/>
rev="${hadoop-core.version}" conf="hadoop20->default"/>
<dependency org="org.apache.hadoop" name="hadoop-test"
rev="${hadoop-core.cloudera.version}" conf="clouderatest->default"/>
rev="${hadoop-core.version}" conf="hadoop20test->default"/>
<!-- Common dependencies for Sqoop -->
<dependency org="commons-cli" name="commons-cli"
@ -113,7 +129,7 @@ under the License.
rev="${commons-collections.version}" conf="releaseaudit->default"/>
<dependency org="org.apache.hbase" name="hbase"
rev="${hbase.version}" conf="cloudera->default">
rev="${hbase.version}" conf="hbase->default">
<artifact name="hbase" type="jar"/>
<artifact name="hbase" type="test-jar" ext="jar" m:classifier="tests"/>
<exclude org="com.sun.jersey" module="jersey-core"/>
@ -121,9 +137,12 @@ under the License.
<exclude org="com.sun.jersey" module="jersey-server"/>
<exclude org="org.apache.thrift" module="thrift"/>
<exclude org="log4j" module="log4j"/>
<exclude org="org.apache.hadoop" module="hadoop-core" />
<exclude org="com.cloudera.cdh" module="zookeeper-ant" />
</dependency>
<exclude org="org.apache.hadoop" module="avro"/>
<exclude org="commons-daemon" module="commons-daemon" />
</dependencies>
</ivy-module>

View File

@ -28,15 +28,14 @@ commons-io.version=1.4
commons-lang.version=2.4
commons-logging.version=1.0.4
# Cloudera Distribution dependency version
hadoop-core.cloudera.version=0.20.2-cdh3u1
hadoop-core.version=0.20.2-cdh3u1
hadoop-common.version=0.23.0-SNAPSHOT
hbase.version=0.90.3-cdh3u1
zookeeper.version=3.3.3-cdh3u1
hsqldb.version=1.8.0.10
ivy.version=2.0.0-rc2
ivy.version=2.1.0
junit.version=4.5

View File

@ -163,6 +163,7 @@ public void runTextCompressionTest(CompressionCodec codec, int expectedNum)
if (codec == null) {
codec = new GzipCodec();
ReflectionUtils.setConf(codec, getConf());
}
Path p = new Path(getDataFilePath().toString()
+ codec.getDefaultExtension());

View File

@ -34,6 +34,7 @@
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.VersionInfo;
import org.junit.After;
import org.junit.Before;
@ -120,6 +121,9 @@ private void startMaster() throws Exception {
@Override
@Before
public void setUp() {
if (!isHadoop20()) {
return;
}
HBaseTestCase.recordTestBuildDataProperty();
try {
startMaster();
@ -143,6 +147,9 @@ public void shutdown() throws Exception {
@Override
@After
public void tearDown() {
if (!isHadoop20()) {
return;
}
try {
shutdown();
} catch (Exception e) {
@ -173,4 +180,8 @@ protected void verifyHBaseCell(String tableName, String rowKey,
table.close();
}
}
protected boolean isHadoop20() {
return VersionInfo.getVersion().startsWith("0.20");
}
}

View File

@ -29,6 +29,9 @@ public class TestHBaseImport extends HBaseTestCase {
@Test
public void testBasicUsage() throws IOException {
if (!isHadoop20()) {
return;
}
// Create the HBase table in Sqoop as we run the job.
String [] argv = getArgv(true, "BasicUsage", "BasicColFam", true, null);
String [] types = { "INT", "INT" };
@ -40,6 +43,9 @@ public void testBasicUsage() throws IOException {
@Test
public void testMissingTableFails() throws IOException {
if (!isHadoop20()) {
return;
}
// Test that if the table doesn't exist, we fail unless we
// explicitly create the table.
String [] argv = getArgv(true, "MissingTable", "MissingFam", false, null);
@ -56,6 +62,9 @@ public void testMissingTableFails() throws IOException {
@Test
public void testOverwriteSucceeds() throws IOException {
if (!isHadoop20()) {
return;
}
// Test that we can create a table and then import immediately
// back on top of it without problem.
String [] argv = getArgv(true, "OverwriteT", "OverwriteF", true, null);
@ -71,6 +80,9 @@ public void testOverwriteSucceeds() throws IOException {
@Test
public void testStrings() throws IOException {
if (!isHadoop20()) {
return;
}
String [] argv = getArgv(true, "stringT", "stringF", true, null);
String [] types = { "INT", "VARCHAR(32)" };
String [] vals = { "0", "'abc'" };
@ -81,6 +93,9 @@ public void testStrings() throws IOException {
@Test
public void testNulls() throws IOException {
if (!isHadoop20()) {
return;
}
String [] argv = getArgv(true, "nullT", "nullF", true, null);
String [] types = { "INT", "INT", "INT" };
String [] vals = { "0", "42", "null" };
@ -96,6 +111,9 @@ public void testNulls() throws IOException {
@Test
public void testExitFailure() throws IOException {
if (!isHadoop20()) {
return;
}
String [] types = { "INT", "INT", "INT" };
String [] vals = { "0", "42", "43" };
createTableWithColTypes(types, vals);

View File

@ -29,6 +29,9 @@ public class TestHBaseQueryImport extends HBaseTestCase {
@Test
public void testImportFromQuery() throws IOException {
if (!isHadoop20()) {
return;
}
String [] types = { "INT", "INT", "INT" };
String [] vals = { "0", "42", "43" };
createTableWithColTypes(types, vals);
@ -47,6 +50,9 @@ public void testImportFromQuery() throws IOException {
@Test
public void testExitFailure() throws IOException {
if (!isHadoop20()) {
return;
}
String [] types = { "INT", "INT", "INT" };
String [] vals = { "0", "42", "43" };
createTableWithColTypes(types, vals);

View File

@ -35,6 +35,7 @@
import org.apache.hadoop.mapreduce.lib.db.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.VersionInfo;
/**
* Test aspects of DataDrivenDBInputFormat.
@ -82,11 +83,17 @@ private void initialize(String driverClassName, String url)
}
public void setUp() throws Exception {
if (!isHadoop20()) {
return;
}
initialize(DRIVER_CLASS, DB_URL);
super.setUp();
}
public void tearDown() throws Exception {
if (!isHadoop20()) {
return;
}
super.tearDown();
shutdown();
}
@ -165,6 +172,9 @@ public void map(Object k, Object v, Context c)
}
public void testDateSplits() throws Exception {
if (!isHadoop20()) {
return;
}
Statement s = connection.createStatement();
final String DATE_TABLE = "datetable";
final String COL = "foo";
@ -219,4 +229,8 @@ public void testDateSplits() throws Exception {
s.close();
}
}
protected boolean isHadoop20() {
return VersionInfo.getVersion().startsWith("0.20");
}
}