5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-02 18:11:13 +08:00

SQOOP-397. Make Sqoop work with Hadoop 0.23

(Bilung Lee via Arvind Prabhakar)


git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1211583 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Arvind Prabhakar 2011-12-07 18:33:08 +00:00
parent c6eca91af0
commit 6c1ddf5f7c
9 changed files with 118 additions and 29 deletions

View File

@ -55,9 +55,9 @@ If you want to build everything (including the documentation), type
+ant package+. This will appear in the +ant package+. This will appear in the
+build/sqoop-(version)/+ directory. +build/sqoop-(version)/+ directory.
Sqoop is built against the latest Hadoop distribution available from Cloudera. This version of Sqoop is built against Hadoop 0.23 available from Apache
These dependencies are obtained via IVY which downloads the necessary binaries maven repository by default. These dependencies are obtained via IVY which
from Cloudera maven repository. downloads the necessary binaries.
== Testing Sqoop == Testing Sqoop
@ -274,3 +274,12 @@ will allow you to edit Sqoop sources in Eclipse with all the library
dependencies correctly resolved. To compile the jars, you should still dependencies correctly resolved. To compile the jars, you should still
use ant. use ant.
== Using a specific version of Hadoop
Now Sqoop defaults to use Hadoop 0.23 available from Apache maven repository.
To switch back to the previous version of Hadoop 0.20, for example, run:
++++
ant test -Dhadoopversion=20
++++

View File

@ -24,6 +24,13 @@
xmlns:artifact="urn:maven-artifact-ant" xmlns:artifact="urn:maven-artifact-ant"
xmlns:ivy="antlib:org.apache.ivy.ant"> xmlns:ivy="antlib:org.apache.ivy.ant">
<!-- load ant-contrib tasks to get the "if" task. -->
<taskdef resource="net/sf/antcontrib/antcontrib.properties">
<classpath>
<pathelement location="${basedir}/lib/ant-contrib-1.0b3.jar"/>
</classpath>
</taskdef>
<!-- Load system-wide and project-wide default properties set by <!-- Load system-wide and project-wide default properties set by
the user, to avoid needing to override with -D. --> the user, to avoid needing to override with -D. -->
<property file="${user.home}/build.properties" /> <property file="${user.home}/build.properties" />
@ -150,10 +157,22 @@
--> -->
<property name="sqoop.thirdparty.lib.dir" value="" /> <property name="sqoop.thirdparty.lib.dir" value="" />
<!-- Set default Hadoop version if not set -->
<if>
<isset property="hadoopversion" />
<then>
<echo message="Use Hadoop 0.${hadoopversion}" />
</then>
<else>
<echo message="Use Hadoop 0.23 by default" />
<property name="hadoopversion" value="23" />
</else>
</if>
<!-- Ivy-based dependency resolution --> <!-- Ivy-based dependency resolution -->
<property name="ivy.dir" location="${basedir}/ivy" /> <property name="ivy.dir" location="${basedir}/ivy" />
<property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"/>
<loadproperties srcfile="${ivy.dir}/libraries.properties"/> <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
<property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"/>
<property name="ivy.jar" location="${lib.dir}/ivy-${ivy.version}.jar"/> <property name="ivy.jar" location="${lib.dir}/ivy-${ivy.version}.jar"/>
<property name="ivy_repo_url" <property name="ivy_repo_url"
value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" /> value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
@ -184,13 +203,6 @@
<property name="sqoop.test.sqlserver.connectstring.host_url" <property name="sqoop.test.sqlserver.connectstring.host_url"
value="jdbc:sqlserver://sqlserverhost:1433"/> value="jdbc:sqlserver://sqlserverhost:1433"/>
<!-- load ant-contrib tasks to get the "if" task. -->
<taskdef resource="net/sf/antcontrib/antcontrib.properties">
<classpath>
<pathelement location="${basedir}/lib/ant-contrib-1.0b3.jar"/>
</classpath>
</taskdef>
<!-- The classpath for compiling and running Sqoop --> <!-- The classpath for compiling and running Sqoop -->
<if> <if>
<isset property="hadoop.home" /> <isset property="hadoop.home" />
@ -1062,20 +1074,20 @@
<!-- retrieve ivy-managed artifacts from the Hadoop distribution --> <!-- retrieve ivy-managed artifacts from the Hadoop distribution -->
<target name="ivy-resolve-hadoop" depends="ivy-init" <target name="ivy-resolve-hadoop" depends="ivy-init"
unless="hadoop.is.local"> unless="hadoop.is.local">
<ivy:resolve settingsRef="${name}.ivy.settings" conf="cloudera" /> <ivy:resolve settingsRef="${name}.ivy.settings" conf="hadoop${hadoopversion}" />
</target> </target>
<target name="ivy-retrieve-hadoop" depends="ivy-init,ivy-resolve-hadoop"> <target name="ivy-retrieve-hadoop" depends="ivy-init,ivy-resolve-hadoop">
<!-- retrieve hadoop refs normally. --> <!-- retrieve hadoop refs normally. -->
<ivy:retrieve settingsRef="${name}.ivy.settings" <ivy:retrieve settingsRef="${name}.ivy.settings"
pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}" pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
sync="true" /> sync="true" />
<ivy:cachepath pathid="${name}.hadoop.classpath" conf="cloudera" /> <ivy:cachepath pathid="${name}.hadoop.classpath" conf="hadoop${hadoopversion}" />
</target> </target>
<!-- retrieve ivy-managed test artifacts from the Hadoop distribution --> <!-- retrieve ivy-managed test artifacts from the Hadoop distribution -->
<target name="ivy-resolve-hadoop-test" depends="ivy-init" <target name="ivy-resolve-hadoop-test" depends="ivy-init"
unless="hadoop.is.local"> unless="hadoop.is.local">
<ivy:resolve settingsRef="${name}.ivy.settings" conf="clouderatest" /> <ivy:resolve settingsRef="${name}.ivy.settings" conf="hadoop${hadoopversion}test" />
</target> </target>
<target name="ivy-retrieve-hadoop-test" <target name="ivy-retrieve-hadoop-test"
depends="ivy-init,ivy-resolve-hadoop-test"> depends="ivy-init,ivy-resolve-hadoop-test">
@ -1083,7 +1095,7 @@
pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}" pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
sync="true" /> sync="true" />
<ivy:cachepath pathid="${name}.hadooptest.classpath" <ivy:cachepath pathid="${name}.hadooptest.classpath"
conf="clouderatest" /> conf="hadoop${hadoopversion}test" />
</target> </target>
<!-- retrieve ivy-managed artifacts for checkstyle --> <!-- retrieve ivy-managed artifacts for checkstyle -->

39
ivy.xml
View File

@ -36,14 +36,13 @@ under the License.
<conf name="common" visibility="private" <conf name="common" visibility="private"
extends="runtime" extends="runtime"
description="artifacts needed to compile/test the application"/> description="artifacts needed to compile/test the application"/>
<conf name="cloudera" visibility="private" <conf name="hbase" visibility="private" />
extends="common,runtime" <conf name="hadoop23" visibility="private" extends="common,runtime,hbase" />
description="artifacts from Cloudera for compile/test" /> <conf name="hadoop20" visibility="private" extends="common,runtime,hbase" />
<conf name="test" visibility="private" extends="common,runtime"/> <conf name="test" visibility="private" extends="common,runtime"/>
<conf name="clouderatest" visibility="private" <conf name="hadoop23test" visibility="private" extends="test,hadoop23" />
extends="test,cloudera" <conf name="hadoop20test" visibility="private" extends="test,hadoop20" />
description="artifacts from Cloudera for testing" />
<!-- We don't redistribute everything we depend on (e.g., Hadoop itself); <!-- We don't redistribute everything we depend on (e.g., Hadoop itself);
anything which Hadoop itself also depends on, we do not ship. anything which Hadoop itself also depends on, we do not ship.
@ -67,11 +66,28 @@ under the License.
<artifact conf="master"/> <artifact conf="master"/>
</publications> </publications>
<dependencies> <dependencies>
<!-- Dependencies for Cloudera's Distribution for Hadoop -->
<!-- Dependencies for Hadoop 0.23 -->
<dependency org="org.apache.hadoop" name="hadoop-common"
rev="${hadoop-common.version}" conf="hadoop23->default">
<artifact name="hadoop-common" type="jar" />
<artifact name="hadoop-common" type="jar" m:classifier="tests"/>
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-hdfs"
rev="${hadoop-common.version}" conf="hadoop23->default">
<artifact name="hadoop-hdfs" type="jar" />
<artifact name="hadoop-hdfs" type="jar" m:classifier="tests"/>
</dependency>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common"
rev="${hadoop-common.version}" conf="hadoop23->default"/>
<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core"
rev="${hadoop-common.version}" conf="hadoop23->default"/>
<!-- Dependencies for Hadoop 0.20 -->
<dependency org="org.apache.hadoop" name="hadoop-core" <dependency org="org.apache.hadoop" name="hadoop-core"
rev="${hadoop-core.cloudera.version}" conf="cloudera->default"/> rev="${hadoop-core.version}" conf="hadoop20->default"/>
<dependency org="org.apache.hadoop" name="hadoop-test" <dependency org="org.apache.hadoop" name="hadoop-test"
rev="${hadoop-core.cloudera.version}" conf="clouderatest->default"/> rev="${hadoop-core.version}" conf="hadoop20test->default"/>
<!-- Common dependencies for Sqoop --> <!-- Common dependencies for Sqoop -->
<dependency org="commons-cli" name="commons-cli" <dependency org="commons-cli" name="commons-cli"
@ -113,7 +129,7 @@ under the License.
rev="${commons-collections.version}" conf="releaseaudit->default"/> rev="${commons-collections.version}" conf="releaseaudit->default"/>
<dependency org="org.apache.hbase" name="hbase" <dependency org="org.apache.hbase" name="hbase"
rev="${hbase.version}" conf="cloudera->default"> rev="${hbase.version}" conf="hbase->default">
<artifact name="hbase" type="jar"/> <artifact name="hbase" type="jar"/>
<artifact name="hbase" type="test-jar" ext="jar" m:classifier="tests"/> <artifact name="hbase" type="test-jar" ext="jar" m:classifier="tests"/>
<exclude org="com.sun.jersey" module="jersey-core"/> <exclude org="com.sun.jersey" module="jersey-core"/>
@ -121,9 +137,12 @@ under the License.
<exclude org="com.sun.jersey" module="jersey-server"/> <exclude org="com.sun.jersey" module="jersey-server"/>
<exclude org="org.apache.thrift" module="thrift"/> <exclude org="org.apache.thrift" module="thrift"/>
<exclude org="log4j" module="log4j"/> <exclude org="log4j" module="log4j"/>
<exclude org="org.apache.hadoop" module="hadoop-core" />
<exclude org="com.cloudera.cdh" module="zookeeper-ant" />
</dependency> </dependency>
<exclude org="org.apache.hadoop" module="avro"/> <exclude org="org.apache.hadoop" module="avro"/>
<exclude org="commons-daemon" module="commons-daemon" />
</dependencies> </dependencies>
</ivy-module> </ivy-module>

View File

@ -28,15 +28,14 @@ commons-io.version=1.4
commons-lang.version=2.4 commons-lang.version=2.4
commons-logging.version=1.0.4 commons-logging.version=1.0.4
# Cloudera Distribution dependency version hadoop-core.version=0.20.2-cdh3u1
hadoop-core.cloudera.version=0.20.2-cdh3u1 hadoop-common.version=0.23.0-SNAPSHOT
hbase.version=0.90.3-cdh3u1 hbase.version=0.90.3-cdh3u1
zookeeper.version=3.3.3-cdh3u1 zookeeper.version=3.3.3-cdh3u1
hsqldb.version=1.8.0.10 hsqldb.version=1.8.0.10
ivy.version=2.0.0-rc2 ivy.version=2.1.0
junit.version=4.5 junit.version=4.5

View File

@ -163,6 +163,7 @@ public void runTextCompressionTest(CompressionCodec codec, int expectedNum)
if (codec == null) { if (codec == null) {
codec = new GzipCodec(); codec = new GzipCodec();
ReflectionUtils.setConf(codec, getConf());
} }
Path p = new Path(getDataFilePath().toString() Path p = new Path(getDataFilePath().toString()
+ codec.getDefaultExtension()); + codec.getDefaultExtension());

View File

@ -34,6 +34,7 @@
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.VersionInfo;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -120,6 +121,9 @@ private void startMaster() throws Exception {
@Override @Override
@Before @Before
public void setUp() { public void setUp() {
if (!isHadoop20()) {
return;
}
HBaseTestCase.recordTestBuildDataProperty(); HBaseTestCase.recordTestBuildDataProperty();
try { try {
startMaster(); startMaster();
@ -143,6 +147,9 @@ public void shutdown() throws Exception {
@Override @Override
@After @After
public void tearDown() { public void tearDown() {
if (!isHadoop20()) {
return;
}
try { try {
shutdown(); shutdown();
} catch (Exception e) { } catch (Exception e) {
@ -173,4 +180,8 @@ protected void verifyHBaseCell(String tableName, String rowKey,
table.close(); table.close();
} }
} }
protected boolean isHadoop20() {
return VersionInfo.getVersion().startsWith("0.20");
}
} }

View File

@ -29,6 +29,9 @@ public class TestHBaseImport extends HBaseTestCase {
@Test @Test
public void testBasicUsage() throws IOException { public void testBasicUsage() throws IOException {
if (!isHadoop20()) {
return;
}
// Create the HBase table in Sqoop as we run the job. // Create the HBase table in Sqoop as we run the job.
String [] argv = getArgv(true, "BasicUsage", "BasicColFam", true, null); String [] argv = getArgv(true, "BasicUsage", "BasicColFam", true, null);
String [] types = { "INT", "INT" }; String [] types = { "INT", "INT" };
@ -40,6 +43,9 @@ public void testBasicUsage() throws IOException {
@Test @Test
public void testMissingTableFails() throws IOException { public void testMissingTableFails() throws IOException {
if (!isHadoop20()) {
return;
}
// Test that if the table doesn't exist, we fail unless we // Test that if the table doesn't exist, we fail unless we
// explicitly create the table. // explicitly create the table.
String [] argv = getArgv(true, "MissingTable", "MissingFam", false, null); String [] argv = getArgv(true, "MissingTable", "MissingFam", false, null);
@ -56,6 +62,9 @@ public void testMissingTableFails() throws IOException {
@Test @Test
public void testOverwriteSucceeds() throws IOException { public void testOverwriteSucceeds() throws IOException {
if (!isHadoop20()) {
return;
}
// Test that we can create a table and then import immediately // Test that we can create a table and then import immediately
// back on top of it without problem. // back on top of it without problem.
String [] argv = getArgv(true, "OverwriteT", "OverwriteF", true, null); String [] argv = getArgv(true, "OverwriteT", "OverwriteF", true, null);
@ -71,6 +80,9 @@ public void testOverwriteSucceeds() throws IOException {
@Test @Test
public void testStrings() throws IOException { public void testStrings() throws IOException {
if (!isHadoop20()) {
return;
}
String [] argv = getArgv(true, "stringT", "stringF", true, null); String [] argv = getArgv(true, "stringT", "stringF", true, null);
String [] types = { "INT", "VARCHAR(32)" }; String [] types = { "INT", "VARCHAR(32)" };
String [] vals = { "0", "'abc'" }; String [] vals = { "0", "'abc'" };
@ -81,6 +93,9 @@ public void testStrings() throws IOException {
@Test @Test
public void testNulls() throws IOException { public void testNulls() throws IOException {
if (!isHadoop20()) {
return;
}
String [] argv = getArgv(true, "nullT", "nullF", true, null); String [] argv = getArgv(true, "nullT", "nullF", true, null);
String [] types = { "INT", "INT", "INT" }; String [] types = { "INT", "INT", "INT" };
String [] vals = { "0", "42", "null" }; String [] vals = { "0", "42", "null" };
@ -96,6 +111,9 @@ public void testNulls() throws IOException {
@Test @Test
public void testExitFailure() throws IOException { public void testExitFailure() throws IOException {
if (!isHadoop20()) {
return;
}
String [] types = { "INT", "INT", "INT" }; String [] types = { "INT", "INT", "INT" };
String [] vals = { "0", "42", "43" }; String [] vals = { "0", "42", "43" };
createTableWithColTypes(types, vals); createTableWithColTypes(types, vals);

View File

@ -29,6 +29,9 @@ public class TestHBaseQueryImport extends HBaseTestCase {
@Test @Test
public void testImportFromQuery() throws IOException { public void testImportFromQuery() throws IOException {
if (!isHadoop20()) {
return;
}
String [] types = { "INT", "INT", "INT" }; String [] types = { "INT", "INT", "INT" };
String [] vals = { "0", "42", "43" }; String [] vals = { "0", "42", "43" };
createTableWithColTypes(types, vals); createTableWithColTypes(types, vals);
@ -47,6 +50,9 @@ public void testImportFromQuery() throws IOException {
@Test @Test
public void testExitFailure() throws IOException { public void testExitFailure() throws IOException {
if (!isHadoop20()) {
return;
}
String [] types = { "INT", "INT", "INT" }; String [] types = { "INT", "INT", "INT" };
String [] vals = { "0", "42", "43" }; String [] vals = { "0", "42", "43" };
createTableWithColTypes(types, vals); createTableWithColTypes(types, vals);

View File

@ -35,6 +35,7 @@
import org.apache.hadoop.mapreduce.lib.db.*; import org.apache.hadoop.mapreduce.lib.db.*;
import org.apache.hadoop.mapreduce.lib.output.*; import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.VersionInfo;
/** /**
* Test aspects of DataDrivenDBInputFormat. * Test aspects of DataDrivenDBInputFormat.
@ -82,11 +83,17 @@ private void initialize(String driverClassName, String url)
} }
public void setUp() throws Exception { public void setUp() throws Exception {
if (!isHadoop20()) {
return;
}
initialize(DRIVER_CLASS, DB_URL); initialize(DRIVER_CLASS, DB_URL);
super.setUp(); super.setUp();
} }
public void tearDown() throws Exception { public void tearDown() throws Exception {
if (!isHadoop20()) {
return;
}
super.tearDown(); super.tearDown();
shutdown(); shutdown();
} }
@ -165,6 +172,9 @@ public void map(Object k, Object v, Context c)
} }
public void testDateSplits() throws Exception { public void testDateSplits() throws Exception {
if (!isHadoop20()) {
return;
}
Statement s = connection.createStatement(); Statement s = connection.createStatement();
final String DATE_TABLE = "datetable"; final String DATE_TABLE = "datetable";
final String COL = "foo"; final String COL = "foo";
@ -219,4 +229,8 @@ public void testDateSplits() throws Exception {
s.close(); s.close();
} }
} }
protected boolean isHadoop20() {
return VersionInfo.getVersion().startsWith("0.20");
}
} }