5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 04:11:44 +08:00

1、集成外挂服务,目前集成了sqoop API

2、增加测试用例
3、sqoop启动脚本支持切换hadoop集群
This commit is contained in:
chenqixu 2019-04-23 13:48:40 +08:00
parent 5b503ea89c
commit 912fbc1c9c
12 changed files with 1262 additions and 559 deletions

View File

@ -98,4 +98,25 @@ bin=`dirname ${prgm}`
bin=`cd ${bin} && pwd` bin=`cd ${bin} && pwd`
source ${bin}/configure-sqoop "${bin}" source ${bin}/configure-sqoop "${bin}"
#exec ${HADOOP_COMMON_HOME}/bin/hadoop org.apache.sqoop.Sqoop "$@"
if [ "--config" = "$1" ]
then
shift
if [[ $1 =~ "--" ]]
then
echo "you need input hadoop-config values."
exit -1
elif [[ $1 = "codegen" ]] || [[ $1 = "create-hive-table" ]] || [[ $1 = "eval" ]] || [[ $1 = "export" ]] || [[ $1 = "help" ]] || [[ $1 = "import" ]] || [[ $1 = "import-all-tables" ]] || [[ $1 = "import-mainframe" ]] || [[ $1 = "job" ]] || [[ $1 = "list-databases" ]] || [[ $1 = "list-tables" ]] || [[ $1 = "merge" ]] || [[ $1 = "metastore" ]] || [[ $1 = "version" ]]
then
echo "you need input hadoop-config values."
exit -1
else
hadoopconfig=$1
shift
fi
fi
if [ ! -n "$hadoopconfig" ] ;then
exec ${HADOOP_COMMON_HOME}/bin/hadoop org.apache.sqoop.Sqoop "$@" exec ${HADOOP_COMMON_HOME}/bin/hadoop org.apache.sqoop.Sqoop "$@"
else
exec ${HADOOP_COMMON_HOME}/bin/hadoop --config "$hadoopconfig" org.apache.sqoop.Sqoop "$@"
fi

View File

@ -186,5 +186,10 @@ under the License.
</description> </description>
</property> </property>
--> -->
<property>
<name>com.newland.component.FujianBI.service.list</name>
<value>com.newland.component.FujianBI.service.impl.KerberosLoginService</value>
<description>service list</description>
</property>
</configuration> </configuration>

File diff suppressed because it is too large Load Diff

View File

@ -46,7 +46,7 @@ public class DataDrivenDBInputFormat<T extends DBWritable>
* *
* @deprecated use org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat. * @deprecated use org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat.
* DataDrivenDBInputSplit instead. * DataDrivenDBInputSplit instead.
* @see org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat. * @see org.apache.sqoop.mapreduce.db.DataDrivenDBInputFormat
* DataDrivenDBInputSplit * DataDrivenDBInputSplit
*/ */
public static class DataDrivenDBInputSplit extends public static class DataDrivenDBInputSplit extends

View File

@ -20,6 +20,7 @@
import java.util.Arrays; import java.util.Arrays;
import com.newland.component.FujianBI.service.ServiceTool;
import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -221,6 +222,14 @@ public static int runTool(String [] args, Configuration conf) {
} }
String toolName = expandedArgs[0]; String toolName = expandedArgs[0];
// Only import and export add service tool processing
if (!isWindow() && (toolName.equals("import") || toolName.equals("export"))) {
ServiceTool serviceTool = ServiceTool.builder(conf, args);
serviceTool.initServices();
serviceTool.startServices();
}
Configuration pluginConf = SqoopTool.loadPlugins(conf); Configuration pluginConf = SqoopTool.loadPlugins(conf);
SqoopTool tool = SqoopTool.getTool(toolName); SqoopTool tool = SqoopTool.getTool(toolName);
if (null == tool) { if (null == tool) {
@ -243,6 +252,20 @@ public static int runTool(String [] args) {
return runTool(args, new Configuration()); return runTool(args, new Configuration());
} }
/**
* 是否是本地测试
*
* @return
*/
public static boolean isWindow() {
String systemType = System.getProperty("os.name");
if (systemType.toUpperCase().startsWith("WINDOWS")) {
return true;
} else {
return false;
}
}
public static void main(String [] args) { public static void main(String [] args) {
if (args.length == 0) { if (args.length == 0) {
System.err.println("Try 'sqoop help' for usage."); System.err.println("Try 'sqoop help' for usage.");

View File

@ -0,0 +1,6 @@
package org.apache.sqoop;
public class SqoopVersion {
public static final String VERSION = "1.4.7";
// public static final String GIT_HASH = "";
}

View File

@ -0,0 +1,31 @@
package org.apache.sqoop;
import org.junit.Before;
import org.junit.Test;
public class SqoopTest {
private String[] args;
@Before
public void setUp() throws Exception {
// String params = "import --connect jdbc:oracle:thin:@10.1.0.242:1521:ywxx --username bishow --password bishow -m 4 --split-by 'product_id' --query 'select sum_date,product_name,product_id from cqx_test2 where $CONDITIONS' --target-dir '/cqx/hivetable/cqx_test2/' --fields-terminated-by '|' --as-textfile --delete-target-dir --null-string '' --null-non-string ''";
// args = params.split(" ", -1);
String[] arg = {"import", "--connect", "jdbc:oracle:thin:@10.1.0.242:1521:ywxx",
"--username", "bishow", "--password", "C%MuhN#q$4", "-m", "4", "--split-by", "product_id", "--query",
"select sum_date,product_name,product_id from cqx_test2 where $CONDITIONS",
"--target-dir", "/cqx/hivetable/cqx_test2/", "--fields-terminated-by", "|", "--as-textfile",
"--delete-target-dir", "--null-string", "", "--null-non-string", ""};
args = arg;
System.out.println("args");
for (String p : args) {
System.out.print(p+" ");
}
}
@Test
public void run() {
int ret = Sqoop.runTool(args);
System.out.println("ret" + ret);
}
}

View File

@ -0,0 +1,126 @@
package org.apache.sqoop.mapreduce.db;
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.junit.Test;
import java.sql.Types;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class DateSplitterTest {
private OracleDateSplitter dateSplitter;
private SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
@Test
public void split() throws Exception {
dateSplitter = new OracleDateSplitter();
String colName = "checkTime";
final long MS_IN_SEC = 1000L;
long minVal;
long maxVal;
int sqlDataType = Types.TIMESTAMP;
minVal = df.parse("2019-04-22 11:28:30").getTime();
maxVal = df.parse("2019-04-22 16:28:30").getTime();
String lowClausePrefix = colName + " >= ";
String highClausePrefix = colName + " < ";
int numSplits = 2;
if (numSplits < 1) {
numSplits = 1;
}
if (minVal == Long.MIN_VALUE && maxVal == Long.MIN_VALUE) {
// The range of acceptable dates is NULL to NULL. Just create a single
// split.
List<InputSplit> splits = new ArrayList<InputSplit>();
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
colName + " IS NULL", colName + " IS NULL"));
return;
}
// For split size we are using seconds. So we need to convert to milliseconds.
long splitLimit = 3600 * MS_IN_SEC;
// Gather the split point integers
List<Long> splitPoints = dateSplitter.split(numSplits, splitLimit, minVal, maxVal);
List<InputSplit> splits = new ArrayList<InputSplit>();
// Turn the split points into a set of intervals.
long start = splitPoints.get(0);
Date startDate = longToDate(start, sqlDataType);
if (sqlDataType == Types.TIMESTAMP) {
// The lower bound's nanos value needs to match the actual lower-bound
// nanos.
try {
((java.sql.Timestamp) startDate).setNanos(0);
} catch (NullPointerException npe) {
// If the lower bound was NULL, we'll get an NPE; just ignore it and
// don't set nanos.
}
}
for (int i = 1; i < splitPoints.size(); i++) {
long end = splitPoints.get(i);
Date endDate = longToDate(end, sqlDataType);
if (i == splitPoints.size() - 1) {
if (sqlDataType == Types.TIMESTAMP) {
// The upper bound's nanos value needs to match the actual
// upper-bound nanos.
try {
((java.sql.Timestamp) endDate).setNanos(0);
} catch (NullPointerException npe) {
// If the upper bound was NULL, we'll get an NPE; just ignore it
// and don't set nanos.
}
}
// This is the last one; use a closed interval.
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + dateSplitter.dateToString(startDate),
colName + " <= " + dateSplitter.dateToString(endDate)));
} else {
// Normal open-interval case.
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + dateSplitter.dateToString(startDate),
highClausePrefix + dateSplitter.dateToString(endDate)));
}
start = end;
startDate = endDate;
}
if (minVal == Long.MIN_VALUE || maxVal == Long.MIN_VALUE) {
// Add an extra split to handle the null case that we saw.
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
colName + " IS NULL", colName + " IS NULL"));
}
printList(splits);
}
private <E> void printList(List<E> list) {
for (E e : list) {
System.out.println(e.toString());
}
}
private Date longToDate(long val, int sqlDataType) {
switch (sqlDataType) {
case Types.DATE:
return new java.sql.Date(val);
case Types.TIME:
return new java.sql.Time(val);
case Types.TIMESTAMP:
return new java.sql.Timestamp(val);
default: // Shouldn't ever hit this case.
return null;
}
}
}

View File

@ -19,14 +19,21 @@
package org.apache.sqoop.mapreduce.db; package org.apache.sqoop.mapreduce.db;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import com.cloudera.sqoop.mapreduce.db.BigDecimalSplitter; import com.cloudera.sqoop.mapreduce.db.BigDecimalSplitter;
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.junit.Test; import org.junit.Test;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
public class TestBigDecimalSplitter { public class TestBigDecimalSplitter {
private org.apache.sqoop.mapreduce.db.BigDecimalSplitter bigDecimalSplitter = new org.apache.sqoop.mapreduce.db.BigDecimalSplitter();
/* Test if the decimal split sizes are generated as expected */ /* Test if the decimal split sizes are generated as expected */
@Test @Test
public void testDecimalTryDivide() { public void testDecimalTryDivide() {
@ -60,4 +67,54 @@ public void testRecurringTryDivide() {
assertEquals(expected, out); assertEquals(expected, out);
} }
@Test
public void testSplit() throws SQLException {
String colName = "cur_lac";
BigDecimal minVal = new BigDecimal(6591);
BigDecimal maxVal = new BigDecimal(24996);
String lowClausePrefix = colName + " >= ";
String highClausePrefix = colName + " < ";
BigDecimal numSplits = new BigDecimal(2000);
if (minVal == null && maxVal == null) {
// Range is null to null. Return a null split accordingly.
List<InputSplit> splits = new ArrayList<InputSplit>();
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
colName + " IS NULL", colName + " IS NULL"));
return;
}
if (minVal == null || maxVal == null) {
// Don't know what is a reasonable min/max value for interpolation. Fail.
System.out.println("Cannot find a range for NUMERIC or DECIMAL "
+ "fields with one end NULL.");
return;
}
// Get all the split points together.
List<BigDecimal> splitPoints = bigDecimalSplitter.split(numSplits, minVal, maxVal);
List<InputSplit> splits = new ArrayList<InputSplit>();
// Turn the split points into a set of intervals.
BigDecimal start = splitPoints.get(0);
for (int i = 1; i < splitPoints.size(); i++) {
BigDecimal end = splitPoints.get(i);
if (i == splitPoints.size() - 1) {
// This is the last one; use a closed interval.
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + start.toString(),
colName + " <= " + end.toString()));
} else {
// Normal open-interval case.
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + start.toString(),
highClausePrefix + end.toString()));
}
start = end;
}
}
} }

View File

@ -0,0 +1,32 @@
package org.apache.sqoop.mapreduce.db;
import org.apache.hadoop.mapreduce.InputSplit;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
public class TestBooleanSplitter {
@Test
public void split() {
List<InputSplit> splits = new ArrayList<>();
String colName = "isCheck";
boolean minVal = false;
boolean maxVal = true;
// Use one or two splits.
if (!minVal) {
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
colName + " = FALSE", colName + " = FALSE"));
}
if (maxVal) {
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
colName + " = TRUE", colName + " = TRUE"));
}
System.out.println(splits);
}
}

View File

@ -18,9 +18,13 @@
package org.apache.sqoop.mapreduce.db; package org.apache.sqoop.mapreduce.db;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import com.cloudera.sqoop.config.ConfigurationHelper;
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
import com.cloudera.sqoop.mapreduce.db.IntegerSplitter; import com.cloudera.sqoop.mapreduce.db.IntegerSplitter;
import org.apache.hadoop.mapreduce.InputSplit;
import org.junit.Test; import org.junit.Test;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
@ -143,6 +147,7 @@ public void testEvenSplitsWithLimit() throws SQLException {
@Test @Test
public void testOddSplitsWithLimit() throws SQLException { public void testOddSplitsWithLimit() throws SQLException {
List<Long> splits = new IntegerSplitter().split(5, 10, 0, 95); List<Long> splits = new IntegerSplitter().split(5, 10, 0, 95);
System.out.println(splits);
long [] expected = { 0, 10, 20, 30, 40, 50, 59, 68, 77, 86, 95}; long [] expected = { 0, 10, 20, 30, 40, 50, 59, 68, 77, 86, 95};
assertLongArrayEquals(expected, toLongArray(splits)); assertLongArrayEquals(expected, toLongArray(splits));
} }
@ -150,6 +155,7 @@ public void testOddSplitsWithLimit() throws SQLException {
@Test @Test
public void testSplitWithBiggerLimit() throws SQLException { public void testSplitWithBiggerLimit() throws SQLException {
List<Long> splits = new IntegerSplitter().split(10, 15, 0, 100); List<Long> splits = new IntegerSplitter().split(10, 15, 0, 100);
System.out.println(splits);
long [] expected = {0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}; long [] expected = {0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100};
assertLongArrayEquals(expected, toLongArray(splits)); assertLongArrayEquals(expected, toLongArray(splits));
} }
@ -157,7 +163,56 @@ public void testSplitWithBiggerLimit() throws SQLException {
@Test @Test
public void testFractionalSplitWithLimit() throws SQLException { public void testFractionalSplitWithLimit() throws SQLException {
List<Long> splits = new IntegerSplitter().split(5, 1, 1, 10); List<Long> splits = new IntegerSplitter().split(5, 1, 1, 10);
System.out.println(splits);
long [] expected = {1,2, 3, 4, 5, 6, 7, 8, 9, 10, 10}; long [] expected = {1,2, 3, 4, 5, 6, 7, 8, 9, 10, 10};
assertLongArrayEquals(expected, toLongArray(splits)); assertLongArrayEquals(expected, toLongArray(splits));
} }
@Test
public void testSplit() throws Exception {
org.apache.sqoop.mapreduce.db.IntegerSplitter integerSplitter = new org.apache.sqoop.mapreduce.db.IntegerSplitter();
String colName = "cnt";
long minVal = 1;
long maxVal = 100;
String lowClausePrefix = colName + " >= ";
String highClausePrefix = colName + " < ";
int numSplits = 3;
if (numSplits < 1) {
numSplits = 1;
}
long splitLimit = -1;
// Get all the split points together.
List<Long> splitPoints = integerSplitter.split(numSplits, splitLimit, minVal, maxVal);
System.out.println(String.format("Splits: [%,28d to %,28d] into %d parts",
minVal, maxVal, numSplits));
for (int i = 0; i < splitPoints.size(); i++) {
System.out.println(String.format("%,28d", splitPoints.get(i)));
}
List<InputSplit> splits = new ArrayList<InputSplit>();
// Turn the split points into a set of intervals.
long start = splitPoints.get(0);
for (int i = 1; i < splitPoints.size(); i++) {
long end = splitPoints.get(i);
if (i == splitPoints.size() - 1) {
// This is the last one; use a closed interval.
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + Long.toString(start),
colName + " <= " + Long.toString(end)));
} else {
// Normal open-interval case.
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + Long.toString(start),
highClausePrefix + Long.toString(end)));
}
start = end;
}
System.out.println(splits);
}
} }

View File

@ -19,9 +19,13 @@
import java.math.BigDecimal; import java.math.BigDecimal;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import com.cloudera.sqoop.config.ConfigurationHelper;
import com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat;
import com.cloudera.sqoop.mapreduce.db.TextSplitter; import com.cloudera.sqoop.mapreduce.db.TextSplitter;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.sqoop.validation.ValidationException; import org.apache.sqoop.validation.ValidationException;
import org.junit.Test; import org.junit.Test;
@ -173,4 +177,93 @@ public void testNChar() throws SQLException {
assertEquals(false, splitter2.isUseNCharStrings()); assertEquals(false, splitter2.isUseNCharStrings());
} }
@Test
public void testSplit() throws Exception {
System.out.println("Generating splits for a textual index column.");
System.out.println("If your database sorts in a case-insensitive order, "
+ "this may result in a partial import or duplicate records.");
System.out.println("You are strongly encouraged to choose an integral split column.");
org.apache.sqoop.mapreduce.db.TextSplitter textSplitter = new org.apache.sqoop.mapreduce.db.TextSplitter();
boolean useNCharStrings = false;
String colName = "produce_name";
String minString = "1231";
String maxString = "12324";
boolean minIsNull = false;
// If the min value is null, switch it to an empty string instead for
// purposes of interpolation. Then add [null, null] as a special case
// split.
if (null == minString) {
minString = "";
minIsNull = true;
}
if (null == maxString) {
// If the max string is null, then the min string has to be null too.
// Just return a special split for this case.
List<InputSplit> splits = new ArrayList<InputSplit>();
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
colName + " IS NULL", colName + " IS NULL"));
return;
}
// Use this as a hint. May need an extra task if the size doesn't
// divide cleanly.
// 本地是1
// 远程默认是2
int numSplits = 3;
String lowClausePrefix = colName + " >= " + (useNCharStrings ? "N'" : "'");
String highClausePrefix = colName + " < " + (useNCharStrings ? "N'" : "'");
// If there is a common prefix between minString and maxString, establish
// it and pull it out of minString and maxString.
int maxPrefixLen = Math.min(minString.length(), maxString.length());
int sharedLen;
for (sharedLen = 0; sharedLen < maxPrefixLen; sharedLen++) {
char c1 = minString.charAt(sharedLen);
char c2 = maxString.charAt(sharedLen);
if (c1 != c2) {
break;
}
}
// The common prefix has length 'sharedLen'. Extract it from both.
String commonPrefix = minString.substring(0, sharedLen);
minString = minString.substring(sharedLen);
maxString = maxString.substring(sharedLen);
List<String> splitStrings = textSplitter.split(numSplits, minString, maxString,
commonPrefix);
List<InputSplit> splits = new ArrayList<InputSplit>();
// Convert the list of split point strings into an actual set of
// InputSplits.
String start = splitStrings.get(0);
for (int i = 1; i < splitStrings.size(); i++) {
String end = splitStrings.get(i);
if (i == splitStrings.size() - 1) {
// This is the last one; use a closed interval.
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + start + "'", colName
+ " <= " + (useNCharStrings ? "N'" : "'") + end + "'"));
} else {
// Normal open-interval case.
splits.add(new com.cloudera.sqoop.mapreduce.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit(
lowClausePrefix + start + "'", highClausePrefix + end + "'"));
}
start = end;
}
if (minIsNull) {
// Add the special null split at the end.
splits.add(new DataDrivenDBInputFormat.DataDrivenDBInputSplit(
colName + " IS NULL", colName + " IS NULL"));
}
System.out.println(splits);
}
} }