5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-02 20:39:58 +08:00

Export-从HDFS导到DBMS,支持配置读取文件编码,使用方法:--fileencoding gbk

This commit is contained in:
chenqixu 2019-11-19 08:51:23 +08:00
parent 912fbc1c9c
commit e0538e2bb1
4 changed files with 40 additions and 5 deletions

View File

@ -31,6 +31,8 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import static org.apache.sqoop.tool.BaseSqoopTool.ENCODE;
/**
* Converts an input record from a string representation to a parsed Sqoop
* record and emits that DBWritable to the OutputFormat for writeback to the
@ -44,6 +46,7 @@ public class TextExportMapper
public static final Log LOG =
LogFactory.getLog(TextExportMapper.class.getName());
private String encoding;
private SqoopRecord recordImpl;
boolean enableDataDumpOnError;
@ -80,13 +83,21 @@ protected void setup(Context context)
}
enableDataDumpOnError = conf.getBoolean(DUMP_DATA_ON_ERROR_KEY, false);
encoding = conf.get(ENCODE);
}
public void map(LongWritable key, Text val, Context context)
throws IOException, InterruptedException {
try {
recordImpl.parse(val);
// 据说转码比较消耗性能
if (encoding != null) {
String newValue = new String(val.getBytes(), 0, val.getLength(), encoding);
recordImpl.parse(newValue);
} else {
recordImpl.parse(val);
}
context.write(recordImpl, NullWritable.get());
} catch (Exception e) {
// Something bad has happened

View File

@ -174,6 +174,8 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
public static final String THROW_ON_ERROR_ARG = "throw-on-error";
public static final String ORACLE_ESCAPING_DISABLED = "oracle-escaping-disabled";
public static final String ESCAPE_MAPPING_COLUMN_NAMES_ENABLED = "escape-mapping-column-names";
public static final String FILE_ENCODING = "fileencoding";//文件编码
public static final String ENCODE = "sqoop.mapreduce.export.encode";
// Arguments for validation.
public static final String VALIDATE_ARG = "validate";

View File

@ -208,6 +208,23 @@ public void configureOptions(ToolOptions toolOptions) {
toolOptions.addUniqueOptions(codeGenOpts);
toolOptions.addUniqueOptions(getHCatalogOptions());
toolOptions.addUniqueOptions(getFileencodingOptions());
}
/**
* 文件编码
*
* @return
*/
protected RelatedOptions getFileencodingOptions() {
RelatedOptions fileencodingOptions = new RelatedOptions("fileencoding arguments");
fileencodingOptions.addOption(OptionBuilder
.hasArg()
.withDescription("fileencoding")
.withLongOpt("fileencoding")
.create());
return fileencodingOptions;
}
@Override
@ -279,6 +296,11 @@ public void applyOptions(CommandLine in, SqoopOptions out)
out.setCall(in.getOptionValue(CALL_ARG));
}
//设置文件编码
if (in.hasOption(FILE_ENCODING)) {
out.getConf().set(ENCODE, in.getOptionValue(FILE_ENCODING));
}
applyValidationOptions(in, out);
applyNewUpdateOptions(in, out);
applyInputFormatOptions(in, out);

View File

@ -24,13 +24,13 @@ public void split() throws Exception {
long maxVal;
int sqlDataType = Types.TIMESTAMP;
minVal = df.parse("2019-04-22 11:28:30").getTime();
maxVal = df.parse("2019-04-22 16:28:30").getTime();
minVal = df.parse("2019-04-22 00:00:00").getTime();
maxVal = df.parse("2019-04-22 23:59:59").getTime();
String lowClausePrefix = colName + " >= ";
String highClausePrefix = colName + " < ";
int numSplits = 2;
int numSplits = 1440;
if (numSplits < 1) {
numSplits = 1;
}
@ -45,7 +45,7 @@ public void split() throws Exception {
}
// For split size we are using seconds. So we need to convert to milliseconds.
long splitLimit = 3600 * MS_IN_SEC;
long splitLimit = -1 * MS_IN_SEC;
// Gather the split point integers
List<Long> splitPoints = dateSplitter.split(numSplits, splitLimit, minVal, maxVal);