5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-04 01:00:46 +08:00

Export-从HDFS导到DBMS,支持配置读取文件编码,使用方法:--fileencoding gbk

This commit is contained in:
chenqixu 2019-11-19 08:51:23 +08:00
parent 912fbc1c9c
commit e0538e2bb1
4 changed files with 40 additions and 5 deletions

View File

@ -31,6 +31,8 @@
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import static org.apache.sqoop.tool.BaseSqoopTool.ENCODE;
/** /**
* Converts an input record from a string representation to a parsed Sqoop * Converts an input record from a string representation to a parsed Sqoop
* record and emits that DBWritable to the OutputFormat for writeback to the * record and emits that DBWritable to the OutputFormat for writeback to the
@ -44,6 +46,7 @@ public class TextExportMapper
public static final Log LOG = public static final Log LOG =
LogFactory.getLog(TextExportMapper.class.getName()); LogFactory.getLog(TextExportMapper.class.getName());
private String encoding;
private SqoopRecord recordImpl; private SqoopRecord recordImpl;
boolean enableDataDumpOnError; boolean enableDataDumpOnError;
@ -80,13 +83,21 @@ protected void setup(Context context)
} }
enableDataDumpOnError = conf.getBoolean(DUMP_DATA_ON_ERROR_KEY, false); enableDataDumpOnError = conf.getBoolean(DUMP_DATA_ON_ERROR_KEY, false);
encoding = conf.get(ENCODE);
} }
public void map(LongWritable key, Text val, Context context) public void map(LongWritable key, Text val, Context context)
throws IOException, InterruptedException { throws IOException, InterruptedException {
try { try {
// 据说转码比较消耗性能
if (encoding != null) {
String newValue = new String(val.getBytes(), 0, val.getLength(), encoding);
recordImpl.parse(newValue);
} else {
recordImpl.parse(val); recordImpl.parse(val);
}
context.write(recordImpl, NullWritable.get()); context.write(recordImpl, NullWritable.get());
} catch (Exception e) { } catch (Exception e) {
// Something bad has happened // Something bad has happened

View File

@ -174,6 +174,8 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
public static final String THROW_ON_ERROR_ARG = "throw-on-error"; public static final String THROW_ON_ERROR_ARG = "throw-on-error";
public static final String ORACLE_ESCAPING_DISABLED = "oracle-escaping-disabled"; public static final String ORACLE_ESCAPING_DISABLED = "oracle-escaping-disabled";
public static final String ESCAPE_MAPPING_COLUMN_NAMES_ENABLED = "escape-mapping-column-names"; public static final String ESCAPE_MAPPING_COLUMN_NAMES_ENABLED = "escape-mapping-column-names";
public static final String FILE_ENCODING = "fileencoding";//文件编码
public static final String ENCODE = "sqoop.mapreduce.export.encode";
// Arguments for validation. // Arguments for validation.
public static final String VALIDATE_ARG = "validate"; public static final String VALIDATE_ARG = "validate";

View File

@ -208,6 +208,23 @@ public void configureOptions(ToolOptions toolOptions) {
toolOptions.addUniqueOptions(codeGenOpts); toolOptions.addUniqueOptions(codeGenOpts);
toolOptions.addUniqueOptions(getHCatalogOptions()); toolOptions.addUniqueOptions(getHCatalogOptions());
toolOptions.addUniqueOptions(getFileencodingOptions());
}
/**
* 文件编码
*
* @return
*/
protected RelatedOptions getFileencodingOptions() {
RelatedOptions fileencodingOptions = new RelatedOptions("fileencoding arguments");
fileencodingOptions.addOption(OptionBuilder
.hasArg()
.withDescription("fileencoding")
.withLongOpt("fileencoding")
.create());
return fileencodingOptions;
} }
@Override @Override
@ -279,6 +296,11 @@ public void applyOptions(CommandLine in, SqoopOptions out)
out.setCall(in.getOptionValue(CALL_ARG)); out.setCall(in.getOptionValue(CALL_ARG));
} }
//设置文件编码
if (in.hasOption(FILE_ENCODING)) {
out.getConf().set(ENCODE, in.getOptionValue(FILE_ENCODING));
}
applyValidationOptions(in, out); applyValidationOptions(in, out);
applyNewUpdateOptions(in, out); applyNewUpdateOptions(in, out);
applyInputFormatOptions(in, out); applyInputFormatOptions(in, out);

View File

@ -24,13 +24,13 @@ public void split() throws Exception {
long maxVal; long maxVal;
int sqlDataType = Types.TIMESTAMP; int sqlDataType = Types.TIMESTAMP;
minVal = df.parse("2019-04-22 11:28:30").getTime(); minVal = df.parse("2019-04-22 00:00:00").getTime();
maxVal = df.parse("2019-04-22 16:28:30").getTime(); maxVal = df.parse("2019-04-22 23:59:59").getTime();
String lowClausePrefix = colName + " >= "; String lowClausePrefix = colName + " >= ";
String highClausePrefix = colName + " < "; String highClausePrefix = colName + " < ";
int numSplits = 2; int numSplits = 1440;
if (numSplits < 1) { if (numSplits < 1) {
numSplits = 1; numSplits = 1;
} }
@ -45,7 +45,7 @@ public void split() throws Exception {
} }
// For split size we are using seconds. So we need to convert to milliseconds. // For split size we are using seconds. So we need to convert to milliseconds.
long splitLimit = 3600 * MS_IN_SEC; long splitLimit = -1 * MS_IN_SEC;
// Gather the split point integers // Gather the split point integers
List<Long> splitPoints = dateSplitter.split(numSplits, splitLimit, minVal, maxVal); List<Long> splitPoints = dateSplitter.split(numSplits, splitLimit, minVal, maxVal);