mirror of
https://github.com/apache/sqoop.git
synced 2025-05-02 20:39:58 +08:00
Export-从HDFS导到DBMS,支持配置读取文件编码,使用方法:--fileencoding gbk
This commit is contained in:
parent
912fbc1c9c
commit
e0538e2bb1
@ -31,6 +31,8 @@
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import static org.apache.sqoop.tool.BaseSqoopTool.ENCODE;
|
||||
|
||||
/**
|
||||
* Converts an input record from a string representation to a parsed Sqoop
|
||||
* record and emits that DBWritable to the OutputFormat for writeback to the
|
||||
@ -44,6 +46,7 @@ public class TextExportMapper
|
||||
public static final Log LOG =
|
||||
LogFactory.getLog(TextExportMapper.class.getName());
|
||||
|
||||
private String encoding;
|
||||
private SqoopRecord recordImpl;
|
||||
|
||||
boolean enableDataDumpOnError;
|
||||
@ -80,13 +83,21 @@ protected void setup(Context context)
|
||||
}
|
||||
|
||||
enableDataDumpOnError = conf.getBoolean(DUMP_DATA_ON_ERROR_KEY, false);
|
||||
|
||||
encoding = conf.get(ENCODE);
|
||||
}
|
||||
|
||||
|
||||
public void map(LongWritable key, Text val, Context context)
|
||||
throws IOException, InterruptedException {
|
||||
try {
|
||||
recordImpl.parse(val);
|
||||
// 据说转码比较消耗性能
|
||||
if (encoding != null) {
|
||||
String newValue = new String(val.getBytes(), 0, val.getLength(), encoding);
|
||||
recordImpl.parse(newValue);
|
||||
} else {
|
||||
recordImpl.parse(val);
|
||||
}
|
||||
context.write(recordImpl, NullWritable.get());
|
||||
} catch (Exception e) {
|
||||
// Something bad has happened
|
||||
|
@ -174,6 +174,8 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
|
||||
public static final String THROW_ON_ERROR_ARG = "throw-on-error";
|
||||
public static final String ORACLE_ESCAPING_DISABLED = "oracle-escaping-disabled";
|
||||
public static final String ESCAPE_MAPPING_COLUMN_NAMES_ENABLED = "escape-mapping-column-names";
|
||||
public static final String FILE_ENCODING = "fileencoding";//文件编码
|
||||
public static final String ENCODE = "sqoop.mapreduce.export.encode";
|
||||
|
||||
// Arguments for validation.
|
||||
public static final String VALIDATE_ARG = "validate";
|
||||
|
@ -208,6 +208,23 @@ public void configureOptions(ToolOptions toolOptions) {
|
||||
|
||||
toolOptions.addUniqueOptions(codeGenOpts);
|
||||
toolOptions.addUniqueOptions(getHCatalogOptions());
|
||||
|
||||
toolOptions.addUniqueOptions(getFileencodingOptions());
|
||||
}
|
||||
|
||||
/**
|
||||
* 文件编码
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
protected RelatedOptions getFileencodingOptions() {
|
||||
RelatedOptions fileencodingOptions = new RelatedOptions("fileencoding arguments");
|
||||
fileencodingOptions.addOption(OptionBuilder
|
||||
.hasArg()
|
||||
.withDescription("fileencoding")
|
||||
.withLongOpt("fileencoding")
|
||||
.create());
|
||||
return fileencodingOptions;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -279,6 +296,11 @@ public void applyOptions(CommandLine in, SqoopOptions out)
|
||||
out.setCall(in.getOptionValue(CALL_ARG));
|
||||
}
|
||||
|
||||
//设置文件编码
|
||||
if (in.hasOption(FILE_ENCODING)) {
|
||||
out.getConf().set(ENCODE, in.getOptionValue(FILE_ENCODING));
|
||||
}
|
||||
|
||||
applyValidationOptions(in, out);
|
||||
applyNewUpdateOptions(in, out);
|
||||
applyInputFormatOptions(in, out);
|
||||
|
@ -24,13 +24,13 @@ public void split() throws Exception {
|
||||
long maxVal;
|
||||
|
||||
int sqlDataType = Types.TIMESTAMP;
|
||||
minVal = df.parse("2019-04-22 11:28:30").getTime();
|
||||
maxVal = df.parse("2019-04-22 16:28:30").getTime();
|
||||
minVal = df.parse("2019-04-22 00:00:00").getTime();
|
||||
maxVal = df.parse("2019-04-22 23:59:59").getTime();
|
||||
|
||||
String lowClausePrefix = colName + " >= ";
|
||||
String highClausePrefix = colName + " < ";
|
||||
|
||||
int numSplits = 2;
|
||||
int numSplits = 1440;
|
||||
if (numSplits < 1) {
|
||||
numSplits = 1;
|
||||
}
|
||||
@ -45,7 +45,7 @@ public void split() throws Exception {
|
||||
}
|
||||
|
||||
// For split size we are using seconds. So we need to convert to milliseconds.
|
||||
long splitLimit = 3600 * MS_IN_SEC;
|
||||
long splitLimit = -1 * MS_IN_SEC;
|
||||
|
||||
// Gather the split point integers
|
||||
List<Long> splitPoints = dateSplitter.split(numSplits, splitLimit, minVal, maxVal);
|
||||
|
Loading…
Reference in New Issue
Block a user