mirror of
https://github.com/apache/sqoop.git
synced 2025-05-04 01:00:46 +08:00
Export-从HDFS导到DBMS,支持配置读取文件编码,使用方法:--fileencoding gbk
This commit is contained in:
parent
912fbc1c9c
commit
e0538e2bb1
@ -31,6 +31,8 @@
|
|||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import static org.apache.sqoop.tool.BaseSqoopTool.ENCODE;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts an input record from a string representation to a parsed Sqoop
|
* Converts an input record from a string representation to a parsed Sqoop
|
||||||
* record and emits that DBWritable to the OutputFormat for writeback to the
|
* record and emits that DBWritable to the OutputFormat for writeback to the
|
||||||
@ -44,6 +46,7 @@ public class TextExportMapper
|
|||||||
public static final Log LOG =
|
public static final Log LOG =
|
||||||
LogFactory.getLog(TextExportMapper.class.getName());
|
LogFactory.getLog(TextExportMapper.class.getName());
|
||||||
|
|
||||||
|
private String encoding;
|
||||||
private SqoopRecord recordImpl;
|
private SqoopRecord recordImpl;
|
||||||
|
|
||||||
boolean enableDataDumpOnError;
|
boolean enableDataDumpOnError;
|
||||||
@ -80,13 +83,21 @@ protected void setup(Context context)
|
|||||||
}
|
}
|
||||||
|
|
||||||
enableDataDumpOnError = conf.getBoolean(DUMP_DATA_ON_ERROR_KEY, false);
|
enableDataDumpOnError = conf.getBoolean(DUMP_DATA_ON_ERROR_KEY, false);
|
||||||
|
|
||||||
|
encoding = conf.get(ENCODE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void map(LongWritable key, Text val, Context context)
|
public void map(LongWritable key, Text val, Context context)
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
try {
|
try {
|
||||||
|
// 据说转码比较消耗性能
|
||||||
|
if (encoding != null) {
|
||||||
|
String newValue = new String(val.getBytes(), 0, val.getLength(), encoding);
|
||||||
|
recordImpl.parse(newValue);
|
||||||
|
} else {
|
||||||
recordImpl.parse(val);
|
recordImpl.parse(val);
|
||||||
|
}
|
||||||
context.write(recordImpl, NullWritable.get());
|
context.write(recordImpl, NullWritable.get());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Something bad has happened
|
// Something bad has happened
|
||||||
|
@ -174,6 +174,8 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
|
|||||||
public static final String THROW_ON_ERROR_ARG = "throw-on-error";
|
public static final String THROW_ON_ERROR_ARG = "throw-on-error";
|
||||||
public static final String ORACLE_ESCAPING_DISABLED = "oracle-escaping-disabled";
|
public static final String ORACLE_ESCAPING_DISABLED = "oracle-escaping-disabled";
|
||||||
public static final String ESCAPE_MAPPING_COLUMN_NAMES_ENABLED = "escape-mapping-column-names";
|
public static final String ESCAPE_MAPPING_COLUMN_NAMES_ENABLED = "escape-mapping-column-names";
|
||||||
|
public static final String FILE_ENCODING = "fileencoding";//文件编码
|
||||||
|
public static final String ENCODE = "sqoop.mapreduce.export.encode";
|
||||||
|
|
||||||
// Arguments for validation.
|
// Arguments for validation.
|
||||||
public static final String VALIDATE_ARG = "validate";
|
public static final String VALIDATE_ARG = "validate";
|
||||||
|
@ -208,6 +208,23 @@ public void configureOptions(ToolOptions toolOptions) {
|
|||||||
|
|
||||||
toolOptions.addUniqueOptions(codeGenOpts);
|
toolOptions.addUniqueOptions(codeGenOpts);
|
||||||
toolOptions.addUniqueOptions(getHCatalogOptions());
|
toolOptions.addUniqueOptions(getHCatalogOptions());
|
||||||
|
|
||||||
|
toolOptions.addUniqueOptions(getFileencodingOptions());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 文件编码
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected RelatedOptions getFileencodingOptions() {
|
||||||
|
RelatedOptions fileencodingOptions = new RelatedOptions("fileencoding arguments");
|
||||||
|
fileencodingOptions.addOption(OptionBuilder
|
||||||
|
.hasArg()
|
||||||
|
.withDescription("fileencoding")
|
||||||
|
.withLongOpt("fileencoding")
|
||||||
|
.create());
|
||||||
|
return fileencodingOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -279,6 +296,11 @@ public void applyOptions(CommandLine in, SqoopOptions out)
|
|||||||
out.setCall(in.getOptionValue(CALL_ARG));
|
out.setCall(in.getOptionValue(CALL_ARG));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//设置文件编码
|
||||||
|
if (in.hasOption(FILE_ENCODING)) {
|
||||||
|
out.getConf().set(ENCODE, in.getOptionValue(FILE_ENCODING));
|
||||||
|
}
|
||||||
|
|
||||||
applyValidationOptions(in, out);
|
applyValidationOptions(in, out);
|
||||||
applyNewUpdateOptions(in, out);
|
applyNewUpdateOptions(in, out);
|
||||||
applyInputFormatOptions(in, out);
|
applyInputFormatOptions(in, out);
|
||||||
|
@ -24,13 +24,13 @@ public void split() throws Exception {
|
|||||||
long maxVal;
|
long maxVal;
|
||||||
|
|
||||||
int sqlDataType = Types.TIMESTAMP;
|
int sqlDataType = Types.TIMESTAMP;
|
||||||
minVal = df.parse("2019-04-22 11:28:30").getTime();
|
minVal = df.parse("2019-04-22 00:00:00").getTime();
|
||||||
maxVal = df.parse("2019-04-22 16:28:30").getTime();
|
maxVal = df.parse("2019-04-22 23:59:59").getTime();
|
||||||
|
|
||||||
String lowClausePrefix = colName + " >= ";
|
String lowClausePrefix = colName + " >= ";
|
||||||
String highClausePrefix = colName + " < ";
|
String highClausePrefix = colName + " < ";
|
||||||
|
|
||||||
int numSplits = 2;
|
int numSplits = 1440;
|
||||||
if (numSplits < 1) {
|
if (numSplits < 1) {
|
||||||
numSplits = 1;
|
numSplits = 1;
|
||||||
}
|
}
|
||||||
@ -45,7 +45,7 @@ public void split() throws Exception {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// For split size we are using seconds. So we need to convert to milliseconds.
|
// For split size we are using seconds. So we need to convert to milliseconds.
|
||||||
long splitLimit = 3600 * MS_IN_SEC;
|
long splitLimit = -1 * MS_IN_SEC;
|
||||||
|
|
||||||
// Gather the split point integers
|
// Gather the split point integers
|
||||||
List<Long> splitPoints = dateSplitter.split(numSplits, splitLimit, minVal, maxVal);
|
List<Long> splitPoints = dateSplitter.split(numSplits, splitLimit, minVal, maxVal);
|
||||||
|
Loading…
Reference in New Issue
Block a user