Export-从HDFS导到DBMS，支持配置读取文件编码，使用方法：--fileencoding gbk

2025-05-04 01:00:46 +08:00 · 2019-11-19 08:51:23 +08:00 · 2019-11-19 08:51:23 +08:00 · e0538e2bb1
commit e0538e2bb1
parent 912fbc1c9c
4 changed files with 40 additions and 5 deletions
--- a/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
+++ b/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
@ -31,6 +31,8 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import static org.apache.sqoop.tool.BaseSqoopTool.ENCODE;
 /**
 * Converts an input record from a string representation to a parsed Sqoop
 * record and emits that DBWritable to the OutputFormat for writeback to the
@ -44,6 +46,7 @@ public class TextExportMapper
  public static final Log LOG =
    LogFactory.getLog(TextExportMapper.class.getName());
  private String encoding;
  private SqoopRecord recordImpl;
  boolean enableDataDumpOnError;
@ -80,13 +83,21 @@ protected void setup(Context context)
    }
    enableDataDumpOnError = conf.getBoolean(DUMP_DATA_ON_ERROR_KEY, false);
    encoding = conf.get(ENCODE);
  }
  public void map(LongWritable key, Text val, Context context)
      throws IOException, InterruptedException {
    try {
      // 据说转码比较消耗性能
      if (encoding != null) {
        String newValue = new String(val.getBytes(), 0, val.getLength(), encoding);
        recordImpl.parse(newValue);
      } else {
        recordImpl.parse(val);
      }
      context.write(recordImpl, NullWritable.get());
    } catch (Exception e) {
      // Something bad has happened
--- a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
+++ b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
@ -174,6 +174,8 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
  public static final String THROW_ON_ERROR_ARG = "throw-on-error";
  public static final String ORACLE_ESCAPING_DISABLED = "oracle-escaping-disabled";
  public static final String ESCAPE_MAPPING_COLUMN_NAMES_ENABLED = "escape-mapping-column-names";
  public static final String FILE_ENCODING = "fileencoding";//文件编码
  public static final String ENCODE = "sqoop.mapreduce.export.encode";
  // Arguments for validation.
  public static final String VALIDATE_ARG = "validate";
--- a/src/java/org/apache/sqoop/tool/ExportTool.java
+++ b/src/java/org/apache/sqoop/tool/ExportTool.java
@ -208,6 +208,23 @@ public void configureOptions(ToolOptions toolOptions) {
    toolOptions.addUniqueOptions(codeGenOpts);
    toolOptions.addUniqueOptions(getHCatalogOptions());
    toolOptions.addUniqueOptions(getFileencodingOptions());
  }
  /**
   * 文件编码
   *
   * @return
   */
  protected RelatedOptions getFileencodingOptions() {
    RelatedOptions fileencodingOptions = new RelatedOptions("fileencoding arguments");
    fileencodingOptions.addOption(OptionBuilder
            .hasArg()
            .withDescription("fileencoding")
            .withLongOpt("fileencoding")
            .create());
    return fileencodingOptions;
  }
  @Override
@ -279,6 +296,11 @@ public void applyOptions(CommandLine in, SqoopOptions out)
          out.setCall(in.getOptionValue(CALL_ARG));
      }
      //设置文件编码
      if (in.hasOption(FILE_ENCODING)) {
          out.getConf().set(ENCODE, in.getOptionValue(FILE_ENCODING));
      }
      applyValidationOptions(in, out);
      applyNewUpdateOptions(in, out);
      applyInputFormatOptions(in, out);
--- a/src/test/org/apache/sqoop/mapreduce/db/DateSplitterTest.java
+++ b/src/test/org/apache/sqoop/mapreduce/db/DateSplitterTest.java
@ -24,13 +24,13 @@ public void split() throws Exception {
        long maxVal;
        int sqlDataType = Types.TIMESTAMP;
-        minVal = df.parse("2019-04-22 11:28:30").getTime();
+        minVal = df.parse("2019-04-22 00:00:00").getTime();
-        maxVal = df.parse("2019-04-22 16:28:30").getTime();
+        maxVal = df.parse("2019-04-22 23:59:59").getTime();
        String lowClausePrefix = colName + " >= ";
        String highClausePrefix = colName + " < ";
-        int numSplits = 2;
+        int numSplits = 1440;
        if (numSplits < 1) {
            numSplits = 1;
        }
@ -45,7 +45,7 @@ public void split() throws Exception {
        }
        // For split size we are using seconds. So we need to convert to milliseconds.
-        long splitLimit = 3600 * MS_IN_SEC;
+        long splitLimit = -1 * MS_IN_SEC;
        // Gather the split point integers
        List<Long> splitPoints = dateSplitter.split(numSplits, splitLimit, minVal, maxVal);