From 3b9c790f2bb8b04f271b6989aba72f534827c299 Mon Sep 17 00:00:00 2001 From: DeleiGuo Date: Tue, 7 May 2024 14:25:38 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0skipEmptyDir=E5=8F=82?= =?UTF-8?q?=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 读取空目录时,默认为 true,即跳过不抛出异常。 --- hdfsreader/doc/hdfsreader.md | 7 +++++++ .../datax/plugin/reader/hdfsreader/DFSUtil.java | 7 +++++++ .../plugin/reader/hdfsreader/HdfsReader.java | 15 +++++++++++++-- .../datax/plugin/reader/hdfsreader/Key.java | 5 +++++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/hdfsreader/doc/hdfsreader.md b/hdfsreader/doc/hdfsreader.md index ca9a021f..5e944b8b 100644 --- a/hdfsreader/doc/hdfsreader.md +++ b/hdfsreader/doc/hdfsreader.md @@ -278,6 +278,13 @@ HdfsReader实现了从Hadoop分布式文件系统Hdfs中读取文件数据并转 * 默认值:无
+* **skipEmptyDir** + + * 描述:读取空目录时,跳过不抛出异常,默认跳过。
+ + * 必选:否
+ + * 默认值:true
常见配置: diff --git a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java index 720f8bf6..f3ad97b9 100644 --- a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java +++ b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/DFSUtil.java @@ -1,5 +1,6 @@ package com.alibaba.datax.plugin.reader.hdfsreader; +import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.element.*; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; @@ -186,6 +187,12 @@ public class DFSUtil { // 获取要读取的文件的根目录 Path listFiles = new Path(path); + if(!hdfs.exists(listFiles)){ + String message = String.format("[%s] 路径目录不存在!",path); + LOG.warn(message); + System.exit(0); + } + // If the network disconnected, this method will retry 45 times // each time the retry interval for 20 seconds // 获取要读取的文件的根目录的所有二级子文件目录 diff --git a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/HdfsReader.java b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/HdfsReader.java index 08c630fc..0e313da5 100644 --- a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/HdfsReader.java +++ b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/HdfsReader.java @@ -43,6 +43,7 @@ public class HdfsReader extends Reader { private List path = null; private boolean skipEmptyOrcFile = false; private Integer orcFileEmptySize = null; + private Boolean skipEmptyDir=null; @Override public void init() { @@ -58,6 +59,7 @@ public class HdfsReader extends Reader { public void validate(){ this.readerOriginConfig.getNecessaryValue(Key.DEFAULT_FS, HdfsReaderErrorCode.DEFAULT_FS_NOT_FIND_ERROR); + skipEmptyDir = this.readerOriginConfig.getBool(Key.SKIP_EMPTY_DIR,true); // path check String pathInString = this.readerOriginConfig.getNecessaryValue(Key.PATH, HdfsReaderErrorCode.REQUIRED_VALUE); @@ -193,9 +195,18 @@ public class HdfsReader extends Reader { // warn:每个slice拖且仅拖一个文件, // int splitNumber = adviceNumber; int splitNumber = this.sourceFiles.size(); + LOG.info("split number:" + splitNumber); + if (0 == splitNumber) { - throw DataXException.asDataXException(HdfsReaderErrorCode.EMPTY_DIR_EXCEPTION, - String.format("未能找到待读取的文件,请确认您的配置项path: %s", this.readerOriginConfig.getString(Key.PATH))); + String message = String.format("未能找到待读取的文件,请确认您的配置项path: %s", + this.readerOriginConfig.getString(Key.PATH)); + if(skipEmptyDir){ + LOG.warn(message); + LOG.info("Task exited with return code 0"); + System.exit(0); + }else { + throw DataXException.asDataXException(HdfsReaderErrorCode.EMPTY_DIR_EXCEPTION,message); + } } List> splitedSourceFiles = this.splitSourceFiles(new ArrayList(this.sourceFiles), splitNumber); diff --git a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/Key.java b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/Key.java index 7f9b3a0a..55785301 100644 --- a/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/Key.java +++ b/hdfsreader/src/main/java/com/alibaba/datax/plugin/reader/hdfsreader/Key.java @@ -63,4 +63,9 @@ public final class Key { public static final String CDH_3_X_HIVE_VERSION = "3.1.3-cdh"; public static final String SUPPORT_ADD_MIDDLE_COLUMN = "supportAddMiddleColumn"; + + /** + * 是否跳过空目录异常 + */ + public static final String SKIP_EMPTY_DIR="skipEmptyDir"; }