diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java index 993659b0..e2900f7c 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsHelper.java @@ -637,7 +637,14 @@ public class HdfsHelper { MessageType messageType = null; ParquetFileProccessor proccessor = null; Path outputPath = new Path(fileName); - String schema = config.getString(Key.PARQUET_SCHEMA); + String schema = config.getString(Key.PARQUET_SCHEMA, null); + if (schema == null) { + List columns = config.getListConfiguration(Key.COLUMN); + if (columns == null || columns.isEmpty()) { + throw DataXException.asDataXException("parquetSchema or column can't be blank!"); + } + schema = HdfsHelper.generateParquetSchemaFromColumnAndType(columns); + } try { messageType = MessageTypeParser.parseMessageType(schema); } catch (Exception e) { diff --git a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java index e7707461..7535687c 100644 --- a/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java +++ b/hdfswriter/src/main/java/com/alibaba/datax/plugin/writer/hdfswriter/HdfsWriter.java @@ -228,6 +228,12 @@ public class HdfsWriter extends Writer { String endFullFileName = null; fileSuffix = UUID.randomUUID().toString().replace('-', '_'); + if (fileType.equalsIgnoreCase("PARQUET")) { + if (StringUtils.isNotBlank(this.compress)) { + fileSuffix += "." + this.compress.toLowerCase(); + } + fileSuffix += ".parquet"; + } fullFileName = String.format("%s%s%s__%s", defaultFS, storePath, filePrefix, fileSuffix); endFullFileName = String.format("%s%s%s__%s", defaultFS, endStorePath, filePrefix, fileSuffix);