gen default parquet schema if config not set

This commit is contained in:
liaoguangwen 2024-10-26 01:26:47 +08:00
parent 3614c2633e
commit 7bb69f6c85
2 changed files with 14 additions and 1 deletions

View File

@ -630,7 +630,14 @@ public class HdfsHelper {
MessageType messageType = null;
ParquetFileProccessor proccessor = null;
Path outputPath = new Path(fileName);
String schema = config.getString(Key.PARQUET_SCHEMA);
String schema = config.getString(Key.PARQUET_SCHEMA, null);
if (schema == null) {
List<Configuration> columns = config.getListConfiguration(Key.COLUMN);
if (columns == null || columns.isEmpty()) {
throw DataXException.asDataXException("parquetSchema or column can't be blank!");
}
schema = HdfsHelper.generateParquetSchemaFromColumnAndType(columns);
}
try {
messageType = MessageTypeParser.parseMessageType(schema);
} catch (Exception e) {

View File

@ -228,6 +228,12 @@ public class HdfsWriter extends Writer {
String endFullFileName = null;
fileSuffix = UUID.randomUUID().toString().replace('-', '_');
if (fileType.equalsIgnoreCase("PARQUET")) {
if (StringUtils.isNotBlank(this.compress)) {
fileSuffix += "." + this.compress.toLowerCase();
}
fileSuffix += ".parquet";
}
fullFileName = String.format("%s%s%s__%s", defaultFS, storePath, filePrefix, fileSuffix);
endFullFileName = String.format("%s%s%s__%s", defaultFS, endStorePath, filePrefix, fileSuffix);