mirror of
https://github.com/alibaba/DataX.git
synced 2025-05-02 15:12:22 +08:00
Merge remote-tracking branch 'origin/master' into mergeUpstream
# Conflicts: # tdenginereader/pom.xml # tdenginewriter/pom.xml # tdenginewriter/src/main/java/com/alibaba/datax/plugin/writer/tdenginewriter/DefaultDataHandler.java
This commit is contained in:
commit
8fadb0c11e
@ -25,7 +25,7 @@ DataX本身作为数据同步框架,将不同数据源的同步抽象为从源
|
||||
|
||||
# Quick Start
|
||||
|
||||
##### Download [DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/20220530/datax.tar.gz)
|
||||
##### Download [DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202209/datax.tar.gz)
|
||||
|
||||
|
||||
##### 请点击:[Quick Start](https://github.com/alibaba/DataX/blob/master/userGuid.md)
|
||||
@ -44,6 +44,8 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N
|
||||
| | SQLServer | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/sqlserverreader/doc/sqlserverreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/sqlserverwriter/doc/sqlserverwriter.md)|
|
||||
| | PostgreSQL | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/postgresqlreader/doc/postgresqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/postgresqlwriter/doc/postgresqlwriter.md)|
|
||||
| | DRDS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/drdsreader/doc/drdsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/drdswriter/doc/drdswriter.md)|
|
||||
| | Apache Doris | | √ |[写](https://github.com/alibaba/DataX/blob/master/doriswriter/doc/doriswriter.md)|
|
||||
| | StarRocks | | √ |[写](https://github.com/alibaba/DataX/blob/master/starrockswriter/doc/starrockswriter.md)|
|
||||
| | 通用RDBMS(支持所有关系型数据库) | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/rdbmsreader/doc/rdbmsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/rdbmswriter/doc/rdbmswriter.md)|
|
||||
| 阿里云数仓数据存储 | ODPS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/odpsreader/doc/odpsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/odpswriter/doc/odpswriter.md)|
|
||||
| | ADS | | √ |[写](https://github.com/alibaba/DataX/blob/master/adswriter/doc/adswriter.md)|
|
||||
@ -95,6 +97,9 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N
|
||||
|
||||
DataX 后续计划月度迭代更新,也欢迎感兴趣的同学提交 Pull requests,月度更新内容会介绍介绍如下。
|
||||
|
||||
- [datax_v202209](https://github.com/alibaba/DataX/releases/tag/datax_v202209)
|
||||
- 涉及通道能力更新(MaxCompute、Datahub、SLS等)、安全漏洞更新、通用打包更新等
|
||||
|
||||
- [datax_v202205](https://github.com/alibaba/DataX/releases/tag/datax_v202205)
|
||||
- 涉及通道能力更新(MaxCompute、Hologres、OSS、Tdengine等)、安全漏洞更新、通用打包更新等
|
||||
|
||||
|
@ -70,7 +70,7 @@ public class DataType {
|
||||
} else if ("datetime".equals(type)) {
|
||||
return DATETIME;
|
||||
} else {
|
||||
throw new IllegalArgumentException("unkown type: " + type);
|
||||
throw new IllegalArgumentException("unknown type: " + type);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,7 +68,7 @@ public class ClickhouseWriter extends Writer {
|
||||
|
||||
this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE) {
|
||||
@Override
|
||||
protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, int columnSqltype, Column column) throws SQLException {
|
||||
protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, int columnSqltype, String typeName, Column column) throws SQLException {
|
||||
try {
|
||||
if (column.getRawData() == null) {
|
||||
preparedStatement.setNull(columnIndex + 1, columnSqltype);
|
||||
|
@ -2,5 +2,5 @@
|
||||
"name": "clickhousewriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.clickhousewriter.ClickhouseWriter",
|
||||
"description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql.",
|
||||
"developer": "jiye.tjy"
|
||||
"developer": "alibaba"
|
||||
}
|
@ -411,6 +411,15 @@ public class Configuration {
|
||||
return list;
|
||||
}
|
||||
|
||||
public <T> List<T> getListWithJson(final String path, Class<T> t) {
|
||||
Object object = this.get(path, List.class);
|
||||
if (null == object) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return JSON.parseArray(JSON.toJSONString(object),t);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据用户提供的json path,寻址List对象,如果对象不存在,返回null
|
||||
*/
|
||||
|
@ -3,6 +3,8 @@ package com.alibaba.datax.common.util;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.Validate;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
@ -82,4 +84,20 @@ public class StrUtil {
|
||||
return s.substring(0, headLength) + "..." + s.substring(s.length() - tailLength);
|
||||
}
|
||||
|
||||
public static String getMd5(String plainText) {
|
||||
try {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (byte b : MessageDigest.getInstance("MD5").digest(plainText.getBytes())) {
|
||||
int i = b & 0xff;
|
||||
if (i < 0x10) {
|
||||
builder.append('0');
|
||||
}
|
||||
builder.append(Integer.toHexString(i));
|
||||
}
|
||||
return builder.toString();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -41,7 +41,7 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<version>4.5</version>
|
||||
<version>4.5.13</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
|
@ -0,0 +1,87 @@
|
||||
package com.alibaba.datax.core.transport.transformer;
|
||||
|
||||
import com.alibaba.datax.common.element.Column;
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.element.StringColumn;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.transformer.Transformer;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* no comments.
|
||||
*
|
||||
* @author XuDaojie
|
||||
* @since 2021-08-16
|
||||
*/
|
||||
public class DigestTransformer extends Transformer {
|
||||
|
||||
private static final String MD5 = "md5";
|
||||
private static final String SHA1 = "sha1";
|
||||
private static final String TO_UPPER_CASE = "toUpperCase";
|
||||
private static final String TO_LOWER_CASE = "toLowerCase";
|
||||
|
||||
public DigestTransformer() {
|
||||
setTransformerName("dx_digest");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Record evaluate(Record record, Object... paras) {
|
||||
|
||||
int columnIndex;
|
||||
String type;
|
||||
String charType;
|
||||
|
||||
try {
|
||||
if (paras.length != 3) {
|
||||
throw new RuntimeException("dx_digest paras length must be 3");
|
||||
}
|
||||
|
||||
columnIndex = (Integer) paras[0];
|
||||
type = (String) paras[1];
|
||||
charType = (String) paras[2];
|
||||
|
||||
if (!StringUtils.equalsIgnoreCase(MD5, type) && !StringUtils.equalsIgnoreCase(SHA1, type)) {
|
||||
throw new RuntimeException("dx_digest paras index 1 must be md5 or sha1");
|
||||
}
|
||||
if (!StringUtils.equalsIgnoreCase(TO_UPPER_CASE, charType) && !StringUtils.equalsIgnoreCase(TO_LOWER_CASE, charType)) {
|
||||
throw new RuntimeException("dx_digest paras index 2 must be toUpperCase or toLowerCase");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_ILLEGAL_PARAMETER, "paras:" + Arrays.asList(paras) + " => " + e.getMessage());
|
||||
}
|
||||
|
||||
Column column = record.getColumn(columnIndex);
|
||||
|
||||
try {
|
||||
String oriValue = column.asString();
|
||||
|
||||
// 如果字段为空,作为空字符串处理
|
||||
if (oriValue == null) {
|
||||
oriValue = "";
|
||||
}
|
||||
String newValue;
|
||||
if (MD5.equals(type)) {
|
||||
newValue = DigestUtils.md5Hex(oriValue);
|
||||
} else {
|
||||
newValue = DigestUtils.sha1Hex(oriValue);
|
||||
}
|
||||
|
||||
if (TO_UPPER_CASE.equals(charType)) {
|
||||
newValue = newValue.toUpperCase();
|
||||
} else {
|
||||
newValue = newValue.toLowerCase();
|
||||
}
|
||||
|
||||
record.setColumn(columnIndex, new StringColumn(newValue));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_RUN_EXCEPTION, e.getMessage(), e);
|
||||
}
|
||||
return record;
|
||||
}
|
||||
|
||||
}
|
@ -61,7 +61,7 @@ public class FilterTransformer extends Transformer {
|
||||
} else if (code.equalsIgnoreCase("<=")) {
|
||||
return doLess(record, value, column, true);
|
||||
} else {
|
||||
throw new RuntimeException("dx_filter can't suport code:" + code);
|
||||
throw new RuntimeException("dx_filter can't support code:" + code);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(TransformerErrorCode.TRANSFORMER_RUN_EXCEPTION, e.getMessage(), e);
|
||||
|
@ -1,10 +1,18 @@
|
||||
package com.alibaba.datax.core.transport.transformer;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
|
||||
/**
|
||||
* GroovyTransformer的帮助类,供groovy代码使用,必须全是static的方法
|
||||
* Created by liqiang on 16/3/4.
|
||||
*/
|
||||
public class GroovyTransformerStaticUtil {
|
||||
|
||||
public static String md5(final String data) {
|
||||
return DigestUtils.md5Hex(data);
|
||||
}
|
||||
|
||||
public static String sha1(final String data) {
|
||||
return DigestUtils.sha1Hex(data);
|
||||
}
|
||||
}
|
||||
|
@ -36,6 +36,7 @@ public class TransformerRegistry {
|
||||
registTransformer(new ReplaceTransformer());
|
||||
registTransformer(new FilterTransformer());
|
||||
registTransformer(new GroovyTransformer());
|
||||
registTransformer(new DigestTransformer());
|
||||
}
|
||||
|
||||
public static void loadTransformerFromLocalStorage() {
|
||||
|
@ -2,7 +2,7 @@
|
||||
"job": {
|
||||
"setting": {
|
||||
"speed": {
|
||||
"byte":10485760
|
||||
"channel":1
|
||||
},
|
||||
"errorLimit": {
|
||||
"record": 0,
|
||||
|
79
datahubreader/pom.xml
Normal file
79
datahubreader/pom.xml
Normal file
@ -0,0 +1,79 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>datahubreader</artifactId>
|
||||
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.aliyun.datahub</groupId>
|
||||
<artifactId>aliyun-sdk-datahub</artifactId>
|
||||
<version>2.21.6-public</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.12</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- compiler plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- assembly plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
34
datahubreader/src/main/assembly/package.xml
Normal file
34
datahubreader/src/main/assembly/package.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/datahubreader</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>datahubreader-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/datahubreader</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/reader/datahubreader/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
@ -0,0 +1,8 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
public class Constant {
|
||||
|
||||
public static String DATETIME_FORMAT = "yyyyMMddHHmmss";
|
||||
public static String DATE_FORMAT = "yyyyMMdd";
|
||||
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
import com.aliyun.datahub.client.DatahubClientBuilder;
|
||||
import com.aliyun.datahub.client.auth.Account;
|
||||
import com.aliyun.datahub.client.auth.AliyunAccount;
|
||||
import com.aliyun.datahub.client.common.DatahubConfig;
|
||||
import com.aliyun.datahub.client.http.HttpConfig;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
public class DatahubClientHelper {
|
||||
public static DatahubClient getDatahubClient(Configuration jobConfig) {
|
||||
String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
Account account = new AliyunAccount(accessId, accessKey);
|
||||
// 是否开启二进制传输,服务端2.12版本开始支持
|
||||
boolean enableBinary = jobConfig.getBool("enableBinary", false);
|
||||
DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
|
||||
// HttpConfig可不设置,不设置时采用默认值
|
||||
// 读写数据推荐打开网络传输 LZ4压缩
|
||||
HttpConfig httpConfig = null;
|
||||
String httpConfigStr = jobConfig.getString("httpConfig");
|
||||
if (StringUtils.isNotBlank(httpConfigStr)) {
|
||||
httpConfig = JSON.parseObject(httpConfigStr, new TypeReference<HttpConfig>() {
|
||||
});
|
||||
}
|
||||
|
||||
DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
|
||||
if (null != httpConfig) {
|
||||
builder.setHttpConfig(httpConfig);
|
||||
}
|
||||
DatahubClient datahubClient = builder.build();
|
||||
return datahubClient;
|
||||
}
|
||||
}
|
@ -0,0 +1,292 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import com.aliyun.datahub.client.model.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.alibaba.datax.common.element.Column;
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.element.StringColumn;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordSender;
|
||||
import com.alibaba.datax.common.spi.Reader;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
|
||||
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
|
||||
|
||||
public class DatahubReader extends Reader {
|
||||
public static class Job extends Reader.Job {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Configuration originalConfig;
|
||||
|
||||
private Long beginTimestampMillis;
|
||||
private Long endTimestampMillis;
|
||||
|
||||
DatahubClient datahubClient;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
LOG.info("datahub reader job init begin ...");
|
||||
this.originalConfig = super.getPluginJobConf();
|
||||
validateParameter(originalConfig);
|
||||
this.datahubClient = DatahubClientHelper.getDatahubClient(this.originalConfig);
|
||||
LOG.info("datahub reader job init end.");
|
||||
}
|
||||
|
||||
private void validateParameter(Configuration conf){
|
||||
conf.getNecessaryValue(Key.ENDPOINT,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESSKEYID,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESSKEYSECRET,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.PROJECT,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.TOPIC,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.COLUMN,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.BEGINDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ENDDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
|
||||
int batchSize = this.originalConfig.getInt(Key.BATCHSIZE, 1024);
|
||||
if (batchSize > 10000) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid batchSize[" + batchSize + "] value (0,10000]!");
|
||||
}
|
||||
|
||||
String beginDateTime = this.originalConfig.getString(Key.BEGINDATETIME);
|
||||
if (beginDateTime != null) {
|
||||
try {
|
||||
beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(beginDateTime);
|
||||
} catch (ParseException e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginDateTime[" + beginDateTime + "], format [yyyyMMddHHmmss]!");
|
||||
}
|
||||
}
|
||||
|
||||
if (beginTimestampMillis != null && beginTimestampMillis <= 0) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginTimestampMillis[" + beginTimestampMillis + "]!");
|
||||
}
|
||||
|
||||
String endDateTime = this.originalConfig.getString(Key.ENDDATETIME);
|
||||
if (endDateTime != null) {
|
||||
try {
|
||||
endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(endDateTime);
|
||||
} catch (ParseException e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginDateTime[" + endDateTime + "], format [yyyyMMddHHmmss]!");
|
||||
}
|
||||
}
|
||||
|
||||
if (endTimestampMillis != null && endTimestampMillis <= 0) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid endTimestampMillis[" + endTimestampMillis + "]!");
|
||||
}
|
||||
|
||||
if (beginTimestampMillis != null && endTimestampMillis != null
|
||||
&& endTimestampMillis <= beginTimestampMillis) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"endTimestampMillis[" + endTimestampMillis + "] must bigger than beginTimestampMillis[" + beginTimestampMillis + "]!");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
// create datahub client
|
||||
String project = originalConfig.getNecessaryValue(Key.PROJECT, DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
String topic = originalConfig.getNecessaryValue(Key.TOPIC, DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
RecordType recordType = null;
|
||||
try {
|
||||
DatahubClient client = DatahubClientHelper.getDatahubClient(this.originalConfig);
|
||||
GetTopicResult getTopicResult = client.getTopic(project, topic);
|
||||
recordType = getTopicResult.getRecordType();
|
||||
} catch (Exception e) {
|
||||
LOG.warn("get topic type error: {}", e.getMessage());
|
||||
}
|
||||
if (null != recordType) {
|
||||
if (recordType == RecordType.BLOB) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"DatahubReader only support 'Tuple' RecordType now, but your RecordType is 'BLOB'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int adviceNumber) {
|
||||
LOG.info("split() begin...");
|
||||
|
||||
List<Configuration> readerSplitConfigs = new ArrayList<Configuration>();
|
||||
|
||||
String project = this.originalConfig.getString(Key.PROJECT);
|
||||
String topic = this.originalConfig.getString(Key.TOPIC);
|
||||
|
||||
List<ShardEntry> shardEntrys = DatahubReaderUtils.getShardsWithRetry(this.datahubClient, project, topic);
|
||||
if (shardEntrys == null || shardEntrys.isEmpty() || shardEntrys.size() == 0) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Project [" + project + "] Topic [" + topic + "] has no shards, please check !");
|
||||
}
|
||||
|
||||
for (ShardEntry shardEntry : shardEntrys) {
|
||||
Configuration splitedConfig = this.originalConfig.clone();
|
||||
splitedConfig.set(Key.SHARDID, shardEntry.getShardId());
|
||||
readerSplitConfigs.add(splitedConfig);
|
||||
}
|
||||
|
||||
LOG.info("split() ok and end...");
|
||||
return readerSplitConfigs;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class Task extends Reader.Task {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
|
||||
|
||||
private Configuration taskConfig;
|
||||
|
||||
private String accessId;
|
||||
private String accessKey;
|
||||
private String endpoint;
|
||||
private String project;
|
||||
private String topic;
|
||||
private String shardId;
|
||||
private Long beginTimestampMillis;
|
||||
private Long endTimestampMillis;
|
||||
private int batchSize;
|
||||
private List<String> columns;
|
||||
private RecordSchema schema;
|
||||
private String timeStampUnit;
|
||||
|
||||
DatahubClient datahubClient;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.taskConfig = super.getPluginJobConf();
|
||||
|
||||
this.accessId = this.taskConfig.getString(Key.ACCESSKEYID);
|
||||
this.accessKey = this.taskConfig.getString(Key.ACCESSKEYSECRET);
|
||||
this.endpoint = this.taskConfig.getString(Key.ENDPOINT);
|
||||
this.project = this.taskConfig.getString(Key.PROJECT);
|
||||
this.topic = this.taskConfig.getString(Key.TOPIC);
|
||||
this.shardId = this.taskConfig.getString(Key.SHARDID);
|
||||
this.batchSize = this.taskConfig.getInt(Key.BATCHSIZE, 1024);
|
||||
this.timeStampUnit = this.taskConfig.getString(Key.TIMESTAMP_UNIT, "MICROSECOND");
|
||||
try {
|
||||
this.beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.BEGINDATETIME));
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
this.endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.ENDDATETIME));
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
|
||||
this.columns = this.taskConfig.getList(Key.COLUMN, String.class);
|
||||
|
||||
this.datahubClient = DatahubClientHelper.getDatahubClient(this.taskConfig);
|
||||
|
||||
|
||||
this.schema = DatahubReaderUtils.getDatahubSchemaWithRetry(this.datahubClient, this.project, topic);
|
||||
|
||||
LOG.info("init datahub reader task finished.project:{} topic:{} batchSize:{}", project, topic, batchSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startRead(RecordSender recordSender) {
|
||||
LOG.info("read start");
|
||||
|
||||
String beginCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project,
|
||||
this.topic, this.shardId, this.beginTimestampMillis);
|
||||
String endCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project,
|
||||
this.topic, this.shardId, this.endTimestampMillis);
|
||||
|
||||
if (beginCursor == null) {
|
||||
LOG.info("Shard:{} has no data!", this.shardId);
|
||||
return;
|
||||
} else if (endCursor == null) {
|
||||
endCursor = DatahubReaderUtils.getLatestCursorWithRetry(this.datahubClient, this.project,
|
||||
this.topic, this.shardId);
|
||||
}
|
||||
|
||||
String curCursor = beginCursor;
|
||||
|
||||
boolean exit = false;
|
||||
|
||||
while (true) {
|
||||
|
||||
GetRecordsResult result = DatahubReaderUtils.getRecordsResultWithRetry(this.datahubClient, this.project, this.topic,
|
||||
this.shardId, this.batchSize, curCursor, this.schema);
|
||||
|
||||
List<RecordEntry> records = result.getRecords();
|
||||
if (records.size() > 0) {
|
||||
for (RecordEntry record : records) {
|
||||
if (record.getSystemTime() >= this.endTimestampMillis) {
|
||||
exit = true;
|
||||
break;
|
||||
}
|
||||
|
||||
HashMap<String, Column> dataMap = new HashMap<String, Column>();
|
||||
List<Field> fields = ((TupleRecordData) record.getRecordData()).getRecordSchema().getFields();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Field field = fields.get(i);
|
||||
Column column = DatahubReaderUtils.getColumnFromField(record, field, this.timeStampUnit);
|
||||
dataMap.put(field.getName(), column);
|
||||
}
|
||||
|
||||
Record dataxRecord = recordSender.createRecord();
|
||||
|
||||
if (null != this.columns && 1 == this.columns.size()) {
|
||||
String columnsInStr = columns.get(0).toString();
|
||||
if ("\"*\"".equals(columnsInStr) || "*".equals(columnsInStr)) {
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
dataxRecord.addColumn(dataMap.get(fields.get(i).getName()));
|
||||
}
|
||||
|
||||
} else {
|
||||
if (dataMap.containsKey(columnsInStr)) {
|
||||
dataxRecord.addColumn(dataMap.get(columnsInStr));
|
||||
} else {
|
||||
dataxRecord.addColumn(new StringColumn(null));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (String col : this.columns) {
|
||||
if (dataMap.containsKey(col)) {
|
||||
dataxRecord.addColumn(dataMap.get(col));
|
||||
} else {
|
||||
dataxRecord.addColumn(new StringColumn(null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordSender.sendToWriter(dataxRecord);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
if (exit) {
|
||||
break;
|
||||
}
|
||||
|
||||
curCursor = result.getNextCursor();
|
||||
}
|
||||
|
||||
|
||||
LOG.info("end read datahub shard...");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public enum DatahubReaderErrorCode implements ErrorCode {
|
||||
BAD_CONFIG_VALUE("DatahubReader-00", "The value you configured is invalid."),
|
||||
LOG_HUB_ERROR("DatahubReader-01","Datahub exception"),
|
||||
REQUIRE_VALUE("DatahubReader-02","Missing parameters"),
|
||||
EMPTY_LOGSTORE_VALUE("DatahubReader-03","There is no shard under this LogStore");
|
||||
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
private DatahubReaderErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,200 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
import com.alibaba.datax.common.element.*;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.util.DataXCaseEnvUtil;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
import com.aliyun.datahub.client.exception.InvalidParameterException;
|
||||
import com.aliyun.datahub.client.model.*;
|
||||
|
||||
public class DatahubReaderUtils {
|
||||
|
||||
public static long getUnixTimeFromDateTime(String dateTime) throws ParseException {
|
||||
try {
|
||||
String format = Constant.DATETIME_FORMAT;
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
|
||||
return simpleDateFormat.parse(dateTime).getTime();
|
||||
} catch (ParseException ignored) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid DateTime[" + dateTime + "]!");
|
||||
}
|
||||
}
|
||||
|
||||
public static List<ShardEntry> getShardsWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
|
||||
|
||||
List<ShardEntry> shards = null;
|
||||
try {
|
||||
shards = RetryUtil.executeWithRetry(new Callable<List<ShardEntry>>() {
|
||||
@Override
|
||||
public List<ShardEntry> call() throws Exception {
|
||||
ListShardResult listShardResult = datahubClient.listShard(project, topic);
|
||||
return listShardResult.getShards();
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Shards error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return shards;
|
||||
}
|
||||
|
||||
public static String getCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic,
|
||||
final String shardId, final long timestamp) {
|
||||
|
||||
String cursor;
|
||||
try {
|
||||
cursor = RetryUtil.executeWithRetry(new Callable<String>() {
|
||||
@Override
|
||||
public String call() throws Exception {
|
||||
try {
|
||||
return datahubClient.getCursor(project, topic, shardId, CursorType.SYSTEM_TIME, timestamp).getCursor();
|
||||
} catch (InvalidParameterException e) {
|
||||
if (e.getErrorMessage().indexOf("Time in seek request is out of range") >= 0) {
|
||||
return null;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Cursor error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
public static String getLatestCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic,
|
||||
final String shardId) {
|
||||
|
||||
String cursor;
|
||||
try {
|
||||
cursor = RetryUtil.executeWithRetry(new Callable<String>() {
|
||||
@Override
|
||||
public String call() throws Exception {
|
||||
return datahubClient.getCursor(project, topic, shardId, CursorType.LATEST).getCursor();
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Cursor error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
public static RecordSchema getDatahubSchemaWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
|
||||
|
||||
RecordSchema schema;
|
||||
try {
|
||||
schema = RetryUtil.executeWithRetry(new Callable<RecordSchema>() {
|
||||
@Override
|
||||
public RecordSchema call() throws Exception {
|
||||
return datahubClient.getTopic(project, topic).getRecordSchema();
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Topic Schema error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
|
||||
public static GetRecordsResult getRecordsResultWithRetry(final DatahubClient datahubClient, final String project,
|
||||
final String topic, final String shardId, final int batchSize, final String cursor, final RecordSchema schema) {
|
||||
|
||||
GetRecordsResult result;
|
||||
try {
|
||||
result = RetryUtil.executeWithRetry(new Callable<GetRecordsResult>() {
|
||||
@Override
|
||||
public GetRecordsResult call() throws Exception {
|
||||
return datahubClient.getRecords(project, topic, shardId, schema, cursor, batchSize);
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Record Result error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
public static Column getColumnFromField(RecordEntry record, Field field, String timeStampUnit) {
|
||||
Column col = null;
|
||||
TupleRecordData o = (TupleRecordData) record.getRecordData();
|
||||
|
||||
switch (field.getType()) {
|
||||
case SMALLINT:
|
||||
Short shortValue = ((Short) o.getField(field.getName()));
|
||||
col = new LongColumn(shortValue == null ? null: shortValue.longValue());
|
||||
break;
|
||||
case INTEGER:
|
||||
col = new LongColumn((Integer) o.getField(field.getName()));
|
||||
break;
|
||||
case BIGINT: {
|
||||
col = new LongColumn((Long) o.getField(field.getName()));
|
||||
break;
|
||||
}
|
||||
case TINYINT: {
|
||||
Byte byteValue = ((Byte) o.getField(field.getName()));
|
||||
col = new LongColumn(byteValue == null ? null : byteValue.longValue());
|
||||
break;
|
||||
}
|
||||
case BOOLEAN: {
|
||||
col = new BoolColumn((Boolean) o.getField(field.getName()));
|
||||
break;
|
||||
}
|
||||
case FLOAT:
|
||||
col = new DoubleColumn((Float) o.getField(field.getName()));
|
||||
break;
|
||||
case DOUBLE: {
|
||||
col = new DoubleColumn((Double) o.getField(field.getName()));
|
||||
break;
|
||||
}
|
||||
case STRING: {
|
||||
col = new StringColumn((String) o.getField(field.getName()));
|
||||
break;
|
||||
}
|
||||
case DECIMAL: {
|
||||
BigDecimal value = (BigDecimal) o.getField(field.getName());
|
||||
col = new DoubleColumn(value == null ? null : value.doubleValue());
|
||||
break;
|
||||
}
|
||||
case TIMESTAMP: {
|
||||
Long value = (Long) o.getField(field.getName());
|
||||
|
||||
if ("MILLISECOND".equals(timeStampUnit)) {
|
||||
// MILLISECOND, 13位精度,直接 new Date()
|
||||
col = new DateColumn(value == null ? null : new Date(value));
|
||||
}
|
||||
else if ("SECOND".equals(timeStampUnit)){
|
||||
col = new DateColumn(value == null ? null : new Date(value * 1000));
|
||||
}
|
||||
else {
|
||||
// 默认都是 MICROSECOND, 16位精度, 和之前的逻辑保持一致。
|
||||
col = new DateColumn(value == null ? null : new Date(value / 1000));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw new RuntimeException("Unknown column type: " + field.getType());
|
||||
}
|
||||
|
||||
return col;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
import com.alibaba.datax.common.util.MessageSource;
|
||||
|
||||
public enum DatahubWriterErrorCode implements ErrorCode {
|
||||
MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")),
|
||||
INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")),
|
||||
GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")),
|
||||
WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")),
|
||||
SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")),
|
||||
;
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
private DatahubWriterErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
public final class Key {
|
||||
|
||||
/**
|
||||
* 此处声明插件用到的需要插件使用者提供的配置项
|
||||
*/
|
||||
public static final String ENDPOINT = "endpoint";
|
||||
|
||||
public static final String ACCESSKEYID = "accessId";
|
||||
|
||||
public static final String ACCESSKEYSECRET = "accessKey";
|
||||
|
||||
public static final String PROJECT = "project";
|
||||
|
||||
public static final String TOPIC = "topic";
|
||||
|
||||
public static final String BEGINDATETIME = "beginDateTime";
|
||||
|
||||
public static final String ENDDATETIME = "endDateTime";
|
||||
|
||||
public static final String BATCHSIZE = "batchSize";
|
||||
|
||||
public static final String COLUMN = "column";
|
||||
|
||||
public static final String SHARDID = "shardId";
|
||||
|
||||
public static final String CONFIG_KEY_ENDPOINT = "endpoint";
|
||||
public static final String CONFIG_KEY_ACCESS_ID = "accessId";
|
||||
public static final String CONFIG_KEY_ACCESS_KEY = "accessKey";
|
||||
|
||||
|
||||
public static final String TIMESTAMP_UNIT = "timeStampUnit";
|
||||
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,9 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
|
||||
errorcode.invalid_config_value=您的參數配寘錯誤.
|
||||
errorcode.get_topic_info_fail=獲取shard清單失敗.
|
||||
errorcode.write_datahub_fail=寫數據失敗.
|
||||
errorcode.schema_not_match=數據格式錯誤.
|
@ -0,0 +1,9 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
|
||||
errorcode.invalid_config_value=您的參數配寘錯誤.
|
||||
errorcode.get_topic_info_fail=獲取shard清單失敗.
|
||||
errorcode.write_datahub_fail=寫數據失敗.
|
||||
errorcode.schema_not_match=數據格式錯誤.
|
14
datahubreader/src/main/resources/job_config_template.json
Normal file
14
datahubreader/src/main/resources/job_config_template.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "datahubreader",
|
||||
"parameter": {
|
||||
"endpoint":"",
|
||||
"accessId": "",
|
||||
"accessKey": "",
|
||||
"project": "",
|
||||
"topic": "",
|
||||
"beginDateTime": "20180913121019",
|
||||
"endDateTime": "20180913121119",
|
||||
"batchSize": 1024,
|
||||
"column": []
|
||||
}
|
||||
}
|
6
datahubreader/src/main/resources/plugin.json
Normal file
6
datahubreader/src/main/resources/plugin.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "datahubreader",
|
||||
"class": "com.alibaba.datax.plugin.reader.datahubreader.DatahubReader",
|
||||
"description": "datahub reader",
|
||||
"developer": "alibaba"
|
||||
}
|
79
datahubwriter/pom.xml
Normal file
79
datahubwriter/pom.xml
Normal file
@ -0,0 +1,79 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>datahubwriter</artifactId>
|
||||
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.aliyun.datahub</groupId>
|
||||
<artifactId>aliyun-sdk-datahub</artifactId>
|
||||
<version>2.21.6-public</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.12</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- compiler plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- assembly plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
34
datahubwriter/src/main/assembly/package.xml
Normal file
34
datahubwriter/src/main/assembly/package.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/datahubwriter</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>datahubwriter-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/datahubwriter</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/writer/datahubwriter/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
@ -0,0 +1,43 @@
|
||||
package com.alibaba.datax.plugin.writer.datahubwriter;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
import com.aliyun.datahub.client.DatahubClientBuilder;
|
||||
import com.aliyun.datahub.client.auth.Account;
|
||||
import com.aliyun.datahub.client.auth.AliyunAccount;
|
||||
import com.aliyun.datahub.client.common.DatahubConfig;
|
||||
import com.aliyun.datahub.client.http.HttpConfig;
|
||||
|
||||
public class DatahubClientHelper {
|
||||
public static DatahubClient getDatahubClient(Configuration jobConfig) {
|
||||
String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
Account account = new AliyunAccount(accessId, accessKey);
|
||||
// 是否开启二进制传输,服务端2.12版本开始支持
|
||||
boolean enableBinary = jobConfig.getBool("enableBinary", false);
|
||||
DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
|
||||
// HttpConfig可不设置,不设置时采用默认值
|
||||
// 读写数据推荐打开网络传输 LZ4压缩
|
||||
HttpConfig httpConfig = null;
|
||||
String httpConfigStr = jobConfig.getString("httpConfig");
|
||||
if (StringUtils.isNotBlank(httpConfigStr)) {
|
||||
httpConfig = JSON.parseObject(httpConfigStr, new TypeReference<HttpConfig>() {
|
||||
});
|
||||
}
|
||||
|
||||
DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
|
||||
if (null != httpConfig) {
|
||||
builder.setHttpConfig(httpConfig);
|
||||
}
|
||||
DatahubClient datahubClient = builder.build();
|
||||
return datahubClient;
|
||||
}
|
||||
}
|
@ -0,0 +1,355 @@
|
||||
package com.alibaba.datax.plugin.writer.datahubwriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Column;
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordReceiver;
|
||||
import com.alibaba.datax.common.spi.Writer;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.common.util.DataXCaseEnvUtil;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
import com.aliyun.datahub.client.model.FieldType;
|
||||
import com.aliyun.datahub.client.model.GetTopicResult;
|
||||
import com.aliyun.datahub.client.model.ListShardResult;
|
||||
import com.aliyun.datahub.client.model.PutErrorEntry;
|
||||
import com.aliyun.datahub.client.model.PutRecordsResult;
|
||||
import com.aliyun.datahub.client.model.RecordEntry;
|
||||
import com.aliyun.datahub.client.model.RecordSchema;
|
||||
import com.aliyun.datahub.client.model.RecordType;
|
||||
import com.aliyun.datahub.client.model.ShardEntry;
|
||||
import com.aliyun.datahub.client.model.ShardState;
|
||||
import com.aliyun.datahub.client.model.TupleRecordData;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
public class DatahubWriter extends Writer {
|
||||
|
||||
/**
|
||||
* Job 中的方法仅执行一次,Task 中方法会由框架启动多个 Task 线程并行执行。
|
||||
* <p/>
|
||||
* 整个 Writer 执行流程是:
|
||||
* <pre>
|
||||
* Job类init-->prepare-->split
|
||||
*
|
||||
* Task类init-->prepare-->startWrite-->post-->destroy
|
||||
* Task类init-->prepare-->startWrite-->post-->destroy
|
||||
*
|
||||
* Job类post-->destroy
|
||||
* </pre>
|
||||
*/
|
||||
public static class Job extends Writer.Job {
|
||||
private static final Logger LOG = LoggerFactory
|
||||
.getLogger(Job.class);
|
||||
|
||||
private Configuration jobConfig = null;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.jobConfig = super.getPluginJobConf();
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
String project = jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String topic = jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
RecordType recordType = null;
|
||||
DatahubClient client = DatahubClientHelper.getDatahubClient(this.jobConfig);
|
||||
try {
|
||||
GetTopicResult getTopicResult = client.getTopic(project, topic);
|
||||
recordType = getTopicResult.getRecordType();
|
||||
} catch (Exception e) {
|
||||
LOG.warn("get topic type error: {}", e.getMessage());
|
||||
}
|
||||
if (null != recordType) {
|
||||
if (recordType == RecordType.BLOB) {
|
||||
throw DataXException.asDataXException(DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
|
||||
"DatahubWriter only support 'Tuple' RecordType now, but your RecordType is 'BLOB'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int mandatoryNumber) {
|
||||
List<Configuration> configs = new ArrayList<Configuration>();
|
||||
for (int i = 0; i < mandatoryNumber; ++i) {
|
||||
configs.add(jobConfig.clone());
|
||||
}
|
||||
return configs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {}
|
||||
|
||||
@Override
|
||||
public void destroy() {}
|
||||
|
||||
}
|
||||
|
||||
public static class Task extends Writer.Task {
|
||||
private static final Logger LOG = LoggerFactory
|
||||
.getLogger(Task.class);
|
||||
private static final List<String> FATAL_ERRORS_DEFAULT = Arrays.asList(
|
||||
"InvalidParameterM",
|
||||
"MalformedRecord",
|
||||
"INVALID_SHARDID",
|
||||
"NoSuchTopic",
|
||||
"NoSuchShard"
|
||||
);
|
||||
|
||||
private Configuration taskConfig;
|
||||
private DatahubClient client;
|
||||
private String project;
|
||||
private String topic;
|
||||
private List<String> shards;
|
||||
private int maxCommitSize;
|
||||
private int maxRetryCount;
|
||||
private RecordSchema schema;
|
||||
private long retryInterval;
|
||||
private Random random;
|
||||
private List<String> column;
|
||||
private List<Integer> columnIndex;
|
||||
private boolean enableColumnConfig;
|
||||
private List<String> fatalErrors;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.taskConfig = super.getPluginJobConf();
|
||||
project = taskConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
topic = taskConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
maxCommitSize = taskConfig.getInt(Key.CONFIG_KEY_MAX_COMMIT_SIZE, 1024*1024);
|
||||
maxRetryCount = taskConfig.getInt(Key.CONFIG_KEY_MAX_RETRY_COUNT, 500);
|
||||
this.retryInterval = taskConfig.getInt(Key.RETRY_INTERVAL, 650);
|
||||
this.random = new Random();
|
||||
this.column = this.taskConfig.getList(Key.CONFIG_KEY_COLUMN, String.class);
|
||||
// ["*"]
|
||||
if (null != this.column && 1 == this.column.size()) {
|
||||
if (StringUtils.equals("*", this.column.get(0))) {
|
||||
this.column = null;
|
||||
}
|
||||
}
|
||||
this.columnIndex = new ArrayList<Integer>();
|
||||
// 留个开关保平安
|
||||
this.enableColumnConfig = this.taskConfig.getBool("enableColumnConfig", true);
|
||||
this.fatalErrors = this.taskConfig.getList("fatalErrors", Task.FATAL_ERRORS_DEFAULT, String.class);
|
||||
this.client = DatahubClientHelper.getDatahubClient(this.taskConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
final String shardIdConfig = this.taskConfig.getString(Key.CONFIG_KEY_SHARD_ID);
|
||||
this.shards = new ArrayList<String>();
|
||||
try {
|
||||
RetryUtil.executeWithRetry(new Callable<Void>() {
|
||||
@Override
|
||||
public Void call() throws Exception {
|
||||
ListShardResult result = client.listShard(project, topic);
|
||||
if (StringUtils.isNotBlank(shardIdConfig)) {
|
||||
shards.add(shardIdConfig);
|
||||
} else {
|
||||
for (ShardEntry shard : result.getShards()) {
|
||||
if (shard.getState() == ShardState.ACTIVE || shard.getState() == ShardState.OPENING) {
|
||||
shards.add(shard.getShardId());
|
||||
}
|
||||
}
|
||||
}
|
||||
schema = client.getTopic(project, topic).getRecordSchema();
|
||||
return null;
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(5), DataXCaseEnvUtil.getRetryInterval(10000L), DataXCaseEnvUtil.getRetryExponential(false));
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubWriterErrorCode.GET_TOPOIC_INFO_FAIL,
|
||||
"get topic info failed", e);
|
||||
}
|
||||
LOG.info("datahub topic {} shard to write: {}", this.topic, JSON.toJSONString(this.shards));
|
||||
LOG.info("datahub topic {} has schema: {}", this.topic, JSON.toJSONString(this.schema));
|
||||
|
||||
// 根据 schmea 顺序 和用户配置的 column,计算写datahub的顺序关系,以支持列换序
|
||||
// 后续统一使用 columnIndex 的顺位关系写 datahub
|
||||
int totalSize = this.schema.getFields().size();
|
||||
if (null != this.column && !this.column.isEmpty() && this.enableColumnConfig) {
|
||||
for (String eachCol : this.column) {
|
||||
int indexFound = -1;
|
||||
for (int i = 0; i < totalSize; i++) {
|
||||
// warn: 大小写ignore
|
||||
if (StringUtils.equalsIgnoreCase(eachCol, this.schema.getField(i).getName())) {
|
||||
indexFound = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (indexFound >= 0) {
|
||||
this.columnIndex.add(indexFound);
|
||||
} else {
|
||||
throw DataXException.asDataXException(DatahubWriterErrorCode.SCHEMA_NOT_MATCH,
|
||||
String.format("can not find column %s in datahub topic %s", eachCol, this.topic));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < totalSize; i++) {
|
||||
this.columnIndex.add(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startWrite(RecordReceiver recordReceiver) {
|
||||
Record record;
|
||||
List<RecordEntry> records = new ArrayList<RecordEntry>();
|
||||
String shardId = null;
|
||||
if (1 == this.shards.size()) {
|
||||
shardId = shards.get(0);
|
||||
} else {
|
||||
shardId = shards.get(this.random.nextInt(shards.size()));
|
||||
}
|
||||
int commitSize = 0;
|
||||
try {
|
||||
while ((record = recordReceiver.getFromReader()) != null) {
|
||||
RecordEntry dhRecord = convertRecord(record, shardId);
|
||||
if (dhRecord != null) {
|
||||
records.add(dhRecord);
|
||||
}
|
||||
commitSize += record.getByteSize();
|
||||
if (commitSize >= maxCommitSize) {
|
||||
commit(records);
|
||||
records.clear();
|
||||
commitSize = 0;
|
||||
if (1 == this.shards.size()) {
|
||||
shardId = shards.get(0);
|
||||
} else {
|
||||
shardId = shards.get(this.random.nextInt(shards.size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (commitSize > 0) {
|
||||
commit(records);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(
|
||||
DatahubWriterErrorCode.WRITE_DATAHUB_FAIL, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {}
|
||||
|
||||
@Override
|
||||
public void destroy() {}
|
||||
|
||||
private void commit(List<RecordEntry> records) throws InterruptedException {
|
||||
PutRecordsResult result = client.putRecords(project, topic, records);
|
||||
if (result.getFailedRecordCount() > 0) {
|
||||
for (int i = 0; i < maxRetryCount; ++i) {
|
||||
boolean limitExceededMessagePrinted = false;
|
||||
for (PutErrorEntry error : result.getPutErrorEntries()) {
|
||||
// 如果是 LimitExceeded 这样打印日志,不能每行记录打印一次了
|
||||
if (StringUtils.equalsIgnoreCase("LimitExceeded", error.getErrorcode())) {
|
||||
if (!limitExceededMessagePrinted) {
|
||||
LOG.warn("write record error, request id: {}, error code: {}, error message: {}",
|
||||
result.getRequestId(), error.getErrorcode(), error.getMessage());
|
||||
limitExceededMessagePrinted = true;
|
||||
}
|
||||
} else {
|
||||
LOG.error("write record error, request id: {}, error code: {}, error message: {}",
|
||||
result.getRequestId(), error.getErrorcode(), error.getMessage());
|
||||
}
|
||||
if (this.fatalErrors.contains(error.getErrorcode())) {
|
||||
throw DataXException.asDataXException(
|
||||
DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
|
||||
error.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
if (this.retryInterval >= 0) {
|
||||
Thread.sleep(this.retryInterval);
|
||||
} else {
|
||||
Thread.sleep(new Random().nextInt(700) + 300);
|
||||
}
|
||||
|
||||
result = client.putRecords(project, topic, result.getFailedRecords());
|
||||
if (result.getFailedRecordCount() == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw DataXException.asDataXException(
|
||||
DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
|
||||
"write datahub failed");
|
||||
}
|
||||
}
|
||||
|
||||
private RecordEntry convertRecord(Record dxRecord, String shardId) {
|
||||
try {
|
||||
RecordEntry dhRecord = new RecordEntry();
|
||||
dhRecord.setShardId(shardId);
|
||||
TupleRecordData data = new TupleRecordData(this.schema);
|
||||
for (int i = 0; i < this.columnIndex.size(); ++i) {
|
||||
int orderInSchema = this.columnIndex.get(i);
|
||||
FieldType type = this.schema.getField(orderInSchema).getType();
|
||||
Column column = dxRecord.getColumn(i);
|
||||
switch (type) {
|
||||
case BIGINT:
|
||||
data.setField(orderInSchema, column.asLong());
|
||||
break;
|
||||
case DOUBLE:
|
||||
data.setField(orderInSchema, column.asDouble());
|
||||
break;
|
||||
case STRING:
|
||||
data.setField(orderInSchema, column.asString());
|
||||
break;
|
||||
case BOOLEAN:
|
||||
data.setField(orderInSchema, column.asBoolean());
|
||||
break;
|
||||
case TIMESTAMP:
|
||||
if (null == column.asDate()) {
|
||||
data.setField(orderInSchema, null);
|
||||
} else {
|
||||
data.setField(orderInSchema, column.asDate().getTime() * 1000);
|
||||
}
|
||||
break;
|
||||
case DECIMAL:
|
||||
// warn
|
||||
data.setField(orderInSchema, column.asBigDecimal());
|
||||
break;
|
||||
case INTEGER:
|
||||
data.setField(orderInSchema, column.asLong());
|
||||
break;
|
||||
case FLOAT:
|
||||
data.setField(orderInSchema, column.asDouble());
|
||||
break;
|
||||
case TINYINT:
|
||||
data.setField(orderInSchema, column.asLong());
|
||||
break;
|
||||
case SMALLINT:
|
||||
data.setField(orderInSchema, column.asLong());
|
||||
break;
|
||||
default:
|
||||
throw DataXException.asDataXException(
|
||||
DatahubWriterErrorCode.SCHEMA_NOT_MATCH,
|
||||
String.format("does not support type: %s", type));
|
||||
}
|
||||
}
|
||||
dhRecord.setRecordData(data);
|
||||
return dhRecord;
|
||||
} catch (Exception e) {
|
||||
super.getTaskPluginCollector().collectDirtyRecord(dxRecord, e, "convert recor failed");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package com.alibaba.datax.plugin.writer.datahubwriter;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
import com.alibaba.datax.common.util.MessageSource;
|
||||
|
||||
public enum DatahubWriterErrorCode implements ErrorCode {
|
||||
MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")),
|
||||
INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")),
|
||||
GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")),
|
||||
WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")),
|
||||
SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")),
|
||||
;
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
private DatahubWriterErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
package com.alibaba.datax.plugin.writer.datahubwriter;
|
||||
|
||||
public final class Key {
|
||||
|
||||
/**
|
||||
* 此处声明插件用到的需要插件使用者提供的配置项
|
||||
*/
|
||||
public static final String CONFIG_KEY_ENDPOINT = "endpoint";
|
||||
public static final String CONFIG_KEY_ACCESS_ID = "accessId";
|
||||
public static final String CONFIG_KEY_ACCESS_KEY = "accessKey";
|
||||
public static final String CONFIG_KEY_PROJECT = "project";
|
||||
public static final String CONFIG_KEY_TOPIC = "topic";
|
||||
public static final String CONFIG_KEY_WRITE_MODE = "mode";
|
||||
public static final String CONFIG_KEY_SHARD_ID = "shardId";
|
||||
public static final String CONFIG_KEY_MAX_COMMIT_SIZE = "maxCommitSize";
|
||||
public static final String CONFIG_KEY_MAX_RETRY_COUNT = "maxRetryCount";
|
||||
|
||||
public static final String CONFIG_VALUE_SEQUENCE_MODE = "sequence";
|
||||
public static final String CONFIG_VALUE_RANDOM_MODE = "random";
|
||||
|
||||
public final static String MAX_RETRY_TIME = "maxRetryTime";
|
||||
|
||||
public final static String RETRY_INTERVAL = "retryInterval";
|
||||
|
||||
public final static String CONFIG_KEY_COLUMN = "column";
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,9 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
|
||||
errorcode.invalid_config_value=您的參數配寘錯誤.
|
||||
errorcode.get_topic_info_fail=獲取shard清單失敗.
|
||||
errorcode.write_datahub_fail=寫數據失敗.
|
||||
errorcode.schema_not_match=數據格式錯誤.
|
@ -0,0 +1,9 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
|
||||
errorcode.invalid_config_value=您的參數配寘錯誤.
|
||||
errorcode.get_topic_info_fail=獲取shard清單失敗.
|
||||
errorcode.write_datahub_fail=寫數據失敗.
|
||||
errorcode.schema_not_match=數據格式錯誤.
|
14
datahubwriter/src/main/resources/job_config_template.json
Normal file
14
datahubwriter/src/main/resources/job_config_template.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "datahubwriter",
|
||||
"parameter": {
|
||||
"endpoint":"",
|
||||
"accessId": "",
|
||||
"accessKey": "",
|
||||
"project": "",
|
||||
"topic": "",
|
||||
"mode": "random",
|
||||
"shardId": "",
|
||||
"maxCommitSize": 524288,
|
||||
"maxRetryCount": 500
|
||||
}
|
||||
}
|
6
datahubwriter/src/main/resources/plugin.json
Normal file
6
datahubwriter/src/main/resources/plugin.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "datahubwriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.datahubwriter.DatahubWriter",
|
||||
"description": "datahub writer",
|
||||
"developer": "alibaba"
|
||||
}
|
181
doriswriter/doc/doriswriter.md
Normal file
181
doriswriter/doc/doriswriter.md
Normal file
@ -0,0 +1,181 @@
|
||||
# DorisWriter 插件文档
|
||||
|
||||
## 1 快速介绍
|
||||
DorisWriter支持将大批量数据写入Doris中。
|
||||
|
||||
## 2 实现原理
|
||||
DorisWriter 通过Doris原生支持Stream load方式导入数据, DorisWriter会将`reader`读取的数据进行缓存在内存中,拼接成Json文本,然后批量导入至Doris。
|
||||
|
||||
## 3 功能说明
|
||||
|
||||
### 3.1 配置样例
|
||||
|
||||
这里是一份从Stream读取数据后导入至Doris的配置文件。
|
||||
|
||||
```
|
||||
{
|
||||
"job": {
|
||||
"content": [
|
||||
{
|
||||
"reader": {
|
||||
"name": "mysqlreader",
|
||||
"parameter": {
|
||||
"column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"],
|
||||
"connection": [
|
||||
{
|
||||
"jdbcUrl": ["jdbc:mysql://localhost:3306/demo"],
|
||||
"table": ["employees_1"]
|
||||
}
|
||||
],
|
||||
"username": "root",
|
||||
"password": "xxxxx",
|
||||
"where": ""
|
||||
}
|
||||
},
|
||||
"writer": {
|
||||
"name": "doriswriter",
|
||||
"parameter": {
|
||||
"loadUrl": ["172.16.0.13:8030"],
|
||||
"loadProps": {
|
||||
},
|
||||
"column": ["emp_no", "birth_date", "first_name","last_name","gender","hire_date"],
|
||||
"username": "root",
|
||||
"password": "xxxxxx",
|
||||
"postSql": ["select count(1) from all_employees_info"],
|
||||
"preSql": [],
|
||||
"flushInterval":30000,
|
||||
"connection": [
|
||||
{
|
||||
"jdbcUrl": "jdbc:mysql://172.16.0.13:9030/demo",
|
||||
"selectedDatabase": "demo",
|
||||
"table": ["all_employees_info"]
|
||||
}
|
||||
],
|
||||
"loadProps": {
|
||||
"format": "json",
|
||||
"strip_outer_array": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"setting": {
|
||||
"speed": {
|
||||
"channel": "1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 参数说明
|
||||
|
||||
* **jdbcUrl**
|
||||
|
||||
- 描述:Doris 的 JDBC 连接串,用户执行 preSql 或 postSQL。
|
||||
- 必选:是
|
||||
- 默认值:无
|
||||
|
||||
* **loadUrl**
|
||||
|
||||
- 描述:作为 Stream Load 的连接目标。格式为 "ip:port"。其中 IP 是 FE 节点 IP,port 是 FE 节点的 http_port。可以填写多个,多个之间使用英文状态的分号隔开:`;`,doriswriter 将以轮询的方式访问。
|
||||
- 必选:是
|
||||
- 默认值:无
|
||||
|
||||
* **username**
|
||||
|
||||
- 描述:访问Doris数据库的用户名
|
||||
- 必选:是
|
||||
- 默认值:无
|
||||
|
||||
* **password**
|
||||
|
||||
- 描述:访问Doris数据库的密码
|
||||
- 必选:否
|
||||
- 默认值:空
|
||||
|
||||
* **connection.selectedDatabase**
|
||||
- 描述:需要写入的Doris数据库名称。
|
||||
- 必选:是
|
||||
- 默认值:无
|
||||
|
||||
* **connection.table**
|
||||
- 描述:需要写入的Doris表名称。
|
||||
- 必选:是
|
||||
- 默认值:无
|
||||
|
||||
* **column**
|
||||
|
||||
- 描述:目的表**需要写入数据**的字段,这些字段将作为生成的 Json 数据的字段名。字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。
|
||||
- 必选:是
|
||||
- 默认值:否
|
||||
|
||||
* **preSql**
|
||||
|
||||
- 描述:写入数据到目的表前,会先执行这里的标准语句。
|
||||
- 必选:否
|
||||
- 默认值:无
|
||||
|
||||
* **postSql**
|
||||
|
||||
- 描述:写入数据到目的表后,会执行这里的标准语句。
|
||||
- 必选:否
|
||||
- 默认值:无
|
||||
|
||||
|
||||
* **maxBatchRows**
|
||||
|
||||
- 描述:每批次导入数据的最大行数。和 **maxBatchSize** 共同控制每批次的导入数量。每批次数据达到两个阈值之一,即开始导入这一批次的数据。
|
||||
- 必选:否
|
||||
- 默认值:500000
|
||||
|
||||
* **batchSize**
|
||||
|
||||
- 描述:每批次导入数据的最大数据量。和 **maxBatchRows** 共同控制每批次的导入数量。每批次数据达到两个阈值之一,即开始导入这一批次的数据。
|
||||
- 必选:否
|
||||
- 默认值:104857600
|
||||
|
||||
* **maxRetries**
|
||||
|
||||
- 描述:每批次导入数据失败后的重试次数。
|
||||
- 必选:否
|
||||
- 默认值:0
|
||||
|
||||
* **labelPrefix**
|
||||
|
||||
- 描述:每批次导入任务的 label 前缀。最终的 label 将有 `labelPrefix + UUID` 组成全局唯一的 label,确保数据不会重复导入
|
||||
- 必选:否
|
||||
- 默认值:`datax_doris_writer_`
|
||||
|
||||
* **loadProps**
|
||||
|
||||
- 描述:StreamLoad 的请求参数,详情参照StreamLoad介绍页面。[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual)
|
||||
|
||||
这里包括导入的数据格式:format等,导入数据格式默认我们使用csv,支持JSON,具体可以参照下面类型转换部分,也可以参照上面Stream load 官方信息
|
||||
|
||||
- 必选:否
|
||||
|
||||
- 默认值:无
|
||||
|
||||
### 类型转换
|
||||
|
||||
默认传入的数据均会被转为字符串,并以`\t`作为列分隔符,`\n`作为行分隔符,组成`csv`文件进行StreamLoad导入操作。
|
||||
|
||||
默认是csv格式导入,如需更改列分隔符, 则正确配置 `loadProps` 即可:
|
||||
|
||||
```json
|
||||
"loadProps": {
|
||||
"column_separator": "\\x01",
|
||||
"row_delimiter": "\\x02"
|
||||
}
|
||||
```
|
||||
|
||||
如需更改导入格式为`json`, 则正确配置 `loadProps` 即可:
|
||||
```json
|
||||
"loadProps": {
|
||||
"format": "json",
|
||||
"strip_outer_array": true
|
||||
}
|
||||
```
|
||||
|
||||
更多信息请参照 Doris 官网:[Stream load - Apache Doris](https://doris.apache.org/zh-CN/docs/data-operate/import/import-way/stream-load-manual)
|
46
doriswriter/doc/mysql2doris.json
Normal file
46
doriswriter/doc/mysql2doris.json
Normal file
@ -0,0 +1,46 @@
|
||||
{
|
||||
"job": {
|
||||
"content": [
|
||||
{
|
||||
"reader": {
|
||||
"name": "mysqlreader",
|
||||
"parameter": {
|
||||
"column": ["k1", "k2", "k3"],
|
||||
"connection": [
|
||||
{
|
||||
"jdbcUrl": ["jdbc:mysql://192.168.10.10:3306/db1"],
|
||||
"table": ["t1"]
|
||||
}
|
||||
],
|
||||
"username": "root",
|
||||
"password": "",
|
||||
"where": ""
|
||||
}
|
||||
},
|
||||
"writer": {
|
||||
"name": "doriswriter",
|
||||
"parameter": {
|
||||
"loadUrl": ["192.168.1.1:8030"],
|
||||
"loadProps": {},
|
||||
"database": "db1",
|
||||
"column": ["k1", "k2", "k3"],
|
||||
"username": "root",
|
||||
"password": "",
|
||||
"postSql": [],
|
||||
"preSql": [],
|
||||
"connection": [
|
||||
"jdbcUrl":"jdbc:mysql://192.168.1.1:9030/",
|
||||
"table":["xxx"],
|
||||
"selectedDatabase":"xxxx"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"setting": {
|
||||
"speed": {
|
||||
"channel": "1"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
99
doriswriter/pom.xml
Normal file
99
doriswriter/pom.xml
Normal file
@ -0,0 +1,99 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>doriswriter</artifactId>
|
||||
<name>doriswriter</name>
|
||||
<packaging>jar</packaging>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>plugin-rdbms-util</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<version>${mysql.driver.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<version>4.5.13</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- compiler plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- assembly plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
52
doriswriter/src/main/assembly/package.xml
Normal file
52
doriswriter/src/main/assembly/package.xml
Normal file
@ -0,0 +1,52 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<assembly xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id/>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
<include>plugin_job_template.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/doriswriter</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>doriswriter-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/doriswriter</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/writer/doriswriter/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
@ -0,0 +1,54 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
|
||||
import java.io.StringWriter;
|
||||
|
||||
public class DelimiterParser {
|
||||
|
||||
private static final String HEX_STRING = "0123456789ABCDEF";
|
||||
|
||||
public static String parse(String sp, String dSp) throws RuntimeException {
|
||||
if ( Strings.isNullOrEmpty(sp)) {
|
||||
return dSp;
|
||||
}
|
||||
if (!sp.toUpperCase().startsWith("\\X")) {
|
||||
return sp;
|
||||
}
|
||||
String hexStr = sp.substring(2);
|
||||
// check hex str
|
||||
if (hexStr.isEmpty()) {
|
||||
throw new RuntimeException("Failed to parse delimiter: `Hex str is empty`");
|
||||
}
|
||||
if (hexStr.length() % 2 != 0) {
|
||||
throw new RuntimeException("Failed to parse delimiter: `Hex str length error`");
|
||||
}
|
||||
for (char hexChar : hexStr.toUpperCase().toCharArray()) {
|
||||
if (HEX_STRING.indexOf(hexChar) == -1) {
|
||||
throw new RuntimeException("Failed to parse delimiter: `Hex str format error`");
|
||||
}
|
||||
}
|
||||
// transform to separator
|
||||
StringWriter writer = new StringWriter();
|
||||
for (byte b : hexStrToBytes(hexStr)) {
|
||||
writer.append((char) b);
|
||||
}
|
||||
return writer.toString();
|
||||
}
|
||||
|
||||
private static byte[] hexStrToBytes(String hexStr) {
|
||||
String upperHexStr = hexStr.toUpperCase();
|
||||
int length = upperHexStr.length() / 2;
|
||||
char[] hexChars = upperHexStr.toCharArray();
|
||||
byte[] bytes = new byte[length];
|
||||
for (int i = 0; i < length; i++) {
|
||||
int pos = i * 2;
|
||||
bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1]));
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
private static byte charToByte(char c) {
|
||||
return (byte) HEX_STRING.indexOf(c);
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Column;
|
||||
|
||||
public class DorisBaseCodec {
|
||||
protected String convertionField( Column col) {
|
||||
if (null == col.getRawData() || Column.Type.NULL == col.getType()) {
|
||||
return null;
|
||||
}
|
||||
if ( Column.Type.BOOL == col.getType()) {
|
||||
return String.valueOf(col.asLong());
|
||||
}
|
||||
if ( Column.Type.BYTES == col.getType()) {
|
||||
byte[] bts = (byte[])col.getRawData();
|
||||
long value = 0;
|
||||
for (int i = 0; i < bts.length; i++) {
|
||||
value += (bts[bts.length - i - 1] & 0xffL) << (8 * i);
|
||||
}
|
||||
return String.valueOf(value);
|
||||
}
|
||||
return col.asString();
|
||||
}
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
public interface DorisCodec extends Serializable {
|
||||
|
||||
String codec( Record row);
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class DorisCodecFactory {
|
||||
public DorisCodecFactory (){
|
||||
|
||||
}
|
||||
public static DorisCodec createCodec( Keys writerOptions) {
|
||||
if ( Keys.StreamLoadFormat.CSV.equals(writerOptions.getStreamLoadFormat())) {
|
||||
Map<String, Object> props = writerOptions.getLoadProps();
|
||||
return new DorisCsvCodec (null == props || !props.containsKey("column_separator") ? null : String.valueOf(props.get("column_separator")));
|
||||
}
|
||||
if ( Keys.StreamLoadFormat.JSON.equals(writerOptions.getStreamLoadFormat())) {
|
||||
return new DorisJsonCodec (writerOptions.getColumns());
|
||||
}
|
||||
throw new RuntimeException("Failed to create row serializer, unsupported `format` from stream load properties.");
|
||||
}
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
|
||||
public class DorisCsvCodec extends DorisBaseCodec implements DorisCodec {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private final String columnSeparator;
|
||||
|
||||
public DorisCsvCodec ( String sp) {
|
||||
this.columnSeparator = DelimiterParser.parse(sp, "\t");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String codec( Record row) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < row.getColumnNumber(); i++) {
|
||||
String value = convertionField(row.getColumn(i));
|
||||
sb.append(null == value ? "\\N" : value);
|
||||
if (i < row.getColumnNumber() - 1) {
|
||||
sb.append(columnSeparator);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class DorisJsonCodec extends DorisBaseCodec implements DorisCodec {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private final List<String> fieldNames;
|
||||
|
||||
public DorisJsonCodec ( List<String> fieldNames) {
|
||||
this.fieldNames = fieldNames;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String codec( Record row) {
|
||||
if (null == fieldNames) {
|
||||
return "";
|
||||
}
|
||||
Map<String, Object> rowMap = new HashMap<> (fieldNames.size());
|
||||
int idx = 0;
|
||||
for (String fieldName : fieldNames) {
|
||||
rowMap.put(fieldName, convertionField(row.getColumn(idx)));
|
||||
idx++;
|
||||
}
|
||||
return JSON.toJSONString(rowMap);
|
||||
}
|
||||
}
|
@ -0,0 +1,233 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.client.config.RequestConfig;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.client.methods.HttpPut;
|
||||
import org.apache.http.entity.ByteArrayEntity;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.DefaultRedirectStrategy;
|
||||
import org.apache.http.impl.client.HttpClientBuilder;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class DorisStreamLoadObserver {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(DorisStreamLoadObserver.class);
|
||||
|
||||
private Keys options;
|
||||
|
||||
private long pos;
|
||||
private static final String RESULT_FAILED = "Fail";
|
||||
private static final String RESULT_LABEL_EXISTED = "Label Already Exists";
|
||||
private static final String LAEBL_STATE_VISIBLE = "VISIBLE";
|
||||
private static final String LAEBL_STATE_COMMITTED = "COMMITTED";
|
||||
private static final String RESULT_LABEL_PREPARE = "PREPARE";
|
||||
private static final String RESULT_LABEL_ABORTED = "ABORTED";
|
||||
private static final String RESULT_LABEL_UNKNOWN = "UNKNOWN";
|
||||
|
||||
|
||||
public DorisStreamLoadObserver ( Keys options){
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
public void streamLoad(WriterTuple data) throws Exception {
|
||||
String host = getLoadHost();
|
||||
if(host == null){
|
||||
throw new IOException ("load_url cannot be empty, or the host cannot connect.Please check your configuration.");
|
||||
}
|
||||
String loadUrl = new StringBuilder(host)
|
||||
.append("/api/")
|
||||
.append(options.getDatabase())
|
||||
.append("/")
|
||||
.append(options.getTable())
|
||||
.append("/_stream_load")
|
||||
.toString();
|
||||
LOG.info("Start to join batch data: rows[{}] bytes[{}] label[{}].", data.getRows().size(), data.getBytes(), data.getLabel());
|
||||
Map<String, Object> loadResult = put(loadUrl, data.getLabel(), addRows(data.getRows(), data.getBytes().intValue()));
|
||||
LOG.info("StreamLoad response :{}",JSON.toJSONString(loadResult));
|
||||
final String keyStatus = "Status";
|
||||
if (null == loadResult || !loadResult.containsKey(keyStatus)) {
|
||||
throw new IOException("Unable to flush data to Doris: unknown result status.");
|
||||
}
|
||||
LOG.debug("StreamLoad response:{}",JSON.toJSONString(loadResult));
|
||||
if (RESULT_FAILED.equals(loadResult.get(keyStatus))) {
|
||||
throw new IOException(
|
||||
new StringBuilder("Failed to flush data to Doris.\n").append(JSON.toJSONString(loadResult)).toString()
|
||||
);
|
||||
} else if (RESULT_LABEL_EXISTED.equals(loadResult.get(keyStatus))) {
|
||||
LOG.debug("StreamLoad response:{}",JSON.toJSONString(loadResult));
|
||||
checkStreamLoadState(host, data.getLabel());
|
||||
}
|
||||
}
|
||||
|
||||
private void checkStreamLoadState(String host, String label) throws IOException {
|
||||
int idx = 0;
|
||||
while(true) {
|
||||
try {
|
||||
TimeUnit.SECONDS.sleep(Math.min(++idx, 5));
|
||||
} catch (InterruptedException ex) {
|
||||
break;
|
||||
}
|
||||
try (CloseableHttpClient httpclient = HttpClients.createDefault()) {
|
||||
HttpGet httpGet = new HttpGet(new StringBuilder(host).append("/api/").append(options.getDatabase()).append("/get_load_state?label=").append(label).toString());
|
||||
httpGet.setHeader("Authorization", getBasicAuthHeader(options.getUsername(), options.getPassword()));
|
||||
httpGet.setHeader("Connection", "close");
|
||||
|
||||
try (CloseableHttpResponse resp = httpclient.execute(httpGet)) {
|
||||
HttpEntity respEntity = getHttpEntity(resp);
|
||||
if (respEntity == null) {
|
||||
throw new IOException(String.format("Failed to flush data to Doris, Error " +
|
||||
"could not get the final state of label[%s].\n", label), null);
|
||||
}
|
||||
Map<String, Object> result = (Map<String, Object>)JSON.parse(EntityUtils.toString(respEntity));
|
||||
String labelState = (String)result.get("state");
|
||||
if (null == labelState) {
|
||||
throw new IOException(String.format("Failed to flush data to Doris, Error " +
|
||||
"could not get the final state of label[%s]. response[%s]\n", label, EntityUtils.toString(respEntity)), null);
|
||||
}
|
||||
LOG.info(String.format("Checking label[%s] state[%s]\n", label, labelState));
|
||||
switch(labelState) {
|
||||
case LAEBL_STATE_VISIBLE:
|
||||
case LAEBL_STATE_COMMITTED:
|
||||
return;
|
||||
case RESULT_LABEL_PREPARE:
|
||||
continue;
|
||||
case RESULT_LABEL_ABORTED:
|
||||
throw new DorisWriterExcetion (String.format("Failed to flush data to Doris, Error " +
|
||||
"label[%s] state[%s]\n", label, labelState), null, true);
|
||||
case RESULT_LABEL_UNKNOWN:
|
||||
default:
|
||||
throw new IOException(String.format("Failed to flush data to Doris, Error " +
|
||||
"label[%s] state[%s]\n", label, labelState), null);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] addRows(List<byte[]> rows, int totalBytes) {
|
||||
if (Keys.StreamLoadFormat.CSV.equals(options.getStreamLoadFormat())) {
|
||||
Map<String, Object> props = (options.getLoadProps() == null ? new HashMap<> () : options.getLoadProps());
|
||||
byte[] lineDelimiter = DelimiterParser.parse((String)props.get("row_delimiter"), "\n").getBytes(StandardCharsets.UTF_8);
|
||||
ByteBuffer bos = ByteBuffer.allocate(totalBytes + rows.size() * lineDelimiter.length);
|
||||
for (byte[] row : rows) {
|
||||
bos.put(row);
|
||||
bos.put(lineDelimiter);
|
||||
}
|
||||
return bos.array();
|
||||
}
|
||||
|
||||
if (Keys.StreamLoadFormat.JSON.equals(options.getStreamLoadFormat())) {
|
||||
ByteBuffer bos = ByteBuffer.allocate(totalBytes + (rows.isEmpty() ? 2 : rows.size() + 1));
|
||||
bos.put("[".getBytes(StandardCharsets.UTF_8));
|
||||
byte[] jsonDelimiter = ",".getBytes(StandardCharsets.UTF_8);
|
||||
boolean isFirstElement = true;
|
||||
for (byte[] row : rows) {
|
||||
if (!isFirstElement) {
|
||||
bos.put(jsonDelimiter);
|
||||
}
|
||||
bos.put(row);
|
||||
isFirstElement = false;
|
||||
}
|
||||
bos.put("]".getBytes(StandardCharsets.UTF_8));
|
||||
return bos.array();
|
||||
}
|
||||
throw new RuntimeException("Failed to join rows data, unsupported `format` from stream load properties:");
|
||||
}
|
||||
private Map<String, Object> put(String loadUrl, String label, byte[] data) throws IOException {
|
||||
LOG.info(String.format("Executing stream load to: '%s', size: '%s'", loadUrl, data.length));
|
||||
final HttpClientBuilder httpClientBuilder = HttpClients.custom()
|
||||
.setRedirectStrategy(new DefaultRedirectStrategy () {
|
||||
@Override
|
||||
protected boolean isRedirectable(String method) {
|
||||
return true;
|
||||
}
|
||||
});
|
||||
try ( CloseableHttpClient httpclient = httpClientBuilder.build()) {
|
||||
HttpPut httpPut = new HttpPut(loadUrl);
|
||||
List<String> cols = options.getColumns();
|
||||
if (null != cols && !cols.isEmpty() && Keys.StreamLoadFormat.CSV.equals(options.getStreamLoadFormat())) {
|
||||
httpPut.setHeader("columns", String.join(",", cols.stream().map(f -> String.format("`%s`", f)).collect(Collectors.toList())));
|
||||
}
|
||||
if (null != options.getLoadProps()) {
|
||||
for (Map.Entry<String, Object> entry : options.getLoadProps().entrySet()) {
|
||||
httpPut.setHeader(entry.getKey(), String.valueOf(entry.getValue()));
|
||||
}
|
||||
}
|
||||
httpPut.setHeader("Expect", "100-continue");
|
||||
httpPut.setHeader("label", label);
|
||||
httpPut.setHeader("Content-Type", "application/x-www-form-urlencoded");
|
||||
httpPut.setHeader("Authorization", getBasicAuthHeader(options.getUsername(), options.getPassword()));
|
||||
httpPut.setEntity(new ByteArrayEntity (data));
|
||||
httpPut.setConfig(RequestConfig.custom().setRedirectsEnabled(true).build());
|
||||
try ( CloseableHttpResponse resp = httpclient.execute(httpPut)) {
|
||||
HttpEntity respEntity = getHttpEntity(resp);
|
||||
if (respEntity == null)
|
||||
return null;
|
||||
return (Map<String, Object>)JSON.parse(EntityUtils.toString(respEntity));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String getBasicAuthHeader(String username, String password) {
|
||||
String auth = username + ":" + password;
|
||||
byte[] encodedAuth = Base64.encodeBase64(auth.getBytes(StandardCharsets.UTF_8));
|
||||
return new StringBuilder("Basic ").append(new String(encodedAuth)).toString();
|
||||
}
|
||||
|
||||
private HttpEntity getHttpEntity(CloseableHttpResponse resp) {
|
||||
int code = resp.getStatusLine().getStatusCode();
|
||||
if (200 != code) {
|
||||
LOG.warn("Request failed with code:{}", code);
|
||||
return null;
|
||||
}
|
||||
HttpEntity respEntity = resp.getEntity();
|
||||
if (null == respEntity) {
|
||||
LOG.warn("Request failed with empty response.");
|
||||
return null;
|
||||
}
|
||||
return respEntity;
|
||||
}
|
||||
|
||||
private String getLoadHost() {
|
||||
List<String> hostList = options.getLoadUrlList();
|
||||
long tmp = pos + hostList.size();
|
||||
for (; pos < tmp; pos++) {
|
||||
String host = new StringBuilder("http://").append(hostList.get((int) (pos % hostList.size()))).toString();
|
||||
if (checkConnection(host)) {
|
||||
return host;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean checkConnection(String host) {
|
||||
try {
|
||||
URL url = new URL(host);
|
||||
HttpURLConnection co = (HttpURLConnection) url.openConnection();
|
||||
co.setConnectTimeout(5000);
|
||||
co.connect();
|
||||
co.disconnect();
|
||||
return true;
|
||||
} catch (Exception e1) {
|
||||
e1.printStackTrace();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,105 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
|
||||
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
|
||||
import com.alibaba.datax.plugin.rdbms.util.RdbmsException;
|
||||
import com.alibaba.datax.plugin.rdbms.writer.Constant;
|
||||
import com.alibaba.druid.sql.parser.ParserException;
|
||||
import com.google.common.base.Strings;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* jdbc util
|
||||
*/
|
||||
public class DorisUtil {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(DorisUtil.class);
|
||||
|
||||
private DorisUtil() {}
|
||||
|
||||
public static List<String> getDorisTableColumns( Connection conn, String databaseName, String tableName) {
|
||||
String currentSql = String.format("SELECT COLUMN_NAME FROM `information_schema`.`COLUMNS` WHERE `TABLE_SCHEMA` = '%s' AND `TABLE_NAME` = '%s' ORDER BY `ORDINAL_POSITION` ASC;", databaseName, tableName);
|
||||
List<String> columns = new ArrayList<> ();
|
||||
ResultSet rs = null;
|
||||
try {
|
||||
rs = DBUtil.query(conn, currentSql);
|
||||
while (DBUtil.asyncResultSetNext(rs)) {
|
||||
String colName = rs.getString("COLUMN_NAME");
|
||||
columns.add(colName);
|
||||
}
|
||||
return columns;
|
||||
} catch (Exception e) {
|
||||
throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null);
|
||||
} finally {
|
||||
DBUtil.closeDBResources(rs, null, null);
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> renderPreOrPostSqls(List<String> preOrPostSqls, String tableName) {
|
||||
if (null == preOrPostSqls) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
List<String> renderedSqls = new ArrayList<>();
|
||||
for (String sql : preOrPostSqls) {
|
||||
if (! Strings.isNullOrEmpty(sql)) {
|
||||
renderedSqls.add(sql.replace(Constant.TABLE_NAME_PLACEHOLDER, tableName));
|
||||
}
|
||||
}
|
||||
return renderedSqls;
|
||||
}
|
||||
|
||||
public static void executeSqls(Connection conn, List<String> sqls) {
|
||||
Statement stmt = null;
|
||||
String currentSql = null;
|
||||
try {
|
||||
stmt = conn.createStatement();
|
||||
for (String sql : sqls) {
|
||||
currentSql = sql;
|
||||
DBUtil.executeSqlWithoutResultSet(stmt, sql);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw RdbmsException.asQueryException(DataBaseType.MySql, e, currentSql, null, null);
|
||||
} finally {
|
||||
DBUtil.closeDBResources(null, stmt, null);
|
||||
}
|
||||
}
|
||||
|
||||
public static void preCheckPrePareSQL( Keys options) {
|
||||
String table = options.getTable();
|
||||
List<String> preSqls = options.getPreSqlList();
|
||||
List<String> renderedPreSqls = DorisUtil.renderPreOrPostSqls(preSqls, table);
|
||||
if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) {
|
||||
LOG.info("Begin to preCheck preSqls:[{}].", String.join(";", renderedPreSqls));
|
||||
for (String sql : renderedPreSqls) {
|
||||
try {
|
||||
DBUtil.sqlValid(sql, DataBaseType.MySql);
|
||||
} catch ( ParserException e) {
|
||||
throw RdbmsException.asPreSQLParserException(DataBaseType.MySql,e,sql);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void preCheckPostSQL( Keys options) {
|
||||
String table = options.getTable();
|
||||
List<String> postSqls = options.getPostSqlList();
|
||||
List<String> renderedPostSqls = DorisUtil.renderPreOrPostSqls(postSqls, table);
|
||||
if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) {
|
||||
LOG.info("Begin to preCheck postSqls:[{}].", String.join(";", renderedPostSqls));
|
||||
for(String sql : renderedPostSqls) {
|
||||
try {
|
||||
DBUtil.sqlValid(sql, DataBaseType.MySql);
|
||||
} catch (ParserException e){
|
||||
throw RdbmsException.asPostSQLParserException(DataBaseType.MySql,e,sql);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,164 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordReceiver;
|
||||
import com.alibaba.datax.common.spi.Writer;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
|
||||
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
|
||||
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* doris data writer
|
||||
*/
|
||||
public class DorisWriter extends Writer {
|
||||
|
||||
public static class Job extends Writer.Job {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
|
||||
private Configuration originalConfig = null;
|
||||
private Keys options;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.originalConfig = super.getPluginJobConf();
|
||||
options = new Keys (super.getPluginJobConf());
|
||||
options.doPretreatment();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void preCheck(){
|
||||
this.init();
|
||||
DorisUtil.preCheckPrePareSQL(options);
|
||||
DorisUtil.preCheckPostSQL(options);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
String username = options.getUsername();
|
||||
String password = options.getPassword();
|
||||
String jdbcUrl = options.getJdbcUrl();
|
||||
List<String> renderedPreSqls = DorisUtil.renderPreOrPostSqls(options.getPreSqlList(), options.getTable());
|
||||
if (null != renderedPreSqls && !renderedPreSqls.isEmpty()) {
|
||||
Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password);
|
||||
LOG.info("Begin to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPreSqls), jdbcUrl);
|
||||
DorisUtil.executeSqls(conn, renderedPreSqls);
|
||||
DBUtil.closeDBResources(null, null, conn);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int mandatoryNumber) {
|
||||
List<Configuration> configurations = new ArrayList<>(mandatoryNumber);
|
||||
for (int i = 0; i < mandatoryNumber; i++) {
|
||||
configurations.add(originalConfig);
|
||||
}
|
||||
return configurations;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
String username = options.getUsername();
|
||||
String password = options.getPassword();
|
||||
String jdbcUrl = options.getJdbcUrl();
|
||||
List<String> renderedPostSqls = DorisUtil.renderPreOrPostSqls(options.getPostSqlList(), options.getTable());
|
||||
if (null != renderedPostSqls && !renderedPostSqls.isEmpty()) {
|
||||
Connection conn = DBUtil.getConnection(DataBaseType.MySql, jdbcUrl, username, password);
|
||||
LOG.info("Start to execute preSqls:[{}]. context info:{}.", String.join(";", renderedPostSqls), jdbcUrl);
|
||||
DorisUtil.executeSqls(conn, renderedPostSqls);
|
||||
DBUtil.closeDBResources(null, null, conn);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class Task extends Writer.Task {
|
||||
private DorisWriterManager writerManager;
|
||||
private Keys options;
|
||||
private DorisCodec rowCodec;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
options = new Keys (super.getPluginJobConf());
|
||||
if (options.isWildcardColumn()) {
|
||||
Connection conn = DBUtil.getConnection(DataBaseType.MySql, options.getJdbcUrl(), options.getUsername(), options.getPassword());
|
||||
List<String> columns = DorisUtil.getDorisTableColumns(conn, options.getDatabase(), options.getTable());
|
||||
options.setInfoCchemaColumns(columns);
|
||||
}
|
||||
writerManager = new DorisWriterManager(options);
|
||||
rowCodec = DorisCodecFactory.createCodec(options);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
}
|
||||
|
||||
public void startWrite(RecordReceiver recordReceiver) {
|
||||
try {
|
||||
Record record;
|
||||
while ((record = recordReceiver.getFromReader()) != null) {
|
||||
if (record.getColumnNumber() != options.getColumns().size()) {
|
||||
throw DataXException
|
||||
.asDataXException(
|
||||
DBUtilErrorCode.CONF_ERROR,
|
||||
String.format(
|
||||
"There is an error in the column configuration information. " +
|
||||
"This is because you have configured a task where the number of fields to be read from the source:%s " +
|
||||
"is not equal to the number of fields to be written to the destination table:%s. " +
|
||||
"Please check your configuration and make changes.",
|
||||
record.getColumnNumber(),
|
||||
options.getColumns().size()));
|
||||
}
|
||||
writerManager.writeRecord(rowCodec.codec(record));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
try {
|
||||
writerManager.close();
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {}
|
||||
|
||||
@Override
|
||||
public boolean supportFailOver(){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class DorisWriterExcetion extends IOException {
|
||||
|
||||
private final Map<String, Object> response;
|
||||
private boolean reCreateLabel;
|
||||
|
||||
public DorisWriterExcetion ( String message, Map<String, Object> response) {
|
||||
super(message);
|
||||
this.response = response;
|
||||
}
|
||||
|
||||
public DorisWriterExcetion ( String message, Map<String, Object> response, boolean reCreateLabel) {
|
||||
super(message);
|
||||
this.response = response;
|
||||
this.reCreateLabel = reCreateLabel;
|
||||
}
|
||||
|
||||
public Map<String, Object> getFailedResponse() {
|
||||
return response;
|
||||
}
|
||||
|
||||
public boolean needReCreateLabel() {
|
||||
return reCreateLabel;
|
||||
}
|
||||
}
|
@ -0,0 +1,192 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import org.apache.commons.lang3.concurrent.BasicThreadFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class DorisWriterManager {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(DorisWriterManager.class);
|
||||
|
||||
private final DorisStreamLoadObserver visitor;
|
||||
private final Keys options;
|
||||
private final List<byte[]> buffer = new ArrayList<> ();
|
||||
private int batchCount = 0;
|
||||
private long batchSize = 0;
|
||||
private volatile boolean closed = false;
|
||||
private volatile Exception flushException;
|
||||
private final LinkedBlockingDeque< WriterTuple > flushQueue;
|
||||
private ScheduledExecutorService scheduler;
|
||||
private ScheduledFuture<?> scheduledFuture;
|
||||
|
||||
public DorisWriterManager( Keys options) {
|
||||
this.options = options;
|
||||
this.visitor = new DorisStreamLoadObserver (options);
|
||||
flushQueue = new LinkedBlockingDeque<>(options.getFlushQueueLength());
|
||||
this.startScheduler();
|
||||
this.startAsyncFlushing();
|
||||
}
|
||||
|
||||
public void startScheduler() {
|
||||
stopScheduler();
|
||||
this.scheduler = Executors.newScheduledThreadPool(1, new BasicThreadFactory.Builder().namingPattern("Doris-interval-flush").daemon(true).build());
|
||||
this.scheduledFuture = this.scheduler.schedule(() -> {
|
||||
synchronized (DorisWriterManager.this) {
|
||||
if (!closed) {
|
||||
try {
|
||||
String label = createBatchLabel();
|
||||
LOG.info(String.format("Doris interval Sinking triggered: label[%s].", label));
|
||||
if (batchCount == 0) {
|
||||
startScheduler();
|
||||
}
|
||||
flush(label, false);
|
||||
} catch (Exception e) {
|
||||
flushException = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}, options.getFlushInterval(), TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
public void stopScheduler() {
|
||||
if (this.scheduledFuture != null) {
|
||||
scheduledFuture.cancel(false);
|
||||
this.scheduler.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
public final synchronized void writeRecord(String record) throws IOException {
|
||||
checkFlushException();
|
||||
try {
|
||||
byte[] bts = record.getBytes(StandardCharsets.UTF_8);
|
||||
buffer.add(bts);
|
||||
batchCount++;
|
||||
batchSize += bts.length;
|
||||
if (batchCount >= options.getBatchRows() || batchSize >= options.getBatchSize()) {
|
||||
String label = createBatchLabel();
|
||||
LOG.debug(String.format("Doris buffer Sinking triggered: rows[%d] label[%s].", batchCount, label));
|
||||
flush(label, false);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Writing records to Doris failed.", e);
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void flush(String label, boolean waitUtilDone) throws Exception {
|
||||
checkFlushException();
|
||||
if (batchCount == 0) {
|
||||
if (waitUtilDone) {
|
||||
waitAsyncFlushingDone();
|
||||
}
|
||||
return;
|
||||
}
|
||||
flushQueue.put(new WriterTuple (label, batchSize, new ArrayList<>(buffer)));
|
||||
if (waitUtilDone) {
|
||||
// wait the last flush
|
||||
waitAsyncFlushingDone();
|
||||
}
|
||||
buffer.clear();
|
||||
batchCount = 0;
|
||||
batchSize = 0;
|
||||
}
|
||||
|
||||
public synchronized void close() {
|
||||
if (!closed) {
|
||||
closed = true;
|
||||
try {
|
||||
String label = createBatchLabel();
|
||||
if (batchCount > 0) LOG.debug(String.format("Doris Sink is about to close: label[%s].", label));
|
||||
flush(label, true);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Writing records to Doris failed.", e);
|
||||
}
|
||||
}
|
||||
checkFlushException();
|
||||
}
|
||||
|
||||
public String createBatchLabel() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (! Strings.isNullOrEmpty(options.getLabelPrefix())) {
|
||||
sb.append(options.getLabelPrefix());
|
||||
}
|
||||
return sb.append(UUID.randomUUID().toString())
|
||||
.toString();
|
||||
}
|
||||
|
||||
private void startAsyncFlushing() {
|
||||
// start flush thread
|
||||
Thread flushThread = new Thread(new Runnable(){
|
||||
public void run() {
|
||||
while(true) {
|
||||
try {
|
||||
asyncFlush();
|
||||
} catch (Exception e) {
|
||||
flushException = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
flushThread.setDaemon(true);
|
||||
flushThread.start();
|
||||
}
|
||||
|
||||
private void waitAsyncFlushingDone() throws InterruptedException {
|
||||
// wait previous flushings
|
||||
for (int i = 0; i <= options.getFlushQueueLength(); i++) {
|
||||
flushQueue.put(new WriterTuple ("", 0l, null));
|
||||
}
|
||||
checkFlushException();
|
||||
}
|
||||
|
||||
private void asyncFlush() throws Exception {
|
||||
WriterTuple flushData = flushQueue.take();
|
||||
if (Strings.isNullOrEmpty(flushData.getLabel())) {
|
||||
return;
|
||||
}
|
||||
stopScheduler();
|
||||
LOG.debug(String.format("Async stream load: rows[%d] bytes[%d] label[%s].", flushData.getRows().size(), flushData.getBytes(), flushData.getLabel()));
|
||||
for (int i = 0; i <= options.getMaxRetries(); i++) {
|
||||
try {
|
||||
// flush to Doris with stream load
|
||||
visitor.streamLoad(flushData);
|
||||
LOG.info(String.format("Async stream load finished: label[%s].", flushData.getLabel()));
|
||||
startScheduler();
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Failed to flush batch data to Doris, retry times = {}", i, e);
|
||||
if (i >= options.getMaxRetries()) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
if (e instanceof DorisWriterExcetion && (( DorisWriterExcetion )e).needReCreateLabel()) {
|
||||
String newLabel = createBatchLabel();
|
||||
LOG.warn(String.format("Batch label changed from [%s] to [%s]", flushData.getLabel(), newLabel));
|
||||
flushData.setLabel(newLabel);
|
||||
}
|
||||
try {
|
||||
Thread.sleep(1000l * Math.min(i + 1, 10));
|
||||
} catch (InterruptedException ex) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new IOException("Unable to flush, interrupted while doing another attempt", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void checkFlushException() {
|
||||
if (flushException != null) {
|
||||
throw new RuntimeException("Writing records to Doris failed.", flushException);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,177 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class Keys implements Serializable {
|
||||
|
||||
private static final long serialVersionUID = 1l;
|
||||
private static final int MAX_RETRIES = 3;
|
||||
private static final int BATCH_ROWS = 500000;
|
||||
private static final long DEFAULT_FLUSH_INTERVAL = 30000;
|
||||
|
||||
private static final String LOAD_PROPS_FORMAT = "format";
|
||||
public enum StreamLoadFormat {
|
||||
CSV, JSON;
|
||||
}
|
||||
|
||||
private static final String USERNAME = "username";
|
||||
private static final String PASSWORD = "password";
|
||||
private static final String DATABASE = "connection[0].selectedDatabase";
|
||||
private static final String TABLE = "connection[0].table[0]";
|
||||
private static final String COLUMN = "column";
|
||||
private static final String PRE_SQL = "preSql";
|
||||
private static final String POST_SQL = "postSql";
|
||||
private static final String JDBC_URL = "connection[0].jdbcUrl";
|
||||
private static final String LABEL_PREFIX = "labelPrefix";
|
||||
private static final String MAX_BATCH_ROWS = "maxBatchRows";
|
||||
private static final String MAX_BATCH_SIZE = "batchSize";
|
||||
private static final String FLUSH_INTERVAL = "flushInterval";
|
||||
private static final String LOAD_URL = "loadUrl";
|
||||
private static final String FLUSH_QUEUE_LENGTH = "flushQueueLength";
|
||||
private static final String LOAD_PROPS = "loadProps";
|
||||
|
||||
private static final String DEFAULT_LABEL_PREFIX = "datax_doris_writer_";
|
||||
|
||||
private static final long DEFAULT_MAX_BATCH_SIZE = 90 * 1024 * 1024; //default 90M
|
||||
|
||||
private final Configuration options;
|
||||
|
||||
private List<String> infoSchemaColumns;
|
||||
private List<String> userSetColumns;
|
||||
private boolean isWildcardColumn;
|
||||
|
||||
public Keys ( Configuration options) {
|
||||
this.options = options;
|
||||
this.userSetColumns = options.getList(COLUMN, String.class).stream().map(str -> str.replace("`", "")).collect(Collectors.toList());
|
||||
if (1 == options.getList(COLUMN, String.class).size() && "*".trim().equals(options.getList(COLUMN, String.class).get(0))) {
|
||||
this.isWildcardColumn = true;
|
||||
}
|
||||
}
|
||||
|
||||
public void doPretreatment() {
|
||||
validateRequired();
|
||||
validateStreamLoadUrl();
|
||||
}
|
||||
|
||||
public String getJdbcUrl() {
|
||||
return options.getString(JDBC_URL);
|
||||
}
|
||||
|
||||
public String getDatabase() {
|
||||
return options.getString(DATABASE);
|
||||
}
|
||||
|
||||
public String getTable() {
|
||||
return options.getString(TABLE);
|
||||
}
|
||||
|
||||
public String getUsername() {
|
||||
return options.getString(USERNAME);
|
||||
}
|
||||
|
||||
public String getPassword() {
|
||||
return options.getString(PASSWORD);
|
||||
}
|
||||
|
||||
public String getLabelPrefix() {
|
||||
String label = options.getString(LABEL_PREFIX);
|
||||
return null == label ? DEFAULT_LABEL_PREFIX : label;
|
||||
}
|
||||
|
||||
public List<String> getLoadUrlList() {
|
||||
return options.getList(LOAD_URL, String.class);
|
||||
}
|
||||
|
||||
public List<String> getColumns() {
|
||||
if (isWildcardColumn) {
|
||||
return this.infoSchemaColumns;
|
||||
}
|
||||
return this.userSetColumns;
|
||||
}
|
||||
|
||||
public boolean isWildcardColumn() {
|
||||
return this.isWildcardColumn;
|
||||
}
|
||||
|
||||
public void setInfoCchemaColumns(List<String> cols) {
|
||||
this.infoSchemaColumns = cols;
|
||||
}
|
||||
|
||||
public List<String> getPreSqlList() {
|
||||
return options.getList(PRE_SQL, String.class);
|
||||
}
|
||||
|
||||
public List<String> getPostSqlList() {
|
||||
return options.getList(POST_SQL, String.class);
|
||||
}
|
||||
|
||||
public Map<String, Object> getLoadProps() {
|
||||
return options.getMap(LOAD_PROPS);
|
||||
}
|
||||
|
||||
public int getMaxRetries() {
|
||||
return MAX_RETRIES;
|
||||
}
|
||||
|
||||
public int getBatchRows() {
|
||||
Integer rows = options.getInt(MAX_BATCH_ROWS);
|
||||
return null == rows ? BATCH_ROWS : rows;
|
||||
}
|
||||
|
||||
public long getBatchSize() {
|
||||
Long size = options.getLong(MAX_BATCH_SIZE);
|
||||
return null == size ? DEFAULT_MAX_BATCH_SIZE : size;
|
||||
}
|
||||
|
||||
public long getFlushInterval() {
|
||||
Long interval = options.getLong(FLUSH_INTERVAL);
|
||||
return null == interval ? DEFAULT_FLUSH_INTERVAL : interval;
|
||||
}
|
||||
|
||||
public int getFlushQueueLength() {
|
||||
Integer len = options.getInt(FLUSH_QUEUE_LENGTH);
|
||||
return null == len ? 1 : len;
|
||||
}
|
||||
|
||||
public StreamLoadFormat getStreamLoadFormat() {
|
||||
Map<String, Object> loadProps = getLoadProps();
|
||||
if (null == loadProps) {
|
||||
return StreamLoadFormat.CSV;
|
||||
}
|
||||
if (loadProps.containsKey(LOAD_PROPS_FORMAT)
|
||||
&& StreamLoadFormat.JSON.name().equalsIgnoreCase(String.valueOf(loadProps.get(LOAD_PROPS_FORMAT)))) {
|
||||
return StreamLoadFormat.JSON;
|
||||
}
|
||||
return StreamLoadFormat.CSV;
|
||||
}
|
||||
|
||||
private void validateStreamLoadUrl() {
|
||||
List<String> urlList = getLoadUrlList();
|
||||
for (String host : urlList) {
|
||||
if (host.split(":").length < 2) {
|
||||
throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR,
|
||||
"The format of loadUrl is not correct, please enter:[`fe_ip:fe_http_ip;fe_ip:fe_http_ip`].");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void validateRequired() {
|
||||
final String[] requiredOptionKeys = new String[]{
|
||||
USERNAME,
|
||||
DATABASE,
|
||||
TABLE,
|
||||
COLUMN,
|
||||
LOAD_URL
|
||||
};
|
||||
for (String optionKey : requiredOptionKeys) {
|
||||
options.getNecessaryValue(optionKey, DBUtilErrorCode.REQUIRED_VALUE);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class WriterTuple {
|
||||
private String label;
|
||||
private Long bytes;
|
||||
private List<byte[]> rows;
|
||||
|
||||
public WriterTuple ( String label, Long bytes, List<byte[]> rows){
|
||||
this.label = label;
|
||||
this.rows = rows;
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
public String getLabel() { return label; }
|
||||
public void setLabel(String label) { this.label = label; }
|
||||
public Long getBytes() { return bytes; }
|
||||
public List<byte[]> getRows() { return rows; }
|
||||
}
|
6
doriswriter/src/main/resources/plugin.json
Normal file
6
doriswriter/src/main/resources/plugin.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "doriswriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.doriswriter.DorisWriter",
|
||||
"description": "apache doris writer plugin",
|
||||
"developer": "apche doris"
|
||||
}
|
20
doriswriter/src/main/resources/plugin_job_template.json
Normal file
20
doriswriter/src/main/resources/plugin_job_template.json
Normal file
@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "doriswriter",
|
||||
"parameter": {
|
||||
"username": "",
|
||||
"password": "",
|
||||
"column": [],
|
||||
"preSql": [],
|
||||
"postSql": [],
|
||||
"beLoadUrl": [],
|
||||
"loadUrl": [],
|
||||
"loadProps": {},
|
||||
"connection": [
|
||||
{
|
||||
"jdbcUrl": "",
|
||||
"selectedDatabase": "",
|
||||
"table": []
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -35,12 +35,12 @@
|
||||
<dependency>
|
||||
<groupId>io.searchbox</groupId>
|
||||
<artifactId>jest-common</artifactId>
|
||||
<version>2.4.0</version>
|
||||
<version>6.3.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.searchbox</groupId>
|
||||
<artifactId>jest</artifactId>
|
||||
<version>2.4.0</version>
|
||||
<version>6.3.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
|
@ -1,236 +0,0 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParser;
|
||||
import io.searchbox.action.Action;
|
||||
import io.searchbox.client.JestClient;
|
||||
import io.searchbox.client.JestClientFactory;
|
||||
import io.searchbox.client.JestResult;
|
||||
import io.searchbox.client.config.HttpClientConfig;
|
||||
import io.searchbox.client.config.HttpClientConfig.Builder;
|
||||
import io.searchbox.core.Bulk;
|
||||
import io.searchbox.indices.CreateIndex;
|
||||
import io.searchbox.indices.DeleteIndex;
|
||||
import io.searchbox.indices.IndicesExists;
|
||||
import io.searchbox.indices.aliases.*;
|
||||
import io.searchbox.indices.mapping.PutMapping;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/2/8.
|
||||
*/
|
||||
public class ESClient {
|
||||
private static final Logger log = LoggerFactory.getLogger(ESClient.class);
|
||||
|
||||
private JestClient jestClient;
|
||||
|
||||
public JestClient getClient() {
|
||||
return jestClient;
|
||||
}
|
||||
|
||||
public void createClient(String endpoint,
|
||||
String user,
|
||||
String passwd,
|
||||
boolean multiThread,
|
||||
int readTimeout,
|
||||
boolean compression,
|
||||
boolean discovery) {
|
||||
|
||||
JestClientFactory factory = new JestClientFactory();
|
||||
Builder httpClientConfig = new HttpClientConfig
|
||||
.Builder(endpoint)
|
||||
.setPreemptiveAuth(new HttpHost(endpoint))
|
||||
.multiThreaded(multiThread)
|
||||
.connTimeout(30000)
|
||||
.readTimeout(readTimeout)
|
||||
.maxTotalConnection(200)
|
||||
.requestCompressionEnabled(compression)
|
||||
.discoveryEnabled(discovery)
|
||||
.discoveryFrequency(5l, TimeUnit.MINUTES);
|
||||
|
||||
if (!("".equals(user) || "".equals(passwd))) {
|
||||
httpClientConfig.defaultCredentials(user, passwd);
|
||||
}
|
||||
|
||||
factory.setHttpClientConfig(httpClientConfig.build());
|
||||
|
||||
jestClient = factory.getObject();
|
||||
}
|
||||
|
||||
public boolean indicesExists(String indexName) throws Exception {
|
||||
boolean isIndicesExists = false;
|
||||
JestResult rst = jestClient.execute(new IndicesExists.Builder(indexName).build());
|
||||
if (rst.isSucceeded()) {
|
||||
isIndicesExists = true;
|
||||
} else {
|
||||
switch (rst.getResponseCode()) {
|
||||
case 404:
|
||||
isIndicesExists = false;
|
||||
break;
|
||||
case 401:
|
||||
// 无权访问
|
||||
default:
|
||||
log.warn(rst.getErrorMessage());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return isIndicesExists;
|
||||
}
|
||||
|
||||
public boolean deleteIndex(String indexName) throws Exception {
|
||||
log.info("delete index " + indexName);
|
||||
if (indicesExists(indexName)) {
|
||||
JestResult rst = execute(new DeleteIndex.Builder(indexName).build());
|
||||
if (!rst.isSucceeded()) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
log.info("index cannot found, skip delete " + indexName);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean createIndex(String indexName, String typeName,
|
||||
Object mappings, String settings, boolean dynamic) throws Exception {
|
||||
JestResult rst = null;
|
||||
if (!indicesExists(indexName)) {
|
||||
log.info("create index " + indexName);
|
||||
rst = jestClient.execute(
|
||||
new CreateIndex.Builder(indexName)
|
||||
.settings(settings)
|
||||
.setParameter("master_timeout", "5m")
|
||||
.build()
|
||||
);
|
||||
//index_already_exists_exception
|
||||
if (!rst.isSucceeded()) {
|
||||
if (getStatus(rst) == 400) {
|
||||
log.info(String.format("index [%s] already exists", indexName));
|
||||
return true;
|
||||
} else {
|
||||
log.error(rst.getErrorMessage());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
log.info(String.format("create [%s] index success", indexName));
|
||||
}
|
||||
}
|
||||
|
||||
int idx = 0;
|
||||
while (idx < 5) {
|
||||
if (indicesExists(indexName)) {
|
||||
break;
|
||||
}
|
||||
Thread.sleep(2000);
|
||||
idx ++;
|
||||
}
|
||||
if (idx >= 5) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic) {
|
||||
log.info("ignore mappings");
|
||||
return true;
|
||||
}
|
||||
log.info("create mappings for " + indexName + " " + mappings);
|
||||
rst = jestClient.execute(new PutMapping.Builder(indexName, typeName, mappings)
|
||||
.setParameter("master_timeout", "5m").build());
|
||||
if (!rst.isSucceeded()) {
|
||||
if (getStatus(rst) == 400) {
|
||||
log.info(String.format("index [%s] mappings already exists", indexName));
|
||||
} else {
|
||||
log.error(rst.getErrorMessage());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
log.info(String.format("index [%s] put mappings success", indexName));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public JestResult execute(Action<JestResult> clientRequest) throws Exception {
|
||||
JestResult rst = null;
|
||||
rst = jestClient.execute(clientRequest);
|
||||
if (!rst.isSucceeded()) {
|
||||
//log.warn(rst.getErrorMessage());
|
||||
}
|
||||
return rst;
|
||||
}
|
||||
|
||||
public Integer getStatus(JestResult rst) {
|
||||
JsonObject jsonObject = rst.getJsonObject();
|
||||
if (jsonObject.has("status")) {
|
||||
return jsonObject.get("status").getAsInt();
|
||||
}
|
||||
return 600;
|
||||
}
|
||||
|
||||
public boolean isBulkResult(JestResult rst) {
|
||||
JsonObject jsonObject = rst.getJsonObject();
|
||||
return jsonObject.has("items");
|
||||
}
|
||||
|
||||
|
||||
public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException {
|
||||
GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build();
|
||||
AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build();
|
||||
JestResult rst = jestClient.execute(getAliases);
|
||||
log.info(rst.getJsonString());
|
||||
List<AliasMapping> list = new ArrayList<AliasMapping>();
|
||||
if (rst.isSucceeded()) {
|
||||
JsonParser jp = new JsonParser();
|
||||
JsonObject jo = (JsonObject)jp.parse(rst.getJsonString());
|
||||
for(Map.Entry<String, JsonElement> entry : jo.entrySet()){
|
||||
String tindex = entry.getKey();
|
||||
if (indexname.equals(tindex)) {
|
||||
continue;
|
||||
}
|
||||
AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build();
|
||||
String s = new Gson().toJson(m.getData());
|
||||
log.info(s);
|
||||
if (needClean) {
|
||||
list.add(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", "5m").build();
|
||||
rst = jestClient.execute(modifyAliases);
|
||||
if (!rst.isSucceeded()) {
|
||||
log.error(rst.getErrorMessage());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public JestResult bulkInsert(Bulk.Builder bulk, int trySize) throws Exception {
|
||||
// es_rejected_execution_exception
|
||||
// illegal_argument_exception
|
||||
// cluster_block_exception
|
||||
JestResult rst = null;
|
||||
rst = jestClient.execute(bulk.build());
|
||||
if (!rst.isSucceeded()) {
|
||||
log.warn(rst.getErrorMessage());
|
||||
}
|
||||
return rst;
|
||||
}
|
||||
|
||||
/**
|
||||
* 关闭JestClient客户端
|
||||
*
|
||||
*/
|
||||
public void closeJestClient() {
|
||||
if (jestClient != null) {
|
||||
jestClient.shutdownClient();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,65 +0,0 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/3/2.
|
||||
*/
|
||||
public class ESColumn {
|
||||
|
||||
private String name;//: "appkey",
|
||||
|
||||
private String type;//": "TEXT",
|
||||
|
||||
private String timezone;
|
||||
|
||||
private String format;
|
||||
|
||||
private Boolean array;
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public void setTimeZone(String timezone) {
|
||||
this.timezone = timezone;
|
||||
}
|
||||
|
||||
public void setFormat(String format) {
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public String getTimezone() {
|
||||
return timezone;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public void setTimezone(String timezone) {
|
||||
this.timezone = timezone;
|
||||
}
|
||||
|
||||
public Boolean isArray() {
|
||||
return array;
|
||||
}
|
||||
|
||||
public void setArray(Boolean array) {
|
||||
this.array = array;
|
||||
}
|
||||
|
||||
public Boolean getArray() {
|
||||
return array;
|
||||
}
|
||||
}
|
@ -1,460 +0,0 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Column;
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordReceiver;
|
||||
import com.alibaba.datax.common.spi.Writer;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
import io.searchbox.client.JestResult;
|
||||
import io.searchbox.core.Bulk;
|
||||
import io.searchbox.core.BulkResult;
|
||||
import io.searchbox.core.Index;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
public class ESWriter extends Writer {
|
||||
private final static String WRITE_COLUMNS = "write_columns";
|
||||
|
||||
public static class Job extends Writer.Job {
|
||||
private static final Logger log = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Configuration conf = null;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.conf = super.getPluginJobConf();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
/**
|
||||
* 注意:此方法仅执行一次。
|
||||
* 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。
|
||||
*/
|
||||
ESClient esClient = new ESClient();
|
||||
esClient.createClient(Key.getEndpoint(conf),
|
||||
Key.getAccessID(conf),
|
||||
Key.getAccessKey(conf),
|
||||
false,
|
||||
300000,
|
||||
false,
|
||||
false);
|
||||
|
||||
String indexName = Key.getIndexName(conf);
|
||||
String typeName = Key.getTypeName(conf);
|
||||
boolean dynamic = Key.getDynamic(conf);
|
||||
String mappings = genMappings(typeName);
|
||||
String settings = JSONObject.toJSONString(
|
||||
Key.getSettings(conf)
|
||||
);
|
||||
log.info(String.format("index:[%s], type:[%s], mappings:[%s]", indexName, typeName, mappings));
|
||||
|
||||
try {
|
||||
boolean isIndicesExists = esClient.indicesExists(indexName);
|
||||
if (Key.isCleanup(this.conf) && isIndicesExists) {
|
||||
esClient.deleteIndex(indexName);
|
||||
}
|
||||
// 强制创建,内部自动忽略已存在的情况
|
||||
if (!esClient.createIndex(indexName, typeName, mappings, settings, dynamic)) {
|
||||
throw new IOException("create index or mapping failed");
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_MAPPINGS, ex.toString());
|
||||
}
|
||||
esClient.closeJestClient();
|
||||
}
|
||||
|
||||
private String genMappings(String typeName) {
|
||||
String mappings = null;
|
||||
Map<String, Object> propMap = new HashMap<String, Object>();
|
||||
List<ESColumn> columnList = new ArrayList<ESColumn>();
|
||||
|
||||
List column = conf.getList("column");
|
||||
if (column != null) {
|
||||
for (Object col : column) {
|
||||
JSONObject jo = JSONObject.parseObject(col.toString());
|
||||
String colName = jo.getString("name");
|
||||
String colTypeStr = jo.getString("type");
|
||||
if (colTypeStr == null) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " column must have type");
|
||||
}
|
||||
ESFieldType colType = ESFieldType.getESFieldType(colTypeStr);
|
||||
if (colType == null) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " unsupported type");
|
||||
}
|
||||
|
||||
ESColumn columnItem = new ESColumn();
|
||||
|
||||
if (colName.equals(Key.PRIMARY_KEY_COLUMN_NAME)) {
|
||||
// 兼容已有版本
|
||||
colType = ESFieldType.ID;
|
||||
colTypeStr = "id";
|
||||
}
|
||||
|
||||
columnItem.setName(colName);
|
||||
columnItem.setType(colTypeStr);
|
||||
|
||||
if (colType == ESFieldType.ID) {
|
||||
columnList.add(columnItem);
|
||||
// 如果是id,则properties为空
|
||||
continue;
|
||||
}
|
||||
|
||||
Boolean array = jo.getBoolean("array");
|
||||
if (array != null) {
|
||||
columnItem.setArray(array);
|
||||
}
|
||||
Map<String, Object> field = new HashMap<String, Object>();
|
||||
field.put("type", colTypeStr);
|
||||
//https://www.elastic.co/guide/en/elasticsearch/reference/5.2/breaking_50_mapping_changes.html#_literal_index_literal_property
|
||||
// https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_deep_dive_on_doc_values.html#_disabling_doc_values
|
||||
field.put("doc_values", jo.getBoolean("doc_values"));
|
||||
field.put("ignore_above", jo.getInteger("ignore_above"));
|
||||
field.put("index", jo.getBoolean("index"));
|
||||
|
||||
switch (colType) {
|
||||
case STRING:
|
||||
// 兼容string类型,ES5之前版本
|
||||
break;
|
||||
case KEYWORD:
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html#_warm_up_global_ordinals
|
||||
field.put("eager_global_ordinals", jo.getBoolean("eager_global_ordinals"));
|
||||
case TEXT:
|
||||
field.put("analyzer", jo.getString("analyzer"));
|
||||
// 优化disk使用,也同步会提高index性能
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html
|
||||
field.put("norms", jo.getBoolean("norms"));
|
||||
field.put("index_options", jo.getBoolean("index_options"));
|
||||
break;
|
||||
case DATE:
|
||||
columnItem.setTimeZone(jo.getString("timezone"));
|
||||
columnItem.setFormat(jo.getString("format"));
|
||||
// 后面时间会处理为带时区的标准时间,所以不需要给ES指定格式
|
||||
/*
|
||||
if (jo.getString("format") != null) {
|
||||
field.put("format", jo.getString("format"));
|
||||
} else {
|
||||
//field.put("format", "strict_date_optional_time||epoch_millis||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd");
|
||||
}
|
||||
*/
|
||||
break;
|
||||
case GEO_SHAPE:
|
||||
field.put("tree", jo.getString("tree"));
|
||||
field.put("precision", jo.getString("precision"));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
propMap.put(colName, field);
|
||||
columnList.add(columnItem);
|
||||
}
|
||||
}
|
||||
|
||||
conf.set(WRITE_COLUMNS, JSON.toJSONString(columnList));
|
||||
|
||||
log.info(JSON.toJSONString(columnList));
|
||||
|
||||
Map<String, Object> rootMappings = new HashMap<String, Object>();
|
||||
Map<String, Object> typeMappings = new HashMap<String, Object>();
|
||||
typeMappings.put("properties", propMap);
|
||||
rootMappings.put(typeName, typeMappings);
|
||||
|
||||
mappings = JSON.toJSONString(rootMappings);
|
||||
|
||||
if (mappings == null || "".equals(mappings)) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, "must have mappings");
|
||||
}
|
||||
|
||||
return mappings;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int mandatoryNumber) {
|
||||
List<Configuration> configurations = new ArrayList<Configuration>(mandatoryNumber);
|
||||
for (int i = 0; i < mandatoryNumber; i++) {
|
||||
configurations.add(conf);
|
||||
}
|
||||
return configurations;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
ESClient esClient = new ESClient();
|
||||
esClient.createClient(Key.getEndpoint(conf),
|
||||
Key.getAccessID(conf),
|
||||
Key.getAccessKey(conf),
|
||||
false,
|
||||
300000,
|
||||
false,
|
||||
false);
|
||||
String alias = Key.getAlias(conf);
|
||||
if (!"".equals(alias)) {
|
||||
log.info(String.format("alias [%s] to [%s]", alias, Key.getIndexName(conf)));
|
||||
try {
|
||||
esClient.alias(Key.getIndexName(conf), alias, Key.isNeedCleanAlias(conf));
|
||||
} catch (IOException e) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_ALIAS_MODIFY, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public static class Task extends Writer.Task {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
|
||||
ESClient esClient = null;
|
||||
private List<ESFieldType> typeList;
|
||||
private List<ESColumn> columnList;
|
||||
|
||||
private int trySize;
|
||||
private int batchSize;
|
||||
private String index;
|
||||
private String type;
|
||||
private String splitter;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.conf = super.getPluginJobConf();
|
||||
index = Key.getIndexName(conf);
|
||||
type = Key.getTypeName(conf);
|
||||
|
||||
trySize = Key.getTrySize(conf);
|
||||
batchSize = Key.getBatchSize(conf);
|
||||
splitter = Key.getSplitter(conf);
|
||||
columnList = JSON.parseObject(this.conf.getString(WRITE_COLUMNS), new TypeReference<List<ESColumn>>() {
|
||||
});
|
||||
|
||||
typeList = new ArrayList<ESFieldType>();
|
||||
|
||||
for (ESColumn col : columnList) {
|
||||
typeList.add(ESFieldType.getESFieldType(col.getType()));
|
||||
}
|
||||
|
||||
esClient = new ESClient();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
esClient.createClient(Key.getEndpoint(conf),
|
||||
Key.getAccessID(conf),
|
||||
Key.getAccessKey(conf),
|
||||
Key.isMultiThread(conf),
|
||||
Key.getTimeout(conf),
|
||||
Key.isCompression(conf),
|
||||
Key.isDiscovery(conf));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startWrite(RecordReceiver recordReceiver) {
|
||||
List<Record> writerBuffer = new ArrayList<Record>(this.batchSize);
|
||||
Record record = null;
|
||||
long total = 0;
|
||||
while ((record = recordReceiver.getFromReader()) != null) {
|
||||
writerBuffer.add(record);
|
||||
if (writerBuffer.size() >= this.batchSize) {
|
||||
total += doBatchInsert(writerBuffer);
|
||||
writerBuffer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
if (!writerBuffer.isEmpty()) {
|
||||
total += doBatchInsert(writerBuffer);
|
||||
writerBuffer.clear();
|
||||
}
|
||||
|
||||
String msg = String.format("task end, write size :%d", total);
|
||||
getTaskPluginCollector().collectMessage("writesize", String.valueOf(total));
|
||||
log.info(msg);
|
||||
esClient.closeJestClient();
|
||||
}
|
||||
|
||||
private String getDateStr(ESColumn esColumn, Column column) {
|
||||
DateTime date = null;
|
||||
DateTimeZone dtz = DateTimeZone.getDefault();
|
||||
if (esColumn.getTimezone() != null) {
|
||||
// 所有时区参考 http://www.joda.org/joda-time/timezones.html
|
||||
dtz = DateTimeZone.forID(esColumn.getTimezone());
|
||||
}
|
||||
if (column.getType() != Column.Type.DATE && esColumn.getFormat() != null) {
|
||||
DateTimeFormatter formatter = DateTimeFormat.forPattern(esColumn.getFormat());
|
||||
date = formatter.withZone(dtz).parseDateTime(column.asString());
|
||||
return date.toString();
|
||||
} else if (column.getType() == Column.Type.DATE) {
|
||||
date = new DateTime(column.asLong(), dtz);
|
||||
return date.toString();
|
||||
} else {
|
||||
return column.asString();
|
||||
}
|
||||
}
|
||||
|
||||
private long doBatchInsert(final List<Record> writerBuffer) {
|
||||
Map<String, Object> data = null;
|
||||
final Bulk.Builder bulkaction = new Bulk.Builder().defaultIndex(this.index).defaultType(this.type);
|
||||
for (Record record : writerBuffer) {
|
||||
data = new HashMap<String, Object>();
|
||||
String id = null;
|
||||
for (int i = 0; i < record.getColumnNumber(); i++) {
|
||||
Column column = record.getColumn(i);
|
||||
String columnName = columnList.get(i).getName();
|
||||
ESFieldType columnType = typeList.get(i);
|
||||
//如果是数组类型,那它传入的必是字符串类型
|
||||
if (columnList.get(i).isArray() != null && columnList.get(i).isArray()) {
|
||||
String[] dataList = column.asString().split(splitter);
|
||||
if (!columnType.equals(ESFieldType.DATE)) {
|
||||
data.put(columnName, dataList);
|
||||
} else {
|
||||
for (int pos = 0; pos < dataList.length; pos++) {
|
||||
dataList[pos] = getDateStr(columnList.get(i), column);
|
||||
}
|
||||
data.put(columnName, dataList);
|
||||
}
|
||||
} else {
|
||||
switch (columnType) {
|
||||
case ID:
|
||||
if (id != null) {
|
||||
id += record.getColumn(i).asString();
|
||||
} else {
|
||||
id = record.getColumn(i).asString();
|
||||
}
|
||||
break;
|
||||
case DATE:
|
||||
try {
|
||||
String dateStr = getDateStr(columnList.get(i), column);
|
||||
data.put(columnName, dateStr);
|
||||
} catch (Exception e) {
|
||||
getTaskPluginCollector().collectDirtyRecord(record, String.format("时间类型解析失败 [%s:%s] exception: %s", columnName, column.toString(), e.toString()));
|
||||
}
|
||||
break;
|
||||
case KEYWORD:
|
||||
case STRING:
|
||||
case TEXT:
|
||||
case IP:
|
||||
case GEO_POINT:
|
||||
data.put(columnName, column.asString());
|
||||
break;
|
||||
case BOOLEAN:
|
||||
data.put(columnName, column.asBoolean());
|
||||
break;
|
||||
case BYTE:
|
||||
case BINARY:
|
||||
data.put(columnName, column.asBytes());
|
||||
break;
|
||||
case LONG:
|
||||
data.put(columnName, column.asLong());
|
||||
break;
|
||||
case INTEGER:
|
||||
data.put(columnName, column.asBigInteger());
|
||||
break;
|
||||
case SHORT:
|
||||
data.put(columnName, column.asBigInteger());
|
||||
break;
|
||||
case FLOAT:
|
||||
case DOUBLE:
|
||||
data.put(columnName, column.asDouble());
|
||||
break;
|
||||
case NESTED:
|
||||
case OBJECT:
|
||||
case GEO_SHAPE:
|
||||
data.put(columnName, JSON.parse(column.asString()));
|
||||
break;
|
||||
default:
|
||||
getTaskPluginCollector().collectDirtyRecord(record, "类型错误:不支持的类型:" + columnType + " " + columnName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (id == null) {
|
||||
//id = UUID.randomUUID().toString();
|
||||
bulkaction.addAction(new Index.Builder(data).build());
|
||||
} else {
|
||||
bulkaction.addAction(new Index.Builder(data).id(id).build());
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
return RetryUtil.executeWithRetry(new Callable<Integer>() {
|
||||
@Override
|
||||
public Integer call() throws Exception {
|
||||
JestResult jestResult = esClient.bulkInsert(bulkaction, 1);
|
||||
if (jestResult.isSucceeded()) {
|
||||
return writerBuffer.size();
|
||||
}
|
||||
|
||||
String msg = String.format("response code: [%d] error :[%s]", jestResult.getResponseCode(), jestResult.getErrorMessage());
|
||||
log.warn(msg);
|
||||
if (esClient.isBulkResult(jestResult)) {
|
||||
BulkResult brst = (BulkResult) jestResult;
|
||||
List<BulkResult.BulkResultItem> failedItems = brst.getFailedItems();
|
||||
for (BulkResult.BulkResultItem item : failedItems) {
|
||||
if (item.status != 400) {
|
||||
// 400 BAD_REQUEST 如果非数据异常,请求异常,则不允许忽略
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s", item.status, item.error));
|
||||
} else {
|
||||
// 如果用户选择不忽略解析错误,则抛异常,默认为忽略
|
||||
if (!Key.isIgnoreParseError(conf)) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s, config not ignoreParseError so throw this error", item.status, item.error));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<BulkResult.BulkResultItem> items = brst.getItems();
|
||||
for (int idx = 0; idx < items.size(); ++idx) {
|
||||
BulkResult.BulkResultItem item = items.get(idx);
|
||||
if (item.error != null && !"".equals(item.error)) {
|
||||
getTaskPluginCollector().collectDirtyRecord(writerBuffer.get(idx), String.format("status:[%d], error: %s", item.status, item.error));
|
||||
}
|
||||
}
|
||||
return writerBuffer.size() - brst.getFailedItems().size();
|
||||
} else {
|
||||
Integer status = esClient.getStatus(jestResult);
|
||||
switch (status) {
|
||||
case 429: //TOO_MANY_REQUESTS
|
||||
log.warn("server response too many requests, so auto reduce speed");
|
||||
break;
|
||||
}
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, jestResult.getErrorMessage());
|
||||
}
|
||||
}
|
||||
}, trySize, 60000L, true);
|
||||
} catch (Exception e) {
|
||||
if (Key.isIgnoreWriteError(this.conf)) {
|
||||
log.warn(String.format("重试[%d]次写入失败,忽略该错误,继续写入!", trySize));
|
||||
} else {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, e);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
esClient.closeJestClient();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public enum ESWriterErrorCode implements ErrorCode {
|
||||
BAD_CONFIG_VALUE("ESWriter-00", "您配置的值不合法."),
|
||||
ES_INDEX_DELETE("ESWriter-01", "删除index错误."),
|
||||
ES_INDEX_CREATE("ESWriter-02", "创建index错误."),
|
||||
ES_MAPPINGS("ESWriter-03", "mappings错误."),
|
||||
ES_INDEX_INSERT("ESWriter-04", "插入数据错误."),
|
||||
ES_ALIAS_MODIFY("ESWriter-05", "别名修改错误."),
|
||||
;
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
ESWriterErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,312 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfo;
|
||||
import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfoResult;
|
||||
import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.PutMapping7;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParser;
|
||||
import io.searchbox.action.Action;
|
||||
import io.searchbox.client.JestClient;
|
||||
import io.searchbox.client.JestClientFactory;
|
||||
import io.searchbox.client.JestResult;
|
||||
import io.searchbox.client.config.HttpClientConfig;
|
||||
import io.searchbox.client.config.HttpClientConfig.Builder;
|
||||
import io.searchbox.core.Bulk;
|
||||
import io.searchbox.indices.CreateIndex;
|
||||
import io.searchbox.indices.DeleteIndex;
|
||||
import io.searchbox.indices.IndicesExists;
|
||||
import io.searchbox.indices.aliases.*;
|
||||
import io.searchbox.indices.mapping.GetMapping;
|
||||
import io.searchbox.indices.mapping.PutMapping;
|
||||
|
||||
import io.searchbox.indices.settings.GetSettings;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/2/8.
|
||||
*/
|
||||
public class ElasticSearchClient {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(ElasticSearchClient.class);
|
||||
|
||||
private JestClient jestClient;
|
||||
private Configuration conf;
|
||||
|
||||
public JestClient getClient() {
|
||||
return jestClient;
|
||||
}
|
||||
|
||||
public ElasticSearchClient(Configuration conf) {
|
||||
this.conf = conf;
|
||||
String endpoint = Key.getEndpoint(conf);
|
||||
String user = Key.getUsername(conf);
|
||||
String passwd = Key.getPassword(conf);
|
||||
boolean multiThread = Key.isMultiThread(conf);
|
||||
int readTimeout = Key.getTimeout(conf);
|
||||
boolean compression = Key.isCompression(conf);
|
||||
boolean discovery = Key.isDiscovery(conf);
|
||||
String discoveryFilter = Key.getDiscoveryFilter(conf);
|
||||
int totalConnection = this.conf.getInt("maxTotalConnection", 200);
|
||||
JestClientFactory factory = new JestClientFactory();
|
||||
Builder httpClientConfig = new HttpClientConfig
|
||||
.Builder(endpoint)
|
||||
// .setPreemptiveAuth(new HttpHost(endpoint))
|
||||
.multiThreaded(multiThread)
|
||||
.connTimeout(readTimeout)
|
||||
.readTimeout(readTimeout)
|
||||
.maxTotalConnection(totalConnection)
|
||||
.requestCompressionEnabled(compression)
|
||||
.discoveryEnabled(discovery)
|
||||
.discoveryFrequency(5L, TimeUnit.MINUTES)
|
||||
.discoveryFilter(discoveryFilter);
|
||||
if (!(StringUtils.isBlank(user) || StringUtils.isBlank(passwd))) {
|
||||
// 匿名登录
|
||||
httpClientConfig.defaultCredentials(user, passwd);
|
||||
}
|
||||
factory.setHttpClientConfig(httpClientConfig.build());
|
||||
this.jestClient = factory.getObject();
|
||||
}
|
||||
|
||||
public boolean indicesExists(String indexName) throws Exception {
|
||||
boolean isIndicesExists = false;
|
||||
JestResult rst = execute(new IndicesExists.Builder(indexName).build());
|
||||
if (rst.isSucceeded()) {
|
||||
isIndicesExists = true;
|
||||
} else {
|
||||
LOGGER.warn("IndicesExists got ResponseCode: {} ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
|
||||
switch (rst.getResponseCode()) {
|
||||
case 404:
|
||||
isIndicesExists = false;
|
||||
break;
|
||||
case 401:
|
||||
// 无权访问
|
||||
default:
|
||||
LOGGER.warn(rst.getErrorMessage());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return isIndicesExists;
|
||||
}
|
||||
|
||||
public boolean deleteIndex(String indexName) throws Exception {
|
||||
LOGGER.info("delete index {}", indexName);
|
||||
if (indicesExists(indexName)) {
|
||||
JestResult rst = execute(new DeleteIndex.Builder(indexName).build());
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.warn("DeleteIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
|
||||
return false;
|
||||
} else {
|
||||
LOGGER.info("delete index {} success", indexName);
|
||||
}
|
||||
} else {
|
||||
LOGGER.info("index cannot found, skip delete index {}", indexName);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isGreaterOrEqualThan7() throws Exception {
|
||||
try {
|
||||
ClusterInfoResult result = execute(new ClusterInfo.Builder().build());
|
||||
LOGGER.info("ClusterInfoResult: {}", result.getJsonString());
|
||||
return result.isGreaterOrEqualThan7();
|
||||
}catch(Exception e) {
|
||||
LOGGER.warn(e.getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取索引的settings
|
||||
* @param indexName 索引名
|
||||
* @return 设置
|
||||
*/
|
||||
public String getIndexSettings(String indexName) {
|
||||
GetSettings.Builder builder = new GetSettings.Builder();
|
||||
builder.addIndex(indexName);
|
||||
GetSettings getSettings = builder.build();
|
||||
try {
|
||||
LOGGER.info("begin GetSettings for index: {}", indexName);
|
||||
JestResult result = this.execute(getSettings);
|
||||
return result.getJsonString();
|
||||
} catch (Exception e) {
|
||||
String message = "GetSettings for index error: " + e.getMessage();
|
||||
LOGGER.warn(message, e);
|
||||
throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_GET_SETTINGS, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean createIndexIfNotExists(String indexName, String typeName,
|
||||
Object mappings, String settings,
|
||||
boolean dynamic, boolean isGreaterOrEqualThan7) throws Exception {
|
||||
JestResult rst;
|
||||
if (!indicesExists(indexName)) {
|
||||
LOGGER.info("create index {}", indexName);
|
||||
rst = execute(
|
||||
new CreateIndex.Builder(indexName)
|
||||
.settings(settings)
|
||||
.setParameter("master_timeout", Key.getMasterTimeout(this.conf))
|
||||
.build()
|
||||
);
|
||||
//index_already_exists_exception
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.warn("CreateIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
|
||||
if (getStatus(rst) == 400) {
|
||||
LOGGER.info(String.format("index {} already exists", indexName));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LOGGER.info("create {} index success", indexName);
|
||||
}
|
||||
}
|
||||
|
||||
if (dynamic) {
|
||||
LOGGER.info("dynamic is true, ignore mappings");
|
||||
return true;
|
||||
}
|
||||
LOGGER.info("create mappings for {} {}", indexName, mappings);
|
||||
//如果大于7.x,mapping的PUT请求URI中不能带type,并且mapping设置中不能带有嵌套结构
|
||||
if (isGreaterOrEqualThan7) {
|
||||
rst = execute(new PutMapping7.Builder(indexName, mappings).
|
||||
setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build());
|
||||
} else {
|
||||
rst = execute(new PutMapping.Builder(indexName, typeName, mappings)
|
||||
.setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build());
|
||||
}
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.error("PutMapping got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
|
||||
return false;
|
||||
} else {
|
||||
LOGGER.info("index {} put mappings success", indexName);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public <T extends JestResult> T execute(Action<T> clientRequest) throws IOException {
|
||||
T rst = jestClient.execute(clientRequest);
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.warn(rst.getJsonString());
|
||||
}
|
||||
return rst;
|
||||
}
|
||||
|
||||
public Integer getStatus(JestResult rst) {
|
||||
JsonObject jsonObject = rst.getJsonObject();
|
||||
if (jsonObject.has("status")) {
|
||||
return jsonObject.get("status").getAsInt();
|
||||
}
|
||||
return 600;
|
||||
}
|
||||
|
||||
public boolean isBulkResult(JestResult rst) {
|
||||
JsonObject jsonObject = rst.getJsonObject();
|
||||
return jsonObject.has("items");
|
||||
}
|
||||
|
||||
|
||||
public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException {
|
||||
GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build();
|
||||
AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build();
|
||||
JestResult rst = null;
|
||||
List<AliasMapping> list = new ArrayList<AliasMapping>();
|
||||
if (needClean) {
|
||||
rst = execute(getAliases);
|
||||
if (rst.isSucceeded()) {
|
||||
JsonParser jp = new JsonParser();
|
||||
JsonObject jo = (JsonObject) jp.parse(rst.getJsonString());
|
||||
for (Map.Entry<String, JsonElement> entry : jo.entrySet()) {
|
||||
String tindex = entry.getKey();
|
||||
if (indexname.equals(tindex)) {
|
||||
continue;
|
||||
}
|
||||
AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build();
|
||||
String s = new Gson().toJson(m.getData());
|
||||
LOGGER.info(s);
|
||||
list.add(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build();
|
||||
rst = execute(modifyAliases);
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.error(rst.getErrorMessage());
|
||||
throw new IOException(rst.getErrorMessage());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取index的mapping
|
||||
*/
|
||||
public String getIndexMapping(String indexName) {
|
||||
GetMapping.Builder builder = new GetMapping.Builder();
|
||||
builder.addIndex(indexName);
|
||||
GetMapping getMapping = builder.build();
|
||||
try {
|
||||
LOGGER.info("begin GetMapping for index: {}", indexName);
|
||||
JestResult result = this.execute(getMapping);
|
||||
return result.getJsonString();
|
||||
} catch (Exception e) {
|
||||
String message = "GetMapping for index error: " + e.getMessage();
|
||||
LOGGER.warn(message, e);
|
||||
throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_MAPPINGS, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
public String getMappingForIndexType(String indexName, String typeName) {
|
||||
String indexMapping = this.getIndexMapping(indexName);
|
||||
JSONObject indexMappingInJson = JSON.parseObject(indexMapping);
|
||||
List<String> paths = Arrays.asList(indexName, "mappings");
|
||||
JSONObject properties = JsonPathUtil.getJsonObject(paths, indexMappingInJson);
|
||||
JSONObject propertiesParent = properties;
|
||||
if (StringUtils.isNotBlank(typeName) && properties.containsKey(typeName)) {
|
||||
propertiesParent = (JSONObject) properties.get(typeName);
|
||||
}
|
||||
JSONObject mapping = (JSONObject) propertiesParent.get("properties");
|
||||
return JSON.toJSONString(mapping);
|
||||
}
|
||||
|
||||
public JestResult bulkInsert(Bulk.Builder bulk) throws Exception {
|
||||
// es_rejected_execution_exception
|
||||
// illegal_argument_exception
|
||||
// cluster_block_exception
|
||||
JestResult rst = null;
|
||||
rst = execute(bulk.build());
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.warn(rst.getErrorMessage());
|
||||
}
|
||||
return rst;
|
||||
}
|
||||
|
||||
/**
|
||||
* 关闭JestClient客户端
|
||||
*
|
||||
*/
|
||||
public void closeJestClient() {
|
||||
if (jestClient != null) {
|
||||
try {
|
||||
// jestClient.shutdownClient();
|
||||
jestClient.close();
|
||||
} catch (IOException e) {
|
||||
LOGGER.warn("ignore error: ", e.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,126 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/3/2.
|
||||
*/
|
||||
public class ElasticSearchColumn {
|
||||
|
||||
private String name;//: "appkey",
|
||||
|
||||
private String type;//": "TEXT",
|
||||
|
||||
private String timezone;
|
||||
|
||||
/**
|
||||
* 源头数据格式化处理,datax做的事情
|
||||
*/
|
||||
private String format;
|
||||
|
||||
/**
|
||||
* 目标端格式化,es原生支持的格式
|
||||
*/
|
||||
private String dstFormat;
|
||||
|
||||
private boolean array;
|
||||
|
||||
/**
|
||||
* 是否使用目标端(ES原生)数组类型
|
||||
*
|
||||
* 默认是false
|
||||
*/
|
||||
private boolean dstArray = false;
|
||||
|
||||
private boolean jsonArray;
|
||||
|
||||
private boolean origin;
|
||||
|
||||
private List<String> combineFields;
|
||||
|
||||
private String combineFieldsValueSeparator = "-";
|
||||
|
||||
public String getCombineFieldsValueSeparator() {
|
||||
return combineFieldsValueSeparator;
|
||||
}
|
||||
|
||||
public void setCombineFieldsValueSeparator(String combineFieldsValueSeparator) {
|
||||
this.combineFieldsValueSeparator = combineFieldsValueSeparator;
|
||||
}
|
||||
|
||||
public List<String> getCombineFields() {
|
||||
return combineFields;
|
||||
}
|
||||
|
||||
public void setCombineFields(List<String> combineFields) {
|
||||
this.combineFields = combineFields;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public void setTimeZone(String timezone) {
|
||||
this.timezone = timezone;
|
||||
}
|
||||
|
||||
public void setFormat(String format) {
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public boolean isOrigin() { return origin; }
|
||||
|
||||
public void setOrigin(boolean origin) { this.origin = origin; }
|
||||
|
||||
public String getTimezone() {
|
||||
return timezone;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public void setTimezone(String timezone) {
|
||||
this.timezone = timezone;
|
||||
}
|
||||
|
||||
public boolean isArray() {
|
||||
return array;
|
||||
}
|
||||
|
||||
public void setArray(boolean array) {
|
||||
this.array = array;
|
||||
}
|
||||
|
||||
public boolean isJsonArray() {return jsonArray;}
|
||||
|
||||
public void setJsonArray(boolean jsonArray) {this.jsonArray = jsonArray;}
|
||||
|
||||
public String getDstFormat() {
|
||||
return dstFormat;
|
||||
}
|
||||
|
||||
public void setDstFormat(String dstFormat) {
|
||||
this.dstFormat = dstFormat;
|
||||
}
|
||||
|
||||
public boolean isDstArray() {
|
||||
return dstArray;
|
||||
}
|
||||
|
||||
public void setDstArray(boolean dstArray) {
|
||||
this.dstArray = dstArray;
|
||||
}
|
||||
}
|
@ -3,8 +3,11 @@ package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/3/1.
|
||||
*/
|
||||
public enum ESFieldType {
|
||||
public enum ElasticSearchFieldType {
|
||||
ID,
|
||||
PARENT,
|
||||
ROUTING,
|
||||
VERSION,
|
||||
STRING,
|
||||
TEXT,
|
||||
KEYWORD,
|
||||
@ -24,20 +27,18 @@ public enum ESFieldType {
|
||||
DATE_RANGE,
|
||||
GEO_POINT,
|
||||
GEO_SHAPE,
|
||||
|
||||
IP,
|
||||
IP_RANGE,
|
||||
COMPLETION,
|
||||
TOKEN_COUNT,
|
||||
|
||||
ARRAY,
|
||||
OBJECT,
|
||||
NESTED;
|
||||
|
||||
public static ESFieldType getESFieldType(String type) {
|
||||
public static ElasticSearchFieldType getESFieldType(String type) {
|
||||
if (type == null) {
|
||||
return null;
|
||||
}
|
||||
for (ESFieldType f : ESFieldType.values()) {
|
||||
for (ElasticSearchFieldType f : ElasticSearchFieldType.values()) {
|
||||
if (f.name().compareTo(type.toUpperCase()) == 0) {
|
||||
return f;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,41 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public enum ElasticSearchWriterErrorCode implements ErrorCode {
|
||||
BAD_CONFIG_VALUE("ESWriter-00", "The value you configured is not valid."),
|
||||
ES_INDEX_DELETE("ESWriter-01", "Delete index error."),
|
||||
ES_INDEX_CREATE("ESWriter-02", "Index creation error."),
|
||||
ES_MAPPINGS("ESWriter-03", "The mappings error."),
|
||||
ES_INDEX_INSERT("ESWriter-04", "Insert data error."),
|
||||
ES_ALIAS_MODIFY("ESWriter-05", "Alias modification error."),
|
||||
JSON_PARSE("ESWrite-06", "Json format parsing error"),
|
||||
UPDATE_WITH_ID("ESWrite-07", "Update mode must specify column type with id"),
|
||||
RECORD_FIELD_NOT_FOUND("ESWrite-08", "Field does not exist in the original table"),
|
||||
ES_GET_SETTINGS("ESWriter-09", "get settings failed");
|
||||
;
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
ElasticSearchWriterErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
|
||||
public class JsonPathUtil {
|
||||
|
||||
public static JSONObject getJsonObject(List<String> paths, JSONObject data) {
|
||||
if (null == paths || paths.isEmpty()) {
|
||||
return data;
|
||||
}
|
||||
|
||||
if (null == data) {
|
||||
return null;
|
||||
}
|
||||
|
||||
JSONObject dataTmp = data;
|
||||
for (String each : paths) {
|
||||
if (null != dataTmp) {
|
||||
dataTmp = dataTmp.getJSONObject(each);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return dataTmp;
|
||||
}
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.JSONException;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
|
||||
/**
|
||||
* @author bozu
|
||||
* @date 2021/01/06
|
||||
*/
|
||||
public class JsonUtil {
|
||||
|
||||
/**
|
||||
* 合并两个json
|
||||
* @param source 源json
|
||||
* @param target 目标json
|
||||
* @return 合并后的json
|
||||
* @throws JSONException
|
||||
*/
|
||||
public static String mergeJsonStr(String source, String target) throws JSONException {
|
||||
if(source == null) {
|
||||
return target;
|
||||
}
|
||||
if(target == null) {
|
||||
return source;
|
||||
}
|
||||
return JSON.toJSONString(deepMerge(JSON.parseObject(source), JSON.parseObject(target)));
|
||||
}
|
||||
|
||||
/**
|
||||
* 深度合并两个json对象,将source的值,merge到target中
|
||||
* @param source 源json
|
||||
* @param target 目标json
|
||||
* @return 合并后的json
|
||||
* @throws JSONException
|
||||
*/
|
||||
private static JSONObject deepMerge(JSONObject source, JSONObject target) throws JSONException {
|
||||
for (String key: source.keySet()) {
|
||||
Object value = source.get(key);
|
||||
if (target.containsKey(key)) {
|
||||
// existing value for "key" - recursively deep merge:
|
||||
if (value instanceof JSONObject) {
|
||||
JSONObject valueJson = (JSONObject)value;
|
||||
deepMerge(valueJson, target.getJSONObject(key));
|
||||
} else {
|
||||
target.put(key, value);
|
||||
}
|
||||
} else {
|
||||
target.put(key, value);
|
||||
}
|
||||
}
|
||||
return target;
|
||||
}
|
||||
}
|
@ -1,9 +1,13 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public final class Key {
|
||||
@ -37,31 +41,35 @@ public final class Key {
|
||||
|
||||
|
||||
public static String getEndpoint(Configuration conf) {
|
||||
return conf.getNecessaryValue("endpoint", ESWriterErrorCode.BAD_CONFIG_VALUE);
|
||||
return conf.getNecessaryValue("endpoint", ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE);
|
||||
}
|
||||
|
||||
public static String getAccessID(Configuration conf) {
|
||||
return conf.getString("accessId", "");
|
||||
public static String getUsername(Configuration conf) {
|
||||
return conf.getString("username", conf.getString("accessId"));
|
||||
}
|
||||
|
||||
public static String getAccessKey(Configuration conf) {
|
||||
return conf.getString("accessKey", "");
|
||||
public static String getPassword(Configuration conf) {
|
||||
return conf.getString("password", conf.getString("accessKey"));
|
||||
}
|
||||
|
||||
public static int getBatchSize(Configuration conf) {
|
||||
return conf.getInt("batchSize", 1000);
|
||||
return conf.getInt("batchSize", 1024);
|
||||
}
|
||||
|
||||
public static int getTrySize(Configuration conf) {
|
||||
return conf.getInt("trySize", 30);
|
||||
}
|
||||
|
||||
public static long getTryInterval(Configuration conf) {
|
||||
return conf.getLong("tryInterval", 60000L);
|
||||
}
|
||||
|
||||
public static int getTimeout(Configuration conf) {
|
||||
return conf.getInt("timeout", 600000);
|
||||
}
|
||||
|
||||
public static boolean isCleanup(Configuration conf) {
|
||||
return conf.getBool("cleanup", false);
|
||||
public static boolean isTruncate(Configuration conf) {
|
||||
return conf.getBool("truncate", conf.getBool("cleanup", false));
|
||||
}
|
||||
|
||||
public static boolean isDiscovery(Configuration conf) {
|
||||
@ -69,7 +77,7 @@ public final class Key {
|
||||
}
|
||||
|
||||
public static boolean isCompression(Configuration conf) {
|
||||
return conf.getBool("compression", true);
|
||||
return conf.getBool("compress", conf.getBool("compression", true));
|
||||
}
|
||||
|
||||
public static boolean isMultiThread(Configuration conf) {
|
||||
@ -77,9 +85,17 @@ public final class Key {
|
||||
}
|
||||
|
||||
public static String getIndexName(Configuration conf) {
|
||||
return conf.getNecessaryValue("index", ESWriterErrorCode.BAD_CONFIG_VALUE);
|
||||
return conf.getNecessaryValue("index", ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE);
|
||||
}
|
||||
|
||||
public static String getDeleteBy(Configuration conf) {
|
||||
return conf.getString("deleteBy");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TODO: 在7.0开始,一个索引只能建一个Type为_doc
|
||||
* */
|
||||
public static String getTypeName(Configuration conf) {
|
||||
String indexType = conf.getString("indexType");
|
||||
if(StringUtils.isBlank(indexType)){
|
||||
@ -128,4 +144,58 @@ public final class Key {
|
||||
public static boolean getDynamic(Configuration conf) {
|
||||
return conf.getBool("dynamic", false);
|
||||
}
|
||||
|
||||
public static String getDstDynamic(Configuration conf) {
|
||||
return conf.getString("dstDynamic");
|
||||
}
|
||||
|
||||
public static String getDiscoveryFilter(Configuration conf){
|
||||
return conf.getString("discoveryFilter","_all");
|
||||
}
|
||||
|
||||
public static Boolean getVersioning(Configuration conf) {
|
||||
return conf.getBool("versioning", false);
|
||||
}
|
||||
|
||||
public static Long getUnifiedVersion(Configuration conf) {
|
||||
return conf.getLong("version", System.currentTimeMillis());
|
||||
}
|
||||
|
||||
public static Map<String, Object> getUrlParams(Configuration conf) {
|
||||
return conf.getMap("urlParams", new HashMap<String, Object>());
|
||||
}
|
||||
|
||||
public static Integer getESVersion(Configuration conf) {
|
||||
return conf.getInt("esVersion");
|
||||
}
|
||||
|
||||
public static String getMasterTimeout(Configuration conf) {
|
||||
return conf.getString("masterTimeout", "5m");
|
||||
}
|
||||
|
||||
public static boolean isEnableNullUpdate(Configuration conf) {
|
||||
return conf.getBool("enableWriteNull", true);
|
||||
}
|
||||
|
||||
public static String getFieldDelimiter(Configuration conf) {
|
||||
return conf.getString("fieldDelimiter", "");
|
||||
}
|
||||
|
||||
public static PrimaryKeyInfo getPrimaryKeyInfo(Configuration conf) {
|
||||
String primaryKeyInfoString = conf.getString("primaryKeyInfo");
|
||||
if (StringUtils.isNotBlank(primaryKeyInfoString)) {
|
||||
return JSON.parseObject(primaryKeyInfoString, new TypeReference<PrimaryKeyInfo>() {});
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<PartitionColumn> getEsPartitionColumn(Configuration conf) {
|
||||
String esPartitionColumnString = conf.getString("esPartitionColumn");
|
||||
if (StringUtils.isNotBlank(esPartitionColumnString)) {
|
||||
return JSON.parseObject(esPartitionColumnString, new TypeReference<List<PartitionColumn>>() {});
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,16 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public class NoReRunException extends DataXException {
|
||||
public NoReRunException(String errorMessage) {
|
||||
super(errorMessage);
|
||||
}
|
||||
|
||||
public NoReRunException(ErrorCode errorCode, String errorMessage) {
|
||||
super(errorCode, errorMessage);
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
public class PartitionColumn {
|
||||
private String name;
|
||||
// like: DATA
|
||||
private String metaType;
|
||||
private String comment;
|
||||
// like: VARCHAR
|
||||
private String type;
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getMetaType() {
|
||||
return metaType;
|
||||
}
|
||||
|
||||
public String getComment() {
|
||||
return comment;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public void setMetaType(String metaType) {
|
||||
this.metaType = metaType;
|
||||
}
|
||||
|
||||
public void setComment(String comment) {
|
||||
this.comment = comment;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class PrimaryKeyInfo {
|
||||
|
||||
/**
|
||||
* 主键类型:PrimaryKeyTypeEnum
|
||||
*
|
||||
* pk: 单个(业务)主键 specific: 联合主键
|
||||
*/
|
||||
private String type;
|
||||
|
||||
/**
|
||||
* 用户定义的联合主键的连接符号
|
||||
*/
|
||||
private String fieldDelimiter;
|
||||
|
||||
/**
|
||||
* 主键的列的名称
|
||||
*/
|
||||
private List<String> column;
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public String getFieldDelimiter() {
|
||||
return fieldDelimiter;
|
||||
}
|
||||
|
||||
public List<String> getColumn() {
|
||||
return column;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public void setFieldDelimiter(String fieldDelimiter) {
|
||||
this.fieldDelimiter = fieldDelimiter;
|
||||
}
|
||||
|
||||
public void setColumn(List<String> column) {
|
||||
this.column = column;
|
||||
}
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import io.searchbox.action.AbstractAction;
|
||||
import io.searchbox.client.config.ElasticsearchVersion;
|
||||
|
||||
public class ClusterInfo extends AbstractAction<ClusterInfoResult> {
|
||||
@Override
|
||||
protected String buildURI(ElasticsearchVersion elasticsearchVersion) {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRestMethodName() {
|
||||
return "GET";
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClusterInfoResult createNewElasticSearchResult(String responseBody, int statusCode, String reasonPhrase, Gson gson) {
|
||||
return createNewElasticSearchResult(new ClusterInfoResult(gson), responseBody, statusCode, reasonPhrase, gson);
|
||||
}
|
||||
|
||||
public static class Builder extends AbstractAction.Builder<ClusterInfo, ClusterInfo.Builder> {
|
||||
|
||||
public Builder() {
|
||||
setHeader("accept", "application/json");
|
||||
setHeader("content-type", "application/json");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClusterInfo build() {
|
||||
return new ClusterInfo();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import io.searchbox.client.JestResult;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class ClusterInfoResult extends JestResult {
|
||||
|
||||
private static final Pattern FIRST_NUMBER = Pattern.compile("\\d");
|
||||
|
||||
private static final int SEVEN = 7;
|
||||
|
||||
public ClusterInfoResult(Gson gson) {
|
||||
super(gson);
|
||||
}
|
||||
|
||||
public ClusterInfoResult(JestResult source) {
|
||||
super(source);
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断es集群的部署版本是否大于7.x
|
||||
* 大于7.x的es对于Index的type有较大改动,需要做额外判定
|
||||
* 对于7.x与6.x版本的es都做过测试,返回符合预期;5.x以下版本直接try-catch后返回false,向下兼容
|
||||
* @return
|
||||
*/
|
||||
public Boolean isGreaterOrEqualThan7() throws Exception {
|
||||
// 如果是没有权限,直接返回false,兼容老版本
|
||||
if (responseCode == 403) {
|
||||
return false;
|
||||
}
|
||||
if (!isSucceeded) {
|
||||
throw new Exception(getJsonString());
|
||||
}
|
||||
try {
|
||||
String version = jsonObject.getAsJsonObject("version").get("number").toString();
|
||||
Matcher matcher = FIRST_NUMBER.matcher(version);
|
||||
matcher.find();
|
||||
String number = matcher.group();
|
||||
Integer versionNum = Integer.valueOf(number);
|
||||
return versionNum >= SEVEN;
|
||||
} catch (Exception e) {
|
||||
//5.x 以下版本不做兼容测试,如果返回json格式解析失败,有可能是以下版本,所以认为不大于7.x
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest;
|
||||
|
||||
import io.searchbox.action.GenericResultAbstractAction;
|
||||
import io.searchbox.client.config.ElasticsearchVersion;
|
||||
|
||||
public class PutMapping7 extends GenericResultAbstractAction {
|
||||
protected PutMapping7(PutMapping7.Builder builder) {
|
||||
super(builder);
|
||||
|
||||
this.indexName = builder.index;
|
||||
this.payload = builder.source;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String buildURI(ElasticsearchVersion elasticsearchVersion) {
|
||||
return super.buildURI(elasticsearchVersion) + "/_mapping";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRestMethodName() {
|
||||
return "PUT";
|
||||
}
|
||||
|
||||
public static class Builder extends GenericResultAbstractAction.Builder<PutMapping7, PutMapping7.Builder> {
|
||||
private String index;
|
||||
private Object source;
|
||||
|
||||
public Builder(String index, Object source) {
|
||||
this.index = index;
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PutMapping7 build() {
|
||||
return new PutMapping7(this);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "elasticsearchwriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ESWriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ElasticSearchWriter",
|
||||
"description": "适用于: 生产环境. 原理: TODO",
|
||||
"developer": "alibaba"
|
||||
}
|
@ -175,7 +175,7 @@ public class HBase20SQLReaderHelper {
|
||||
if (querySql == null || querySql.isEmpty()) {
|
||||
// 如果splitPoints为空,则根据splitKey自动切分,不过这种切分方式无法保证数据均分,且只支持整形和字符型列
|
||||
if (splitPoints == null || splitPoints.isEmpty()) {
|
||||
LOG.info("Split accoring min and max value of splitColumn...");
|
||||
LOG.info("Split according min and max value of splitColumn...");
|
||||
Pair<Object, Object> minMaxPK = getPkRange(configuration);
|
||||
if (null == minMaxPK) {
|
||||
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK,
|
||||
@ -208,7 +208,7 @@ public class HBase20SQLReaderHelper {
|
||||
}
|
||||
|
||||
} else {
|
||||
LOG.info("Split accoring splitPoints...");
|
||||
LOG.info("Split according splitPoints...");
|
||||
// 根据指定splitPoints进行切分
|
||||
rangeList = buildSplitRange();
|
||||
}
|
||||
|
@ -2,6 +2,6 @@
|
||||
"name": "hbase20xsqlreader",
|
||||
"class": "com.alibaba.datax.plugin.reader.hbase20xsqlreader.HBase20xSQLReader",
|
||||
"description": "useScene: prod. mechanism: read data from phoenix through queryserver.",
|
||||
"developer": "bake"
|
||||
"developer": "alibaba"
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,6 @@
|
||||
"name": "hbase20xsqlwriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.hbase20xsqlwriter.HBase20xSQLWriter",
|
||||
"description": "useScene: prod. mechanism: use hbase sql UPSERT to put data, index tables will be updated too.",
|
||||
"developer": "bake"
|
||||
"developer": "alibaba"
|
||||
}
|
||||
|
||||
|
@ -231,6 +231,7 @@ HdfsWriter提供向HDFS文件系统指定路径中写入TEXTFile文件和ORCFile
|
||||
|
||||
* append,写入前不做任何处理,DataX hdfswriter直接使用filename写入,并保证文件名不冲突。
|
||||
* nonConflict,如果目录下有fileName前缀的文件,直接报错。
|
||||
* truncate,如果目录下有fileName前缀的文件,先删除后写入。
|
||||
|
||||
* 必选:是 <br />
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "hologreswriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.hologreswriter.HologresWriter",
|
||||
"name": "hologresjdbcwriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.hologresjdbcwriter.HologresJdbcWriter",
|
||||
"description": "",
|
||||
"developer": "alibaba"
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"name": "hologreswriter",
|
||||
"name": "hologresjdbcwriter",
|
||||
"parameter": {
|
||||
"url": "",
|
||||
"username": "",
|
||||
|
73
loghubreader/pom.xml
Normal file
73
loghubreader/pom.xml
Normal file
@ -0,0 +1,73 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>loghubreader</artifactId>
|
||||
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.aliyun.openservices</groupId>
|
||||
<artifactId>aliyun-log</artifactId>
|
||||
<version>0.6.22</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- compiler plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- assembly plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
34
loghubreader/src/main/assembly/package.xml
Normal file
34
loghubreader/src/main/assembly/package.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/loghubreader</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>loghubreader-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/loghubreader</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/reader/loghubreader/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
@ -0,0 +1,26 @@
|
||||
package com.alibaba.datax.plugin.reader.loghubreader;
|
||||
|
||||
public class Constant {
|
||||
|
||||
public static String DATETIME_FORMAT = "yyyyMMddHHmmss";
|
||||
public static String DATE_FORMAT = "yyyyMMdd";
|
||||
|
||||
static String META_COL_SOURCE = "__source__";
|
||||
static String META_COL_TOPIC = "__topic__";
|
||||
static String META_COL_CATEGORY = "__category__";
|
||||
static String META_COL_MACHINEUUID = "__machineUUID__";
|
||||
static String META_COL_HOSTNAME = "__hostname__";
|
||||
static String META_COL_PATH = "__path__";
|
||||
static String META_COL_LOGTIME = "__logtime__";
|
||||
public static String META_COL_RECEIVE_TIME = "__receive_time__";
|
||||
|
||||
/**
|
||||
* 除用户手动配置的列之外,其余数据列作为一个 json 读取到一列
|
||||
*/
|
||||
static String COL_EXTRACT_OTHERS = "C__extract_others__";
|
||||
|
||||
/**
|
||||
* 将所有元数据列作为一个 json 读取到一列
|
||||
*/
|
||||
static String COL_EXTRACT_ALL_META = "C__extract_all_meta__";
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
package com.alibaba.datax.plugin.reader.loghubreader;
|
||||
|
||||
public final class Key {
|
||||
|
||||
/**
|
||||
* 此处声明插件用到的需要插件使用者提供的配置项
|
||||
*/
|
||||
public static final String ENDPOINT = "endpoint";
|
||||
|
||||
public static final String ACCESSKEYID = "accessId";
|
||||
|
||||
public static final String ACCESSKEYSECRET = "accessKey";
|
||||
|
||||
public static final String PROJECT = "project";
|
||||
|
||||
public static final String LOGSTORE = "logstore";
|
||||
|
||||
public static final String TOPIC = "topic";
|
||||
|
||||
public static final String COLUMN = "column";
|
||||
|
||||
public static final String BATCHSIZE = "batchSize";
|
||||
|
||||
public static final String BEGINTIMESTAMPMILLIS = "beginTimestampMillis";
|
||||
|
||||
public static final String ENDTIMESTAMPMILLIS = "endTimestampMillis";
|
||||
|
||||
public static final String BEGINDATETIME = "beginDateTime";
|
||||
|
||||
public static final String ENDDATETIME = "endDateTime";
|
||||
|
||||
public static final String TIMEFORMAT = "timeformat";
|
||||
|
||||
public static final String SOURCE = "source";
|
||||
|
||||
public static final String SHARD = "shard";
|
||||
|
||||
}
|
@ -0,0 +1,482 @@
|
||||
package com.alibaba.datax.plugin.reader.loghubreader;
|
||||
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.element.StringColumn;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordSender;
|
||||
import com.alibaba.datax.common.spi.Reader;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.common.util.DataXCaseEnvUtil;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.aliyun.openservices.log.Client;
|
||||
import com.aliyun.openservices.log.common.Consts.CursorMode;
|
||||
import com.aliyun.openservices.log.common.*;
|
||||
import com.aliyun.openservices.log.exception.LogException;
|
||||
import com.aliyun.openservices.log.response.BatchGetLogResponse;
|
||||
import com.aliyun.openservices.log.response.GetCursorResponse;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
public class LogHubReader extends Reader {
|
||||
public static class Job extends Reader.Job {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Client client;
|
||||
private Configuration originalConfig;
|
||||
|
||||
private Long beginTimestampMillis;
|
||||
private Long endTimestampMillis;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
LOG.info("loghub reader job init begin ...");
|
||||
this.originalConfig = super.getPluginJobConf();
|
||||
validateParameter(originalConfig);
|
||||
|
||||
String endPoint = this.originalConfig.getString(Key.ENDPOINT);
|
||||
String accessKeyId = this.originalConfig.getString(Key.ACCESSKEYID);
|
||||
String accessKeySecret = this.originalConfig.getString(Key.ACCESSKEYSECRET);
|
||||
|
||||
client = new Client(endPoint, accessKeyId, accessKeySecret);
|
||||
LOG.info("loghub reader job init end.");
|
||||
}
|
||||
|
||||
private void validateParameter(Configuration conf){
|
||||
conf.getNecessaryValue(Key.ENDPOINT,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESSKEYID,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESSKEYSECRET,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.PROJECT,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.LOGSTORE,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.COLUMN,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
|
||||
int batchSize = this.originalConfig.getInt(Key.BATCHSIZE);
|
||||
if (batchSize > 1000) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid batchSize[" + batchSize + "] value (0,1000]!");
|
||||
}
|
||||
|
||||
beginTimestampMillis = this.originalConfig.getLong(Key.BEGINTIMESTAMPMILLIS);
|
||||
String beginDateTime = this.originalConfig.getString(Key.BEGINDATETIME);
|
||||
|
||||
if (beginDateTime != null) {
|
||||
try {
|
||||
beginTimestampMillis = getUnixTimeFromDateTime(beginDateTime);
|
||||
} catch (ParseException e) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginDateTime[" + beginDateTime + "], format [yyyyMMddHHmmss or yyyyMMdd]!");
|
||||
}
|
||||
}
|
||||
|
||||
if (beginTimestampMillis != null && beginTimestampMillis <= 0) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginTimestampMillis[" + beginTimestampMillis + "]!");
|
||||
}
|
||||
|
||||
endTimestampMillis = this.originalConfig.getLong(Key.ENDTIMESTAMPMILLIS);
|
||||
String endDateTime = this.originalConfig.getString(Key.ENDDATETIME);
|
||||
|
||||
if (endDateTime != null) {
|
||||
try {
|
||||
endTimestampMillis = getUnixTimeFromDateTime(endDateTime);
|
||||
} catch (ParseException e) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginDateTime[" + endDateTime + "], format [yyyyMMddHHmmss or yyyyMMdd]!");
|
||||
}
|
||||
}
|
||||
|
||||
if (endTimestampMillis != null && endTimestampMillis <= 0) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid endTimestampMillis[" + endTimestampMillis + "]!");
|
||||
}
|
||||
|
||||
if (beginTimestampMillis != null && endTimestampMillis != null
|
||||
&& endTimestampMillis <= beginTimestampMillis) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"endTimestampMillis[" + endTimestampMillis + "] must bigger than beginTimestampMillis[" + beginTimestampMillis + "]!");
|
||||
}
|
||||
}
|
||||
|
||||
private long getUnixTimeFromDateTime(String dateTime) throws ParseException {
|
||||
try {
|
||||
String format = Constant.DATETIME_FORMAT;
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
|
||||
return simpleDateFormat.parse(dateTime).getTime() / 1000;
|
||||
} catch (ParseException ignored) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid DateTime[" + dateTime + "]!");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int adviceNumber) {
|
||||
LOG.info("split() begin...");
|
||||
|
||||
List<Configuration> readerSplitConfigs = new ArrayList<Configuration>();
|
||||
|
||||
final String project = this.originalConfig.getString(Key.PROJECT);
|
||||
final String logstore = this.originalConfig.getString(Key.LOGSTORE);
|
||||
|
||||
List<Shard> logStore = null;
|
||||
try {
|
||||
logStore = RetryUtil.executeWithRetry(new Callable<List<Shard>>() {
|
||||
@Override
|
||||
public List<Shard> call() throws Exception {
|
||||
return client.ListShard(project, logstore).GetShards();
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
|
||||
if (logStore == null) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"LogStore[" + logstore + "] isn't exists, please check !");
|
||||
}
|
||||
|
||||
int splitNumber = logStore.size();
|
||||
if (0 == splitNumber) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.EMPTY_LOGSTORE_VALUE,
|
||||
"LogStore[" + logstore + "] has 0 shard, please check !");
|
||||
}
|
||||
|
||||
Collections.shuffle(logStore);
|
||||
for (int i = 0; i < logStore.size(); i++) {
|
||||
if (beginTimestampMillis != null && endTimestampMillis != null) {
|
||||
try {
|
||||
String beginCursor = getCursorWithRetry(client, project, logstore, logStore.get(i).GetShardId(), beginTimestampMillis).GetCursor();
|
||||
String endCursor = getCursorWithRetry(client, project, logstore, logStore.get(i).GetShardId(), endTimestampMillis).GetCursor();
|
||||
if (beginCursor.equals(endCursor)) {
|
||||
if ((i == logStore.size() - 1) && (readerSplitConfigs.size() == 0)) {
|
||||
|
||||
} else {
|
||||
LOG.info("skip empty shard[" + logStore.get(i) + "]!");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("Check Shard[" + logStore.get(i) + "] Error, please check !" + e.toString());
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e);
|
||||
}
|
||||
}
|
||||
Configuration splitedConfig = this.originalConfig.clone();
|
||||
splitedConfig.set(Key.SHARD, logStore.get(i).GetShardId());
|
||||
readerSplitConfigs.add(splitedConfig);
|
||||
}
|
||||
|
||||
if (splitNumber < adviceNumber) {
|
||||
// LOG.info(MESSAGE_SOURCE.message("hdfsreader.12",
|
||||
// splitNumber, adviceNumber, splitNumber, splitNumber));
|
||||
}
|
||||
LOG.info("split() ok and end...");
|
||||
|
||||
return readerSplitConfigs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final long fromTime) throws Exception {
|
||||
return
|
||||
RetryUtil.executeWithRetry(new Callable<GetCursorResponse>() {
|
||||
@Override
|
||||
public GetCursorResponse call() throws Exception {
|
||||
LOG.info("loghug get cursor with project: {} logstore: {} shard: {} time: {}", project, logstore, shard, fromTime);
|
||||
return client.GetCursor(project, logstore, shard, fromTime);
|
||||
}
|
||||
}, 7, 1000L, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class Task extends Reader.Task {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
|
||||
|
||||
private Configuration taskConfig;
|
||||
private Client client;
|
||||
private String endPoint;
|
||||
private String accessKeyId;
|
||||
private String accessKeySecret;
|
||||
private String project;
|
||||
private String logstore;
|
||||
private long beginTimestampMillis;
|
||||
private long endTimestampMillis;
|
||||
private int batchSize;
|
||||
private int shard;
|
||||
private List<String> columns;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.taskConfig = super.getPluginJobConf();
|
||||
|
||||
endPoint = this.taskConfig.getString(Key.ENDPOINT);
|
||||
accessKeyId = this.taskConfig.getString(Key.ACCESSKEYID);
|
||||
accessKeySecret = this.taskConfig.getString(Key.ACCESSKEYSECRET);
|
||||
project = this.taskConfig.getString(Key.PROJECT);
|
||||
logstore = this.taskConfig.getString(Key.LOGSTORE);
|
||||
batchSize = this.taskConfig.getInt(Key.BATCHSIZE, 128);
|
||||
|
||||
this.beginTimestampMillis = this.taskConfig.getLong(Key.BEGINTIMESTAMPMILLIS, -1);
|
||||
String beginDateTime = this.taskConfig.getString(Key.BEGINDATETIME);
|
||||
|
||||
if (beginDateTime != null) {
|
||||
try {
|
||||
beginTimestampMillis = getUnixTimeFromDateTime(beginDateTime);
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
}
|
||||
|
||||
this.endTimestampMillis = this.taskConfig.getLong(Key.ENDTIMESTAMPMILLIS, -1);
|
||||
String endDateTime = this.taskConfig.getString(Key.ENDDATETIME);
|
||||
|
||||
if (endDateTime != null) {
|
||||
try {
|
||||
endTimestampMillis = getUnixTimeFromDateTime(endDateTime);
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
}
|
||||
|
||||
columns = this.taskConfig.getList(Key.COLUMN, String.class);
|
||||
|
||||
shard = this.taskConfig.getInt(Key.SHARD);
|
||||
|
||||
client = new Client(endPoint, accessKeyId, accessKeySecret);
|
||||
LOG.info("init loghub reader task finished.project:{} logstore:{} batchSize:{}", project, logstore, batchSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
}
|
||||
|
||||
private long getUnixTimeFromDateTime(String dateTime) throws ParseException {
|
||||
try {
|
||||
String format = Constant.DATETIME_FORMAT;
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
|
||||
return simpleDateFormat.parse(dateTime).getTime() / 1000;
|
||||
} catch (ParseException ignored) {
|
||||
}
|
||||
String format = Constant.DATE_FORMAT;
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
|
||||
return simpleDateFormat.parse(dateTime).getTime() / 1000;
|
||||
}
|
||||
|
||||
private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final long fromTime) throws Exception {
|
||||
return
|
||||
RetryUtil.executeWithRetry(new Callable<GetCursorResponse>() {
|
||||
@Override
|
||||
public GetCursorResponse call() throws Exception {
|
||||
LOG.info("loghug get cursor with project: {} logstore: {} shard: {} time: {}", project, logstore, shard, fromTime);
|
||||
return client.GetCursor(project, logstore, shard, fromTime);
|
||||
}
|
||||
}, 7, 1000L, true);
|
||||
}
|
||||
|
||||
private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final CursorMode mode) throws Exception {
|
||||
return
|
||||
RetryUtil.executeWithRetry(new Callable<GetCursorResponse>() {
|
||||
@Override
|
||||
public GetCursorResponse call() throws Exception {
|
||||
LOG.info("loghug get cursor with project: {} logstore: {} shard: {} mode: {}", project, logstore, shard, mode);
|
||||
return client.GetCursor(project, logstore, shard, mode);
|
||||
}
|
||||
}, 7, 1000L, true);
|
||||
}
|
||||
|
||||
private BatchGetLogResponse batchGetLogWithRetry(final Client client, final String project, final String logstore, final int shard, final int batchSize,
|
||||
final String curCursor, final String endCursor) throws Exception {
|
||||
return
|
||||
RetryUtil.executeWithRetry(new Callable<BatchGetLogResponse>() {
|
||||
@Override
|
||||
public BatchGetLogResponse call() throws Exception {
|
||||
return client.BatchGetLog(project, logstore, shard, batchSize, curCursor, endCursor);
|
||||
}
|
||||
}, 7, 1000L, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startRead(RecordSender recordSender) {
|
||||
LOG.info("read start");
|
||||
|
||||
try {
|
||||
GetCursorResponse cursorRes;
|
||||
if (this.beginTimestampMillis != -1) {
|
||||
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, beginTimestampMillis);
|
||||
} else {
|
||||
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, CursorMode.BEGIN);
|
||||
}
|
||||
String beginCursor = cursorRes.GetCursor();
|
||||
|
||||
LOG.info("the begin cursor, loghub requestId: {} cursor: {}", cursorRes.GetRequestId(), cursorRes.GetCursor());
|
||||
|
||||
if (this.endTimestampMillis != -1) {
|
||||
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, endTimestampMillis);
|
||||
} else {
|
||||
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, CursorMode.END);
|
||||
}
|
||||
String endCursor = cursorRes.GetCursor();
|
||||
LOG.info("the end cursor, loghub requestId: {} cursor: {}", cursorRes.GetRequestId(), cursorRes.GetCursor());
|
||||
|
||||
if (StringUtils.equals(beginCursor, endCursor)) {
|
||||
LOG.info("beginCursor:{} equals endCursor:{}, end directly!", beginCursor, endCursor);
|
||||
return;
|
||||
}
|
||||
|
||||
String currentCursor = null;
|
||||
String nextCursor = beginCursor;
|
||||
|
||||
HashMap<String, String> metaMap = new HashMap<String, String>();
|
||||
HashMap<String, String> dataMap = new HashMap<String, String>();
|
||||
JSONObject allMetaJson = new JSONObject();
|
||||
while (!StringUtils.equals(currentCursor, nextCursor)) {
|
||||
currentCursor = nextCursor;
|
||||
BatchGetLogResponse logDataRes = batchGetLogWithRetry(client, project, logstore, this.shard, this.batchSize, currentCursor, endCursor);
|
||||
|
||||
List<LogGroupData> logGroups = logDataRes.GetLogGroups();
|
||||
|
||||
for(LogGroupData logGroup: logGroups) {
|
||||
metaMap.clear();
|
||||
allMetaJson.clear();
|
||||
FastLogGroup flg = logGroup.GetFastLogGroup();
|
||||
|
||||
metaMap.put("C_Category", flg.getCategory());
|
||||
metaMap.put(Constant.META_COL_CATEGORY, flg.getCategory());
|
||||
allMetaJson.put(Constant.META_COL_CATEGORY, flg.getCategory());
|
||||
|
||||
metaMap.put("C_Source", flg.getSource());
|
||||
metaMap.put(Constant.META_COL_SOURCE, flg.getSource());
|
||||
allMetaJson.put(Constant.META_COL_SOURCE, flg.getSource());
|
||||
|
||||
metaMap.put("C_Topic", flg.getTopic());
|
||||
metaMap.put(Constant.META_COL_TOPIC, flg.getTopic());
|
||||
allMetaJson.put(Constant.META_COL_TOPIC, flg.getTopic());
|
||||
|
||||
metaMap.put("C_MachineUUID", flg.getMachineUUID());
|
||||
metaMap.put(Constant.META_COL_MACHINEUUID, flg.getMachineUUID());
|
||||
allMetaJson.put(Constant.META_COL_MACHINEUUID, flg.getMachineUUID());
|
||||
|
||||
for (int tagIdx = 0; tagIdx < flg.getLogTagsCount(); ++tagIdx) {
|
||||
FastLogTag logtag = flg.getLogTags(tagIdx);
|
||||
String tagKey = logtag.getKey();
|
||||
String tagValue = logtag.getValue();
|
||||
if (tagKey.equals(Constant.META_COL_HOSTNAME)) {
|
||||
metaMap.put("C_HostName", logtag.getValue());
|
||||
} else if (tagKey.equals(Constant.META_COL_PATH)) {
|
||||
metaMap.put("C_Path", logtag.getValue());
|
||||
}
|
||||
metaMap.put(tagKey, tagValue);
|
||||
allMetaJson.put(tagKey, tagValue);
|
||||
}
|
||||
|
||||
for (int lIdx = 0; lIdx < flg.getLogsCount(); ++lIdx) {
|
||||
dataMap.clear();
|
||||
FastLog log = flg.getLogs(lIdx);
|
||||
|
||||
String logTime = String.valueOf(log.getTime());
|
||||
metaMap.put("C_LogTime", logTime);
|
||||
metaMap.put(Constant.META_COL_LOGTIME, logTime);
|
||||
allMetaJson.put(Constant.META_COL_LOGTIME, logTime);
|
||||
|
||||
for (int cIdx = 0; cIdx < log.getContentsCount(); ++cIdx) {
|
||||
FastLogContent content = log.getContents(cIdx);
|
||||
dataMap.put(content.getKey(), content.getValue());
|
||||
}
|
||||
|
||||
Record record = recordSender.createRecord();
|
||||
|
||||
JSONObject extractOthers = new JSONObject();
|
||||
if(columns.contains(Constant.COL_EXTRACT_OTHERS)){
|
||||
List<String> keyList = Arrays.asList(dataMap.keySet().toArray(new String[dataMap.keySet().size()]));
|
||||
for (String otherKey:keyList) {
|
||||
if (!columns.contains(otherKey)){
|
||||
extractOthers.put(otherKey,dataMap.get(otherKey));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (null != this.columns && 1 == this.columns.size()) {
|
||||
String columnsInStr = columns.get(0).toString();
|
||||
if ("\"*\"".equals(columnsInStr) || "*".equals(columnsInStr)) {
|
||||
List<String> keyList = Arrays.asList(dataMap.keySet().toArray(new String[dataMap.keySet().size()]));
|
||||
Collections.sort(keyList);
|
||||
|
||||
for (String key : keyList) {
|
||||
record.addColumn(new StringColumn(key + ":" + dataMap.get(key)));
|
||||
}
|
||||
} else {
|
||||
if (dataMap.containsKey(columnsInStr)) {
|
||||
record.addColumn(new StringColumn(dataMap.get(columnsInStr)));
|
||||
} else if (metaMap.containsKey(columnsInStr)) {
|
||||
record.addColumn(new StringColumn(metaMap.get(columnsInStr)));
|
||||
} else if (Constant.COL_EXTRACT_OTHERS.equals(columnsInStr)){
|
||||
record.addColumn(new StringColumn(extractOthers.toJSONString()));
|
||||
} else if (Constant.COL_EXTRACT_ALL_META.equals(columnsInStr)) {
|
||||
record.addColumn(new StringColumn(allMetaJson.toJSONString()));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (String col : this.columns) {
|
||||
if (dataMap.containsKey(col)) {
|
||||
record.addColumn(new StringColumn(dataMap.get(col)));
|
||||
} else if (metaMap.containsKey(col)) {
|
||||
record.addColumn(new StringColumn(metaMap.get(col)));
|
||||
} else if (col != null && col.startsWith("'") && col.endsWith("'")){
|
||||
String constant = col.substring(1, col.length()-1);
|
||||
record.addColumn(new StringColumn(constant));
|
||||
}else if (Constant.COL_EXTRACT_OTHERS.equals(col)){
|
||||
record.addColumn(new StringColumn(extractOthers.toJSONString()));
|
||||
} else if (Constant.COL_EXTRACT_ALL_META.equals(col)) {
|
||||
record.addColumn(new StringColumn(allMetaJson.toJSONString()));
|
||||
} else {
|
||||
record.addColumn(new StringColumn(null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordSender.sendToWriter(record);
|
||||
}
|
||||
}
|
||||
|
||||
nextCursor = logDataRes.GetNextCursor();
|
||||
}
|
||||
} catch (LogException e) {
|
||||
if (e.GetErrorCode().equals("LogStoreNotExist")) {
|
||||
LOG.info("logStore[" + logstore +"] Not Exits! detail error messsage: " + e.toString());
|
||||
} else {
|
||||
LOG.error("read LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString());
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.error("read LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString());
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e);
|
||||
}
|
||||
|
||||
LOG.info("end read loghub shard...");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
package com.alibaba.datax.plugin.reader.loghubreader;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public enum LogHubReaderErrorCode implements ErrorCode {
|
||||
BAD_CONFIG_VALUE("LogHuReader-00", "The value you configured is invalid."),
|
||||
LOG_HUB_ERROR("LogHubReader-01","LogHub access encounter exception"),
|
||||
REQUIRE_VALUE("LogHubReader-02","Missing parameters"),
|
||||
EMPTY_LOGSTORE_VALUE("LogHubReader-03","There is no shard in this LogStore");
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
private LogHubReaderErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
6
loghubreader/src/main/resources/plugin.json
Normal file
6
loghubreader/src/main/resources/plugin.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "loghubreader",
|
||||
"class": "com.alibaba.datax.plugin.reader.loghubreader.LogHubReader",
|
||||
"description": "适用于: 从SLS LogHub中读取数据",
|
||||
"developer": "alibaba"
|
||||
}
|
12
loghubreader/src/main/resources/plugin_job_template.json
Normal file
12
loghubreader/src/main/resources/plugin_job_template.json
Normal file
@ -0,0 +1,12 @@
|
||||
{
|
||||
"name": "loghubreader",
|
||||
"parameter": {
|
||||
"endpoint": "",
|
||||
"accessId": "",
|
||||
"accessKey": "",
|
||||
"project": "",
|
||||
"logstore": "",
|
||||
"batchSize":1024,
|
||||
"column": []
|
||||
}
|
||||
}
|
73
loghubwriter/pom.xml
Normal file
73
loghubwriter/pom.xml
Normal file
@ -0,0 +1,73 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>loghubwriter</artifactId>
|
||||
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.aliyun.openservices</groupId>
|
||||
<artifactId>aliyun-log</artifactId>
|
||||
<version>0.6.12</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- compiler plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- assembly plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
34
loghubwriter/src/main/assembly/package.xml
Normal file
34
loghubwriter/src/main/assembly/package.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/loghubwriter</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>loghubwriter-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/loghubwriter</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/writer/loghubwriter/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
@ -0,0 +1,35 @@
|
||||
package com.alibaba.datax.plugin.writer.loghubwriter;
|
||||
|
||||
/**
|
||||
* 配置关键字
|
||||
* @author
|
||||
*/
|
||||
public final class Key {
|
||||
|
||||
/**
|
||||
* 此处声明插件用到的需要插件使用者提供的配置项
|
||||
*/
|
||||
public static final String ENDPOINT = "endpoint";
|
||||
|
||||
public static final String ACCESS_KEY_ID = "accessId";
|
||||
|
||||
public static final String ACCESS_KEY_SECRET = "accessKey";
|
||||
|
||||
public static final String PROJECT = "project";
|
||||
|
||||
public static final String LOG_STORE = "logstore";
|
||||
|
||||
public static final String TOPIC = "topic";
|
||||
|
||||
public static final String COLUMN = "column";
|
||||
|
||||
public static final String BATCH_SIZE = "batchSize";
|
||||
|
||||
public static final String TIME = "time";
|
||||
|
||||
public static final String TIME_FORMAT = "timeformat";
|
||||
|
||||
public static final String SOURCE = "source";
|
||||
|
||||
public static final String HASH_BY_KEY = "hashKey";
|
||||
}
|
@ -0,0 +1,315 @@
|
||||
package com.alibaba.datax.plugin.writer.loghubwriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordReceiver;
|
||||
import com.alibaba.datax.common.spi.Writer;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
import com.alibaba.datax.common.util.StrUtil;
|
||||
import com.aliyun.openservices.log.Client;
|
||||
import com.aliyun.openservices.log.common.LogItem;
|
||||
import com.aliyun.openservices.log.common.Shard;
|
||||
import com.aliyun.openservices.log.exception.LogException;
|
||||
import com.aliyun.openservices.log.request.ListShardRequest;
|
||||
import com.aliyun.openservices.log.request.PutLogsRequest;
|
||||
import com.aliyun.openservices.log.response.ListShardResponse;
|
||||
import com.aliyun.openservices.log.response.PutLogsResponse;
|
||||
|
||||
import org.apache.commons.codec.digest.Md5Crypt;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import sun.security.provider.MD5;
|
||||
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
/**
|
||||
* SLS 写插件
|
||||
* @author
|
||||
*/
|
||||
public class LogHubWriter extends Writer {
|
||||
|
||||
public static class Job extends Writer.Job {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Configuration jobConfig = null;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
info(LOG, "loghub writer job init begin ...");
|
||||
this.jobConfig = super.getPluginJobConf();
|
||||
validateParameter(jobConfig);
|
||||
info(LOG, "loghub writer job init end.");
|
||||
}
|
||||
|
||||
private void validateParameter(Configuration conf){
|
||||
conf.getNecessaryValue(Key.ENDPOINT,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESS_KEY_ID,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESS_KEY_SECRET,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.PROJECT,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.LOG_STORE,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.COLUMN,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int mandatoryNumber) {
|
||||
info(LOG, "split begin...");
|
||||
List<Configuration> configurationList = new ArrayList<Configuration>();
|
||||
for (int i = 0; i < mandatoryNumber; i++) {
|
||||
configurationList.add(this.jobConfig.clone());
|
||||
}
|
||||
info(LOG, "split end...");
|
||||
return configurationList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
}
|
||||
|
||||
public static class Task extends Writer.Task {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
|
||||
private Configuration taskConfig;
|
||||
private com.aliyun.openservices.log.Client logHubClient;
|
||||
private String logStore;
|
||||
private String topic;
|
||||
private String project;
|
||||
private List<String> columnList;
|
||||
private int batchSize;
|
||||
private String timeCol;
|
||||
private String timeFormat;
|
||||
private String source;
|
||||
private boolean isHashKey;
|
||||
private List<Shard> shards;
|
||||
public void init() {
|
||||
this.taskConfig = super.getPluginJobConf();
|
||||
String endpoint = taskConfig.getString(Key.ENDPOINT);
|
||||
String accessKeyId = taskConfig.getString(Key.ACCESS_KEY_ID);
|
||||
String accessKeySecret = taskConfig.getString(Key.ACCESS_KEY_SECRET);
|
||||
project = taskConfig.getString(Key.PROJECT);
|
||||
logStore = taskConfig.getString(Key.LOG_STORE);
|
||||
topic = taskConfig.getString(Key.TOPIC,"");
|
||||
columnList = taskConfig.getList(Key.COLUMN,String.class);
|
||||
batchSize = taskConfig.getInt(Key.BATCH_SIZE,1024);
|
||||
timeCol = taskConfig.getString(Key.TIME,"");
|
||||
timeFormat = taskConfig.getString(Key.TIME_FORMAT,"");
|
||||
source = taskConfig.getString(Key.SOURCE,"");
|
||||
isHashKey = taskConfig.getBool(Key.HASH_BY_KEY,false);
|
||||
logHubClient = new Client(endpoint, accessKeyId, accessKeySecret);
|
||||
if (isHashKey) {
|
||||
listShard();
|
||||
info(LOG, "init loghub writer with hash key mode.");
|
||||
}
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info("init loghub writer task finished.project:{} logstore:{} topic:{} batchSize:{}",project,logStore,topic,batchSize);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取通道的分片信息
|
||||
*/
|
||||
private void listShard() {
|
||||
try {
|
||||
ListShardResponse response = logHubClient.ListShard(new ListShardRequest(project,logStore));
|
||||
shards = response.GetShards();
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info("Get shard count:{}", shards.size());
|
||||
}
|
||||
} catch (LogException e) {
|
||||
info(LOG, "Get shard failed!");
|
||||
throw new RuntimeException("Get shard failed!", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
}
|
||||
|
||||
private int getTime(String v) {
|
||||
try {
|
||||
if ("bigint".equalsIgnoreCase(timeFormat)) {
|
||||
return Integer.valueOf(v);
|
||||
}
|
||||
|
||||
DateFormat sdf = new SimpleDateFormat(timeFormat);
|
||||
Date date = sdf.parse(v);
|
||||
return (int)(date.getTime()/1000);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Format time failed!", e);
|
||||
}
|
||||
return (int)(((new Date())).getTime()/1000);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startWrite(RecordReceiver recordReceiver) {
|
||||
info(LOG, "start to write.....................");
|
||||
// 按照shared做hash处理
|
||||
if (isHashKey) {
|
||||
processDataWithHashKey(recordReceiver);
|
||||
} else {
|
||||
processDataWithoutHashKey(recordReceiver);
|
||||
}
|
||||
info(LOG, "finish to write.........");
|
||||
}
|
||||
|
||||
private void processDataWithHashKey(RecordReceiver receiver) {
|
||||
Record record;
|
||||
Map<String, List<LogItem>> logMap = new HashMap<String, List<LogItem>>(shards.size());
|
||||
int count = 0;
|
||||
try {
|
||||
while ((record = receiver.getFromReader()) != null) {
|
||||
LogItem logItem = new LogItem();
|
||||
if (record.getColumnNumber() != columnList.size()) {
|
||||
this.getTaskPluginCollector().collectDirtyRecord(record, "column not match");
|
||||
}
|
||||
|
||||
String id = "";
|
||||
for (int i = 0; i < record.getColumnNumber(); i++) {
|
||||
String colName = columnList.get(i);
|
||||
String colValue = record.getColumn(i).asString();
|
||||
if (colName.endsWith("_id")) {
|
||||
id = colValue;
|
||||
}
|
||||
|
||||
logItem.PushBack(colName, colValue);
|
||||
if (colName.equals(timeCol)) {
|
||||
logItem.SetTime(getTime(colValue));
|
||||
}
|
||||
}
|
||||
|
||||
String hashKey = getShardHashKey(StrUtil.getMd5(id), shards);
|
||||
if (!logMap.containsKey(hashKey)) {
|
||||
info(LOG, "Hash key:" + hashKey);
|
||||
logMap.put(hashKey, new ArrayList<LogItem>());
|
||||
}
|
||||
logMap.get(hashKey).add(logItem);
|
||||
|
||||
if (logMap.get(hashKey).size() % batchSize == 0) {
|
||||
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logMap.get(hashKey), hashKey);
|
||||
PutLogsResponse response = putLog(request);
|
||||
count += logMap.get(hashKey).size();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("record count:{}, request id:{}", logMap.get(hashKey).size(), response.GetRequestId());
|
||||
}
|
||||
logMap.get(hashKey).clear();
|
||||
}
|
||||
}
|
||||
|
||||
for (Map.Entry<String, List<LogItem>> entry : logMap.entrySet()) {
|
||||
if (!entry.getValue().isEmpty()) {
|
||||
// 将剩余的数据发送
|
||||
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, entry.getValue(), entry.getKey());
|
||||
PutLogsResponse response = putLog(request);
|
||||
count += entry.getValue().size();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("record count:{}, request id:{}", entry.getValue().size(), response.GetRequestId());
|
||||
}
|
||||
entry.getValue().clear();
|
||||
}
|
||||
}
|
||||
LOG.info("{} records have been sent", count);
|
||||
} catch (LogException ex) {
|
||||
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, ex.getMessage(), ex);
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private void processDataWithoutHashKey(RecordReceiver receiver) {
|
||||
Record record;
|
||||
ArrayList<LogItem> logGroup = new ArrayList<LogItem>();
|
||||
int count = 0;
|
||||
try {
|
||||
while ((record = receiver.getFromReader()) != null) {
|
||||
LogItem logItem = new LogItem();
|
||||
if(record.getColumnNumber() != columnList.size()){
|
||||
this.getTaskPluginCollector().collectDirtyRecord(record,"column not match");
|
||||
}
|
||||
for (int i = 0; i < record.getColumnNumber(); i++) {
|
||||
String colName = columnList.get(i);
|
||||
String colValue = record.getColumn(i).asString();
|
||||
logItem.PushBack(colName, colValue);
|
||||
if(colName.equals(timeCol)){
|
||||
logItem.SetTime(getTime(colValue));
|
||||
}
|
||||
}
|
||||
|
||||
logGroup.add(logItem);
|
||||
count++;
|
||||
if (count % batchSize == 0) {
|
||||
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logGroup);
|
||||
PutLogsResponse response = putLog(request);
|
||||
logGroup.clear();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("record count:{}, request id:{}", count, response.GetRequestId());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!logGroup.isEmpty()) {
|
||||
//将剩余的数据发送
|
||||
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logGroup);
|
||||
PutLogsResponse response = putLog(request);
|
||||
logGroup.clear();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("record count:{}, request id:{}", count, response.GetRequestId());
|
||||
}
|
||||
}
|
||||
LOG.info("{} records have been sent", count);
|
||||
} catch (LogException ex) {
|
||||
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, ex.getMessage(), ex);
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private PutLogsResponse putLog(final PutLogsRequest request) throws Exception{
|
||||
final Client client = this.logHubClient;
|
||||
|
||||
return RetryUtil.executeWithRetry(new Callable<PutLogsResponse>() {
|
||||
public PutLogsResponse call() throws LogException{
|
||||
return client.PutLogs(request);
|
||||
}
|
||||
}, 3, 1000L, false);
|
||||
}
|
||||
|
||||
private String getShardHashKey(String hashKey, List<Shard> shards) {
|
||||
for (Shard shard : shards) {
|
||||
if (hashKey.compareTo(shard.getExclusiveEndKey()) < 0 && hashKey.compareTo(shard.getInclusiveBeginKey()) >= 0) {
|
||||
return shard.getInclusiveBeginKey();
|
||||
}
|
||||
}
|
||||
return shards.get(0).getInclusiveBeginKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 日志打印控制
|
||||
*
|
||||
* @param logger
|
||||
* @param message
|
||||
*/
|
||||
public static void info(Logger logger, String message) {
|
||||
if (logger.isInfoEnabled()) {
|
||||
logger.info(message);
|
||||
}
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user