DataX 2209, add plugin: datahubreader/datahubwriter/loghubreader/loghubwriter/starrocksreader, update plugin: odpsreader/oceanbasev10reader/oceanbasev10writer/elasticserachwriter/mysqlreader/mysqlwriter

This commit is contained in:
dingxiaobo 2022-09-22 16:11:34 +08:00
parent ced5a454b9
commit 874a256a03
133 changed files with 5505 additions and 2537 deletions

View File

@ -25,7 +25,7 @@ DataX本身作为数据同步框架将不同数据源的同步抽象为从源
# Quick Start
##### Download [DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/20220530/datax.tar.gz)
##### Download [DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202209/datax.tar.gz)
##### 请点击:[Quick Start](https://github.com/alibaba/DataX/blob/master/userGuid.md)
@ -95,6 +95,9 @@ DataX目前已经有了比较全面的插件体系主流的RDBMS数据库、N
DataX 后续计划月度迭代更新,也欢迎感兴趣的同学提交 Pull requests月度更新内容会介绍介绍如下。
- [datax_v202209]https://github.com/alibaba/DataX/releases/tag/datax_v202209)
- 涉及通道能力更新MaxCompute、Datahub、SLS等、安全漏洞更新、通用打包更新等
- [datax_v202205]https://github.com/alibaba/DataX/releases/tag/datax_v202205)
- 涉及通道能力更新MaxCompute、Hologres、OSS、Tdengine等、安全漏洞更新、通用打包更新等

View File

@ -2,5 +2,5 @@
"name": "clickhousewriter",
"class": "com.alibaba.datax.plugin.writer.clickhousewriter.ClickhouseWriter",
"description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql.",
"developer": "jiye.tjy"
"developer": "alibaba"
}

View File

@ -411,6 +411,15 @@ public class Configuration {
return list;
}
public <T> List<T> getListWithJson(final String path, Class<T> t) {
Object object = this.get(path, List.class);
if (null == object) {
return null;
}
return JSON.parseArray(JSON.toJSONString(object),t);
}
/**
* 根据用户提供的json path寻址List对象如果对象不存在返回null
*/

View File

@ -3,6 +3,8 @@ package com.alibaba.datax.common.util;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.DecimalFormat;
import java.util.HashMap;
import java.util.Map;
@ -82,4 +84,20 @@ public class StrUtil {
return s.substring(0, headLength) + "..." + s.substring(s.length() - tailLength);
}
public static String getMd5(String plainText) {
try {
StringBuilder builder = new StringBuilder();
for (byte b : MessageDigest.getInstance("MD5").digest(plainText.getBytes())) {
int i = b & 0xff;
if (i < 0x10) {
builder.append('0');
}
builder.append(Integer.toHexString(i));
}
return builder.toString();
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
}
}

79
datahubreader/pom.xml Normal file
View File

@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>datax-all</artifactId>
<groupId>com.alibaba.datax</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>datahubreader</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<dependency>
<groupId>com.aliyun.datahub</groupId>
<artifactId>aliyun-sdk-datahub</artifactId>
<version>2.21.6-public</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${jdk-version}</source>
<target>${jdk-version}</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<!-- assembly plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,34 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
</includes>
<outputDirectory>plugin/reader/datahubreader</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>datahubreader-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/reader/datahubreader</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/reader/datahubreader/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,8 @@
package com.alibaba.datax.plugin.reader.datahubreader;
public class Constant {
public static String DATETIME_FORMAT = "yyyyMMddHHmmss";
public static String DATE_FORMAT = "yyyyMMdd";
}

View File

@ -0,0 +1,42 @@
package com.alibaba.datax.plugin.reader.datahubreader;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.Account;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.http.HttpConfig;
import org.apache.commons.lang3.StringUtils;
public class DatahubClientHelper {
public static DatahubClient getDatahubClient(Configuration jobConfig) {
String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
Account account = new AliyunAccount(accessId, accessKey);
// 是否开启二进制传输服务端2.12版本开始支持
boolean enableBinary = jobConfig.getBool("enableBinary", false);
DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
// HttpConfig可不设置不设置时采用默认值
// 读写数据推荐打开网络传输 LZ4压缩
HttpConfig httpConfig = null;
String httpConfigStr = jobConfig.getString("httpConfig");
if (StringUtils.isNotBlank(httpConfigStr)) {
httpConfig = JSON.parseObject(httpConfigStr, new TypeReference<HttpConfig>() {
});
}
DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
if (null != httpConfig) {
builder.setHttpConfig(httpConfig);
}
DatahubClient datahubClient = builder.build();
return datahubClient;
}
}

View File

@ -0,0 +1,292 @@
package com.alibaba.datax.plugin.reader.datahubreader;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import com.aliyun.datahub.client.model.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
import com.aliyun.datahub.client.DatahubClient;
public class DatahubReader extends Reader {
public static class Job extends Reader.Job {
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
private Configuration originalConfig;
private Long beginTimestampMillis;
private Long endTimestampMillis;
DatahubClient datahubClient;
@Override
public void init() {
LOG.info("datahub reader job init begin ...");
this.originalConfig = super.getPluginJobConf();
validateParameter(originalConfig);
this.datahubClient = DatahubClientHelper.getDatahubClient(this.originalConfig);
LOG.info("datahub reader job init end.");
}
private void validateParameter(Configuration conf){
conf.getNecessaryValue(Key.ENDPOINT,DatahubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.ACCESSKEYID,DatahubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.ACCESSKEYSECRET,DatahubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.PROJECT,DatahubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.TOPIC,DatahubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.COLUMN,DatahubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.BEGINDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.ENDDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
int batchSize = this.originalConfig.getInt(Key.BATCHSIZE, 1024);
if (batchSize > 10000) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid batchSize[" + batchSize + "] value (0,10000]!");
}
String beginDateTime = this.originalConfig.getString(Key.BEGINDATETIME);
if (beginDateTime != null) {
try {
beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(beginDateTime);
} catch (ParseException e) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid beginDateTime[" + beginDateTime + "], format [yyyyMMddHHmmss]!");
}
}
if (beginTimestampMillis != null && beginTimestampMillis <= 0) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid beginTimestampMillis[" + beginTimestampMillis + "]!");
}
String endDateTime = this.originalConfig.getString(Key.ENDDATETIME);
if (endDateTime != null) {
try {
endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(endDateTime);
} catch (ParseException e) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid beginDateTime[" + endDateTime + "], format [yyyyMMddHHmmss]!");
}
}
if (endTimestampMillis != null && endTimestampMillis <= 0) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid endTimestampMillis[" + endTimestampMillis + "]!");
}
if (beginTimestampMillis != null && endTimestampMillis != null
&& endTimestampMillis <= beginTimestampMillis) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"endTimestampMillis[" + endTimestampMillis + "] must bigger than beginTimestampMillis[" + beginTimestampMillis + "]!");
}
}
@Override
public void prepare() {
// create datahub client
String project = originalConfig.getNecessaryValue(Key.PROJECT, DatahubReaderErrorCode.REQUIRE_VALUE);
String topic = originalConfig.getNecessaryValue(Key.TOPIC, DatahubReaderErrorCode.REQUIRE_VALUE);
RecordType recordType = null;
try {
DatahubClient client = DatahubClientHelper.getDatahubClient(this.originalConfig);
GetTopicResult getTopicResult = client.getTopic(project, topic);
recordType = getTopicResult.getRecordType();
} catch (Exception e) {
LOG.warn("get topic type error: {}", e.getMessage());
}
if (null != recordType) {
if (recordType == RecordType.BLOB) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"DatahubReader only support 'Tuple' RecordType now, but your RecordType is 'BLOB'");
}
}
}
@Override
public void destroy() {
}
@Override
public List<Configuration> split(int adviceNumber) {
LOG.info("split() begin...");
List<Configuration> readerSplitConfigs = new ArrayList<Configuration>();
String project = this.originalConfig.getString(Key.PROJECT);
String topic = this.originalConfig.getString(Key.TOPIC);
List<ShardEntry> shardEntrys = DatahubReaderUtils.getShardsWithRetry(this.datahubClient, project, topic);
if (shardEntrys == null || shardEntrys.isEmpty() || shardEntrys.size() == 0) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"Project [" + project + "] Topic [" + topic + "] has no shards, please check !");
}
for (ShardEntry shardEntry : shardEntrys) {
Configuration splitedConfig = this.originalConfig.clone();
splitedConfig.set(Key.SHARDID, shardEntry.getShardId());
readerSplitConfigs.add(splitedConfig);
}
LOG.info("split() ok and end...");
return readerSplitConfigs;
}
}
public static class Task extends Reader.Task {
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
private Configuration taskConfig;
private String accessId;
private String accessKey;
private String endpoint;
private String project;
private String topic;
private String shardId;
private Long beginTimestampMillis;
private Long endTimestampMillis;
private int batchSize;
private List<String> columns;
private RecordSchema schema;
private String timeStampUnit;
DatahubClient datahubClient;
@Override
public void init() {
this.taskConfig = super.getPluginJobConf();
this.accessId = this.taskConfig.getString(Key.ACCESSKEYID);
this.accessKey = this.taskConfig.getString(Key.ACCESSKEYSECRET);
this.endpoint = this.taskConfig.getString(Key.ENDPOINT);
this.project = this.taskConfig.getString(Key.PROJECT);
this.topic = this.taskConfig.getString(Key.TOPIC);
this.shardId = this.taskConfig.getString(Key.SHARDID);
this.batchSize = this.taskConfig.getInt(Key.BATCHSIZE, 1024);
this.timeStampUnit = this.taskConfig.getString(Key.TIMESTAMP_UNIT, "MICROSECOND");
try {
this.beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.BEGINDATETIME));
} catch (ParseException e) {
}
try {
this.endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.ENDDATETIME));
} catch (ParseException e) {
}
this.columns = this.taskConfig.getList(Key.COLUMN, String.class);
this.datahubClient = DatahubClientHelper.getDatahubClient(this.taskConfig);
this.schema = DatahubReaderUtils.getDatahubSchemaWithRetry(this.datahubClient, this.project, topic);
LOG.info("init datahub reader task finished.project:{} topic:{} batchSize:{}", project, topic, batchSize);
}
@Override
public void destroy() {
}
@Override
public void startRead(RecordSender recordSender) {
LOG.info("read start");
String beginCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project,
this.topic, this.shardId, this.beginTimestampMillis);
String endCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project,
this.topic, this.shardId, this.endTimestampMillis);
if (beginCursor == null) {
LOG.info("Shard:{} has no data!", this.shardId);
return;
} else if (endCursor == null) {
endCursor = DatahubReaderUtils.getLatestCursorWithRetry(this.datahubClient, this.project,
this.topic, this.shardId);
}
String curCursor = beginCursor;
boolean exit = false;
while (true) {
GetRecordsResult result = DatahubReaderUtils.getRecordsResultWithRetry(this.datahubClient, this.project, this.topic,
this.shardId, this.batchSize, curCursor, this.schema);
List<RecordEntry> records = result.getRecords();
if (records.size() > 0) {
for (RecordEntry record : records) {
if (record.getSystemTime() >= this.endTimestampMillis) {
exit = true;
break;
}
HashMap<String, Column> dataMap = new HashMap<String, Column>();
List<Field> fields = ((TupleRecordData) record.getRecordData()).getRecordSchema().getFields();
for (int i = 0; i < fields.size(); i++) {
Field field = fields.get(i);
Column column = DatahubReaderUtils.getColumnFromField(record, field, this.timeStampUnit);
dataMap.put(field.getName(), column);
}
Record dataxRecord = recordSender.createRecord();
if (null != this.columns && 1 == this.columns.size()) {
String columnsInStr = columns.get(0).toString();
if ("\"*\"".equals(columnsInStr) || "*".equals(columnsInStr)) {
for (int i = 0; i < fields.size(); i++) {
dataxRecord.addColumn(dataMap.get(fields.get(i).getName()));
}
} else {
if (dataMap.containsKey(columnsInStr)) {
dataxRecord.addColumn(dataMap.get(columnsInStr));
} else {
dataxRecord.addColumn(new StringColumn(null));
}
}
} else {
for (String col : this.columns) {
if (dataMap.containsKey(col)) {
dataxRecord.addColumn(dataMap.get(col));
} else {
dataxRecord.addColumn(new StringColumn(null));
}
}
}
recordSender.sendToWriter(dataxRecord);
}
} else {
break;
}
if (exit) {
break;
}
curCursor = result.getNextCursor();
}
LOG.info("end read datahub shard...");
}
}
}

View File

@ -0,0 +1,35 @@
package com.alibaba.datax.plugin.reader.datahubreader;
import com.alibaba.datax.common.spi.ErrorCode;
public enum DatahubReaderErrorCode implements ErrorCode {
BAD_CONFIG_VALUE("DatahubReader-00", "The value you configured is invalid."),
LOG_HUB_ERROR("DatahubReader-01","Datahub exception"),
REQUIRE_VALUE("DatahubReader-02","Missing parameters"),
EMPTY_LOGSTORE_VALUE("DatahubReader-03","There is no shard under this LogStore");
private final String code;
private final String description;
private DatahubReaderErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code,
this.description);
}
}

View File

@ -0,0 +1,200 @@
package com.alibaba.datax.plugin.reader.datahubreader;
import java.math.BigDecimal;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.concurrent.Callable;
import com.alibaba.datax.common.element.*;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.DataXCaseEnvUtil;
import com.alibaba.datax.common.util.RetryUtil;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.exception.InvalidParameterException;
import com.aliyun.datahub.client.model.*;
public class DatahubReaderUtils {
public static long getUnixTimeFromDateTime(String dateTime) throws ParseException {
try {
String format = Constant.DATETIME_FORMAT;
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
return simpleDateFormat.parse(dateTime).getTime();
} catch (ParseException ignored) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid DateTime[" + dateTime + "]!");
}
}
public static List<ShardEntry> getShardsWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
List<ShardEntry> shards = null;
try {
shards = RetryUtil.executeWithRetry(new Callable<List<ShardEntry>>() {
@Override
public List<ShardEntry> call() throws Exception {
ListShardResult listShardResult = datahubClient.listShard(project, topic);
return listShardResult.getShards();
}
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
} catch (Exception e) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"get Shards error, please check ! detail error messsage: " + e.toString());
}
return shards;
}
public static String getCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic,
final String shardId, final long timestamp) {
String cursor;
try {
cursor = RetryUtil.executeWithRetry(new Callable<String>() {
@Override
public String call() throws Exception {
try {
return datahubClient.getCursor(project, topic, shardId, CursorType.SYSTEM_TIME, timestamp).getCursor();
} catch (InvalidParameterException e) {
if (e.getErrorMessage().indexOf("Time in seek request is out of range") >= 0) {
return null;
} else {
throw e;
}
}
}
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
} catch (Exception e) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"get Cursor error, please check ! detail error messsage: " + e.toString());
}
return cursor;
}
public static String getLatestCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic,
final String shardId) {
String cursor;
try {
cursor = RetryUtil.executeWithRetry(new Callable<String>() {
@Override
public String call() throws Exception {
return datahubClient.getCursor(project, topic, shardId, CursorType.LATEST).getCursor();
}
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
} catch (Exception e) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"get Cursor error, please check ! detail error messsage: " + e.toString());
}
return cursor;
}
public static RecordSchema getDatahubSchemaWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
RecordSchema schema;
try {
schema = RetryUtil.executeWithRetry(new Callable<RecordSchema>() {
@Override
public RecordSchema call() throws Exception {
return datahubClient.getTopic(project, topic).getRecordSchema();
}
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
} catch (Exception e) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"get Topic Schema error, please check ! detail error messsage: " + e.toString());
}
return schema;
}
public static GetRecordsResult getRecordsResultWithRetry(final DatahubClient datahubClient, final String project,
final String topic, final String shardId, final int batchSize, final String cursor, final RecordSchema schema) {
GetRecordsResult result;
try {
result = RetryUtil.executeWithRetry(new Callable<GetRecordsResult>() {
@Override
public GetRecordsResult call() throws Exception {
return datahubClient.getRecords(project, topic, shardId, schema, cursor, batchSize);
}
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
} catch (Exception e) {
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
"get Record Result error, please check ! detail error messsage: " + e.toString());
}
return result;
}
public static Column getColumnFromField(RecordEntry record, Field field, String timeStampUnit) {
Column col = null;
TupleRecordData o = (TupleRecordData) record.getRecordData();
switch (field.getType()) {
case SMALLINT:
Short shortValue = ((Short) o.getField(field.getName()));
col = new LongColumn(shortValue == null ? null: shortValue.longValue());
break;
case INTEGER:
col = new LongColumn((Integer) o.getField(field.getName()));
break;
case BIGINT: {
col = new LongColumn((Long) o.getField(field.getName()));
break;
}
case TINYINT: {
Byte byteValue = ((Byte) o.getField(field.getName()));
col = new LongColumn(byteValue == null ? null : byteValue.longValue());
break;
}
case BOOLEAN: {
col = new BoolColumn((Boolean) o.getField(field.getName()));
break;
}
case FLOAT:
col = new DoubleColumn((Float) o.getField(field.getName()));
break;
case DOUBLE: {
col = new DoubleColumn((Double) o.getField(field.getName()));
break;
}
case STRING: {
col = new StringColumn((String) o.getField(field.getName()));
break;
}
case DECIMAL: {
BigDecimal value = (BigDecimal) o.getField(field.getName());
col = new DoubleColumn(value == null ? null : value.doubleValue());
break;
}
case TIMESTAMP: {
Long value = (Long) o.getField(field.getName());
if ("MILLISECOND".equals(timeStampUnit)) {
// MILLISECOND, 13位精度直接 new Date()
col = new DateColumn(value == null ? null : new Date(value));
}
else if ("SECOND".equals(timeStampUnit)){
col = new DateColumn(value == null ? null : new Date(value * 1000));
}
else {
// 默认都是 MICROSECOND, 16位精度 和之前的逻辑保持一致
col = new DateColumn(value == null ? null : new Date(value / 1000));
}
break;
}
default:
throw new RuntimeException("Unknown column type: " + field.getType());
}
return col;
}
}

View File

@ -0,0 +1,37 @@
package com.alibaba.datax.plugin.reader.datahubreader;
import com.alibaba.datax.common.spi.ErrorCode;
import com.alibaba.datax.common.util.MessageSource;
public enum DatahubWriterErrorCode implements ErrorCode {
MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")),
INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")),
GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")),
WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")),
SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")),
;
private final String code;
private final String description;
private DatahubWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code,
this.description);
}
}

View File

@ -0,0 +1,35 @@
package com.alibaba.datax.plugin.reader.datahubreader;
public final class Key {
/**
* 此处声明插件用到的需要插件使用者提供的配置项
*/
public static final String ENDPOINT = "endpoint";
public static final String ACCESSKEYID = "accessId";
public static final String ACCESSKEYSECRET = "accessKey";
public static final String PROJECT = "project";
public static final String TOPIC = "topic";
public static final String BEGINDATETIME = "beginDateTime";
public static final String ENDDATETIME = "endDateTime";
public static final String BATCHSIZE = "batchSize";
public static final String COLUMN = "column";
public static final String SHARDID = "shardId";
public static final String CONFIG_KEY_ENDPOINT = "endpoint";
public static final String CONFIG_KEY_ACCESS_ID = "accessId";
public static final String CONFIG_KEY_ACCESS_KEY = "accessKey";
public static final String TIMESTAMP_UNIT = "timeStampUnit";
}

View File

@ -0,0 +1,5 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.

View File

@ -0,0 +1,5 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.

View File

@ -0,0 +1,5 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.

View File

@ -0,0 +1,5 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.

View File

@ -0,0 +1,9 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
errorcode.invalid_config_value=您的參數配寘錯誤.
errorcode.get_topic_info_fail=獲取shard清單失敗.
errorcode.write_datahub_fail=寫數據失敗.
errorcode.schema_not_match=數據格式錯誤.

View File

@ -0,0 +1,9 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
errorcode.invalid_config_value=您的參數配寘錯誤.
errorcode.get_topic_info_fail=獲取shard清單失敗.
errorcode.write_datahub_fail=寫數據失敗.
errorcode.schema_not_match=數據格式錯誤.

View File

@ -0,0 +1,14 @@
{
"name": "datahubreader",
"parameter": {
"endpoint":"",
"accessId": "",
"accessKey": "",
"project": "",
"topic": "",
"beginDateTime": "20180913121019",
"endDateTime": "20180913121119",
"batchSize": 1024,
"column": []
}
}

View File

@ -0,0 +1,6 @@
{
"name": "datahubreader",
"class": "com.alibaba.datax.plugin.reader.datahubreader.DatahubReader",
"description": "datahub reader",
"developer": "alibaba"
}

79
datahubwriter/pom.xml Normal file
View File

@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>datax-all</artifactId>
<groupId>com.alibaba.datax</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>datahubwriter</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<dependency>
<groupId>com.aliyun.datahub</groupId>
<artifactId>aliyun-sdk-datahub</artifactId>
<version>2.21.6-public</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${jdk-version}</source>
<target>${jdk-version}</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<!-- assembly plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,34 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
</includes>
<outputDirectory>plugin/writer/datahubwriter</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>datahubwriter-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/writer/datahubwriter</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/writer/datahubwriter/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,43 @@
package com.alibaba.datax.plugin.writer.datahubwriter;
import org.apache.commons.lang3.StringUtils;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.DatahubClientBuilder;
import com.aliyun.datahub.client.auth.Account;
import com.aliyun.datahub.client.auth.AliyunAccount;
import com.aliyun.datahub.client.common.DatahubConfig;
import com.aliyun.datahub.client.http.HttpConfig;
public class DatahubClientHelper {
public static DatahubClient getDatahubClient(Configuration jobConfig) {
String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
Account account = new AliyunAccount(accessId, accessKey);
// 是否开启二进制传输服务端2.12版本开始支持
boolean enableBinary = jobConfig.getBool("enableBinary", false);
DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
// HttpConfig可不设置不设置时采用默认值
// 读写数据推荐打开网络传输 LZ4压缩
HttpConfig httpConfig = null;
String httpConfigStr = jobConfig.getString("httpConfig");
if (StringUtils.isNotBlank(httpConfigStr)) {
httpConfig = JSON.parseObject(httpConfigStr, new TypeReference<HttpConfig>() {
});
}
DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
if (null != httpConfig) {
builder.setHttpConfig(httpConfig);
}
DatahubClient datahubClient = builder.build();
return datahubClient;
}
}

View File

@ -0,0 +1,355 @@
package com.alibaba.datax.plugin.writer.datahubwriter;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.spi.Writer;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.common.util.DataXCaseEnvUtil;
import com.alibaba.datax.common.util.RetryUtil;
import com.alibaba.fastjson.JSON;
import com.aliyun.datahub.client.DatahubClient;
import com.aliyun.datahub.client.model.FieldType;
import com.aliyun.datahub.client.model.GetTopicResult;
import com.aliyun.datahub.client.model.ListShardResult;
import com.aliyun.datahub.client.model.PutErrorEntry;
import com.aliyun.datahub.client.model.PutRecordsResult;
import com.aliyun.datahub.client.model.RecordEntry;
import com.aliyun.datahub.client.model.RecordSchema;
import com.aliyun.datahub.client.model.RecordType;
import com.aliyun.datahub.client.model.ShardEntry;
import com.aliyun.datahub.client.model.ShardState;
import com.aliyun.datahub.client.model.TupleRecordData;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Callable;
public class DatahubWriter extends Writer {
/**
* Job 中的方法仅执行一次Task 中方法会由框架启动多个 Task 线程并行执行
* <p/>
* 整个 Writer 执行流程是
* <pre>
* Job类init-->prepare-->split
*
* Task类init-->prepare-->startWrite-->post-->destroy
* Task类init-->prepare-->startWrite-->post-->destroy
*
* Job类post-->destroy
* </pre>
*/
public static class Job extends Writer.Job {
private static final Logger LOG = LoggerFactory
.getLogger(Job.class);
private Configuration jobConfig = null;
@Override
public void init() {
this.jobConfig = super.getPluginJobConf();
jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
}
@Override
public void prepare() {
String project = jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT,
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
String topic = jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC,
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
RecordType recordType = null;
DatahubClient client = DatahubClientHelper.getDatahubClient(this.jobConfig);
try {
GetTopicResult getTopicResult = client.getTopic(project, topic);
recordType = getTopicResult.getRecordType();
} catch (Exception e) {
LOG.warn("get topic type error: {}", e.getMessage());
}
if (null != recordType) {
if (recordType == RecordType.BLOB) {
throw DataXException.asDataXException(DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
"DatahubWriter only support 'Tuple' RecordType now, but your RecordType is 'BLOB'");
}
}
}
@Override
public List<Configuration> split(int mandatoryNumber) {
List<Configuration> configs = new ArrayList<Configuration>();
for (int i = 0; i < mandatoryNumber; ++i) {
configs.add(jobConfig.clone());
}
return configs;
}
@Override
public void post() {}
@Override
public void destroy() {}
}
public static class Task extends Writer.Task {
private static final Logger LOG = LoggerFactory
.getLogger(Task.class);
private static final List<String> FATAL_ERRORS_DEFAULT = Arrays.asList(
"InvalidParameterM",
"MalformedRecord",
"INVALID_SHARDID",
"NoSuchTopic",
"NoSuchShard"
);
private Configuration taskConfig;
private DatahubClient client;
private String project;
private String topic;
private List<String> shards;
private int maxCommitSize;
private int maxRetryCount;
private RecordSchema schema;
private long retryInterval;
private Random random;
private List<String> column;
private List<Integer> columnIndex;
private boolean enableColumnConfig;
private List<String> fatalErrors;
@Override
public void init() {
this.taskConfig = super.getPluginJobConf();
project = taskConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
topic = taskConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
maxCommitSize = taskConfig.getInt(Key.CONFIG_KEY_MAX_COMMIT_SIZE, 1024*1024);
maxRetryCount = taskConfig.getInt(Key.CONFIG_KEY_MAX_RETRY_COUNT, 500);
this.retryInterval = taskConfig.getInt(Key.RETRY_INTERVAL, 650);
this.random = new Random();
this.column = this.taskConfig.getList(Key.CONFIG_KEY_COLUMN, String.class);
// ["*"]
if (null != this.column && 1 == this.column.size()) {
if (StringUtils.equals("*", this.column.get(0))) {
this.column = null;
}
}
this.columnIndex = new ArrayList<Integer>();
// 留个开关保平安
this.enableColumnConfig = this.taskConfig.getBool("enableColumnConfig", true);
this.fatalErrors = this.taskConfig.getList("fatalErrors", Task.FATAL_ERRORS_DEFAULT, String.class);
this.client = DatahubClientHelper.getDatahubClient(this.taskConfig);
}
@Override
public void prepare() {
final String shardIdConfig = this.taskConfig.getString(Key.CONFIG_KEY_SHARD_ID);
this.shards = new ArrayList<String>();
try {
RetryUtil.executeWithRetry(new Callable<Void>() {
@Override
public Void call() throws Exception {
ListShardResult result = client.listShard(project, topic);
if (StringUtils.isNotBlank(shardIdConfig)) {
shards.add(shardIdConfig);
} else {
for (ShardEntry shard : result.getShards()) {
if (shard.getState() == ShardState.ACTIVE || shard.getState() == ShardState.OPENING) {
shards.add(shard.getShardId());
}
}
}
schema = client.getTopic(project, topic).getRecordSchema();
return null;
}
}, DataXCaseEnvUtil.getRetryTimes(5), DataXCaseEnvUtil.getRetryInterval(10000L), DataXCaseEnvUtil.getRetryExponential(false));
} catch (Exception e) {
throw DataXException.asDataXException(DatahubWriterErrorCode.GET_TOPOIC_INFO_FAIL,
"get topic info failed", e);
}
LOG.info("datahub topic {} shard to write: {}", this.topic, JSON.toJSONString(this.shards));
LOG.info("datahub topic {} has schema: {}", this.topic, JSON.toJSONString(this.schema));
// 根据 schmea 顺序 和用户配置的 column计算写datahub的顺序关系以支持列换序
// 后续统一使用 columnIndex 的顺位关系写 datahub
int totalSize = this.schema.getFields().size();
if (null != this.column && !this.column.isEmpty() && this.enableColumnConfig) {
for (String eachCol : this.column) {
int indexFound = -1;
for (int i = 0; i < totalSize; i++) {
// warn: 大小写ignore
if (StringUtils.equalsIgnoreCase(eachCol, this.schema.getField(i).getName())) {
indexFound = i;
break;
}
}
if (indexFound >= 0) {
this.columnIndex.add(indexFound);
} else {
throw DataXException.asDataXException(DatahubWriterErrorCode.SCHEMA_NOT_MATCH,
String.format("can not find column %s in datahub topic %s", eachCol, this.topic));
}
}
} else {
for (int i = 0; i < totalSize; i++) {
this.columnIndex.add(i);
}
}
}
@Override
public void startWrite(RecordReceiver recordReceiver) {
Record record;
List<RecordEntry> records = new ArrayList<RecordEntry>();
String shardId = null;
if (1 == this.shards.size()) {
shardId = shards.get(0);
} else {
shardId = shards.get(this.random.nextInt(shards.size()));
}
int commitSize = 0;
try {
while ((record = recordReceiver.getFromReader()) != null) {
RecordEntry dhRecord = convertRecord(record, shardId);
if (dhRecord != null) {
records.add(dhRecord);
}
commitSize += record.getByteSize();
if (commitSize >= maxCommitSize) {
commit(records);
records.clear();
commitSize = 0;
if (1 == this.shards.size()) {
shardId = shards.get(0);
} else {
shardId = shards.get(this.random.nextInt(shards.size()));
}
}
}
if (commitSize > 0) {
commit(records);
}
} catch (Exception e) {
throw DataXException.asDataXException(
DatahubWriterErrorCode.WRITE_DATAHUB_FAIL, e);
}
}
@Override
public void post() {}
@Override
public void destroy() {}
private void commit(List<RecordEntry> records) throws InterruptedException {
PutRecordsResult result = client.putRecords(project, topic, records);
if (result.getFailedRecordCount() > 0) {
for (int i = 0; i < maxRetryCount; ++i) {
boolean limitExceededMessagePrinted = false;
for (PutErrorEntry error : result.getPutErrorEntries()) {
// 如果是 LimitExceeded 这样打印日志不能每行记录打印一次了
if (StringUtils.equalsIgnoreCase("LimitExceeded", error.getErrorcode())) {
if (!limitExceededMessagePrinted) {
LOG.warn("write record error, request id: {}, error code: {}, error message: {}",
result.getRequestId(), error.getErrorcode(), error.getMessage());
limitExceededMessagePrinted = true;
}
} else {
LOG.error("write record error, request id: {}, error code: {}, error message: {}",
result.getRequestId(), error.getErrorcode(), error.getMessage());
}
if (this.fatalErrors.contains(error.getErrorcode())) {
throw DataXException.asDataXException(
DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
error.getMessage());
}
}
if (this.retryInterval >= 0) {
Thread.sleep(this.retryInterval);
} else {
Thread.sleep(new Random().nextInt(700) + 300);
}
result = client.putRecords(project, topic, result.getFailedRecords());
if (result.getFailedRecordCount() == 0) {
return;
}
}
throw DataXException.asDataXException(
DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
"write datahub failed");
}
}
private RecordEntry convertRecord(Record dxRecord, String shardId) {
try {
RecordEntry dhRecord = new RecordEntry();
dhRecord.setShardId(shardId);
TupleRecordData data = new TupleRecordData(this.schema);
for (int i = 0; i < this.columnIndex.size(); ++i) {
int orderInSchema = this.columnIndex.get(i);
FieldType type = this.schema.getField(orderInSchema).getType();
Column column = dxRecord.getColumn(i);
switch (type) {
case BIGINT:
data.setField(orderInSchema, column.asLong());
break;
case DOUBLE:
data.setField(orderInSchema, column.asDouble());
break;
case STRING:
data.setField(orderInSchema, column.asString());
break;
case BOOLEAN:
data.setField(orderInSchema, column.asBoolean());
break;
case TIMESTAMP:
if (null == column.asDate()) {
data.setField(orderInSchema, null);
} else {
data.setField(orderInSchema, column.asDate().getTime() * 1000);
}
break;
case DECIMAL:
// warn
data.setField(orderInSchema, column.asBigDecimal());
break;
case INTEGER:
data.setField(orderInSchema, column.asLong());
break;
case FLOAT:
data.setField(orderInSchema, column.asDouble());
break;
case TINYINT:
data.setField(orderInSchema, column.asLong());
break;
case SMALLINT:
data.setField(orderInSchema, column.asLong());
break;
default:
throw DataXException.asDataXException(
DatahubWriterErrorCode.SCHEMA_NOT_MATCH,
String.format("does not support type: %s", type));
}
}
dhRecord.setRecordData(data);
return dhRecord;
} catch (Exception e) {
super.getTaskPluginCollector().collectDirtyRecord(dxRecord, e, "convert recor failed");
}
return null;
}
}
}

View File

@ -0,0 +1,37 @@
package com.alibaba.datax.plugin.writer.datahubwriter;
import com.alibaba.datax.common.spi.ErrorCode;
import com.alibaba.datax.common.util.MessageSource;
public enum DatahubWriterErrorCode implements ErrorCode {
MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")),
INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")),
GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")),
WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")),
SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")),
;
private final String code;
private final String description;
private DatahubWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code,
this.description);
}
}

View File

@ -0,0 +1,26 @@
package com.alibaba.datax.plugin.writer.datahubwriter;
public final class Key {
/**
* 此处声明插件用到的需要插件使用者提供的配置项
*/
public static final String CONFIG_KEY_ENDPOINT = "endpoint";
public static final String CONFIG_KEY_ACCESS_ID = "accessId";
public static final String CONFIG_KEY_ACCESS_KEY = "accessKey";
public static final String CONFIG_KEY_PROJECT = "project";
public static final String CONFIG_KEY_TOPIC = "topic";
public static final String CONFIG_KEY_WRITE_MODE = "mode";
public static final String CONFIG_KEY_SHARD_ID = "shardId";
public static final String CONFIG_KEY_MAX_COMMIT_SIZE = "maxCommitSize";
public static final String CONFIG_KEY_MAX_RETRY_COUNT = "maxRetryCount";
public static final String CONFIG_VALUE_SEQUENCE_MODE = "sequence";
public static final String CONFIG_VALUE_RANDOM_MODE = "random";
public final static String MAX_RETRY_TIME = "maxRetryTime";
public final static String RETRY_INTERVAL = "retryInterval";
public final static String CONFIG_KEY_COLUMN = "column";
}

View File

@ -0,0 +1,5 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.

View File

@ -0,0 +1,5 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.

View File

@ -0,0 +1,5 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.

View File

@ -0,0 +1,5 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.

View File

@ -0,0 +1,9 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
errorcode.invalid_config_value=您的參數配寘錯誤.
errorcode.get_topic_info_fail=獲取shard清單失敗.
errorcode.write_datahub_fail=寫數據失敗.
errorcode.schema_not_match=數據格式錯誤.

View File

@ -0,0 +1,9 @@
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
errorcode.invalid_config_value=您的參數配寘錯誤.
errorcode.get_topic_info_fail=獲取shard清單失敗.
errorcode.write_datahub_fail=寫數據失敗.
errorcode.schema_not_match=數據格式錯誤.

View File

@ -0,0 +1,14 @@
{
"name": "datahubwriter",
"parameter": {
"endpoint":"",
"accessId": "",
"accessKey": "",
"project": "",
"topic": "",
"mode": "random",
"shardId": "",
"maxCommitSize": 524288,
"maxRetryCount": 500
}
}

View File

@ -0,0 +1,6 @@
{
"name": "datahubwriter",
"class": "com.alibaba.datax.plugin.writer.datahubwriter.DatahubWriter",
"description": "datahub writer",
"developer": "alibaba"
}

View File

@ -35,12 +35,12 @@
<dependency>
<groupId>io.searchbox</groupId>
<artifactId>jest-common</artifactId>
<version>2.4.0</version>
<version>6.3.1</version>
</dependency>
<dependency>
<groupId>io.searchbox</groupId>
<artifactId>jest</artifactId>
<version>2.4.0</version>
<version>6.3.1</version>
</dependency>
<dependency>
<groupId>joda-time</groupId>

View File

@ -1,236 +0,0 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import io.searchbox.action.Action;
import io.searchbox.client.JestClient;
import io.searchbox.client.JestClientFactory;
import io.searchbox.client.JestResult;
import io.searchbox.client.config.HttpClientConfig;
import io.searchbox.client.config.HttpClientConfig.Builder;
import io.searchbox.core.Bulk;
import io.searchbox.indices.CreateIndex;
import io.searchbox.indices.DeleteIndex;
import io.searchbox.indices.IndicesExists;
import io.searchbox.indices.aliases.*;
import io.searchbox.indices.mapping.PutMapping;
import org.apache.http.HttpHost;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* Created by xiongfeng.bxf on 17/2/8.
*/
public class ESClient {
private static final Logger log = LoggerFactory.getLogger(ESClient.class);
private JestClient jestClient;
public JestClient getClient() {
return jestClient;
}
public void createClient(String endpoint,
String user,
String passwd,
boolean multiThread,
int readTimeout,
boolean compression,
boolean discovery) {
JestClientFactory factory = new JestClientFactory();
Builder httpClientConfig = new HttpClientConfig
.Builder(endpoint)
.setPreemptiveAuth(new HttpHost(endpoint))
.multiThreaded(multiThread)
.connTimeout(30000)
.readTimeout(readTimeout)
.maxTotalConnection(200)
.requestCompressionEnabled(compression)
.discoveryEnabled(discovery)
.discoveryFrequency(5l, TimeUnit.MINUTES);
if (!("".equals(user) || "".equals(passwd))) {
httpClientConfig.defaultCredentials(user, passwd);
}
factory.setHttpClientConfig(httpClientConfig.build());
jestClient = factory.getObject();
}
public boolean indicesExists(String indexName) throws Exception {
boolean isIndicesExists = false;
JestResult rst = jestClient.execute(new IndicesExists.Builder(indexName).build());
if (rst.isSucceeded()) {
isIndicesExists = true;
} else {
switch (rst.getResponseCode()) {
case 404:
isIndicesExists = false;
break;
case 401:
// 无权访问
default:
log.warn(rst.getErrorMessage());
break;
}
}
return isIndicesExists;
}
public boolean deleteIndex(String indexName) throws Exception {
log.info("delete index " + indexName);
if (indicesExists(indexName)) {
JestResult rst = execute(new DeleteIndex.Builder(indexName).build());
if (!rst.isSucceeded()) {
return false;
}
} else {
log.info("index cannot found, skip delete " + indexName);
}
return true;
}
public boolean createIndex(String indexName, String typeName,
Object mappings, String settings, boolean dynamic) throws Exception {
JestResult rst = null;
if (!indicesExists(indexName)) {
log.info("create index " + indexName);
rst = jestClient.execute(
new CreateIndex.Builder(indexName)
.settings(settings)
.setParameter("master_timeout", "5m")
.build()
);
//index_already_exists_exception
if (!rst.isSucceeded()) {
if (getStatus(rst) == 400) {
log.info(String.format("index [%s] already exists", indexName));
return true;
} else {
log.error(rst.getErrorMessage());
return false;
}
} else {
log.info(String.format("create [%s] index success", indexName));
}
}
int idx = 0;
while (idx < 5) {
if (indicesExists(indexName)) {
break;
}
Thread.sleep(2000);
idx ++;
}
if (idx >= 5) {
return false;
}
if (dynamic) {
log.info("ignore mappings");
return true;
}
log.info("create mappings for " + indexName + " " + mappings);
rst = jestClient.execute(new PutMapping.Builder(indexName, typeName, mappings)
.setParameter("master_timeout", "5m").build());
if (!rst.isSucceeded()) {
if (getStatus(rst) == 400) {
log.info(String.format("index [%s] mappings already exists", indexName));
} else {
log.error(rst.getErrorMessage());
return false;
}
} else {
log.info(String.format("index [%s] put mappings success", indexName));
}
return true;
}
public JestResult execute(Action<JestResult> clientRequest) throws Exception {
JestResult rst = null;
rst = jestClient.execute(clientRequest);
if (!rst.isSucceeded()) {
//log.warn(rst.getErrorMessage());
}
return rst;
}
public Integer getStatus(JestResult rst) {
JsonObject jsonObject = rst.getJsonObject();
if (jsonObject.has("status")) {
return jsonObject.get("status").getAsInt();
}
return 600;
}
public boolean isBulkResult(JestResult rst) {
JsonObject jsonObject = rst.getJsonObject();
return jsonObject.has("items");
}
public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException {
GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build();
AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build();
JestResult rst = jestClient.execute(getAliases);
log.info(rst.getJsonString());
List<AliasMapping> list = new ArrayList<AliasMapping>();
if (rst.isSucceeded()) {
JsonParser jp = new JsonParser();
JsonObject jo = (JsonObject)jp.parse(rst.getJsonString());
for(Map.Entry<String, JsonElement> entry : jo.entrySet()){
String tindex = entry.getKey();
if (indexname.equals(tindex)) {
continue;
}
AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build();
String s = new Gson().toJson(m.getData());
log.info(s);
if (needClean) {
list.add(m);
}
}
}
ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", "5m").build();
rst = jestClient.execute(modifyAliases);
if (!rst.isSucceeded()) {
log.error(rst.getErrorMessage());
return false;
}
return true;
}
public JestResult bulkInsert(Bulk.Builder bulk, int trySize) throws Exception {
// es_rejected_execution_exception
// illegal_argument_exception
// cluster_block_exception
JestResult rst = null;
rst = jestClient.execute(bulk.build());
if (!rst.isSucceeded()) {
log.warn(rst.getErrorMessage());
}
return rst;
}
/**
* 关闭JestClient客户端
*
*/
public void closeJestClient() {
if (jestClient != null) {
jestClient.shutdownClient();
}
}
}

View File

@ -1,65 +0,0 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
/**
* Created by xiongfeng.bxf on 17/3/2.
*/
public class ESColumn {
private String name;//: "appkey",
private String type;//": "TEXT",
private String timezone;
private String format;
private Boolean array;
public void setName(String name) {
this.name = name;
}
public void setType(String type) {
this.type = type;
}
public void setTimeZone(String timezone) {
this.timezone = timezone;
}
public void setFormat(String format) {
this.format = format;
}
public String getName() {
return name;
}
public String getType() {
return type;
}
public String getTimezone() {
return timezone;
}
public String getFormat() {
return format;
}
public void setTimezone(String timezone) {
this.timezone = timezone;
}
public Boolean isArray() {
return array;
}
public void setArray(Boolean array) {
this.array = array;
}
public Boolean getArray() {
return array;
}
}

View File

@ -1,460 +0,0 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.spi.Writer;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.common.util.RetryUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.TypeReference;
import io.searchbox.client.JestResult;
import io.searchbox.core.Bulk;
import io.searchbox.core.BulkResult;
import io.searchbox.core.Index;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.*;
import java.util.concurrent.Callable;
public class ESWriter extends Writer {
private final static String WRITE_COLUMNS = "write_columns";
public static class Job extends Writer.Job {
private static final Logger log = LoggerFactory.getLogger(Job.class);
private Configuration conf = null;
@Override
public void init() {
this.conf = super.getPluginJobConf();
}
@Override
public void prepare() {
/**
* 注意此方法仅执行一次
* 最佳实践如果 Job 中有需要进行数据同步之前的处理可以在此处完成如果没有必要则可以直接去掉
*/
ESClient esClient = new ESClient();
esClient.createClient(Key.getEndpoint(conf),
Key.getAccessID(conf),
Key.getAccessKey(conf),
false,
300000,
false,
false);
String indexName = Key.getIndexName(conf);
String typeName = Key.getTypeName(conf);
boolean dynamic = Key.getDynamic(conf);
String mappings = genMappings(typeName);
String settings = JSONObject.toJSONString(
Key.getSettings(conf)
);
log.info(String.format("index:[%s], type:[%s], mappings:[%s]", indexName, typeName, mappings));
try {
boolean isIndicesExists = esClient.indicesExists(indexName);
if (Key.isCleanup(this.conf) && isIndicesExists) {
esClient.deleteIndex(indexName);
}
// 强制创建,内部自动忽略已存在的情况
if (!esClient.createIndex(indexName, typeName, mappings, settings, dynamic)) {
throw new IOException("create index or mapping failed");
}
} catch (Exception ex) {
throw DataXException.asDataXException(ESWriterErrorCode.ES_MAPPINGS, ex.toString());
}
esClient.closeJestClient();
}
private String genMappings(String typeName) {
String mappings = null;
Map<String, Object> propMap = new HashMap<String, Object>();
List<ESColumn> columnList = new ArrayList<ESColumn>();
List column = conf.getList("column");
if (column != null) {
for (Object col : column) {
JSONObject jo = JSONObject.parseObject(col.toString());
String colName = jo.getString("name");
String colTypeStr = jo.getString("type");
if (colTypeStr == null) {
throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " column must have type");
}
ESFieldType colType = ESFieldType.getESFieldType(colTypeStr);
if (colType == null) {
throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " unsupported type");
}
ESColumn columnItem = new ESColumn();
if (colName.equals(Key.PRIMARY_KEY_COLUMN_NAME)) {
// 兼容已有版本
colType = ESFieldType.ID;
colTypeStr = "id";
}
columnItem.setName(colName);
columnItem.setType(colTypeStr);
if (colType == ESFieldType.ID) {
columnList.add(columnItem);
// 如果是id,则properties为空
continue;
}
Boolean array = jo.getBoolean("array");
if (array != null) {
columnItem.setArray(array);
}
Map<String, Object> field = new HashMap<String, Object>();
field.put("type", colTypeStr);
//https://www.elastic.co/guide/en/elasticsearch/reference/5.2/breaking_50_mapping_changes.html#_literal_index_literal_property
// https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_deep_dive_on_doc_values.html#_disabling_doc_values
field.put("doc_values", jo.getBoolean("doc_values"));
field.put("ignore_above", jo.getInteger("ignore_above"));
field.put("index", jo.getBoolean("index"));
switch (colType) {
case STRING:
// 兼容string类型,ES5之前版本
break;
case KEYWORD:
// https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html#_warm_up_global_ordinals
field.put("eager_global_ordinals", jo.getBoolean("eager_global_ordinals"));
case TEXT:
field.put("analyzer", jo.getString("analyzer"));
// 优化disk使用,也同步会提高index性能
// https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html
field.put("norms", jo.getBoolean("norms"));
field.put("index_options", jo.getBoolean("index_options"));
break;
case DATE:
columnItem.setTimeZone(jo.getString("timezone"));
columnItem.setFormat(jo.getString("format"));
// 后面时间会处理为带时区的标准时间,所以不需要给ES指定格式
/*
if (jo.getString("format") != null) {
field.put("format", jo.getString("format"));
} else {
//field.put("format", "strict_date_optional_time||epoch_millis||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd");
}
*/
break;
case GEO_SHAPE:
field.put("tree", jo.getString("tree"));
field.put("precision", jo.getString("precision"));
default:
break;
}
propMap.put(colName, field);
columnList.add(columnItem);
}
}
conf.set(WRITE_COLUMNS, JSON.toJSONString(columnList));
log.info(JSON.toJSONString(columnList));
Map<String, Object> rootMappings = new HashMap<String, Object>();
Map<String, Object> typeMappings = new HashMap<String, Object>();
typeMappings.put("properties", propMap);
rootMappings.put(typeName, typeMappings);
mappings = JSON.toJSONString(rootMappings);
if (mappings == null || "".equals(mappings)) {
throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, "must have mappings");
}
return mappings;
}
@Override
public List<Configuration> split(int mandatoryNumber) {
List<Configuration> configurations = new ArrayList<Configuration>(mandatoryNumber);
for (int i = 0; i < mandatoryNumber; i++) {
configurations.add(conf);
}
return configurations;
}
@Override
public void post() {
ESClient esClient = new ESClient();
esClient.createClient(Key.getEndpoint(conf),
Key.getAccessID(conf),
Key.getAccessKey(conf),
false,
300000,
false,
false);
String alias = Key.getAlias(conf);
if (!"".equals(alias)) {
log.info(String.format("alias [%s] to [%s]", alias, Key.getIndexName(conf)));
try {
esClient.alias(Key.getIndexName(conf), alias, Key.isNeedCleanAlias(conf));
} catch (IOException e) {
throw DataXException.asDataXException(ESWriterErrorCode.ES_ALIAS_MODIFY, e);
}
}
}
@Override
public void destroy() {
}
}
public static class Task extends Writer.Task {
private static final Logger log = LoggerFactory.getLogger(Job.class);
private Configuration conf;
ESClient esClient = null;
private List<ESFieldType> typeList;
private List<ESColumn> columnList;
private int trySize;
private int batchSize;
private String index;
private String type;
private String splitter;
@Override
public void init() {
this.conf = super.getPluginJobConf();
index = Key.getIndexName(conf);
type = Key.getTypeName(conf);
trySize = Key.getTrySize(conf);
batchSize = Key.getBatchSize(conf);
splitter = Key.getSplitter(conf);
columnList = JSON.parseObject(this.conf.getString(WRITE_COLUMNS), new TypeReference<List<ESColumn>>() {
});
typeList = new ArrayList<ESFieldType>();
for (ESColumn col : columnList) {
typeList.add(ESFieldType.getESFieldType(col.getType()));
}
esClient = new ESClient();
}
@Override
public void prepare() {
esClient.createClient(Key.getEndpoint(conf),
Key.getAccessID(conf),
Key.getAccessKey(conf),
Key.isMultiThread(conf),
Key.getTimeout(conf),
Key.isCompression(conf),
Key.isDiscovery(conf));
}
@Override
public void startWrite(RecordReceiver recordReceiver) {
List<Record> writerBuffer = new ArrayList<Record>(this.batchSize);
Record record = null;
long total = 0;
while ((record = recordReceiver.getFromReader()) != null) {
writerBuffer.add(record);
if (writerBuffer.size() >= this.batchSize) {
total += doBatchInsert(writerBuffer);
writerBuffer.clear();
}
}
if (!writerBuffer.isEmpty()) {
total += doBatchInsert(writerBuffer);
writerBuffer.clear();
}
String msg = String.format("task end, write size :%d", total);
getTaskPluginCollector().collectMessage("writesize", String.valueOf(total));
log.info(msg);
esClient.closeJestClient();
}
private String getDateStr(ESColumn esColumn, Column column) {
DateTime date = null;
DateTimeZone dtz = DateTimeZone.getDefault();
if (esColumn.getTimezone() != null) {
// 所有时区参考 http://www.joda.org/joda-time/timezones.html
dtz = DateTimeZone.forID(esColumn.getTimezone());
}
if (column.getType() != Column.Type.DATE && esColumn.getFormat() != null) {
DateTimeFormatter formatter = DateTimeFormat.forPattern(esColumn.getFormat());
date = formatter.withZone(dtz).parseDateTime(column.asString());
return date.toString();
} else if (column.getType() == Column.Type.DATE) {
date = new DateTime(column.asLong(), dtz);
return date.toString();
} else {
return column.asString();
}
}
private long doBatchInsert(final List<Record> writerBuffer) {
Map<String, Object> data = null;
final Bulk.Builder bulkaction = new Bulk.Builder().defaultIndex(this.index).defaultType(this.type);
for (Record record : writerBuffer) {
data = new HashMap<String, Object>();
String id = null;
for (int i = 0; i < record.getColumnNumber(); i++) {
Column column = record.getColumn(i);
String columnName = columnList.get(i).getName();
ESFieldType columnType = typeList.get(i);
//如果是数组类型那它传入的必是字符串类型
if (columnList.get(i).isArray() != null && columnList.get(i).isArray()) {
String[] dataList = column.asString().split(splitter);
if (!columnType.equals(ESFieldType.DATE)) {
data.put(columnName, dataList);
} else {
for (int pos = 0; pos < dataList.length; pos++) {
dataList[pos] = getDateStr(columnList.get(i), column);
}
data.put(columnName, dataList);
}
} else {
switch (columnType) {
case ID:
if (id != null) {
id += record.getColumn(i).asString();
} else {
id = record.getColumn(i).asString();
}
break;
case DATE:
try {
String dateStr = getDateStr(columnList.get(i), column);
data.put(columnName, dateStr);
} catch (Exception e) {
getTaskPluginCollector().collectDirtyRecord(record, String.format("时间类型解析失败 [%s:%s] exception: %s", columnName, column.toString(), e.toString()));
}
break;
case KEYWORD:
case STRING:
case TEXT:
case IP:
case GEO_POINT:
data.put(columnName, column.asString());
break;
case BOOLEAN:
data.put(columnName, column.asBoolean());
break;
case BYTE:
case BINARY:
data.put(columnName, column.asBytes());
break;
case LONG:
data.put(columnName, column.asLong());
break;
case INTEGER:
data.put(columnName, column.asBigInteger());
break;
case SHORT:
data.put(columnName, column.asBigInteger());
break;
case FLOAT:
case DOUBLE:
data.put(columnName, column.asDouble());
break;
case NESTED:
case OBJECT:
case GEO_SHAPE:
data.put(columnName, JSON.parse(column.asString()));
break;
default:
getTaskPluginCollector().collectDirtyRecord(record, "类型错误:不支持的类型:" + columnType + " " + columnName);
}
}
}
if (id == null) {
//id = UUID.randomUUID().toString();
bulkaction.addAction(new Index.Builder(data).build());
} else {
bulkaction.addAction(new Index.Builder(data).id(id).build());
}
}
try {
return RetryUtil.executeWithRetry(new Callable<Integer>() {
@Override
public Integer call() throws Exception {
JestResult jestResult = esClient.bulkInsert(bulkaction, 1);
if (jestResult.isSucceeded()) {
return writerBuffer.size();
}
String msg = String.format("response code: [%d] error :[%s]", jestResult.getResponseCode(), jestResult.getErrorMessage());
log.warn(msg);
if (esClient.isBulkResult(jestResult)) {
BulkResult brst = (BulkResult) jestResult;
List<BulkResult.BulkResultItem> failedItems = brst.getFailedItems();
for (BulkResult.BulkResultItem item : failedItems) {
if (item.status != 400) {
// 400 BAD_REQUEST 如果非数据异常,请求异常,则不允许忽略
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s", item.status, item.error));
} else {
// 如果用户选择不忽略解析错误,则抛异常,默认为忽略
if (!Key.isIgnoreParseError(conf)) {
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s, config not ignoreParseError so throw this error", item.status, item.error));
}
}
}
List<BulkResult.BulkResultItem> items = brst.getItems();
for (int idx = 0; idx < items.size(); ++idx) {
BulkResult.BulkResultItem item = items.get(idx);
if (item.error != null && !"".equals(item.error)) {
getTaskPluginCollector().collectDirtyRecord(writerBuffer.get(idx), String.format("status:[%d], error: %s", item.status, item.error));
}
}
return writerBuffer.size() - brst.getFailedItems().size();
} else {
Integer status = esClient.getStatus(jestResult);
switch (status) {
case 429: //TOO_MANY_REQUESTS
log.warn("server response too many requests, so auto reduce speed");
break;
}
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, jestResult.getErrorMessage());
}
}
}, trySize, 60000L, true);
} catch (Exception e) {
if (Key.isIgnoreWriteError(this.conf)) {
log.warn(String.format("重试[%d]次写入失败,忽略该错误,继续写入!", trySize));
} else {
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, e);
}
}
return 0;
}
@Override
public void post() {
}
@Override
public void destroy() {
esClient.closeJestClient();
}
}
}

View File

@ -1,37 +0,0 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import com.alibaba.datax.common.spi.ErrorCode;
public enum ESWriterErrorCode implements ErrorCode {
BAD_CONFIG_VALUE("ESWriter-00", "您配置的值不合法."),
ES_INDEX_DELETE("ESWriter-01", "删除index错误."),
ES_INDEX_CREATE("ESWriter-02", "创建index错误."),
ES_MAPPINGS("ESWriter-03", "mappings错误."),
ES_INDEX_INSERT("ESWriter-04", "插入数据错误."),
ES_ALIAS_MODIFY("ESWriter-05", "别名修改错误."),
;
private final String code;
private final String description;
ESWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code,
this.description);
}
}

View File

@ -0,0 +1,312 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfo;
import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfoResult;
import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.PutMapping7;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import io.searchbox.action.Action;
import io.searchbox.client.JestClient;
import io.searchbox.client.JestClientFactory;
import io.searchbox.client.JestResult;
import io.searchbox.client.config.HttpClientConfig;
import io.searchbox.client.config.HttpClientConfig.Builder;
import io.searchbox.core.Bulk;
import io.searchbox.indices.CreateIndex;
import io.searchbox.indices.DeleteIndex;
import io.searchbox.indices.IndicesExists;
import io.searchbox.indices.aliases.*;
import io.searchbox.indices.mapping.GetMapping;
import io.searchbox.indices.mapping.PutMapping;
import io.searchbox.indices.settings.GetSettings;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* Created by xiongfeng.bxf on 17/2/8.
*/
public class ElasticSearchClient {
private static final Logger LOGGER = LoggerFactory.getLogger(ElasticSearchClient.class);
private JestClient jestClient;
private Configuration conf;
public JestClient getClient() {
return jestClient;
}
public ElasticSearchClient(Configuration conf) {
this.conf = conf;
String endpoint = Key.getEndpoint(conf);
String user = Key.getUsername(conf);
String passwd = Key.getPassword(conf);
boolean multiThread = Key.isMultiThread(conf);
int readTimeout = Key.getTimeout(conf);
boolean compression = Key.isCompression(conf);
boolean discovery = Key.isDiscovery(conf);
String discoveryFilter = Key.getDiscoveryFilter(conf);
int totalConnection = this.conf.getInt("maxTotalConnection", 200);
JestClientFactory factory = new JestClientFactory();
Builder httpClientConfig = new HttpClientConfig
.Builder(endpoint)
// .setPreemptiveAuth(new HttpHost(endpoint))
.multiThreaded(multiThread)
.connTimeout(readTimeout)
.readTimeout(readTimeout)
.maxTotalConnection(totalConnection)
.requestCompressionEnabled(compression)
.discoveryEnabled(discovery)
.discoveryFrequency(5L, TimeUnit.MINUTES)
.discoveryFilter(discoveryFilter);
if (!(StringUtils.isBlank(user) || StringUtils.isBlank(passwd))) {
// 匿名登录
httpClientConfig.defaultCredentials(user, passwd);
}
factory.setHttpClientConfig(httpClientConfig.build());
this.jestClient = factory.getObject();
}
public boolean indicesExists(String indexName) throws Exception {
boolean isIndicesExists = false;
JestResult rst = execute(new IndicesExists.Builder(indexName).build());
if (rst.isSucceeded()) {
isIndicesExists = true;
} else {
LOGGER.warn("IndicesExists got ResponseCode: {} ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
switch (rst.getResponseCode()) {
case 404:
isIndicesExists = false;
break;
case 401:
// 无权访问
default:
LOGGER.warn(rst.getErrorMessage());
break;
}
}
return isIndicesExists;
}
public boolean deleteIndex(String indexName) throws Exception {
LOGGER.info("delete index {}", indexName);
if (indicesExists(indexName)) {
JestResult rst = execute(new DeleteIndex.Builder(indexName).build());
if (!rst.isSucceeded()) {
LOGGER.warn("DeleteIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
return false;
} else {
LOGGER.info("delete index {} success", indexName);
}
} else {
LOGGER.info("index cannot found, skip delete index {}", indexName);
}
return true;
}
public boolean isGreaterOrEqualThan7() throws Exception {
try {
ClusterInfoResult result = execute(new ClusterInfo.Builder().build());
LOGGER.info("ClusterInfoResult: {}", result.getJsonString());
return result.isGreaterOrEqualThan7();
}catch(Exception e) {
LOGGER.warn(e.getMessage());
return false;
}
}
/**
* 获取索引的settings
* @param indexName 索引名
* @return 设置
*/
public String getIndexSettings(String indexName) {
GetSettings.Builder builder = new GetSettings.Builder();
builder.addIndex(indexName);
GetSettings getSettings = builder.build();
try {
LOGGER.info("begin GetSettings for index: {}", indexName);
JestResult result = this.execute(getSettings);
return result.getJsonString();
} catch (Exception e) {
String message = "GetSettings for index error: " + e.getMessage();
LOGGER.warn(message, e);
throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_GET_SETTINGS, e.getMessage(), e);
}
}
public boolean createIndexIfNotExists(String indexName, String typeName,
Object mappings, String settings,
boolean dynamic, boolean isGreaterOrEqualThan7) throws Exception {
JestResult rst;
if (!indicesExists(indexName)) {
LOGGER.info("create index {}", indexName);
rst = execute(
new CreateIndex.Builder(indexName)
.settings(settings)
.setParameter("master_timeout", Key.getMasterTimeout(this.conf))
.build()
);
//index_already_exists_exception
if (!rst.isSucceeded()) {
LOGGER.warn("CreateIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
if (getStatus(rst) == 400) {
LOGGER.info(String.format("index {} already exists", indexName));
return true;
} else {
return false;
}
} else {
LOGGER.info("create {} index success", indexName);
}
}
if (dynamic) {
LOGGER.info("dynamic is true, ignore mappings");
return true;
}
LOGGER.info("create mappings for {} {}", indexName, mappings);
//如果大于7.xmapping的PUT请求URI中不能带type并且mapping设置中不能带有嵌套结构
if (isGreaterOrEqualThan7) {
rst = execute(new PutMapping7.Builder(indexName, mappings).
setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build());
} else {
rst = execute(new PutMapping.Builder(indexName, typeName, mappings)
.setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build());
}
if (!rst.isSucceeded()) {
LOGGER.error("PutMapping got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
return false;
} else {
LOGGER.info("index {} put mappings success", indexName);
}
return true;
}
public <T extends JestResult> T execute(Action<T> clientRequest) throws IOException {
T rst = jestClient.execute(clientRequest);
if (!rst.isSucceeded()) {
LOGGER.warn(rst.getJsonString());
}
return rst;
}
public Integer getStatus(JestResult rst) {
JsonObject jsonObject = rst.getJsonObject();
if (jsonObject.has("status")) {
return jsonObject.get("status").getAsInt();
}
return 600;
}
public boolean isBulkResult(JestResult rst) {
JsonObject jsonObject = rst.getJsonObject();
return jsonObject.has("items");
}
public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException {
GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build();
AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build();
JestResult rst = null;
List<AliasMapping> list = new ArrayList<AliasMapping>();
if (needClean) {
rst = execute(getAliases);
if (rst.isSucceeded()) {
JsonParser jp = new JsonParser();
JsonObject jo = (JsonObject) jp.parse(rst.getJsonString());
for (Map.Entry<String, JsonElement> entry : jo.entrySet()) {
String tindex = entry.getKey();
if (indexname.equals(tindex)) {
continue;
}
AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build();
String s = new Gson().toJson(m.getData());
LOGGER.info(s);
list.add(m);
}
}
}
ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build();
rst = execute(modifyAliases);
if (!rst.isSucceeded()) {
LOGGER.error(rst.getErrorMessage());
throw new IOException(rst.getErrorMessage());
}
return true;
}
/**
* 获取index的mapping
*/
public String getIndexMapping(String indexName) {
GetMapping.Builder builder = new GetMapping.Builder();
builder.addIndex(indexName);
GetMapping getMapping = builder.build();
try {
LOGGER.info("begin GetMapping for index: {}", indexName);
JestResult result = this.execute(getMapping);
return result.getJsonString();
} catch (Exception e) {
String message = "GetMapping for index error: " + e.getMessage();
LOGGER.warn(message, e);
throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_MAPPINGS, e.getMessage(), e);
}
}
public String getMappingForIndexType(String indexName, String typeName) {
String indexMapping = this.getIndexMapping(indexName);
JSONObject indexMappingInJson = JSON.parseObject(indexMapping);
List<String> paths = Arrays.asList(indexName, "mappings");
JSONObject properties = JsonPathUtil.getJsonObject(paths, indexMappingInJson);
JSONObject propertiesParent = properties;
if (StringUtils.isNotBlank(typeName) && properties.containsKey(typeName)) {
propertiesParent = (JSONObject) properties.get(typeName);
}
JSONObject mapping = (JSONObject) propertiesParent.get("properties");
return JSON.toJSONString(mapping);
}
public JestResult bulkInsert(Bulk.Builder bulk) throws Exception {
// es_rejected_execution_exception
// illegal_argument_exception
// cluster_block_exception
JestResult rst = null;
rst = execute(bulk.build());
if (!rst.isSucceeded()) {
LOGGER.warn(rst.getErrorMessage());
}
return rst;
}
/**
* 关闭JestClient客户端
*
*/
public void closeJestClient() {
if (jestClient != null) {
try {
// jestClient.shutdownClient();
jestClient.close();
} catch (IOException e) {
LOGGER.warn("ignore error: ", e.getMessage());
}
}
}
}

View File

@ -0,0 +1,126 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import java.util.List;
/**
* Created by xiongfeng.bxf on 17/3/2.
*/
public class ElasticSearchColumn {
private String name;//: "appkey",
private String type;//": "TEXT",
private String timezone;
/**
* 源头数据格式化处理datax做的事情
*/
private String format;
/**
* 目标端格式化es原生支持的格式
*/
private String dstFormat;
private boolean array;
/**
* 是否使用目标端(ES原生)数组类型
*
* 默认是false
*/
private boolean dstArray = false;
private boolean jsonArray;
private boolean origin;
private List<String> combineFields;
private String combineFieldsValueSeparator = "-";
public String getCombineFieldsValueSeparator() {
return combineFieldsValueSeparator;
}
public void setCombineFieldsValueSeparator(String combineFieldsValueSeparator) {
this.combineFieldsValueSeparator = combineFieldsValueSeparator;
}
public List<String> getCombineFields() {
return combineFields;
}
public void setCombineFields(List<String> combineFields) {
this.combineFields = combineFields;
}
public void setName(String name) {
this.name = name;
}
public void setType(String type) {
this.type = type;
}
public void setTimeZone(String timezone) {
this.timezone = timezone;
}
public void setFormat(String format) {
this.format = format;
}
public String getName() {
return name;
}
public String getType() {
return type;
}
public boolean isOrigin() { return origin; }
public void setOrigin(boolean origin) { this.origin = origin; }
public String getTimezone() {
return timezone;
}
public String getFormat() {
return format;
}
public void setTimezone(String timezone) {
this.timezone = timezone;
}
public boolean isArray() {
return array;
}
public void setArray(boolean array) {
this.array = array;
}
public boolean isJsonArray() {return jsonArray;}
public void setJsonArray(boolean jsonArray) {this.jsonArray = jsonArray;}
public String getDstFormat() {
return dstFormat;
}
public void setDstFormat(String dstFormat) {
this.dstFormat = dstFormat;
}
public boolean isDstArray() {
return dstArray;
}
public void setDstArray(boolean dstArray) {
this.dstArray = dstArray;
}
}

View File

@ -3,8 +3,11 @@ package com.alibaba.datax.plugin.writer.elasticsearchwriter;
/**
* Created by xiongfeng.bxf on 17/3/1.
*/
public enum ESFieldType {
public enum ElasticSearchFieldType {
ID,
PARENT,
ROUTING,
VERSION,
STRING,
TEXT,
KEYWORD,
@ -24,20 +27,18 @@ public enum ESFieldType {
DATE_RANGE,
GEO_POINT,
GEO_SHAPE,
IP,
IP_RANGE,
COMPLETION,
TOKEN_COUNT,
ARRAY,
OBJECT,
NESTED;
public static ESFieldType getESFieldType(String type) {
public static ElasticSearchFieldType getESFieldType(String type) {
if (type == null) {
return null;
}
for (ESFieldType f : ESFieldType.values()) {
for (ElasticSearchFieldType f : ElasticSearchFieldType.values()) {
if (f.name().compareTo(type.toUpperCase()) == 0) {
return f;
}

View File

@ -0,0 +1,41 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import com.alibaba.datax.common.spi.ErrorCode;
public enum ElasticSearchWriterErrorCode implements ErrorCode {
BAD_CONFIG_VALUE("ESWriter-00", "The value you configured is not valid."),
ES_INDEX_DELETE("ESWriter-01", "Delete index error."),
ES_INDEX_CREATE("ESWriter-02", "Index creation error."),
ES_MAPPINGS("ESWriter-03", "The mappings error."),
ES_INDEX_INSERT("ESWriter-04", "Insert data error."),
ES_ALIAS_MODIFY("ESWriter-05", "Alias modification error."),
JSON_PARSE("ESWrite-06", "Json format parsing error"),
UPDATE_WITH_ID("ESWrite-07", "Update mode must specify column type with id"),
RECORD_FIELD_NOT_FOUND("ESWrite-08", "Field does not exist in the original table"),
ES_GET_SETTINGS("ESWriter-09", "get settings failed");
;
private final String code;
private final String description;
ElasticSearchWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code,
this.description);
}
}

View File

@ -0,0 +1,28 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import java.util.List;
import com.alibaba.fastjson.JSONObject;
public class JsonPathUtil {
public static JSONObject getJsonObject(List<String> paths, JSONObject data) {
if (null == paths || paths.isEmpty()) {
return data;
}
if (null == data) {
return null;
}
JSONObject dataTmp = data;
for (String each : paths) {
if (null != dataTmp) {
dataTmp = dataTmp.getJSONObject(each);
} else {
return null;
}
}
return dataTmp;
}
}

View File

@ -0,0 +1,54 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONException;
import com.alibaba.fastjson.JSONObject;
/**
* @author bozu
* @date 2021/01/06
*/
public class JsonUtil {
/**
* 合并两个json
* @param source 源json
* @param target 目标json
* @return 合并后的json
* @throws JSONException
*/
public static String mergeJsonStr(String source, String target) throws JSONException {
if(source == null) {
return target;
}
if(target == null) {
return source;
}
return JSON.toJSONString(deepMerge(JSON.parseObject(source), JSON.parseObject(target)));
}
/**
* 深度合并两个json对象将source的值merge到target中
* @param source 源json
* @param target 目标json
* @return 合并后的json
* @throws JSONException
*/
private static JSONObject deepMerge(JSONObject source, JSONObject target) throws JSONException {
for (String key: source.keySet()) {
Object value = source.get(key);
if (target.containsKey(key)) {
// existing value for "key" - recursively deep merge:
if (value instanceof JSONObject) {
JSONObject valueJson = (JSONObject)value;
deepMerge(valueJson, target.getJSONObject(key));
} else {
target.put(key, value);
}
} else {
target.put(key, value);
}
}
return target;
}
}

View File

@ -1,9 +1,13 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.TypeReference;
import org.apache.commons.lang3.StringUtils;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public final class Key {
@ -37,31 +41,35 @@ public final class Key {
public static String getEndpoint(Configuration conf) {
return conf.getNecessaryValue("endpoint", ESWriterErrorCode.BAD_CONFIG_VALUE);
return conf.getNecessaryValue("endpoint", ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE);
}
public static String getAccessID(Configuration conf) {
return conf.getString("accessId", "");
public static String getUsername(Configuration conf) {
return conf.getString("username", conf.getString("accessId"));
}
public static String getAccessKey(Configuration conf) {
return conf.getString("accessKey", "");
public static String getPassword(Configuration conf) {
return conf.getString("password", conf.getString("accessKey"));
}
public static int getBatchSize(Configuration conf) {
return conf.getInt("batchSize", 1000);
return conf.getInt("batchSize", 1024);
}
public static int getTrySize(Configuration conf) {
return conf.getInt("trySize", 30);
}
public static long getTryInterval(Configuration conf) {
return conf.getLong("tryInterval", 60000L);
}
public static int getTimeout(Configuration conf) {
return conf.getInt("timeout", 600000);
}
public static boolean isCleanup(Configuration conf) {
return conf.getBool("cleanup", false);
public static boolean isTruncate(Configuration conf) {
return conf.getBool("truncate", conf.getBool("cleanup", false));
}
public static boolean isDiscovery(Configuration conf) {
@ -69,7 +77,7 @@ public final class Key {
}
public static boolean isCompression(Configuration conf) {
return conf.getBool("compression", true);
return conf.getBool("compress", conf.getBool("compression", true));
}
public static boolean isMultiThread(Configuration conf) {
@ -77,9 +85,17 @@ public final class Key {
}
public static String getIndexName(Configuration conf) {
return conf.getNecessaryValue("index", ESWriterErrorCode.BAD_CONFIG_VALUE);
return conf.getNecessaryValue("index", ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE);
}
public static String getDeleteBy(Configuration conf) {
return conf.getString("deleteBy");
}
/**
* TODO: 在7.0开始一个索引只能建一个Type为_doc
* */
public static String getTypeName(Configuration conf) {
String indexType = conf.getString("indexType");
if(StringUtils.isBlank(indexType)){
@ -128,4 +144,58 @@ public final class Key {
public static boolean getDynamic(Configuration conf) {
return conf.getBool("dynamic", false);
}
public static String getDstDynamic(Configuration conf) {
return conf.getString("dstDynamic");
}
public static String getDiscoveryFilter(Configuration conf){
return conf.getString("discoveryFilter","_all");
}
public static Boolean getVersioning(Configuration conf) {
return conf.getBool("versioning", false);
}
public static Long getUnifiedVersion(Configuration conf) {
return conf.getLong("version", System.currentTimeMillis());
}
public static Map<String, Object> getUrlParams(Configuration conf) {
return conf.getMap("urlParams", new HashMap<String, Object>());
}
public static Integer getESVersion(Configuration conf) {
return conf.getInt("esVersion");
}
public static String getMasterTimeout(Configuration conf) {
return conf.getString("masterTimeout", "5m");
}
public static boolean isEnableNullUpdate(Configuration conf) {
return conf.getBool("enableWriteNull", true);
}
public static String getFieldDelimiter(Configuration conf) {
return conf.getString("fieldDelimiter", "");
}
public static PrimaryKeyInfo getPrimaryKeyInfo(Configuration conf) {
String primaryKeyInfoString = conf.getString("primaryKeyInfo");
if (StringUtils.isNotBlank(primaryKeyInfoString)) {
return JSON.parseObject(primaryKeyInfoString, new TypeReference<PrimaryKeyInfo>() {});
} else {
return null;
}
}
public static List<PartitionColumn> getEsPartitionColumn(Configuration conf) {
String esPartitionColumnString = conf.getString("esPartitionColumn");
if (StringUtils.isNotBlank(esPartitionColumnString)) {
return JSON.parseObject(esPartitionColumnString, new TypeReference<List<PartitionColumn>>() {});
} else {
return null;
}
}
}

View File

@ -0,0 +1,16 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.spi.ErrorCode;
public class NoReRunException extends DataXException {
public NoReRunException(String errorMessage) {
super(errorMessage);
}
public NoReRunException(ErrorCode errorCode, String errorMessage) {
super(errorCode, errorMessage);
}
private static final long serialVersionUID = 1L;
}

View File

@ -0,0 +1,42 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
public class PartitionColumn {
private String name;
// like: DATA
private String metaType;
private String comment;
// like: VARCHAR
private String type;
public String getName() {
return name;
}
public String getMetaType() {
return metaType;
}
public String getComment() {
return comment;
}
public String getType() {
return type;
}
public void setName(String name) {
this.name = name;
}
public void setMetaType(String metaType) {
this.metaType = metaType;
}
public void setComment(String comment) {
this.comment = comment;
}
public void setType(String type) {
this.type = type;
}
}

View File

@ -0,0 +1,47 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
import java.util.List;
public class PrimaryKeyInfo {
/**
* 主键类型:PrimaryKeyTypeEnum
*
* pk: 单个(业务)主键 specific: 联合主键
*/
private String type;
/**
* 用户定义的联合主键的连接符号
*/
private String fieldDelimiter;
/**
* 主键的列的名称
*/
private List<String> column;
public String getType() {
return type;
}
public String getFieldDelimiter() {
return fieldDelimiter;
}
public List<String> getColumn() {
return column;
}
public void setType(String type) {
this.type = type;
}
public void setFieldDelimiter(String fieldDelimiter) {
this.fieldDelimiter = fieldDelimiter;
}
public void setColumn(List<String> column) {
this.column = column;
}
}

View File

@ -0,0 +1,35 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest;
import com.google.gson.Gson;
import io.searchbox.action.AbstractAction;
import io.searchbox.client.config.ElasticsearchVersion;
public class ClusterInfo extends AbstractAction<ClusterInfoResult> {
@Override
protected String buildURI(ElasticsearchVersion elasticsearchVersion) {
return "";
}
@Override
public String getRestMethodName() {
return "GET";
}
@Override
public ClusterInfoResult createNewElasticSearchResult(String responseBody, int statusCode, String reasonPhrase, Gson gson) {
return createNewElasticSearchResult(new ClusterInfoResult(gson), responseBody, statusCode, reasonPhrase, gson);
}
public static class Builder extends AbstractAction.Builder<ClusterInfo, ClusterInfo.Builder> {
public Builder() {
setHeader("accept", "application/json");
setHeader("content-type", "application/json");
}
@Override
public ClusterInfo build() {
return new ClusterInfo();
}
}
}

View File

@ -0,0 +1,49 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest;
import com.google.gson.Gson;
import io.searchbox.client.JestResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ClusterInfoResult extends JestResult {
private static final Pattern FIRST_NUMBER = Pattern.compile("\\d");
private static final int SEVEN = 7;
public ClusterInfoResult(Gson gson) {
super(gson);
}
public ClusterInfoResult(JestResult source) {
super(source);
}
/**
* 判断es集群的部署版本是否大于7.x
* 大于7.x的es对于Index的type有较大改动需要做额外判定
* 对于7.x与6.x版本的es都做过测试返回符合预期;5.x以下版本直接try-catch后返回false向下兼容
* @return
*/
public Boolean isGreaterOrEqualThan7() throws Exception {
// 如果是没有权限直接返回false兼容老版本
if (responseCode == 403) {
return false;
}
if (!isSucceeded) {
throw new Exception(getJsonString());
}
try {
String version = jsonObject.getAsJsonObject("version").get("number").toString();
Matcher matcher = FIRST_NUMBER.matcher(version);
matcher.find();
String number = matcher.group();
Integer versionNum = Integer.valueOf(number);
return versionNum >= SEVEN;
} catch (Exception e) {
//5.x 以下版本不做兼容测试如果返回json格式解析失败有可能是以下版本所以认为不大于7.x
return false;
}
}
}

View File

@ -0,0 +1,39 @@
package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest;
import io.searchbox.action.GenericResultAbstractAction;
import io.searchbox.client.config.ElasticsearchVersion;
public class PutMapping7 extends GenericResultAbstractAction {
protected PutMapping7(PutMapping7.Builder builder) {
super(builder);
this.indexName = builder.index;
this.payload = builder.source;
}
@Override
protected String buildURI(ElasticsearchVersion elasticsearchVersion) {
return super.buildURI(elasticsearchVersion) + "/_mapping";
}
@Override
public String getRestMethodName() {
return "PUT";
}
public static class Builder extends GenericResultAbstractAction.Builder<PutMapping7, PutMapping7.Builder> {
private String index;
private Object source;
public Builder(String index, Object source) {
this.index = index;
this.source = source;
}
@Override
public PutMapping7 build() {
return new PutMapping7(this);
}
}
}

View File

@ -1,6 +1,6 @@
{
"name": "elasticsearchwriter",
"class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ESWriter",
"class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ElasticSearchWriter",
"description": "适用于: 生产环境. 原理: TODO",
"developer": "alibaba"
}

View File

@ -2,6 +2,6 @@
"name": "hbase20xsqlreader",
"class": "com.alibaba.datax.plugin.reader.hbase20xsqlreader.HBase20xSQLReader",
"description": "useScene: prod. mechanism: read data from phoenix through queryserver.",
"developer": "bake"
"developer": "alibaba"
}

View File

@ -2,6 +2,6 @@
"name": "hbase20xsqlwriter",
"class": "com.alibaba.datax.plugin.writer.hbase20xsqlwriter.HBase20xSQLWriter",
"description": "useScene: prod. mechanism: use hbase sql UPSERT to put data, index tables will be updated too.",
"developer": "bake"
"developer": "alibaba"
}

73
loghubreader/pom.xml Normal file
View File

@ -0,0 +1,73 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>datax-all</artifactId>
<groupId>com.alibaba.datax</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>loghubreader</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<dependency>
<groupId>com.aliyun.openservices</groupId>
<artifactId>aliyun-log</artifactId>
<version>0.6.22</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${jdk-version}</source>
<target>${jdk-version}</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<!-- assembly plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,34 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
</includes>
<outputDirectory>plugin/reader/loghubreader</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>loghubreader-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/reader/loghubreader</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/reader/loghubreader/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,26 @@
package com.alibaba.datax.plugin.reader.loghubreader;
public class Constant {
public static String DATETIME_FORMAT = "yyyyMMddHHmmss";
public static String DATE_FORMAT = "yyyyMMdd";
static String META_COL_SOURCE = "__source__";
static String META_COL_TOPIC = "__topic__";
static String META_COL_CATEGORY = "__category__";
static String META_COL_MACHINEUUID = "__machineUUID__";
static String META_COL_HOSTNAME = "__hostname__";
static String META_COL_PATH = "__path__";
static String META_COL_LOGTIME = "__logtime__";
public static String META_COL_RECEIVE_TIME = "__receive_time__";
/**
* 除用户手动配置的列之外其余数据列作为一个 json 读取到一列
*/
static String COL_EXTRACT_OTHERS = "C__extract_others__";
/**
* 将所有元数据列作为一个 json 读取到一列
*/
static String COL_EXTRACT_ALL_META = "C__extract_all_meta__";
}

View File

@ -0,0 +1,38 @@
package com.alibaba.datax.plugin.reader.loghubreader;
public final class Key {
/**
* 此处声明插件用到的需要插件使用者提供的配置项
*/
public static final String ENDPOINT = "endpoint";
public static final String ACCESSKEYID = "accessId";
public static final String ACCESSKEYSECRET = "accessKey";
public static final String PROJECT = "project";
public static final String LOGSTORE = "logstore";
public static final String TOPIC = "topic";
public static final String COLUMN = "column";
public static final String BATCHSIZE = "batchSize";
public static final String BEGINTIMESTAMPMILLIS = "beginTimestampMillis";
public static final String ENDTIMESTAMPMILLIS = "endTimestampMillis";
public static final String BEGINDATETIME = "beginDateTime";
public static final String ENDDATETIME = "endDateTime";
public static final String TIMEFORMAT = "timeformat";
public static final String SOURCE = "source";
public static final String SHARD = "shard";
}

View File

@ -0,0 +1,482 @@
package com.alibaba.datax.plugin.reader.loghubreader;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.common.util.DataXCaseEnvUtil;
import com.alibaba.datax.common.util.RetryUtil;
import com.alibaba.fastjson.JSONObject;
import com.aliyun.openservices.log.Client;
import com.aliyun.openservices.log.common.Consts.CursorMode;
import com.aliyun.openservices.log.common.*;
import com.aliyun.openservices.log.exception.LogException;
import com.aliyun.openservices.log.response.BatchGetLogResponse;
import com.aliyun.openservices.log.response.GetCursorResponse;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.Callable;
public class LogHubReader extends Reader {
public static class Job extends Reader.Job {
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
private Client client;
private Configuration originalConfig;
private Long beginTimestampMillis;
private Long endTimestampMillis;
@Override
public void init() {
LOG.info("loghub reader job init begin ...");
this.originalConfig = super.getPluginJobConf();
validateParameter(originalConfig);
String endPoint = this.originalConfig.getString(Key.ENDPOINT);
String accessKeyId = this.originalConfig.getString(Key.ACCESSKEYID);
String accessKeySecret = this.originalConfig.getString(Key.ACCESSKEYSECRET);
client = new Client(endPoint, accessKeyId, accessKeySecret);
LOG.info("loghub reader job init end.");
}
private void validateParameter(Configuration conf){
conf.getNecessaryValue(Key.ENDPOINT,LogHubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.ACCESSKEYID,LogHubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.ACCESSKEYSECRET,LogHubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.PROJECT,LogHubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.LOGSTORE,LogHubReaderErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.COLUMN,LogHubReaderErrorCode.REQUIRE_VALUE);
int batchSize = this.originalConfig.getInt(Key.BATCHSIZE);
if (batchSize > 1000) {
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid batchSize[" + batchSize + "] value (0,1000]!");
}
beginTimestampMillis = this.originalConfig.getLong(Key.BEGINTIMESTAMPMILLIS);
String beginDateTime = this.originalConfig.getString(Key.BEGINDATETIME);
if (beginDateTime != null) {
try {
beginTimestampMillis = getUnixTimeFromDateTime(beginDateTime);
} catch (ParseException e) {
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid beginDateTime[" + beginDateTime + "], format [yyyyMMddHHmmss or yyyyMMdd]!");
}
}
if (beginTimestampMillis != null && beginTimestampMillis <= 0) {
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid beginTimestampMillis[" + beginTimestampMillis + "]!");
}
endTimestampMillis = this.originalConfig.getLong(Key.ENDTIMESTAMPMILLIS);
String endDateTime = this.originalConfig.getString(Key.ENDDATETIME);
if (endDateTime != null) {
try {
endTimestampMillis = getUnixTimeFromDateTime(endDateTime);
} catch (ParseException e) {
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid beginDateTime[" + endDateTime + "], format [yyyyMMddHHmmss or yyyyMMdd]!");
}
}
if (endTimestampMillis != null && endTimestampMillis <= 0) {
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid endTimestampMillis[" + endTimestampMillis + "]!");
}
if (beginTimestampMillis != null && endTimestampMillis != null
&& endTimestampMillis <= beginTimestampMillis) {
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
"endTimestampMillis[" + endTimestampMillis + "] must bigger than beginTimestampMillis[" + beginTimestampMillis + "]!");
}
}
private long getUnixTimeFromDateTime(String dateTime) throws ParseException {
try {
String format = Constant.DATETIME_FORMAT;
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
return simpleDateFormat.parse(dateTime).getTime() / 1000;
} catch (ParseException ignored) {
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
"Invalid DateTime[" + dateTime + "]!");
}
}
@Override
public void prepare() {
}
@Override
public List<Configuration> split(int adviceNumber) {
LOG.info("split() begin...");
List<Configuration> readerSplitConfigs = new ArrayList<Configuration>();
final String project = this.originalConfig.getString(Key.PROJECT);
final String logstore = this.originalConfig.getString(Key.LOGSTORE);
List<Shard> logStore = null;
try {
logStore = RetryUtil.executeWithRetry(new Callable<List<Shard>>() {
@Override
public List<Shard> call() throws Exception {
return client.ListShard(project, logstore).GetShards();
}
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
} catch (Exception e) {
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
"get LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString());
}
if (logStore == null) {
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
"LogStore[" + logstore + "] isn't exists, please check !");
}
int splitNumber = logStore.size();
if (0 == splitNumber) {
throw DataXException.asDataXException(LogHubReaderErrorCode.EMPTY_LOGSTORE_VALUE,
"LogStore[" + logstore + "] has 0 shard, please check !");
}
Collections.shuffle(logStore);
for (int i = 0; i < logStore.size(); i++) {
if (beginTimestampMillis != null && endTimestampMillis != null) {
try {
String beginCursor = getCursorWithRetry(client, project, logstore, logStore.get(i).GetShardId(), beginTimestampMillis).GetCursor();
String endCursor = getCursorWithRetry(client, project, logstore, logStore.get(i).GetShardId(), endTimestampMillis).GetCursor();
if (beginCursor.equals(endCursor)) {
if ((i == logStore.size() - 1) && (readerSplitConfigs.size() == 0)) {
} else {
LOG.info("skip empty shard[" + logStore.get(i) + "]!");
continue;
}
}
} catch (Exception e) {
LOG.error("Check Shard[" + logStore.get(i) + "] Error, please check !" + e.toString());
throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e);
}
}
Configuration splitedConfig = this.originalConfig.clone();
splitedConfig.set(Key.SHARD, logStore.get(i).GetShardId());
readerSplitConfigs.add(splitedConfig);
}
if (splitNumber < adviceNumber) {
// LOG.info(MESSAGE_SOURCE.message("hdfsreader.12",
// splitNumber, adviceNumber, splitNumber, splitNumber));
}
LOG.info("split() ok and end...");
return readerSplitConfigs;
}
@Override
public void post() {
}
@Override
public void destroy() {
}
private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final long fromTime) throws Exception {
return
RetryUtil.executeWithRetry(new Callable<GetCursorResponse>() {
@Override
public GetCursorResponse call() throws Exception {
LOG.info("loghug get cursor with project: {} logstore: {} shard: {} time: {}", project, logstore, shard, fromTime);
return client.GetCursor(project, logstore, shard, fromTime);
}
}, 7, 1000L, true);
}
}
public static class Task extends Reader.Task {
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
private Configuration taskConfig;
private Client client;
private String endPoint;
private String accessKeyId;
private String accessKeySecret;
private String project;
private String logstore;
private long beginTimestampMillis;
private long endTimestampMillis;
private int batchSize;
private int shard;
private List<String> columns;
@Override
public void init() {
this.taskConfig = super.getPluginJobConf();
endPoint = this.taskConfig.getString(Key.ENDPOINT);
accessKeyId = this.taskConfig.getString(Key.ACCESSKEYID);
accessKeySecret = this.taskConfig.getString(Key.ACCESSKEYSECRET);
project = this.taskConfig.getString(Key.PROJECT);
logstore = this.taskConfig.getString(Key.LOGSTORE);
batchSize = this.taskConfig.getInt(Key.BATCHSIZE, 128);
this.beginTimestampMillis = this.taskConfig.getLong(Key.BEGINTIMESTAMPMILLIS, -1);
String beginDateTime = this.taskConfig.getString(Key.BEGINDATETIME);
if (beginDateTime != null) {
try {
beginTimestampMillis = getUnixTimeFromDateTime(beginDateTime);
} catch (ParseException e) {
}
}
this.endTimestampMillis = this.taskConfig.getLong(Key.ENDTIMESTAMPMILLIS, -1);
String endDateTime = this.taskConfig.getString(Key.ENDDATETIME);
if (endDateTime != null) {
try {
endTimestampMillis = getUnixTimeFromDateTime(endDateTime);
} catch (ParseException e) {
}
}
columns = this.taskConfig.getList(Key.COLUMN, String.class);
shard = this.taskConfig.getInt(Key.SHARD);
client = new Client(endPoint, accessKeyId, accessKeySecret);
LOG.info("init loghub reader task finished.project:{} logstore:{} batchSize:{}", project, logstore, batchSize);
}
@Override
public void prepare() {
}
private long getUnixTimeFromDateTime(String dateTime) throws ParseException {
try {
String format = Constant.DATETIME_FORMAT;
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
return simpleDateFormat.parse(dateTime).getTime() / 1000;
} catch (ParseException ignored) {
}
String format = Constant.DATE_FORMAT;
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
return simpleDateFormat.parse(dateTime).getTime() / 1000;
}
private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final long fromTime) throws Exception {
return
RetryUtil.executeWithRetry(new Callable<GetCursorResponse>() {
@Override
public GetCursorResponse call() throws Exception {
LOG.info("loghug get cursor with project: {} logstore: {} shard: {} time: {}", project, logstore, shard, fromTime);
return client.GetCursor(project, logstore, shard, fromTime);
}
}, 7, 1000L, true);
}
private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final CursorMode mode) throws Exception {
return
RetryUtil.executeWithRetry(new Callable<GetCursorResponse>() {
@Override
public GetCursorResponse call() throws Exception {
LOG.info("loghug get cursor with project: {} logstore: {} shard: {} mode: {}", project, logstore, shard, mode);
return client.GetCursor(project, logstore, shard, mode);
}
}, 7, 1000L, true);
}
private BatchGetLogResponse batchGetLogWithRetry(final Client client, final String project, final String logstore, final int shard, final int batchSize,
final String curCursor, final String endCursor) throws Exception {
return
RetryUtil.executeWithRetry(new Callable<BatchGetLogResponse>() {
@Override
public BatchGetLogResponse call() throws Exception {
return client.BatchGetLog(project, logstore, shard, batchSize, curCursor, endCursor);
}
}, 7, 1000L, true);
}
@Override
public void startRead(RecordSender recordSender) {
LOG.info("read start");
try {
GetCursorResponse cursorRes;
if (this.beginTimestampMillis != -1) {
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, beginTimestampMillis);
} else {
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, CursorMode.BEGIN);
}
String beginCursor = cursorRes.GetCursor();
LOG.info("the begin cursor, loghub requestId: {} cursor: {}", cursorRes.GetRequestId(), cursorRes.GetCursor());
if (this.endTimestampMillis != -1) {
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, endTimestampMillis);
} else {
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, CursorMode.END);
}
String endCursor = cursorRes.GetCursor();
LOG.info("the end cursor, loghub requestId: {} cursor: {}", cursorRes.GetRequestId(), cursorRes.GetCursor());
if (StringUtils.equals(beginCursor, endCursor)) {
LOG.info("beginCursor:{} equals endCursor:{}, end directly!", beginCursor, endCursor);
return;
}
String currentCursor = null;
String nextCursor = beginCursor;
HashMap<String, String> metaMap = new HashMap<String, String>();
HashMap<String, String> dataMap = new HashMap<String, String>();
JSONObject allMetaJson = new JSONObject();
while (!StringUtils.equals(currentCursor, nextCursor)) {
currentCursor = nextCursor;
BatchGetLogResponse logDataRes = batchGetLogWithRetry(client, project, logstore, this.shard, this.batchSize, currentCursor, endCursor);
List<LogGroupData> logGroups = logDataRes.GetLogGroups();
for(LogGroupData logGroup: logGroups) {
metaMap.clear();
allMetaJson.clear();
FastLogGroup flg = logGroup.GetFastLogGroup();
metaMap.put("C_Category", flg.getCategory());
metaMap.put(Constant.META_COL_CATEGORY, flg.getCategory());
allMetaJson.put(Constant.META_COL_CATEGORY, flg.getCategory());
metaMap.put("C_Source", flg.getSource());
metaMap.put(Constant.META_COL_SOURCE, flg.getSource());
allMetaJson.put(Constant.META_COL_SOURCE, flg.getSource());
metaMap.put("C_Topic", flg.getTopic());
metaMap.put(Constant.META_COL_TOPIC, flg.getTopic());
allMetaJson.put(Constant.META_COL_TOPIC, flg.getTopic());
metaMap.put("C_MachineUUID", flg.getMachineUUID());
metaMap.put(Constant.META_COL_MACHINEUUID, flg.getMachineUUID());
allMetaJson.put(Constant.META_COL_MACHINEUUID, flg.getMachineUUID());
for (int tagIdx = 0; tagIdx < flg.getLogTagsCount(); ++tagIdx) {
FastLogTag logtag = flg.getLogTags(tagIdx);
String tagKey = logtag.getKey();
String tagValue = logtag.getValue();
if (tagKey.equals(Constant.META_COL_HOSTNAME)) {
metaMap.put("C_HostName", logtag.getValue());
} else if (tagKey.equals(Constant.META_COL_PATH)) {
metaMap.put("C_Path", logtag.getValue());
}
metaMap.put(tagKey, tagValue);
allMetaJson.put(tagKey, tagValue);
}
for (int lIdx = 0; lIdx < flg.getLogsCount(); ++lIdx) {
dataMap.clear();
FastLog log = flg.getLogs(lIdx);
String logTime = String.valueOf(log.getTime());
metaMap.put("C_LogTime", logTime);
metaMap.put(Constant.META_COL_LOGTIME, logTime);
allMetaJson.put(Constant.META_COL_LOGTIME, logTime);
for (int cIdx = 0; cIdx < log.getContentsCount(); ++cIdx) {
FastLogContent content = log.getContents(cIdx);
dataMap.put(content.getKey(), content.getValue());
}
Record record = recordSender.createRecord();
JSONObject extractOthers = new JSONObject();
if(columns.contains(Constant.COL_EXTRACT_OTHERS)){
List<String> keyList = Arrays.asList(dataMap.keySet().toArray(new String[dataMap.keySet().size()]));
for (String otherKey:keyList) {
if (!columns.contains(otherKey)){
extractOthers.put(otherKey,dataMap.get(otherKey));
}
}
}
if (null != this.columns && 1 == this.columns.size()) {
String columnsInStr = columns.get(0).toString();
if ("\"*\"".equals(columnsInStr) || "*".equals(columnsInStr)) {
List<String> keyList = Arrays.asList(dataMap.keySet().toArray(new String[dataMap.keySet().size()]));
Collections.sort(keyList);
for (String key : keyList) {
record.addColumn(new StringColumn(key + ":" + dataMap.get(key)));
}
} else {
if (dataMap.containsKey(columnsInStr)) {
record.addColumn(new StringColumn(dataMap.get(columnsInStr)));
} else if (metaMap.containsKey(columnsInStr)) {
record.addColumn(new StringColumn(metaMap.get(columnsInStr)));
} else if (Constant.COL_EXTRACT_OTHERS.equals(columnsInStr)){
record.addColumn(new StringColumn(extractOthers.toJSONString()));
} else if (Constant.COL_EXTRACT_ALL_META.equals(columnsInStr)) {
record.addColumn(new StringColumn(allMetaJson.toJSONString()));
}
}
} else {
for (String col : this.columns) {
if (dataMap.containsKey(col)) {
record.addColumn(new StringColumn(dataMap.get(col)));
} else if (metaMap.containsKey(col)) {
record.addColumn(new StringColumn(metaMap.get(col)));
} else if (col != null && col.startsWith("'") && col.endsWith("'")){
String constant = col.substring(1, col.length()-1);
record.addColumn(new StringColumn(constant));
}else if (Constant.COL_EXTRACT_OTHERS.equals(col)){
record.addColumn(new StringColumn(extractOthers.toJSONString()));
} else if (Constant.COL_EXTRACT_ALL_META.equals(col)) {
record.addColumn(new StringColumn(allMetaJson.toJSONString()));
} else {
record.addColumn(new StringColumn(null));
}
}
}
recordSender.sendToWriter(record);
}
}
nextCursor = logDataRes.GetNextCursor();
}
} catch (LogException e) {
if (e.GetErrorCode().equals("LogStoreNotExist")) {
LOG.info("logStore[" + logstore +"] Not Exits! detail error messsage: " + e.toString());
} else {
LOG.error("read LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString());
throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e);
}
} catch (Exception e) {
LOG.error("read LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString());
throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e);
}
LOG.info("end read loghub shard...");
}
@Override
public void post() {
}
@Override
public void destroy() {
}
}
}

View File

@ -0,0 +1,34 @@
package com.alibaba.datax.plugin.reader.loghubreader;
import com.alibaba.datax.common.spi.ErrorCode;
public enum LogHubReaderErrorCode implements ErrorCode {
BAD_CONFIG_VALUE("LogHuReader-00", "The value you configured is invalid."),
LOG_HUB_ERROR("LogHubReader-01","LogHub access encounter exception"),
REQUIRE_VALUE("LogHubReader-02","Missing parameters"),
EMPTY_LOGSTORE_VALUE("LogHubReader-03","There is no shard in this LogStore");
private final String code;
private final String description;
private LogHubReaderErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code,
this.description);
}
}

View File

@ -0,0 +1,6 @@
{
"name": "loghubreader",
"class": "com.alibaba.datax.plugin.reader.loghubreader.LogHubReader",
"description": "适用于: 从SLS LogHub中读取数据",
"developer": "alibaba"
}

View File

@ -0,0 +1,12 @@
{
"name": "loghubreader",
"parameter": {
"endpoint": "",
"accessId": "",
"accessKey": "",
"project": "",
"logstore": "",
"batchSize":1024,
"column": []
}
}

73
loghubwriter/pom.xml Normal file
View File

@ -0,0 +1,73 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>datax-all</artifactId>
<groupId>com.alibaba.datax</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>loghubwriter</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<dependency>
<groupId>com.aliyun.openservices</groupId>
<artifactId>aliyun-log</artifactId>
<version>0.6.12</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${jdk-version}</source>
<target>${jdk-version}</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<!-- assembly plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,34 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
</includes>
<outputDirectory>plugin/writer/loghubwriter</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>loghubwriter-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/writer/loghubwriter</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/writer/loghubwriter/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,35 @@
package com.alibaba.datax.plugin.writer.loghubwriter;
/**
* 配置关键字
* @author
*/
public final class Key {
/**
* 此处声明插件用到的需要插件使用者提供的配置项
*/
public static final String ENDPOINT = "endpoint";
public static final String ACCESS_KEY_ID = "accessId";
public static final String ACCESS_KEY_SECRET = "accessKey";
public static final String PROJECT = "project";
public static final String LOG_STORE = "logstore";
public static final String TOPIC = "topic";
public static final String COLUMN = "column";
public static final String BATCH_SIZE = "batchSize";
public static final String TIME = "time";
public static final String TIME_FORMAT = "timeformat";
public static final String SOURCE = "source";
public static final String HASH_BY_KEY = "hashKey";
}

View File

@ -0,0 +1,315 @@
package com.alibaba.datax.plugin.writer.loghubwriter;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.spi.Writer;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.common.util.RetryUtil;
import com.alibaba.datax.common.util.StrUtil;
import com.aliyun.openservices.log.Client;
import com.aliyun.openservices.log.common.LogItem;
import com.aliyun.openservices.log.common.Shard;
import com.aliyun.openservices.log.exception.LogException;
import com.aliyun.openservices.log.request.ListShardRequest;
import com.aliyun.openservices.log.request.PutLogsRequest;
import com.aliyun.openservices.log.response.ListShardResponse;
import com.aliyun.openservices.log.response.PutLogsResponse;
import org.apache.commons.codec.digest.Md5Crypt;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.security.provider.MD5;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
/**
* SLS 写插件
* @author
*/
public class LogHubWriter extends Writer {
public static class Job extends Writer.Job {
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
private Configuration jobConfig = null;
@Override
public void init() {
info(LOG, "loghub writer job init begin ...");
this.jobConfig = super.getPluginJobConf();
validateParameter(jobConfig);
info(LOG, "loghub writer job init end.");
}
private void validateParameter(Configuration conf){
conf.getNecessaryValue(Key.ENDPOINT,LogHubWriterErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.ACCESS_KEY_ID,LogHubWriterErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.ACCESS_KEY_SECRET,LogHubWriterErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.PROJECT,LogHubWriterErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.LOG_STORE,LogHubWriterErrorCode.REQUIRE_VALUE);
conf.getNecessaryValue(Key.COLUMN,LogHubWriterErrorCode.REQUIRE_VALUE);
}
@Override
public List<Configuration> split(int mandatoryNumber) {
info(LOG, "split begin...");
List<Configuration> configurationList = new ArrayList<Configuration>();
for (int i = 0; i < mandatoryNumber; i++) {
configurationList.add(this.jobConfig.clone());
}
info(LOG, "split end...");
return configurationList;
}
@Override
public void post() {
}
@Override
public void destroy() {
}
}
public static class Task extends Writer.Task {
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
private Configuration taskConfig;
private com.aliyun.openservices.log.Client logHubClient;
private String logStore;
private String topic;
private String project;
private List<String> columnList;
private int batchSize;
private String timeCol;
private String timeFormat;
private String source;
private boolean isHashKey;
private List<Shard> shards;
public void init() {
this.taskConfig = super.getPluginJobConf();
String endpoint = taskConfig.getString(Key.ENDPOINT);
String accessKeyId = taskConfig.getString(Key.ACCESS_KEY_ID);
String accessKeySecret = taskConfig.getString(Key.ACCESS_KEY_SECRET);
project = taskConfig.getString(Key.PROJECT);
logStore = taskConfig.getString(Key.LOG_STORE);
topic = taskConfig.getString(Key.TOPIC,"");
columnList = taskConfig.getList(Key.COLUMN,String.class);
batchSize = taskConfig.getInt(Key.BATCH_SIZE,1024);
timeCol = taskConfig.getString(Key.TIME,"");
timeFormat = taskConfig.getString(Key.TIME_FORMAT,"");
source = taskConfig.getString(Key.SOURCE,"");
isHashKey = taskConfig.getBool(Key.HASH_BY_KEY,false);
logHubClient = new Client(endpoint, accessKeyId, accessKeySecret);
if (isHashKey) {
listShard();
info(LOG, "init loghub writer with hash key mode.");
}
if (LOG.isInfoEnabled()) {
LOG.info("init loghub writer task finished.project:{} logstore:{} topic:{} batchSize:{}",project,logStore,topic,batchSize);
}
}
/**
* 获取通道的分片信息
*/
private void listShard() {
try {
ListShardResponse response = logHubClient.ListShard(new ListShardRequest(project,logStore));
shards = response.GetShards();
if (LOG.isInfoEnabled()) {
LOG.info("Get shard count:{}", shards.size());
}
} catch (LogException e) {
info(LOG, "Get shard failed!");
throw new RuntimeException("Get shard failed!", e);
}
}
@Override
public void prepare() {
}
private int getTime(String v) {
try {
if ("bigint".equalsIgnoreCase(timeFormat)) {
return Integer.valueOf(v);
}
DateFormat sdf = new SimpleDateFormat(timeFormat);
Date date = sdf.parse(v);
return (int)(date.getTime()/1000);
} catch (Exception e) {
LOG.warn("Format time failed!", e);
}
return (int)(((new Date())).getTime()/1000);
}
@Override
public void startWrite(RecordReceiver recordReceiver) {
info(LOG, "start to write.....................");
// 按照shared做hash处理
if (isHashKey) {
processDataWithHashKey(recordReceiver);
} else {
processDataWithoutHashKey(recordReceiver);
}
info(LOG, "finish to write.........");
}
private void processDataWithHashKey(RecordReceiver receiver) {
Record record;
Map<String, List<LogItem>> logMap = new HashMap<String, List<LogItem>>(shards.size());
int count = 0;
try {
while ((record = receiver.getFromReader()) != null) {
LogItem logItem = new LogItem();
if (record.getColumnNumber() != columnList.size()) {
this.getTaskPluginCollector().collectDirtyRecord(record, "column not match");
}
String id = "";
for (int i = 0; i < record.getColumnNumber(); i++) {
String colName = columnList.get(i);
String colValue = record.getColumn(i).asString();
if (colName.endsWith("_id")) {
id = colValue;
}
logItem.PushBack(colName, colValue);
if (colName.equals(timeCol)) {
logItem.SetTime(getTime(colValue));
}
}
String hashKey = getShardHashKey(StrUtil.getMd5(id), shards);
if (!logMap.containsKey(hashKey)) {
info(LOG, "Hash key:" + hashKey);
logMap.put(hashKey, new ArrayList<LogItem>());
}
logMap.get(hashKey).add(logItem);
if (logMap.get(hashKey).size() % batchSize == 0) {
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logMap.get(hashKey), hashKey);
PutLogsResponse response = putLog(request);
count += logMap.get(hashKey).size();
if (LOG.isDebugEnabled()) {
LOG.debug("record count:{}, request id:{}", logMap.get(hashKey).size(), response.GetRequestId());
}
logMap.get(hashKey).clear();
}
}
for (Map.Entry<String, List<LogItem>> entry : logMap.entrySet()) {
if (!entry.getValue().isEmpty()) {
// 将剩余的数据发送
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, entry.getValue(), entry.getKey());
PutLogsResponse response = putLog(request);
count += entry.getValue().size();
if (LOG.isDebugEnabled()) {
LOG.debug("record count:{}, request id:{}", entry.getValue().size(), response.GetRequestId());
}
entry.getValue().clear();
}
}
LOG.info("{} records have been sent", count);
} catch (LogException ex) {
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, ex.getMessage(), ex);
} catch (Exception e) {
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, e.getMessage(), e);
}
}
private void processDataWithoutHashKey(RecordReceiver receiver) {
Record record;
ArrayList<LogItem> logGroup = new ArrayList<LogItem>();
int count = 0;
try {
while ((record = receiver.getFromReader()) != null) {
LogItem logItem = new LogItem();
if(record.getColumnNumber() != columnList.size()){
this.getTaskPluginCollector().collectDirtyRecord(record,"column not match");
}
for (int i = 0; i < record.getColumnNumber(); i++) {
String colName = columnList.get(i);
String colValue = record.getColumn(i).asString();
logItem.PushBack(colName, colValue);
if(colName.equals(timeCol)){
logItem.SetTime(getTime(colValue));
}
}
logGroup.add(logItem);
count++;
if (count % batchSize == 0) {
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logGroup);
PutLogsResponse response = putLog(request);
logGroup.clear();
if (LOG.isDebugEnabled()) {
LOG.debug("record count:{}, request id:{}", count, response.GetRequestId());
}
}
}
if (!logGroup.isEmpty()) {
//将剩余的数据发送
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logGroup);
PutLogsResponse response = putLog(request);
logGroup.clear();
if (LOG.isDebugEnabled()) {
LOG.debug("record count:{}, request id:{}", count, response.GetRequestId());
}
}
LOG.info("{} records have been sent", count);
} catch (LogException ex) {
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, ex.getMessage(), ex);
} catch (Exception e) {
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, e.getMessage(), e);
}
}
private PutLogsResponse putLog(final PutLogsRequest request) throws Exception{
final Client client = this.logHubClient;
return RetryUtil.executeWithRetry(new Callable<PutLogsResponse>() {
public PutLogsResponse call() throws LogException{
return client.PutLogs(request);
}
}, 3, 1000L, false);
}
private String getShardHashKey(String hashKey, List<Shard> shards) {
for (Shard shard : shards) {
if (hashKey.compareTo(shard.getExclusiveEndKey()) < 0 && hashKey.compareTo(shard.getInclusiveBeginKey()) >= 0) {
return shard.getInclusiveBeginKey();
}
}
return shards.get(0).getInclusiveBeginKey();
}
@Override
public void post() {
}
@Override
public void destroy() {
}
}
/**
* 日志打印控制
*
* @param logger
* @param message
*/
public static void info(Logger logger, String message) {
if (logger.isInfoEnabled()) {
logger.info(message);
}
}
}

View File

@ -0,0 +1,33 @@
package com.alibaba.datax.plugin.writer.loghubwriter;
import com.alibaba.datax.common.spi.ErrorCode;
public enum LogHubWriterErrorCode implements ErrorCode {
BAD_CONFIG_VALUE("LogHubWriter-00", "The value you configured is invalid."),
LOG_HUB_ERROR("LogHubWriter-01","LogHub access encounter exception"),
REQUIRE_VALUE("LogHubWriter-02","Missing parameters");
private final String code;
private final String description;
private LogHubWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code,
this.description);
}
}

View File

@ -0,0 +1,6 @@
{
"name": "loghubwriter",
"class": "com.alibaba.datax.plugin.writer.loghubwriter.LogHubWriter",
"description": "适用于: 将数据导入到SLS LogHub中",
"developer": "alibaba"
}

View File

@ -0,0 +1,13 @@
{
"name": "loghubwriter",
"parameter": {
"endpoint": "",
"accessId": "",
"accessKey": "",
"project": "",
"logstore": "",
"topic": "",
"batchSize":1024,
"column": []
}
}

View File

@ -197,9 +197,9 @@ MysqlReader插件实现了从Mysql读取数据。在底层实现上MysqlReade
* **querySql**
* 描述在有些业务场景下where这一配置项不足以描述所筛选的条件用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后DataX系统就会忽略column这些配置型直接使用这个配置项的内容对数据进行筛选例如需要进行多表join后同步数据使用select a,b from table_a join table_b on table_a.id = table_b.id <br />
* 描述在有些业务场景下where这一配置项不足以描述所筛选的条件用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后DataX系统就会忽略tablecolumn这些配置型直接使用这个配置项的内容对数据进行筛选例如需要进行多表join后同步数据使用select a,b from table_a join table_b on table_a.id = table_b.id <br />
`当用户配置querySql时MysqlReader直接忽略column、where条件的配置`querySql优先级大于column、where选项。querySql和table不能同时存在
`当用户配置querySql时MysqlReader直接忽略table、column、where条件的配置`querySql优先级大于table、column、where选项。
* 必选:否 <br />

View File

@ -1,6 +1,5 @@
package com.alibaba.datax.plugin.reader.oceanbasev10reader.ext;
import java.util.Arrays;
import java.util.List;
import com.alibaba.datax.common.constant.CommonConstant;
@ -27,7 +26,7 @@ public class ReaderJob extends CommonRdbmsReader.Job {
public void init(Configuration originalConfig) {
//将config中的column和table中的关键字进行转义
List<String> columns = originalConfig.getList(Key.COLUMN, String.class);
ObReaderUtils.escapeDatabaseKeywords(columns);
ObReaderUtils.escapeDatabaseKeyword(columns);
originalConfig.set(Key.COLUMN, columns);
List<JSONObject> conns = originalConfig.getList(Constant.CONN_MARK, JSONObject.class);
@ -38,7 +37,7 @@ public class ReaderJob extends CommonRdbmsReader.Job {
// tables will be null when querySql is configured
if (tables != null) {
ObReaderUtils.escapeDatabaseKeywords(tables);
ObReaderUtils.escapeDatabaseKeyword(tables);
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE),
tables);
}
@ -79,7 +78,8 @@ public class ReaderJob extends CommonRdbmsReader.Job {
final String obJdbcDelimiter = com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING;
if (jdbcUrl.startsWith(obJdbcDelimiter)) {
String[] ss = jdbcUrl.split(obJdbcDelimiter);
if (ss.length >= 2) {
int elementCount = 2;
if (ss.length >= elementCount) {
String tenant = ss[1].trim();
String[] sss = tenant.split(":");
return sss[0];

View File

@ -37,12 +37,15 @@ public class ObReaderUtils {
public static final DataBaseType databaseType = DataBaseType.OceanBase;
private static final String TABLE_SCHEMA_DELIMITER = ".";
private static final Pattern JDBC_PATTERN = Pattern.compile("jdbc:(oceanbase|mysql)://([\\w\\.-]+:\\d+)/([\\w\\.-]+)");
private static Set<String> keywordsFromString2HashSet(final String keywords) {
return new HashSet(Arrays.asList(keywords.split(",")));
}
public static String escapeDatabaseKeywords(String keyword) {
public static String escapeDatabaseKeyword(String keyword) {
if (databaseKeywords == null) {
if (isOracleMode(compatibleMode)) {
databaseKeywords = keywordsFromString2HashSet(ORACLE_KEYWORDS);
@ -57,10 +60,10 @@ public class ObReaderUtils {
return keyword;
}
public static void escapeDatabaseKeywords(List<String> ids) {
public static void escapeDatabaseKeyword(List<String> ids) {
if (ids != null && ids.size() > 0) {
for (int i = 0; i < ids.size(); i++) {
ids.set(i, escapeDatabaseKeywords(ids.get(i)));
ids.set(i, escapeDatabaseKeyword(ids.get(i)));
}
}
}
@ -144,7 +147,7 @@ public class ObReaderUtils {
if (isOracleMode(context.getCompatibleMode())) {
tableName = tableName.toUpperCase();
String schema;
if (tableName.contains(".")) {
if (tableName.contains(TABLE_SCHEMA_DELIMITER)) {
schema = String.format("'%s'", tableName.substring(0, tableName.indexOf(".")));
tableName = tableName.substring(tableName.indexOf(".") + 1);
} else {
@ -170,7 +173,7 @@ public class ObReaderUtils {
while (rs.next()) {
hasPk = true;
String columnName = rs.getString("Column_name");
columnName = escapeDatabaseKeywords(columnName);
columnName = escapeDatabaseKeyword(columnName);
if (!realIndex.contains(columnName)) {
realIndex.add(columnName);
}
@ -462,7 +465,7 @@ public class ObReaderUtils {
if (isOracleMode(compatibleMode)) {
String schema;
tableName = tableName.toUpperCase();
if (tableName.contains(".")) {
if (tableName.contains(TABLE_SCHEMA_DELIMITER)) {
schema = String.format("'%s'", tableName.substring(0, tableName.indexOf(".")));
tableName = tableName.substring(tableName.indexOf(".") + 1);
} else {
@ -513,7 +516,7 @@ public class ObReaderUtils {
Iterator<Map.Entry<String, List<String>>> iterator = allIndex.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<String, List<String>> entry = iterator.next();
if (entry.getKey().equals("PRIMARY")) {
if ("PRIMARY".equals(entry.getKey())) {
continue;
}
@ -770,9 +773,7 @@ public class ObReaderUtils {
}
public static String getDbNameFromJdbcUrl(String jdbcUrl) {
final Pattern pattern = Pattern.compile("jdbc:(oceanbase|mysql)://([\\w\\.-]+:\\d+)/([\\w\\.-]+)");
Matcher matcher = pattern.matcher(jdbcUrl);
Matcher matcher = JDBC_PATTERN.matcher(jdbcUrl);
if (matcher.find()) {
return matcher.group(3);
} else {
@ -814,18 +815,52 @@ public class ObReaderUtils {
if (version1 == null || version2 == null) {
throw new RuntimeException("can not compare null version");
}
ObVersion v1 = new ObVersion(version1);
ObVersion v2 = new ObVersion(version2);
return v1.compareTo(v2);
}
String[] ver1Part = version1.split("\\.");
String[] ver2Part = version2.split("\\.");
for (int i = 0; i < ver1Part.length; i++) {
int v1 = Integer.parseInt(ver1Part[i]), v2 = Integer.parseInt(ver2Part[i]);
if (v1 > v2) {
return 1;
} else if (v1 < v2) {
return -1;
/**
*
* @param conn
* @param sql
* @return
*/
public static List<String> getResultsFromSql(Connection conn, String sql) {
List<String> list = new ArrayList();
Statement stmt = null;
ResultSet rs = null;
LOG.info("executing sql: " + sql);
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql);
while (rs.next()) {
list.add(rs.getString(1));
}
} catch (Exception e) {
LOG.error("error when executing sql: " + e.getMessage());
} finally {
DBUtil.closeDBResources(rs, stmt, null);
}
return 0;
return list;
}
/**
* get obversion, try ob_version first, and then try version if failed
* @param conn
* @return
*/
public static ObVersion getObVersion(Connection conn) {
List<String> results = getResultsFromSql(conn, "select ob_version()");
if (results.size() == 0) {
results = getResultsFromSql(conn, "select version()");
}
ObVersion obVersion = new ObVersion(results.get(0));
LOG.info("obVersion: " + obVersion);
return obVersion;
}
}

View File

@ -0,0 +1,86 @@
package com.alibaba.datax.plugin.reader.oceanbasev10reader.util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author johnrobbet
*/
public class ObVersion implements Comparable<ObVersion> {
private static final Logger LOG = LoggerFactory.getLogger(ObVersion.class);
private int majorVersion;
private int minorVersion;
private int releaseNumber;
private int patchNumber;
public static final ObVersion V2276 = valueOf("2.2.76");
private static final ObVersion DEFAULT_VERSION =
valueOf(System.getProperty("defaultObVersion","3.2.3.0"));
private static final int VERSION_PART_COUNT = 4;
public ObVersion(String version) {
try {
String[] versionParts = version.split("\\.");
majorVersion = Integer.valueOf(versionParts[0]);
minorVersion = Integer.valueOf(versionParts[1]);
releaseNumber = Integer.valueOf(versionParts[2]);
int tempPatchNum = 0;
if (versionParts.length == VERSION_PART_COUNT) {
try {
tempPatchNum = Integer.valueOf(versionParts[3]);
} catch (Exception e) {
LOG.warn("fail to parse ob version: " + e.getMessage());
}
}
patchNumber = tempPatchNum;
} catch (Exception ex) {
LOG.warn("fail to get ob version, using default {} {}",
DEFAULT_VERSION, ex.getMessage());
majorVersion = DEFAULT_VERSION.majorVersion;
minorVersion = DEFAULT_VERSION.minorVersion;
releaseNumber = DEFAULT_VERSION.releaseNumber;
patchNumber = DEFAULT_VERSION.patchNumber;
}
}
public static ObVersion valueOf(String version) {
return new ObVersion(version);
}
@Override
public int compareTo(ObVersion o) {
if (this.majorVersion > o.majorVersion) {
return 1;
} else if (this.majorVersion < o.majorVersion) {
return -1;
}
if (this.minorVersion > o.minorVersion) {
return 1;
} else if (this.minorVersion < o.minorVersion) {
return -1;
}
if (this.releaseNumber > o.releaseNumber) {
return 1;
} else if (this.releaseNumber < o.releaseNumber) {
return -1;
}
if (this.patchNumber > o.patchNumber) {
return 1;
} else if (this.patchNumber < o.patchNumber) {
return -1;
}
return 0;
}
@Override
public String toString() {
return String.format("%d.%d.%d.%d", majorVersion, minorVersion, releaseNumber, patchNumber);
}
}

View File

@ -5,8 +5,13 @@ package com.alibaba.datax.plugin.reader.oceanbasev10reader.util;
*/
public enum PartType {
// Non partitioned table
NONPARTITION("NONPARTITION"),
// Partitioned table
PARTITION("PARTITION"),
// Subpartitioned table
SUBPARTITION("SUBPARTITION");
private String typeString;

View File

@ -3,7 +3,6 @@ package com.alibaba.datax.plugin.reader.oceanbasev10reader.util;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.reader.Constant;
import com.alibaba.datax.plugin.rdbms.reader.Key;
import com.alibaba.datax.plugin.rdbms.reader.util.HintUtil;
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.reader.oceanbasev10reader.ext.ObReaderKey;
@ -11,8 +10,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
@ -22,12 +19,76 @@ import java.util.List;
public class PartitionSplitUtil {
private static final Logger LOG = LoggerFactory.getLogger(PartitionSplitUtil.class);
private static final String ORACLE_GET_SUBPART_TEMPLATE =
"select subpartition_name "
+ "from dba_tab_subpartitions "
+ "where table_name = '%s' and table_owner = '%s'";
private static final String ORACLE_GET_PART_TEMPLATE =
"select partition_name "
+ "from dba_tab_partitions "
+ "where table_name = '%s' and table_owner = '%s'";
private static final String MYSQL_GET_PART_TEMPLATE =
"select p.part_name "
+ "from oceanbase.__all_part p, oceanbase.%s t, oceanbase.__all_database d "
+ "where p.table_id = t.table_id "
+ "and d.database_id = t.database_id "
+ "and d.database_name = '%s' "
+ "and t.table_name = '%s'";
private static final String MYSQL_GET_SUBPART_TEMPLATE =
"select p.sub_part_name "
+ "from oceanbase.__all_sub_part p, oceanbase.%s t, oceanbase.__all_database d "
+ "where p.table_id = t.table_id "
+ "and d.database_id = t.database_id "
+ "and d.database_name = '%s' "
+ "and t.table_name = '%s'";
/**
* get partition info from data dictionary in ob oracle mode
* @param config
* @param tableName
* @return
*/
public static PartInfo getObOraclePartInfoBySQL(Configuration config, String tableName) {
PartInfo partInfo;
DataBaseType dbType = ObReaderUtils.databaseType;
String jdbcUrl = config.getString(Key.JDBC_URL);
String username = config.getString(Key.USERNAME);
String password = config.getString(Key.PASSWORD);
String dbname = ObReaderUtils.getDbNameFromJdbcUrl(jdbcUrl).toUpperCase();
Connection conn = DBUtil.getConnection(dbType, jdbcUrl, username, password);
tableName = tableName.toUpperCase();
// check if the table has subpartitions or not
String getSubPartSql = String.format(ORACLE_GET_SUBPART_TEMPLATE, tableName, dbname);
List<String> partList = ObReaderUtils.getResultsFromSql(conn, getSubPartSql);
if (partList != null && partList.size() > 0) {
partInfo = new PartInfo(PartType.SUBPARTITION);
partInfo.addPart(partList);
return partInfo;
}
String getPartSql = String.format(ORACLE_GET_PART_TEMPLATE, tableName, dbname);
partList = ObReaderUtils.getResultsFromSql(conn, getPartSql);
if (partList != null && partList.size() > 0) {
partInfo = new PartInfo(PartType.PARTITION);
partInfo.addPart(partList);
return partInfo;
}
// table is not partitioned
partInfo = new PartInfo(PartType.NONPARTITION);
return partInfo;
}
public static List<Configuration> splitByPartition (Configuration configuration) {
List<Configuration> allSlices = new ArrayList<>();
List<Object> conns = configuration.getList(Constant.CONN_MARK, Object.class);
for (int i = 0, len = conns.size(); i < len; i++) {
List<Object> connections = configuration.getList(Constant.CONN_MARK, Object.class);
for (int i = 0, len = connections.size(); i < len; i++) {
Configuration sliceConfig = configuration.clone();
Configuration connConf = Configuration.from(conns.get(i).toString());
Configuration connConf = Configuration.from(connections.get(i).toString());
String jdbcUrl = connConf.getString(Key.JDBC_URL);
sliceConfig.set(Key.JDBC_URL, jdbcUrl);
sliceConfig.remove(Constant.CONN_MARK);
@ -64,7 +125,7 @@ public class PartitionSplitUtil {
slices.add(slice);
}
} else {
LOG.info("fail to get table part info or table is not partitioned, proceed as non-partitioned table.");
LOG.info("table is not partitioned.");
Configuration slice = configuration.clone();
slice.set(Key.QUERY_SQL, ObReaderUtils.buildQuerySql(weakRead, column, table, where));
@ -74,7 +135,16 @@ public class PartitionSplitUtil {
return slices;
}
private static PartInfo getObPartInfoBySQL(Configuration config, String table) {
public static PartInfo getObPartInfoBySQL(Configuration config, String table) {
boolean isOracleMode = config.getString(ObReaderKey.OB_COMPATIBILITY_MODE).equals("ORACLE");
if (isOracleMode) {
return getObOraclePartInfoBySQL(config, table);
} else {
return getObMySQLPartInfoBySQL(config, table);
}
}
public static PartInfo getObMySQLPartInfoBySQL(Configuration config, String table) {
PartInfo partInfo = new PartInfo(PartType.NONPARTITION);
List<String> partList;
Connection conn = null;
@ -86,45 +156,22 @@ public class PartitionSplitUtil {
String allTable = "__all_table";
conn = DBUtil.getConnection(DataBaseType.OceanBase, jdbcUrl, username, password);
String obVersion = getResultsFromSql(conn, "select version()").get(0);
LOG.info("obVersion: " + obVersion);
if (ObReaderUtils.compareObVersion("2.2.76", obVersion) < 0) {
ObVersion obVersion = ObReaderUtils.getObVersion(conn);
if (obVersion.compareTo(ObVersion.V2276) >= 0) {
allTable = "__all_table_v2";
}
String queryPart = String.format(
"select p.part_name " +
"from oceanbase.__all_part p, oceanbase.%s t, oceanbase.__all_database d " +
"where p.table_id = t.table_id " +
"and d.database_id = t.database_id " +
"and d.database_name = '%s' " +
"and t.table_name = '%s'", allTable, dbname, table);
String querySubPart = String.format(
"select p.sub_part_name " +
"from oceanbase.__all_sub_part p, oceanbase.%s t, oceanbase.__all_database d " +
"where p.table_id = t.table_id " +
"and d.database_id = t.database_id " +
"and d.database_name = '%s' " +
"and t.table_name = '%s'", allTable, dbname, table);
if (config.getString(ObReaderKey.OB_COMPATIBILITY_MODE).equals("ORACLE")) {
queryPart = String.format(
"select partition_name from all_tab_partitions where TABLE_OWNER = '%s' and table_name = '%s'",
dbname.toUpperCase(), table.toUpperCase());
querySubPart = String.format(
"select subpartition_name from all_tab_subpartitions where TABLE_OWNER = '%s' and table_name = '%s'",
dbname.toUpperCase(), table.toUpperCase());
}
String querySubPart = String.format(MYSQL_GET_SUBPART_TEMPLATE, allTable, dbname, table);
PartType partType = PartType.SUBPARTITION;
// try subpartition first
partList = getResultsFromSql(conn, querySubPart);
partList = ObReaderUtils.getResultsFromSql(conn, querySubPart);
// if table is not sub-partitioned, the try partition
if (partList.isEmpty()) {
partList = getResultsFromSql(conn, queryPart);
String queryPart = String.format(MYSQL_GET_PART_TEMPLATE, allTable, dbname, table);
partList = ObReaderUtils.getResultsFromSql(conn, queryPart);
partType = PartType.PARTITION;
}
@ -140,26 +187,4 @@ public class PartitionSplitUtil {
return partInfo;
}
private static List<String> getResultsFromSql(Connection conn, String sql) {
List<String> list = new ArrayList();
Statement stmt = null;
ResultSet rs = null;
LOG.info("executing sql: " + sql);
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql);
while (rs.next()) {
list.add(rs.getString(1));
}
} catch (Exception e) {
LOG.error("error when executing sql: " + e.getMessage());
} finally {
DBUtil.closeDBResources(rs, stmt, null);
}
return list;
}
}

View File

@ -19,6 +19,15 @@ public class TaskContext {
private boolean weakRead = true;
private String userSavePoint;
private String compatibleMode = ObReaderUtils.OB_COMPATIBLE_MODE_MYSQL;
public String getPartitionName() {
return partitionName;
}
public void setPartitionName(String partitionName) {
this.partitionName = partitionName;
}
private String partitionName;
// 断点续读的保存点
@ -165,12 +174,4 @@ public class TaskContext {
public void setCompatibleMode(String compatibleMode) {
this.compatibleMode = compatibleMode;
}
public String getPartitionName() {
return partitionName;
}
public void setPartitionName(String partitionName) {
this.partitionName = partitionName;
}
}

View File

@ -18,5 +18,7 @@ public class ObReaderUtilsTest {
assert ObReaderUtils.compareObVersion("2.2.70", "2.2.50") == 1;
assert ObReaderUtils.compareObVersion("2.2.70", "3.1.2") == -1;
assert ObReaderUtils.compareObVersion("3.1.2", "3.1.2") == 0;
assert ObReaderUtils.compareObVersion("3.2.3.0", "3.2.3.0") == 0;
assert ObReaderUtils.compareObVersion("3.2.3.0-CE", "3.2.3.0") == 0;
}
}

View File

@ -61,7 +61,7 @@ public class OceanBaseV10Writer extends Writer {
checkCompatibleMode(originalConfig);
//将config中的column和table中的关键字进行转义
List<String> columns = originalConfig.getList(Key.COLUMN, String.class);
ObWriterUtils.escapeDatabaseKeywords(columns);
ObWriterUtils.escapeDatabaseKeyword(columns);
originalConfig.set(Key.COLUMN, columns);
List<JSONObject> conns = originalConfig.getList(Constant.CONN_MARK, JSONObject.class);
@ -69,7 +69,7 @@ public class OceanBaseV10Writer extends Writer {
JSONObject conn = conns.get(i);
Configuration connConfig = Configuration.from(conn.toString());
List<String> tables = connConfig.getList(Key.TABLE, String.class);
ObWriterUtils.escapeDatabaseKeywords(tables);
ObWriterUtils.escapeDatabaseKeyword(tables);
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE), tables);
}
this.commonJob = new CommonRdbmsWriter.Job(DATABASE_TYPE);

View File

@ -25,7 +25,7 @@ public class ObWriterUtils {
return new HashSet(Arrays.asList(keywords.split(",")));
}
public static String escapeDatabaseKeywords(String keyword) {
public static String escapeDatabaseKeyword(String keyword) {
if (databaseKeywords == null) {
if (isOracleMode()) {
databaseKeywords = keywordsFromString2HashSet(ORACLE_KEYWORDS);
@ -40,9 +40,9 @@ public class ObWriterUtils {
return keyword;
}
public static void escapeDatabaseKeywords(List<String> keywords) {
public static void escapeDatabaseKeyword(List<String> keywords) {
for (int i = 0; i < keywords.size(); i++) {
keywords.set(i, escapeDatabaseKeywords(keywords.get(i)));
keywords.set(i, escapeDatabaseKeyword(keywords.get(i)));
}
}
public static Boolean isEscapeMode(String keyword){
@ -159,7 +159,7 @@ public class ObWriterUtils {
while (rs.next()) {
String keyName = rs.getString("Key_name");
String columnName = rs.getString("Column_name");
columnName=escapeDatabaseKeywords(columnName);
columnName= escapeDatabaseKeyword(columnName);
if(!ObWriterUtils.isEscapeMode(columnName)){
columnName = columnName.toUpperCase();
}

View File

@ -3,20 +3,6 @@ package com.alibaba.datax.plugin.reader.odpsreader;
public enum ColumnType {
PARTITION, NORMAL, CONSTANT, UNKNOWN, ;
@Override
public String toString() {
switch (this) {
case PARTITION:
return "partition";
case NORMAL:
return "normal";
case CONSTANT:
return "constant";
default:
return "unknown";
}
}
public static ColumnType asColumnType(String columnTypeString) {
if ("partition".equals(columnTypeString)) {
return PARTITION;

View File

@ -0,0 +1,24 @@
package com.alibaba.datax.plugin.reader.odpsreader;
public class InternalColumnInfo {
private String columnName;
private ColumnType columnType;
public String getColumnName() {
return columnName;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
public ColumnType getColumnType() {
return columnType;
}
public void setColumnType(ColumnType columnType) {
this.columnType = columnType;
}
}

View File

@ -15,8 +15,6 @@ import com.aliyun.odps.TableSchema;
import com.aliyun.odps.tunnel.TableTunnel.DownloadSession;
import com.aliyun.odps.type.TypeInfo;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -311,7 +309,7 @@ public class OdpsReader extends Reader {
*/
List<String> allPartitionColumns = this.originalConfig.getList(
Constant.PARTITION_COLUMNS, String.class);
List<Pair<String, ColumnType>> parsedColumns = OdpsUtil
List<InternalColumnInfo> parsedColumns = OdpsUtil
.parseColumns(allNormalColumns, allPartitionColumns,
userConfiguredColumns);
@ -320,13 +318,15 @@ public class OdpsReader extends Reader {
StringBuilder sb = new StringBuilder();
sb.append("[ ");
for (int i = 0, len = parsedColumns.size(); i < len; i++) {
Pair<String, ColumnType> pair = parsedColumns.get(i);
sb.append(String.format(" %s : %s", pair.getLeft(),
pair.getRight()));
InternalColumnInfo pair = parsedColumns.get(i);
sb.append(String.format(" %s : %s", pair.getColumnName(),
pair.getColumnType()));
if (i != len - 1) {
sb.append(",");
}
}
sb.append(" ]");
LOG.info("parsed column details: {} .", sb.toString());
}
@ -500,22 +500,11 @@ public class OdpsReader extends Reader {
}
try {
List<Configuration> parsedColumnsTmp = this.readerSliceConf
.getListConfiguration(Constant.PARSED_COLUMNS);
List<Pair<String, ColumnType>> parsedColumns = new ArrayList<Pair<String, ColumnType>>();
for (int i = 0; i < parsedColumnsTmp.size(); i++) {
Configuration eachColumnConfig = parsedColumnsTmp.get(i);
String columnName = eachColumnConfig.getString("left");
ColumnType columnType = ColumnType
.asColumnType(eachColumnConfig.getString("right"));
parsedColumns.add(new MutablePair<String, ColumnType>(
columnName, columnType));
}
List<InternalColumnInfo> parsedColumns = this.readerSliceConf.getListWithJson(Constant.PARSED_COLUMNS,
InternalColumnInfo.class);
ReaderProxy readerProxy = new ReaderProxy(recordSender, downloadSession,
columnTypeMap, parsedColumns, partition, this.isPartitionedTable,
start, count, this.isCompress, this.readerSliceConf);
readerProxy.doRead();
} catch (Exception e) {
throw DataXException.asDataXException(OdpsReaderErrorCode.READ_DATA_FAIL,

View File

@ -17,7 +17,6 @@ import com.aliyun.odps.type.MapTypeInfo;
import com.aliyun.odps.type.TypeInfo;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -34,7 +33,7 @@ public class ReaderProxy {
private RecordSender recordSender;
private TableTunnel.DownloadSession downloadSession;
private Map<String, TypeInfo> columnTypeMap;
private List<Pair<String, ColumnType>> parsedColumns;
private List<InternalColumnInfo> parsedColumns;
private String partition;
private boolean isPartitionTable;
@ -71,7 +70,7 @@ public class ReaderProxy {
public ReaderProxy(RecordSender recordSender, TableTunnel.DownloadSession downloadSession,
Map<String, TypeInfo> columnTypeMap,
List<Pair<String, ColumnType>> parsedColumns, String partition,
List<InternalColumnInfo> parsedColumns, String partition,
boolean isPartitionTable, long start, long count, boolean isCompress, Configuration taskConfig) {
this.recordSender = recordSender;
this.downloadSession = downloadSession;
@ -136,9 +135,9 @@ public class ReaderProxy {
// warn: for PARTITION||NORMAL columnTypeMap's key
// sets(columnName) is big than parsedColumns's left
// sets(columnName), always contain
for (Pair<String, ColumnType> pair : this.parsedColumns) {
String columnName = pair.getLeft();
switch (pair.getRight()) {
for (InternalColumnInfo pair : this.parsedColumns) {
String columnName = pair.getColumnName();
switch (pair.getColumnType()) {
case PARTITION:
String partitionColumnValue = this
.getPartitionColumnValue(partitionMap,

View File

@ -7,6 +7,7 @@ import com.alibaba.datax.common.util.MessageSource;
import com.alibaba.datax.common.util.RetryUtil;
import com.alibaba.datax.plugin.reader.odpsreader.ColumnType;
import com.alibaba.datax.plugin.reader.odpsreader.Constant;
import com.alibaba.datax.plugin.reader.odpsreader.InternalColumnInfo;
import com.alibaba.datax.plugin.reader.odpsreader.Key;
import com.alibaba.datax.plugin.reader.odpsreader.OdpsReaderErrorCode;
import com.aliyun.odps.*;
@ -215,19 +216,18 @@ public final class OdpsUtil {
return userConfiguredPartitionClassification;
}
public static List<Pair<String, ColumnType>> parseColumns(
public static List<InternalColumnInfo> parseColumns(
List<String> allNormalColumns, List<String> allPartitionColumns,
List<String> userConfiguredColumns) {
List<Pair<String, ColumnType>> parsededColumns = new ArrayList<Pair<String, ColumnType>>();
List<InternalColumnInfo> parsededColumns = new ArrayList<InternalColumnInfo>();
// warn: upper & lower case
for (String column : userConfiguredColumns) {
MutablePair<String, ColumnType> pair = new MutablePair<String, ColumnType>();
InternalColumnInfo pair = new InternalColumnInfo();
// if constant column
if (OdpsUtil.checkIfConstantColumn(column)) {
// remove first and last '
pair.setLeft(column.substring(1, column.length() - 1));
pair.setRight(ColumnType.CONSTANT);
pair.setColumnName(column.substring(1, column.length() - 1));
pair.setColumnType(ColumnType.CONSTANT);
parsededColumns.add(pair);
continue;
}
@ -236,8 +236,8 @@ public final class OdpsUtil {
// repeated in partitioning columns
int index = OdpsUtil.indexOfIgnoreCase(allNormalColumns, column);
if (0 <= index) {
pair.setLeft(allNormalColumns.get(index));
pair.setRight(ColumnType.NORMAL);
pair.setColumnName(allNormalColumns.get(index));
pair.setColumnType(ColumnType.NORMAL);
parsededColumns.add(pair);
continue;
}
@ -245,8 +245,8 @@ public final class OdpsUtil {
// if partition column
index = OdpsUtil.indexOfIgnoreCase(allPartitionColumns, column);
if (0 <= index) {
pair.setLeft(allPartitionColumns.get(index));
pair.setRight(ColumnType.PARTITION);
pair.setColumnName(allPartitionColumns.get(index));
pair.setColumnType(ColumnType.PARTITION);
parsededColumns.add(pair);
continue;
}
@ -431,13 +431,13 @@ public final class OdpsUtil {
MESSAGE_SOURCE.message("odpsutil.12", tableName), e);
}
public static List<Column> getNormalColumns(List<Pair<String, ColumnType>> parsedColumns,
public static List<Column> getNormalColumns(List<InternalColumnInfo> parsedColumns,
Map<String, TypeInfo> columnTypeMap) {
List<Column> userConfigNormalColumns = new ArrayList<Column>();
Set<String> columnNameSet = new HashSet<String>();
for (Pair<String, ColumnType> columnInfo : parsedColumns) {
if (columnInfo.getValue() == ColumnType.NORMAL) {
String columnName = columnInfo.getKey();
for (InternalColumnInfo columnInfo : parsedColumns) {
if (columnInfo.getColumnType() == ColumnType.NORMAL) {
String columnName = columnInfo.getColumnName();
if (!columnNameSet.contains(columnName)) {
Column column = new Column(columnName, columnTypeMap.get(columnName));
userConfigNormalColumns.add(column);

View File

@ -24,9 +24,6 @@
<httpclient.version>4.5</httpclient.version>
<commons-io.version>2.4</commons-io.version>
<!-- json -->
<fastjson.version>1.2.28</fastjson.version>
<!-- opentsdb -->
<opentsdb.version>2.3.2</opentsdb.version>
@ -94,7 +91,6 @@
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<!-- opentsdb -->

View File

@ -6,5 +6,5 @@
"mechanism": "根据时间和 metric 直连底层 HBase 存储,从而 Scan 出符合条件的数据点",
"warn": "指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00)"
},
"developer": "Benedict Jin"
"developer": "alibaba"
}

View File

@ -41,11 +41,9 @@
</dependency>
<dependency>
<groupId>com.oracle</groupId>
<groupId>oracle</groupId>
<artifactId>ojdbc6</artifactId>
<version>11.2.0.3</version>
<scope>system</scope>
<systemPath>${basedir}/src/main/lib/ojdbc6-11.2.0.3.jar</systemPath>
</dependency>
</dependencies>

View File

@ -15,13 +15,6 @@
<include>plugin_job_template.json</include>
</includes>
<outputDirectory>plugin/reader/oraclereader</outputDirectory>
</fileSet>
<fileSet>
<directory>src/main/lib</directory>
<includes>
<include>ojdbc6-11.2.0.3.jar</include>
</includes>
<outputDirectory>plugin/reader/oraclereader/libs</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>

View File

@ -39,11 +39,9 @@
</dependency>
<dependency>
<groupId>com.oracle</groupId>
<groupId>oracle</groupId>
<artifactId>ojdbc6</artifactId>
<version>11.2.0.3</version>
<scope>system</scope>
<systemPath>${basedir}/src/main/lib/ojdbc6-11.2.0.3.jar</systemPath>
</dependency>
</dependencies>

View File

@ -16,13 +16,6 @@
</includes>
<outputDirectory>plugin/writer/oraclewriter</outputDirectory>
</fileSet>
<fileSet>
<directory>src/main/lib</directory>
<includes>
<include>ojdbc6-11.2.0.3.jar</include>
</includes>
<outputDirectory>plugin/writer/oraclewriter/libs</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>

View File

@ -2,5 +2,5 @@
"name": "oscarwriter",
"class": "com.alibaba.datax.plugin.writer.oscarwriter.OscarWriter",
"description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql. warn: The more you know about the database, the less problems you encounter.",
"developer": "linjiayu"
"developer": "alibaba"
}

View File

@ -1,179 +0,0 @@
#!/bin/usr/env python
#-*- coding: utf-8 -*-
from optparse import OptionParser
import sys
import json
import tabulate
import zlib
from ots2 import *
class ConsoleConfig:
def __init__(self, config_file):
f = open(config_file, 'r')
config = json.loads(f.read())
self.endpoint = str(config['endpoint'])
self.accessid = str(config['accessId'])
self.accesskey = str(config['accessKey'])
self.instance_name = str(config['instanceName'])
self.status_table = str(config['statusTable'])
self.ots = OTSClient(self.endpoint, self.accessid, self.accesskey, self.instance_name)
def describe_job(config, options):
'''
1. get job's description
2. get all job's checkpoints and check if it is done
'''
if not options.stream_id:
print "Error: Should set the stream id using '-s' or '--streamid'."
sys.exit(-1)
if not options.timestamp:
print "Error: Should set the timestamp using '-t' or '--timestamp'."
sys.exit(-1)
pk = [('StreamId', options.stream_id), ('StatusType', 'DataxJobDesc'), ('StatusValue', '%16d' % int(options.timestamp))]
consumed, pk, attrs, next_token = config.ots.get_row(config.status_table, pk, [], None, 1)
if not attrs:
print 'Stream job is not found.'
sys.exit(-1)
job_detail = parse_job_detail(attrs)
print '----------JobDescriptions----------'
print json.dumps(job_detail, indent=2)
print '-----------------------------------'
stream_checkpoints = _list_checkpoints(config, options.stream_id, int(options.timestamp))
cps_headers = ['ShardId', 'SendRecordCount', 'Checkpoint', 'SkipCount', 'Version']
table_content = []
for cp in stream_checkpoints:
table_content.append([cp['ShardId'], cp['SendRecordCount'], cp['Checkpoint'], cp['SkipCount'], cp['Version']])
print tabulate.tabulate(table_content, headers=cps_headers)
# check if stream job has finished
finished = True
if len(job_detail['ShardIds']) != len(stream_checkpoints):
finished = False
for cp in stream_checkpoints:
if cp['Version'] != job_detail['Version']:
finished = False
print '----------JobSummary----------'
print 'ShardsCount:', len(job_detail['ShardIds'])
print 'CheckPointsCount:', len(stream_checkpoints)
print 'JobStatus:', 'Finished' if finished else 'NotFinished'
print '------------------------------'
def _list_checkpoints(config, stream_id, timestamp):
start_pk = [('StreamId', stream_id), ('StatusType', 'CheckpointForDataxReader'), ('StatusValue', '%16d' % timestamp)]
end_pk = [('StreamId', stream_id), ('StatusType', 'CheckpointForDataxReader'), ('StatusValue', '%16d' % (timestamp + 1))]
consumed_counter = CapacityUnit(0, 0)
columns_to_get = []
checkpoints = []
range_iter = config.ots.xget_range(
config.status_table, Direction.FORWARD,
start_pk, end_pk,
consumed_counter, columns_to_get, 100,
column_filter=None, max_version=1
)
rows = []
for (primary_key, attrs) in range_iter:
checkpoint = {}
for attr in attrs:
checkpoint[attr[0]] = attr[1]
if not checkpoint.has_key('SendRecordCount'):
checkpoint['SendRecordCount'] = 0
checkpoint['ShardId'] = primary_key[2][1].split('\t')[1]
checkpoints.append(checkpoint)
return checkpoints
def list_job(config, options):
'''
Two options:
1. list all jobs of stream
2. list all jobs and all streams
'''
consumed_counter = CapacityUnit(0, 0)
if options.stream_id:
start_pk = [('StreamId', options.stream_id), ('StatusType', INF_MIN), ('StatusValue', INF_MIN)]
end_pk = [('StreamId', options.stream_id), ('StatusType', INF_MAX), ('StatusValue', INF_MAX)]
else:
start_pk = [('StreamId', INF_MIN), ('StatusType', INF_MIN), ('StatusValue', INF_MIN)]
end_pk = [('StreamId', INF_MAX), ('StatusType', INF_MAX), ('StatusValue', INF_MAX)]
columns_to_get = []
range_iter = config.ots.xget_range(
config.status_table, Direction.FORWARD,
start_pk, end_pk,
consumed_counter, columns_to_get, None,
column_filter=None, max_version=1
)
rows = []
for (primary_key, attrs) in range_iter:
if primary_key[1][1] == 'DataxJobDesc':
job_detail = parse_job_detail(attrs)
rows.append([job_detail['TableName'], job_detail['JobStreamId'], job_detail['EndTime'], job_detail['StartTime'], job_detail['EndTime'], job_detail['Version']])
headers = ['TableName', 'JobStreamId', 'Timestamp', 'StartTime', 'EndTime', 'Version']
print tabulate.tabulate(rows, headers=headers)
def parse_job_detail(attrs):
job_details = {}
shard_ids_content = ''
for attr in attrs:
if attr[0].startswith('ShardIds_'):
shard_ids_content += attr[1]
else:
job_details[attr[0]] = attr[1]
shard_ids = json.loads(zlib.decompress(shard_ids_content))
if not job_details.has_key('Version'):
job_details['Version'] = ''
if not job_details.has_key('SkipCount'):
job_details['SkipCount'] = 0
job_details['ShardIds'] = shard_ids
return job_details
def parse_time(value):
try:
return int(value)
except Exception,e:
return int(time.mktime(time.strptime(value, '%Y-%m-%d %H:%M:%S')))
if __name__ == '__main__':
parser = OptionParser()
parser.add_option('-c', '--config', dest='config_file', help='path of config file', metavar='tablestore_streamreader_config.json')
parser.add_option('-a', '--action', dest='action', help='the action to do', choices = ['describe_job', 'list_job'], metavar='')
parser.add_option('-t', '--timestamp', dest='timestamp', help='the timestamp', metavar='')
parser.add_option('-s', '--streamid', dest='stream_id', help='the id of stream', metavar='')
parser.add_option('-d', '--shardid', dest='shard_id', help='the id of shard', metavar='')
options, args = parser.parse_args()
if not options.config_file:
print "Error: Should set the path of config file using '-c' or '--config'."
sys.exit(-1)
if not options.action:
print "Error: Should set the action using '-a' or '--action'."
sys.exit(-1)
console_config = ConsoleConfig(options.config_file)
if options.action == 'list_job':
list_job(console_config, options)
elif options.action == 'describe_job':
describe_job(console_config, options)

File diff suppressed because it is too large Load Diff

View File

@ -60,13 +60,6 @@
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>db2reader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>postgresqlreader/target/datax/</directory>
<includes>
@ -103,13 +96,13 @@
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>otsstreamreader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>otsstreamreader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>txtfilereader/target/datax/</directory>
<includes>
@ -215,6 +208,27 @@
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>datahubreader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>loghubreader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>starrocksreader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<!-- writer -->
<fileSet>
@ -448,5 +462,19 @@
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>datahubwriter/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>loghubwriter/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
</fileSets>
</assembly>

View File

@ -22,7 +22,8 @@ public enum DataBaseType {
ClickHouse("clickhouse", "ru.yandex.clickhouse.ClickHouseDriver"),
KingbaseES("kingbasees", "com.kingbase8.Driver"),
Oscar("oscar", "com.oscar.Driver"),
OceanBase("oceanbase", "com.alipay.oceanbase.jdbc.Driver");
OceanBase("oceanbase", "com.alipay.oceanbase.jdbc.Driver"),
StarRocks("starrocks", "com.mysql.jdbc.Driver");
private String typeName;

Some files were not shown because too many files have changed in this diff Show More