mirror of
https://github.com/alibaba/DataX.git
synced 2025-05-02 02:31:01 +08:00
DataX 2209, add plugin: datahubreader/datahubwriter/loghubreader/loghubwriter/starrocksreader, update plugin: odpsreader/oceanbasev10reader/oceanbasev10writer/elasticserachwriter/mysqlreader/mysqlwriter
This commit is contained in:
parent
ced5a454b9
commit
874a256a03
@ -25,7 +25,7 @@ DataX本身作为数据同步框架,将不同数据源的同步抽象为从源
|
||||
|
||||
# Quick Start
|
||||
|
||||
##### Download [DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/20220530/datax.tar.gz)
|
||||
##### Download [DataX下载地址](https://datax-opensource.oss-cn-hangzhou.aliyuncs.com/202209/datax.tar.gz)
|
||||
|
||||
|
||||
##### 请点击:[Quick Start](https://github.com/alibaba/DataX/blob/master/userGuid.md)
|
||||
@ -95,6 +95,9 @@ DataX目前已经有了比较全面的插件体系,主流的RDBMS数据库、N
|
||||
|
||||
DataX 后续计划月度迭代更新,也欢迎感兴趣的同学提交 Pull requests,月度更新内容会介绍介绍如下。
|
||||
|
||||
- [datax_v202209](https://github.com/alibaba/DataX/releases/tag/datax_v202209)
|
||||
- 涉及通道能力更新(MaxCompute、Datahub、SLS等)、安全漏洞更新、通用打包更新等
|
||||
|
||||
- [datax_v202205](https://github.com/alibaba/DataX/releases/tag/datax_v202205)
|
||||
- 涉及通道能力更新(MaxCompute、Hologres、OSS、Tdengine等)、安全漏洞更新、通用打包更新等
|
||||
|
||||
|
@ -2,5 +2,5 @@
|
||||
"name": "clickhousewriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.clickhousewriter.ClickhouseWriter",
|
||||
"description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql.",
|
||||
"developer": "jiye.tjy"
|
||||
"developer": "alibaba"
|
||||
}
|
@ -411,6 +411,15 @@ public class Configuration {
|
||||
return list;
|
||||
}
|
||||
|
||||
public <T> List<T> getListWithJson(final String path, Class<T> t) {
|
||||
Object object = this.get(path, List.class);
|
||||
if (null == object) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return JSON.parseArray(JSON.toJSONString(object),t);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据用户提供的json path,寻址List对象,如果对象不存在,返回null
|
||||
*/
|
||||
|
@ -3,6 +3,8 @@ package com.alibaba.datax.common.util;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.Validate;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
@ -82,4 +84,20 @@ public class StrUtil {
|
||||
return s.substring(0, headLength) + "..." + s.substring(s.length() - tailLength);
|
||||
}
|
||||
|
||||
public static String getMd5(String plainText) {
|
||||
try {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (byte b : MessageDigest.getInstance("MD5").digest(plainText.getBytes())) {
|
||||
int i = b & 0xff;
|
||||
if (i < 0x10) {
|
||||
builder.append('0');
|
||||
}
|
||||
builder.append(Integer.toHexString(i));
|
||||
}
|
||||
return builder.toString();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
79
datahubreader/pom.xml
Normal file
79
datahubreader/pom.xml
Normal file
@ -0,0 +1,79 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>datahubreader</artifactId>
|
||||
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.aliyun.datahub</groupId>
|
||||
<artifactId>aliyun-sdk-datahub</artifactId>
|
||||
<version>2.21.6-public</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.12</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- compiler plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- assembly plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
34
datahubreader/src/main/assembly/package.xml
Normal file
34
datahubreader/src/main/assembly/package.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/datahubreader</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>datahubreader-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/datahubreader</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/reader/datahubreader/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
@ -0,0 +1,8 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
public class Constant {
|
||||
|
||||
public static String DATETIME_FORMAT = "yyyyMMddHHmmss";
|
||||
public static String DATE_FORMAT = "yyyyMMdd";
|
||||
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
import com.aliyun.datahub.client.DatahubClientBuilder;
|
||||
import com.aliyun.datahub.client.auth.Account;
|
||||
import com.aliyun.datahub.client.auth.AliyunAccount;
|
||||
import com.aliyun.datahub.client.common.DatahubConfig;
|
||||
import com.aliyun.datahub.client.http.HttpConfig;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
public class DatahubClientHelper {
|
||||
public static DatahubClient getDatahubClient(Configuration jobConfig) {
|
||||
String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
Account account = new AliyunAccount(accessId, accessKey);
|
||||
// 是否开启二进制传输,服务端2.12版本开始支持
|
||||
boolean enableBinary = jobConfig.getBool("enableBinary", false);
|
||||
DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
|
||||
// HttpConfig可不设置,不设置时采用默认值
|
||||
// 读写数据推荐打开网络传输 LZ4压缩
|
||||
HttpConfig httpConfig = null;
|
||||
String httpConfigStr = jobConfig.getString("httpConfig");
|
||||
if (StringUtils.isNotBlank(httpConfigStr)) {
|
||||
httpConfig = JSON.parseObject(httpConfigStr, new TypeReference<HttpConfig>() {
|
||||
});
|
||||
}
|
||||
|
||||
DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
|
||||
if (null != httpConfig) {
|
||||
builder.setHttpConfig(httpConfig);
|
||||
}
|
||||
DatahubClient datahubClient = builder.build();
|
||||
return datahubClient;
|
||||
}
|
||||
}
|
@ -0,0 +1,292 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import com.aliyun.datahub.client.model.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.alibaba.datax.common.element.Column;
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.element.StringColumn;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordSender;
|
||||
import com.alibaba.datax.common.spi.Reader;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
|
||||
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
|
||||
|
||||
public class DatahubReader extends Reader {
|
||||
public static class Job extends Reader.Job {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Configuration originalConfig;
|
||||
|
||||
private Long beginTimestampMillis;
|
||||
private Long endTimestampMillis;
|
||||
|
||||
DatahubClient datahubClient;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
LOG.info("datahub reader job init begin ...");
|
||||
this.originalConfig = super.getPluginJobConf();
|
||||
validateParameter(originalConfig);
|
||||
this.datahubClient = DatahubClientHelper.getDatahubClient(this.originalConfig);
|
||||
LOG.info("datahub reader job init end.");
|
||||
}
|
||||
|
||||
private void validateParameter(Configuration conf){
|
||||
conf.getNecessaryValue(Key.ENDPOINT,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESSKEYID,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESSKEYSECRET,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.PROJECT,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.TOPIC,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.COLUMN,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.BEGINDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ENDDATETIME,DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
|
||||
int batchSize = this.originalConfig.getInt(Key.BATCHSIZE, 1024);
|
||||
if (batchSize > 10000) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid batchSize[" + batchSize + "] value (0,10000]!");
|
||||
}
|
||||
|
||||
String beginDateTime = this.originalConfig.getString(Key.BEGINDATETIME);
|
||||
if (beginDateTime != null) {
|
||||
try {
|
||||
beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(beginDateTime);
|
||||
} catch (ParseException e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginDateTime[" + beginDateTime + "], format [yyyyMMddHHmmss]!");
|
||||
}
|
||||
}
|
||||
|
||||
if (beginTimestampMillis != null && beginTimestampMillis <= 0) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginTimestampMillis[" + beginTimestampMillis + "]!");
|
||||
}
|
||||
|
||||
String endDateTime = this.originalConfig.getString(Key.ENDDATETIME);
|
||||
if (endDateTime != null) {
|
||||
try {
|
||||
endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(endDateTime);
|
||||
} catch (ParseException e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginDateTime[" + endDateTime + "], format [yyyyMMddHHmmss]!");
|
||||
}
|
||||
}
|
||||
|
||||
if (endTimestampMillis != null && endTimestampMillis <= 0) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid endTimestampMillis[" + endTimestampMillis + "]!");
|
||||
}
|
||||
|
||||
if (beginTimestampMillis != null && endTimestampMillis != null
|
||||
&& endTimestampMillis <= beginTimestampMillis) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"endTimestampMillis[" + endTimestampMillis + "] must bigger than beginTimestampMillis[" + beginTimestampMillis + "]!");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
// create datahub client
|
||||
String project = originalConfig.getNecessaryValue(Key.PROJECT, DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
String topic = originalConfig.getNecessaryValue(Key.TOPIC, DatahubReaderErrorCode.REQUIRE_VALUE);
|
||||
RecordType recordType = null;
|
||||
try {
|
||||
DatahubClient client = DatahubClientHelper.getDatahubClient(this.originalConfig);
|
||||
GetTopicResult getTopicResult = client.getTopic(project, topic);
|
||||
recordType = getTopicResult.getRecordType();
|
||||
} catch (Exception e) {
|
||||
LOG.warn("get topic type error: {}", e.getMessage());
|
||||
}
|
||||
if (null != recordType) {
|
||||
if (recordType == RecordType.BLOB) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"DatahubReader only support 'Tuple' RecordType now, but your RecordType is 'BLOB'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int adviceNumber) {
|
||||
LOG.info("split() begin...");
|
||||
|
||||
List<Configuration> readerSplitConfigs = new ArrayList<Configuration>();
|
||||
|
||||
String project = this.originalConfig.getString(Key.PROJECT);
|
||||
String topic = this.originalConfig.getString(Key.TOPIC);
|
||||
|
||||
List<ShardEntry> shardEntrys = DatahubReaderUtils.getShardsWithRetry(this.datahubClient, project, topic);
|
||||
if (shardEntrys == null || shardEntrys.isEmpty() || shardEntrys.size() == 0) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Project [" + project + "] Topic [" + topic + "] has no shards, please check !");
|
||||
}
|
||||
|
||||
for (ShardEntry shardEntry : shardEntrys) {
|
||||
Configuration splitedConfig = this.originalConfig.clone();
|
||||
splitedConfig.set(Key.SHARDID, shardEntry.getShardId());
|
||||
readerSplitConfigs.add(splitedConfig);
|
||||
}
|
||||
|
||||
LOG.info("split() ok and end...");
|
||||
return readerSplitConfigs;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class Task extends Reader.Task {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
|
||||
|
||||
private Configuration taskConfig;
|
||||
|
||||
private String accessId;
|
||||
private String accessKey;
|
||||
private String endpoint;
|
||||
private String project;
|
||||
private String topic;
|
||||
private String shardId;
|
||||
private Long beginTimestampMillis;
|
||||
private Long endTimestampMillis;
|
||||
private int batchSize;
|
||||
private List<String> columns;
|
||||
private RecordSchema schema;
|
||||
private String timeStampUnit;
|
||||
|
||||
DatahubClient datahubClient;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.taskConfig = super.getPluginJobConf();
|
||||
|
||||
this.accessId = this.taskConfig.getString(Key.ACCESSKEYID);
|
||||
this.accessKey = this.taskConfig.getString(Key.ACCESSKEYSECRET);
|
||||
this.endpoint = this.taskConfig.getString(Key.ENDPOINT);
|
||||
this.project = this.taskConfig.getString(Key.PROJECT);
|
||||
this.topic = this.taskConfig.getString(Key.TOPIC);
|
||||
this.shardId = this.taskConfig.getString(Key.SHARDID);
|
||||
this.batchSize = this.taskConfig.getInt(Key.BATCHSIZE, 1024);
|
||||
this.timeStampUnit = this.taskConfig.getString(Key.TIMESTAMP_UNIT, "MICROSECOND");
|
||||
try {
|
||||
this.beginTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.BEGINDATETIME));
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
|
||||
try {
|
||||
this.endTimestampMillis = DatahubReaderUtils.getUnixTimeFromDateTime(this.taskConfig.getString(Key.ENDDATETIME));
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
|
||||
this.columns = this.taskConfig.getList(Key.COLUMN, String.class);
|
||||
|
||||
this.datahubClient = DatahubClientHelper.getDatahubClient(this.taskConfig);
|
||||
|
||||
|
||||
this.schema = DatahubReaderUtils.getDatahubSchemaWithRetry(this.datahubClient, this.project, topic);
|
||||
|
||||
LOG.info("init datahub reader task finished.project:{} topic:{} batchSize:{}", project, topic, batchSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startRead(RecordSender recordSender) {
|
||||
LOG.info("read start");
|
||||
|
||||
String beginCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project,
|
||||
this.topic, this.shardId, this.beginTimestampMillis);
|
||||
String endCursor = DatahubReaderUtils.getCursorWithRetry(this.datahubClient, this.project,
|
||||
this.topic, this.shardId, this.endTimestampMillis);
|
||||
|
||||
if (beginCursor == null) {
|
||||
LOG.info("Shard:{} has no data!", this.shardId);
|
||||
return;
|
||||
} else if (endCursor == null) {
|
||||
endCursor = DatahubReaderUtils.getLatestCursorWithRetry(this.datahubClient, this.project,
|
||||
this.topic, this.shardId);
|
||||
}
|
||||
|
||||
String curCursor = beginCursor;
|
||||
|
||||
boolean exit = false;
|
||||
|
||||
while (true) {
|
||||
|
||||
GetRecordsResult result = DatahubReaderUtils.getRecordsResultWithRetry(this.datahubClient, this.project, this.topic,
|
||||
this.shardId, this.batchSize, curCursor, this.schema);
|
||||
|
||||
List<RecordEntry> records = result.getRecords();
|
||||
if (records.size() > 0) {
|
||||
for (RecordEntry record : records) {
|
||||
if (record.getSystemTime() >= this.endTimestampMillis) {
|
||||
exit = true;
|
||||
break;
|
||||
}
|
||||
|
||||
HashMap<String, Column> dataMap = new HashMap<String, Column>();
|
||||
List<Field> fields = ((TupleRecordData) record.getRecordData()).getRecordSchema().getFields();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Field field = fields.get(i);
|
||||
Column column = DatahubReaderUtils.getColumnFromField(record, field, this.timeStampUnit);
|
||||
dataMap.put(field.getName(), column);
|
||||
}
|
||||
|
||||
Record dataxRecord = recordSender.createRecord();
|
||||
|
||||
if (null != this.columns && 1 == this.columns.size()) {
|
||||
String columnsInStr = columns.get(0).toString();
|
||||
if ("\"*\"".equals(columnsInStr) || "*".equals(columnsInStr)) {
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
dataxRecord.addColumn(dataMap.get(fields.get(i).getName()));
|
||||
}
|
||||
|
||||
} else {
|
||||
if (dataMap.containsKey(columnsInStr)) {
|
||||
dataxRecord.addColumn(dataMap.get(columnsInStr));
|
||||
} else {
|
||||
dataxRecord.addColumn(new StringColumn(null));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (String col : this.columns) {
|
||||
if (dataMap.containsKey(col)) {
|
||||
dataxRecord.addColumn(dataMap.get(col));
|
||||
} else {
|
||||
dataxRecord.addColumn(new StringColumn(null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordSender.sendToWriter(dataxRecord);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
if (exit) {
|
||||
break;
|
||||
}
|
||||
|
||||
curCursor = result.getNextCursor();
|
||||
}
|
||||
|
||||
|
||||
LOG.info("end read datahub shard...");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public enum DatahubReaderErrorCode implements ErrorCode {
|
||||
BAD_CONFIG_VALUE("DatahubReader-00", "The value you configured is invalid."),
|
||||
LOG_HUB_ERROR("DatahubReader-01","Datahub exception"),
|
||||
REQUIRE_VALUE("DatahubReader-02","Missing parameters"),
|
||||
EMPTY_LOGSTORE_VALUE("DatahubReader-03","There is no shard under this LogStore");
|
||||
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
private DatahubReaderErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,200 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
import com.alibaba.datax.common.element.*;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.util.DataXCaseEnvUtil;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
import com.aliyun.datahub.client.exception.InvalidParameterException;
|
||||
import com.aliyun.datahub.client.model.*;
|
||||
|
||||
public class DatahubReaderUtils {
|
||||
|
||||
public static long getUnixTimeFromDateTime(String dateTime) throws ParseException {
|
||||
try {
|
||||
String format = Constant.DATETIME_FORMAT;
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
|
||||
return simpleDateFormat.parse(dateTime).getTime();
|
||||
} catch (ParseException ignored) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid DateTime[" + dateTime + "]!");
|
||||
}
|
||||
}
|
||||
|
||||
public static List<ShardEntry> getShardsWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
|
||||
|
||||
List<ShardEntry> shards = null;
|
||||
try {
|
||||
shards = RetryUtil.executeWithRetry(new Callable<List<ShardEntry>>() {
|
||||
@Override
|
||||
public List<ShardEntry> call() throws Exception {
|
||||
ListShardResult listShardResult = datahubClient.listShard(project, topic);
|
||||
return listShardResult.getShards();
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Shards error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return shards;
|
||||
}
|
||||
|
||||
public static String getCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic,
|
||||
final String shardId, final long timestamp) {
|
||||
|
||||
String cursor;
|
||||
try {
|
||||
cursor = RetryUtil.executeWithRetry(new Callable<String>() {
|
||||
@Override
|
||||
public String call() throws Exception {
|
||||
try {
|
||||
return datahubClient.getCursor(project, topic, shardId, CursorType.SYSTEM_TIME, timestamp).getCursor();
|
||||
} catch (InvalidParameterException e) {
|
||||
if (e.getErrorMessage().indexOf("Time in seek request is out of range") >= 0) {
|
||||
return null;
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Cursor error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
public static String getLatestCursorWithRetry(final DatahubClient datahubClient, final String project, final String topic,
|
||||
final String shardId) {
|
||||
|
||||
String cursor;
|
||||
try {
|
||||
cursor = RetryUtil.executeWithRetry(new Callable<String>() {
|
||||
@Override
|
||||
public String call() throws Exception {
|
||||
return datahubClient.getCursor(project, topic, shardId, CursorType.LATEST).getCursor();
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Cursor error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
public static RecordSchema getDatahubSchemaWithRetry(final DatahubClient datahubClient, final String project, final String topic) {
|
||||
|
||||
RecordSchema schema;
|
||||
try {
|
||||
schema = RetryUtil.executeWithRetry(new Callable<RecordSchema>() {
|
||||
@Override
|
||||
public RecordSchema call() throws Exception {
|
||||
return datahubClient.getTopic(project, topic).getRecordSchema();
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Topic Schema error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
|
||||
public static GetRecordsResult getRecordsResultWithRetry(final DatahubClient datahubClient, final String project,
|
||||
final String topic, final String shardId, final int batchSize, final String cursor, final RecordSchema schema) {
|
||||
|
||||
GetRecordsResult result;
|
||||
try {
|
||||
result = RetryUtil.executeWithRetry(new Callable<GetRecordsResult>() {
|
||||
@Override
|
||||
public GetRecordsResult call() throws Exception {
|
||||
return datahubClient.getRecords(project, topic, shardId, schema, cursor, batchSize);
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get Record Result error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
public static Column getColumnFromField(RecordEntry record, Field field, String timeStampUnit) {
|
||||
Column col = null;
|
||||
TupleRecordData o = (TupleRecordData) record.getRecordData();
|
||||
|
||||
switch (field.getType()) {
|
||||
case SMALLINT:
|
||||
Short shortValue = ((Short) o.getField(field.getName()));
|
||||
col = new LongColumn(shortValue == null ? null: shortValue.longValue());
|
||||
break;
|
||||
case INTEGER:
|
||||
col = new LongColumn((Integer) o.getField(field.getName()));
|
||||
break;
|
||||
case BIGINT: {
|
||||
col = new LongColumn((Long) o.getField(field.getName()));
|
||||
break;
|
||||
}
|
||||
case TINYINT: {
|
||||
Byte byteValue = ((Byte) o.getField(field.getName()));
|
||||
col = new LongColumn(byteValue == null ? null : byteValue.longValue());
|
||||
break;
|
||||
}
|
||||
case BOOLEAN: {
|
||||
col = new BoolColumn((Boolean) o.getField(field.getName()));
|
||||
break;
|
||||
}
|
||||
case FLOAT:
|
||||
col = new DoubleColumn((Float) o.getField(field.getName()));
|
||||
break;
|
||||
case DOUBLE: {
|
||||
col = new DoubleColumn((Double) o.getField(field.getName()));
|
||||
break;
|
||||
}
|
||||
case STRING: {
|
||||
col = new StringColumn((String) o.getField(field.getName()));
|
||||
break;
|
||||
}
|
||||
case DECIMAL: {
|
||||
BigDecimal value = (BigDecimal) o.getField(field.getName());
|
||||
col = new DoubleColumn(value == null ? null : value.doubleValue());
|
||||
break;
|
||||
}
|
||||
case TIMESTAMP: {
|
||||
Long value = (Long) o.getField(field.getName());
|
||||
|
||||
if ("MILLISECOND".equals(timeStampUnit)) {
|
||||
// MILLISECOND, 13位精度,直接 new Date()
|
||||
col = new DateColumn(value == null ? null : new Date(value));
|
||||
}
|
||||
else if ("SECOND".equals(timeStampUnit)){
|
||||
col = new DateColumn(value == null ? null : new Date(value * 1000));
|
||||
}
|
||||
else {
|
||||
// 默认都是 MICROSECOND, 16位精度, 和之前的逻辑保持一致。
|
||||
col = new DateColumn(value == null ? null : new Date(value / 1000));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw new RuntimeException("Unknown column type: " + field.getType());
|
||||
}
|
||||
|
||||
return col;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
import com.alibaba.datax.common.util.MessageSource;
|
||||
|
||||
public enum DatahubWriterErrorCode implements ErrorCode {
|
||||
MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")),
|
||||
INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")),
|
||||
GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")),
|
||||
WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")),
|
||||
SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")),
|
||||
;
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
private DatahubWriterErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
package com.alibaba.datax.plugin.reader.datahubreader;
|
||||
|
||||
public final class Key {
|
||||
|
||||
/**
|
||||
* 此处声明插件用到的需要插件使用者提供的配置项
|
||||
*/
|
||||
public static final String ENDPOINT = "endpoint";
|
||||
|
||||
public static final String ACCESSKEYID = "accessId";
|
||||
|
||||
public static final String ACCESSKEYSECRET = "accessKey";
|
||||
|
||||
public static final String PROJECT = "project";
|
||||
|
||||
public static final String TOPIC = "topic";
|
||||
|
||||
public static final String BEGINDATETIME = "beginDateTime";
|
||||
|
||||
public static final String ENDDATETIME = "endDateTime";
|
||||
|
||||
public static final String BATCHSIZE = "batchSize";
|
||||
|
||||
public static final String COLUMN = "column";
|
||||
|
||||
public static final String SHARDID = "shardId";
|
||||
|
||||
public static final String CONFIG_KEY_ENDPOINT = "endpoint";
|
||||
public static final String CONFIG_KEY_ACCESS_ID = "accessId";
|
||||
public static final String CONFIG_KEY_ACCESS_KEY = "accessKey";
|
||||
|
||||
|
||||
public static final String TIMESTAMP_UNIT = "timeStampUnit";
|
||||
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,9 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
|
||||
errorcode.invalid_config_value=您的參數配寘錯誤.
|
||||
errorcode.get_topic_info_fail=獲取shard清單失敗.
|
||||
errorcode.write_datahub_fail=寫數據失敗.
|
||||
errorcode.schema_not_match=數據格式錯誤.
|
@ -0,0 +1,9 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
|
||||
errorcode.invalid_config_value=您的參數配寘錯誤.
|
||||
errorcode.get_topic_info_fail=獲取shard清單失敗.
|
||||
errorcode.write_datahub_fail=寫數據失敗.
|
||||
errorcode.schema_not_match=數據格式錯誤.
|
14
datahubreader/src/main/resources/job_config_template.json
Normal file
14
datahubreader/src/main/resources/job_config_template.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "datahubreader",
|
||||
"parameter": {
|
||||
"endpoint":"",
|
||||
"accessId": "",
|
||||
"accessKey": "",
|
||||
"project": "",
|
||||
"topic": "",
|
||||
"beginDateTime": "20180913121019",
|
||||
"endDateTime": "20180913121119",
|
||||
"batchSize": 1024,
|
||||
"column": []
|
||||
}
|
||||
}
|
6
datahubreader/src/main/resources/plugin.json
Normal file
6
datahubreader/src/main/resources/plugin.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "datahubreader",
|
||||
"class": "com.alibaba.datax.plugin.reader.datahubreader.DatahubReader",
|
||||
"description": "datahub reader",
|
||||
"developer": "alibaba"
|
||||
}
|
79
datahubwriter/pom.xml
Normal file
79
datahubwriter/pom.xml
Normal file
@ -0,0 +1,79 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>datahubwriter</artifactId>
|
||||
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.aliyun.datahub</groupId>
|
||||
<artifactId>aliyun-sdk-datahub</artifactId>
|
||||
<version>2.21.6-public</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.12</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- compiler plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- assembly plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
34
datahubwriter/src/main/assembly/package.xml
Normal file
34
datahubwriter/src/main/assembly/package.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/datahubwriter</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>datahubwriter-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/datahubwriter</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/writer/datahubwriter/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
@ -0,0 +1,43 @@
|
||||
package com.alibaba.datax.plugin.writer.datahubwriter;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
import com.aliyun.datahub.client.DatahubClientBuilder;
|
||||
import com.aliyun.datahub.client.auth.Account;
|
||||
import com.aliyun.datahub.client.auth.AliyunAccount;
|
||||
import com.aliyun.datahub.client.common.DatahubConfig;
|
||||
import com.aliyun.datahub.client.http.HttpConfig;
|
||||
|
||||
public class DatahubClientHelper {
|
||||
public static DatahubClient getDatahubClient(Configuration jobConfig) {
|
||||
String accessId = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String accessKey = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String endpoint = jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
Account account = new AliyunAccount(accessId, accessKey);
|
||||
// 是否开启二进制传输,服务端2.12版本开始支持
|
||||
boolean enableBinary = jobConfig.getBool("enableBinary", false);
|
||||
DatahubConfig datahubConfig = new DatahubConfig(endpoint, account, enableBinary);
|
||||
// HttpConfig可不设置,不设置时采用默认值
|
||||
// 读写数据推荐打开网络传输 LZ4压缩
|
||||
HttpConfig httpConfig = null;
|
||||
String httpConfigStr = jobConfig.getString("httpConfig");
|
||||
if (StringUtils.isNotBlank(httpConfigStr)) {
|
||||
httpConfig = JSON.parseObject(httpConfigStr, new TypeReference<HttpConfig>() {
|
||||
});
|
||||
}
|
||||
|
||||
DatahubClientBuilder builder = DatahubClientBuilder.newBuilder().setDatahubConfig(datahubConfig);
|
||||
if (null != httpConfig) {
|
||||
builder.setHttpConfig(httpConfig);
|
||||
}
|
||||
DatahubClient datahubClient = builder.build();
|
||||
return datahubClient;
|
||||
}
|
||||
}
|
@ -0,0 +1,355 @@
|
||||
package com.alibaba.datax.plugin.writer.datahubwriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Column;
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordReceiver;
|
||||
import com.alibaba.datax.common.spi.Writer;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.common.util.DataXCaseEnvUtil;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.aliyun.datahub.client.DatahubClient;
|
||||
import com.aliyun.datahub.client.model.FieldType;
|
||||
import com.aliyun.datahub.client.model.GetTopicResult;
|
||||
import com.aliyun.datahub.client.model.ListShardResult;
|
||||
import com.aliyun.datahub.client.model.PutErrorEntry;
|
||||
import com.aliyun.datahub.client.model.PutRecordsResult;
|
||||
import com.aliyun.datahub.client.model.RecordEntry;
|
||||
import com.aliyun.datahub.client.model.RecordSchema;
|
||||
import com.aliyun.datahub.client.model.RecordType;
|
||||
import com.aliyun.datahub.client.model.ShardEntry;
|
||||
import com.aliyun.datahub.client.model.ShardState;
|
||||
import com.aliyun.datahub.client.model.TupleRecordData;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
public class DatahubWriter extends Writer {
|
||||
|
||||
/**
|
||||
* Job 中的方法仅执行一次,Task 中方法会由框架启动多个 Task 线程并行执行。
|
||||
* <p/>
|
||||
* 整个 Writer 执行流程是:
|
||||
* <pre>
|
||||
* Job类init-->prepare-->split
|
||||
*
|
||||
* Task类init-->prepare-->startWrite-->post-->destroy
|
||||
* Task类init-->prepare-->startWrite-->post-->destroy
|
||||
*
|
||||
* Job类post-->destroy
|
||||
* </pre>
|
||||
*/
|
||||
public static class Job extends Writer.Job {
|
||||
private static final Logger LOG = LoggerFactory
|
||||
.getLogger(Job.class);
|
||||
|
||||
private Configuration jobConfig = null;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.jobConfig = super.getPluginJobConf();
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_ENDPOINT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_ID, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_ACCESS_KEY, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
String project = jobConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
String topic = jobConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC,
|
||||
DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
RecordType recordType = null;
|
||||
DatahubClient client = DatahubClientHelper.getDatahubClient(this.jobConfig);
|
||||
try {
|
||||
GetTopicResult getTopicResult = client.getTopic(project, topic);
|
||||
recordType = getTopicResult.getRecordType();
|
||||
} catch (Exception e) {
|
||||
LOG.warn("get topic type error: {}", e.getMessage());
|
||||
}
|
||||
if (null != recordType) {
|
||||
if (recordType == RecordType.BLOB) {
|
||||
throw DataXException.asDataXException(DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
|
||||
"DatahubWriter only support 'Tuple' RecordType now, but your RecordType is 'BLOB'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int mandatoryNumber) {
|
||||
List<Configuration> configs = new ArrayList<Configuration>();
|
||||
for (int i = 0; i < mandatoryNumber; ++i) {
|
||||
configs.add(jobConfig.clone());
|
||||
}
|
||||
return configs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {}
|
||||
|
||||
@Override
|
||||
public void destroy() {}
|
||||
|
||||
}
|
||||
|
||||
public static class Task extends Writer.Task {
|
||||
private static final Logger LOG = LoggerFactory
|
||||
.getLogger(Task.class);
|
||||
private static final List<String> FATAL_ERRORS_DEFAULT = Arrays.asList(
|
||||
"InvalidParameterM",
|
||||
"MalformedRecord",
|
||||
"INVALID_SHARDID",
|
||||
"NoSuchTopic",
|
||||
"NoSuchShard"
|
||||
);
|
||||
|
||||
private Configuration taskConfig;
|
||||
private DatahubClient client;
|
||||
private String project;
|
||||
private String topic;
|
||||
private List<String> shards;
|
||||
private int maxCommitSize;
|
||||
private int maxRetryCount;
|
||||
private RecordSchema schema;
|
||||
private long retryInterval;
|
||||
private Random random;
|
||||
private List<String> column;
|
||||
private List<Integer> columnIndex;
|
||||
private boolean enableColumnConfig;
|
||||
private List<String> fatalErrors;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.taskConfig = super.getPluginJobConf();
|
||||
project = taskConfig.getNecessaryValue(Key.CONFIG_KEY_PROJECT, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
topic = taskConfig.getNecessaryValue(Key.CONFIG_KEY_TOPIC, DatahubWriterErrorCode.MISSING_REQUIRED_VALUE);
|
||||
maxCommitSize = taskConfig.getInt(Key.CONFIG_KEY_MAX_COMMIT_SIZE, 1024*1024);
|
||||
maxRetryCount = taskConfig.getInt(Key.CONFIG_KEY_MAX_RETRY_COUNT, 500);
|
||||
this.retryInterval = taskConfig.getInt(Key.RETRY_INTERVAL, 650);
|
||||
this.random = new Random();
|
||||
this.column = this.taskConfig.getList(Key.CONFIG_KEY_COLUMN, String.class);
|
||||
// ["*"]
|
||||
if (null != this.column && 1 == this.column.size()) {
|
||||
if (StringUtils.equals("*", this.column.get(0))) {
|
||||
this.column = null;
|
||||
}
|
||||
}
|
||||
this.columnIndex = new ArrayList<Integer>();
|
||||
// 留个开关保平安
|
||||
this.enableColumnConfig = this.taskConfig.getBool("enableColumnConfig", true);
|
||||
this.fatalErrors = this.taskConfig.getList("fatalErrors", Task.FATAL_ERRORS_DEFAULT, String.class);
|
||||
this.client = DatahubClientHelper.getDatahubClient(this.taskConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
final String shardIdConfig = this.taskConfig.getString(Key.CONFIG_KEY_SHARD_ID);
|
||||
this.shards = new ArrayList<String>();
|
||||
try {
|
||||
RetryUtil.executeWithRetry(new Callable<Void>() {
|
||||
@Override
|
||||
public Void call() throws Exception {
|
||||
ListShardResult result = client.listShard(project, topic);
|
||||
if (StringUtils.isNotBlank(shardIdConfig)) {
|
||||
shards.add(shardIdConfig);
|
||||
} else {
|
||||
for (ShardEntry shard : result.getShards()) {
|
||||
if (shard.getState() == ShardState.ACTIVE || shard.getState() == ShardState.OPENING) {
|
||||
shards.add(shard.getShardId());
|
||||
}
|
||||
}
|
||||
}
|
||||
schema = client.getTopic(project, topic).getRecordSchema();
|
||||
return null;
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(5), DataXCaseEnvUtil.getRetryInterval(10000L), DataXCaseEnvUtil.getRetryExponential(false));
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(DatahubWriterErrorCode.GET_TOPOIC_INFO_FAIL,
|
||||
"get topic info failed", e);
|
||||
}
|
||||
LOG.info("datahub topic {} shard to write: {}", this.topic, JSON.toJSONString(this.shards));
|
||||
LOG.info("datahub topic {} has schema: {}", this.topic, JSON.toJSONString(this.schema));
|
||||
|
||||
// 根据 schmea 顺序 和用户配置的 column,计算写datahub的顺序关系,以支持列换序
|
||||
// 后续统一使用 columnIndex 的顺位关系写 datahub
|
||||
int totalSize = this.schema.getFields().size();
|
||||
if (null != this.column && !this.column.isEmpty() && this.enableColumnConfig) {
|
||||
for (String eachCol : this.column) {
|
||||
int indexFound = -1;
|
||||
for (int i = 0; i < totalSize; i++) {
|
||||
// warn: 大小写ignore
|
||||
if (StringUtils.equalsIgnoreCase(eachCol, this.schema.getField(i).getName())) {
|
||||
indexFound = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (indexFound >= 0) {
|
||||
this.columnIndex.add(indexFound);
|
||||
} else {
|
||||
throw DataXException.asDataXException(DatahubWriterErrorCode.SCHEMA_NOT_MATCH,
|
||||
String.format("can not find column %s in datahub topic %s", eachCol, this.topic));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < totalSize; i++) {
|
||||
this.columnIndex.add(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startWrite(RecordReceiver recordReceiver) {
|
||||
Record record;
|
||||
List<RecordEntry> records = new ArrayList<RecordEntry>();
|
||||
String shardId = null;
|
||||
if (1 == this.shards.size()) {
|
||||
shardId = shards.get(0);
|
||||
} else {
|
||||
shardId = shards.get(this.random.nextInt(shards.size()));
|
||||
}
|
||||
int commitSize = 0;
|
||||
try {
|
||||
while ((record = recordReceiver.getFromReader()) != null) {
|
||||
RecordEntry dhRecord = convertRecord(record, shardId);
|
||||
if (dhRecord != null) {
|
||||
records.add(dhRecord);
|
||||
}
|
||||
commitSize += record.getByteSize();
|
||||
if (commitSize >= maxCommitSize) {
|
||||
commit(records);
|
||||
records.clear();
|
||||
commitSize = 0;
|
||||
if (1 == this.shards.size()) {
|
||||
shardId = shards.get(0);
|
||||
} else {
|
||||
shardId = shards.get(this.random.nextInt(shards.size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (commitSize > 0) {
|
||||
commit(records);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(
|
||||
DatahubWriterErrorCode.WRITE_DATAHUB_FAIL, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {}
|
||||
|
||||
@Override
|
||||
public void destroy() {}
|
||||
|
||||
private void commit(List<RecordEntry> records) throws InterruptedException {
|
||||
PutRecordsResult result = client.putRecords(project, topic, records);
|
||||
if (result.getFailedRecordCount() > 0) {
|
||||
for (int i = 0; i < maxRetryCount; ++i) {
|
||||
boolean limitExceededMessagePrinted = false;
|
||||
for (PutErrorEntry error : result.getPutErrorEntries()) {
|
||||
// 如果是 LimitExceeded 这样打印日志,不能每行记录打印一次了
|
||||
if (StringUtils.equalsIgnoreCase("LimitExceeded", error.getErrorcode())) {
|
||||
if (!limitExceededMessagePrinted) {
|
||||
LOG.warn("write record error, request id: {}, error code: {}, error message: {}",
|
||||
result.getRequestId(), error.getErrorcode(), error.getMessage());
|
||||
limitExceededMessagePrinted = true;
|
||||
}
|
||||
} else {
|
||||
LOG.error("write record error, request id: {}, error code: {}, error message: {}",
|
||||
result.getRequestId(), error.getErrorcode(), error.getMessage());
|
||||
}
|
||||
if (this.fatalErrors.contains(error.getErrorcode())) {
|
||||
throw DataXException.asDataXException(
|
||||
DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
|
||||
error.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
if (this.retryInterval >= 0) {
|
||||
Thread.sleep(this.retryInterval);
|
||||
} else {
|
||||
Thread.sleep(new Random().nextInt(700) + 300);
|
||||
}
|
||||
|
||||
result = client.putRecords(project, topic, result.getFailedRecords());
|
||||
if (result.getFailedRecordCount() == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
throw DataXException.asDataXException(
|
||||
DatahubWriterErrorCode.WRITE_DATAHUB_FAIL,
|
||||
"write datahub failed");
|
||||
}
|
||||
}
|
||||
|
||||
private RecordEntry convertRecord(Record dxRecord, String shardId) {
|
||||
try {
|
||||
RecordEntry dhRecord = new RecordEntry();
|
||||
dhRecord.setShardId(shardId);
|
||||
TupleRecordData data = new TupleRecordData(this.schema);
|
||||
for (int i = 0; i < this.columnIndex.size(); ++i) {
|
||||
int orderInSchema = this.columnIndex.get(i);
|
||||
FieldType type = this.schema.getField(orderInSchema).getType();
|
||||
Column column = dxRecord.getColumn(i);
|
||||
switch (type) {
|
||||
case BIGINT:
|
||||
data.setField(orderInSchema, column.asLong());
|
||||
break;
|
||||
case DOUBLE:
|
||||
data.setField(orderInSchema, column.asDouble());
|
||||
break;
|
||||
case STRING:
|
||||
data.setField(orderInSchema, column.asString());
|
||||
break;
|
||||
case BOOLEAN:
|
||||
data.setField(orderInSchema, column.asBoolean());
|
||||
break;
|
||||
case TIMESTAMP:
|
||||
if (null == column.asDate()) {
|
||||
data.setField(orderInSchema, null);
|
||||
} else {
|
||||
data.setField(orderInSchema, column.asDate().getTime() * 1000);
|
||||
}
|
||||
break;
|
||||
case DECIMAL:
|
||||
// warn
|
||||
data.setField(orderInSchema, column.asBigDecimal());
|
||||
break;
|
||||
case INTEGER:
|
||||
data.setField(orderInSchema, column.asLong());
|
||||
break;
|
||||
case FLOAT:
|
||||
data.setField(orderInSchema, column.asDouble());
|
||||
break;
|
||||
case TINYINT:
|
||||
data.setField(orderInSchema, column.asLong());
|
||||
break;
|
||||
case SMALLINT:
|
||||
data.setField(orderInSchema, column.asLong());
|
||||
break;
|
||||
default:
|
||||
throw DataXException.asDataXException(
|
||||
DatahubWriterErrorCode.SCHEMA_NOT_MATCH,
|
||||
String.format("does not support type: %s", type));
|
||||
}
|
||||
}
|
||||
dhRecord.setRecordData(data);
|
||||
return dhRecord;
|
||||
} catch (Exception e) {
|
||||
super.getTaskPluginCollector().collectDirtyRecord(dxRecord, e, "convert recor failed");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package com.alibaba.datax.plugin.writer.datahubwriter;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
import com.alibaba.datax.common.util.MessageSource;
|
||||
|
||||
public enum DatahubWriterErrorCode implements ErrorCode {
|
||||
MISSING_REQUIRED_VALUE("DatahubWriter-01", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.missing_required_value")),
|
||||
INVALID_CONFIG_VALUE("DatahubWriter-02", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.invalid_config_value")),
|
||||
GET_TOPOIC_INFO_FAIL("DatahubWriter-03", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.get_topic_info_fail")),
|
||||
WRITE_DATAHUB_FAIL("DatahubWriter-04", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.write_datahub_fail")),
|
||||
SCHEMA_NOT_MATCH("DatahubWriter-05", MessageSource.loadResourceBundle(DatahubWriterErrorCode.class).message("errorcode.schema_not_match")),
|
||||
;
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
private DatahubWriterErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
package com.alibaba.datax.plugin.writer.datahubwriter;
|
||||
|
||||
public final class Key {
|
||||
|
||||
/**
|
||||
* 此处声明插件用到的需要插件使用者提供的配置项
|
||||
*/
|
||||
public static final String CONFIG_KEY_ENDPOINT = "endpoint";
|
||||
public static final String CONFIG_KEY_ACCESS_ID = "accessId";
|
||||
public static final String CONFIG_KEY_ACCESS_KEY = "accessKey";
|
||||
public static final String CONFIG_KEY_PROJECT = "project";
|
||||
public static final String CONFIG_KEY_TOPIC = "topic";
|
||||
public static final String CONFIG_KEY_WRITE_MODE = "mode";
|
||||
public static final String CONFIG_KEY_SHARD_ID = "shardId";
|
||||
public static final String CONFIG_KEY_MAX_COMMIT_SIZE = "maxCommitSize";
|
||||
public static final String CONFIG_KEY_MAX_RETRY_COUNT = "maxRetryCount";
|
||||
|
||||
public static final String CONFIG_VALUE_SEQUENCE_MODE = "sequence";
|
||||
public static final String CONFIG_VALUE_RANDOM_MODE = "random";
|
||||
|
||||
public final static String MAX_RETRY_TIME = "maxRetryTime";
|
||||
|
||||
public final static String RETRY_INTERVAL = "retryInterval";
|
||||
|
||||
public final static String CONFIG_KEY_COLUMN = "column";
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,5 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.
|
@ -0,0 +1,9 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
|
||||
errorcode.invalid_config_value=您的參數配寘錯誤.
|
||||
errorcode.get_topic_info_fail=獲取shard清單失敗.
|
||||
errorcode.write_datahub_fail=寫數據失敗.
|
||||
errorcode.schema_not_match=數據格式錯誤.
|
@ -0,0 +1,9 @@
|
||||
errorcode.missing_required_value=\u60A8\u7F3A\u5931\u4E86\u5FC5\u987B\u586B\u5199\u7684\u53C2\u6570\u503C.
|
||||
errorcode.invalid_config_value=\u60A8\u7684\u53C2\u6570\u914D\u7F6E\u9519\u8BEF.
|
||||
errorcode.get_topic_info_fail=\u83B7\u53D6shard\u5217\u8868\u5931\u8D25.
|
||||
errorcode.write_datahub_fail=\u5199\u6570\u636E\u5931\u8D25.
|
||||
errorcode.schema_not_match=\u6570\u636E\u683C\u5F0F\u9519\u8BEF.errorcode.missing_required_value=您缺失了必須填寫的參數值.
|
||||
errorcode.invalid_config_value=您的參數配寘錯誤.
|
||||
errorcode.get_topic_info_fail=獲取shard清單失敗.
|
||||
errorcode.write_datahub_fail=寫數據失敗.
|
||||
errorcode.schema_not_match=數據格式錯誤.
|
14
datahubwriter/src/main/resources/job_config_template.json
Normal file
14
datahubwriter/src/main/resources/job_config_template.json
Normal file
@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "datahubwriter",
|
||||
"parameter": {
|
||||
"endpoint":"",
|
||||
"accessId": "",
|
||||
"accessKey": "",
|
||||
"project": "",
|
||||
"topic": "",
|
||||
"mode": "random",
|
||||
"shardId": "",
|
||||
"maxCommitSize": 524288,
|
||||
"maxRetryCount": 500
|
||||
}
|
||||
}
|
6
datahubwriter/src/main/resources/plugin.json
Normal file
6
datahubwriter/src/main/resources/plugin.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "datahubwriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.datahubwriter.DatahubWriter",
|
||||
"description": "datahub writer",
|
||||
"developer": "alibaba"
|
||||
}
|
@ -35,12 +35,12 @@
|
||||
<dependency>
|
||||
<groupId>io.searchbox</groupId>
|
||||
<artifactId>jest-common</artifactId>
|
||||
<version>2.4.0</version>
|
||||
<version>6.3.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.searchbox</groupId>
|
||||
<artifactId>jest</artifactId>
|
||||
<version>2.4.0</version>
|
||||
<version>6.3.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
|
@ -1,236 +0,0 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParser;
|
||||
import io.searchbox.action.Action;
|
||||
import io.searchbox.client.JestClient;
|
||||
import io.searchbox.client.JestClientFactory;
|
||||
import io.searchbox.client.JestResult;
|
||||
import io.searchbox.client.config.HttpClientConfig;
|
||||
import io.searchbox.client.config.HttpClientConfig.Builder;
|
||||
import io.searchbox.core.Bulk;
|
||||
import io.searchbox.indices.CreateIndex;
|
||||
import io.searchbox.indices.DeleteIndex;
|
||||
import io.searchbox.indices.IndicesExists;
|
||||
import io.searchbox.indices.aliases.*;
|
||||
import io.searchbox.indices.mapping.PutMapping;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/2/8.
|
||||
*/
|
||||
public class ESClient {
|
||||
private static final Logger log = LoggerFactory.getLogger(ESClient.class);
|
||||
|
||||
private JestClient jestClient;
|
||||
|
||||
public JestClient getClient() {
|
||||
return jestClient;
|
||||
}
|
||||
|
||||
public void createClient(String endpoint,
|
||||
String user,
|
||||
String passwd,
|
||||
boolean multiThread,
|
||||
int readTimeout,
|
||||
boolean compression,
|
||||
boolean discovery) {
|
||||
|
||||
JestClientFactory factory = new JestClientFactory();
|
||||
Builder httpClientConfig = new HttpClientConfig
|
||||
.Builder(endpoint)
|
||||
.setPreemptiveAuth(new HttpHost(endpoint))
|
||||
.multiThreaded(multiThread)
|
||||
.connTimeout(30000)
|
||||
.readTimeout(readTimeout)
|
||||
.maxTotalConnection(200)
|
||||
.requestCompressionEnabled(compression)
|
||||
.discoveryEnabled(discovery)
|
||||
.discoveryFrequency(5l, TimeUnit.MINUTES);
|
||||
|
||||
if (!("".equals(user) || "".equals(passwd))) {
|
||||
httpClientConfig.defaultCredentials(user, passwd);
|
||||
}
|
||||
|
||||
factory.setHttpClientConfig(httpClientConfig.build());
|
||||
|
||||
jestClient = factory.getObject();
|
||||
}
|
||||
|
||||
public boolean indicesExists(String indexName) throws Exception {
|
||||
boolean isIndicesExists = false;
|
||||
JestResult rst = jestClient.execute(new IndicesExists.Builder(indexName).build());
|
||||
if (rst.isSucceeded()) {
|
||||
isIndicesExists = true;
|
||||
} else {
|
||||
switch (rst.getResponseCode()) {
|
||||
case 404:
|
||||
isIndicesExists = false;
|
||||
break;
|
||||
case 401:
|
||||
// 无权访问
|
||||
default:
|
||||
log.warn(rst.getErrorMessage());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return isIndicesExists;
|
||||
}
|
||||
|
||||
public boolean deleteIndex(String indexName) throws Exception {
|
||||
log.info("delete index " + indexName);
|
||||
if (indicesExists(indexName)) {
|
||||
JestResult rst = execute(new DeleteIndex.Builder(indexName).build());
|
||||
if (!rst.isSucceeded()) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
log.info("index cannot found, skip delete " + indexName);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean createIndex(String indexName, String typeName,
|
||||
Object mappings, String settings, boolean dynamic) throws Exception {
|
||||
JestResult rst = null;
|
||||
if (!indicesExists(indexName)) {
|
||||
log.info("create index " + indexName);
|
||||
rst = jestClient.execute(
|
||||
new CreateIndex.Builder(indexName)
|
||||
.settings(settings)
|
||||
.setParameter("master_timeout", "5m")
|
||||
.build()
|
||||
);
|
||||
//index_already_exists_exception
|
||||
if (!rst.isSucceeded()) {
|
||||
if (getStatus(rst) == 400) {
|
||||
log.info(String.format("index [%s] already exists", indexName));
|
||||
return true;
|
||||
} else {
|
||||
log.error(rst.getErrorMessage());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
log.info(String.format("create [%s] index success", indexName));
|
||||
}
|
||||
}
|
||||
|
||||
int idx = 0;
|
||||
while (idx < 5) {
|
||||
if (indicesExists(indexName)) {
|
||||
break;
|
||||
}
|
||||
Thread.sleep(2000);
|
||||
idx ++;
|
||||
}
|
||||
if (idx >= 5) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic) {
|
||||
log.info("ignore mappings");
|
||||
return true;
|
||||
}
|
||||
log.info("create mappings for " + indexName + " " + mappings);
|
||||
rst = jestClient.execute(new PutMapping.Builder(indexName, typeName, mappings)
|
||||
.setParameter("master_timeout", "5m").build());
|
||||
if (!rst.isSucceeded()) {
|
||||
if (getStatus(rst) == 400) {
|
||||
log.info(String.format("index [%s] mappings already exists", indexName));
|
||||
} else {
|
||||
log.error(rst.getErrorMessage());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
log.info(String.format("index [%s] put mappings success", indexName));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public JestResult execute(Action<JestResult> clientRequest) throws Exception {
|
||||
JestResult rst = null;
|
||||
rst = jestClient.execute(clientRequest);
|
||||
if (!rst.isSucceeded()) {
|
||||
//log.warn(rst.getErrorMessage());
|
||||
}
|
||||
return rst;
|
||||
}
|
||||
|
||||
public Integer getStatus(JestResult rst) {
|
||||
JsonObject jsonObject = rst.getJsonObject();
|
||||
if (jsonObject.has("status")) {
|
||||
return jsonObject.get("status").getAsInt();
|
||||
}
|
||||
return 600;
|
||||
}
|
||||
|
||||
public boolean isBulkResult(JestResult rst) {
|
||||
JsonObject jsonObject = rst.getJsonObject();
|
||||
return jsonObject.has("items");
|
||||
}
|
||||
|
||||
|
||||
public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException {
|
||||
GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build();
|
||||
AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build();
|
||||
JestResult rst = jestClient.execute(getAliases);
|
||||
log.info(rst.getJsonString());
|
||||
List<AliasMapping> list = new ArrayList<AliasMapping>();
|
||||
if (rst.isSucceeded()) {
|
||||
JsonParser jp = new JsonParser();
|
||||
JsonObject jo = (JsonObject)jp.parse(rst.getJsonString());
|
||||
for(Map.Entry<String, JsonElement> entry : jo.entrySet()){
|
||||
String tindex = entry.getKey();
|
||||
if (indexname.equals(tindex)) {
|
||||
continue;
|
||||
}
|
||||
AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build();
|
||||
String s = new Gson().toJson(m.getData());
|
||||
log.info(s);
|
||||
if (needClean) {
|
||||
list.add(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", "5m").build();
|
||||
rst = jestClient.execute(modifyAliases);
|
||||
if (!rst.isSucceeded()) {
|
||||
log.error(rst.getErrorMessage());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public JestResult bulkInsert(Bulk.Builder bulk, int trySize) throws Exception {
|
||||
// es_rejected_execution_exception
|
||||
// illegal_argument_exception
|
||||
// cluster_block_exception
|
||||
JestResult rst = null;
|
||||
rst = jestClient.execute(bulk.build());
|
||||
if (!rst.isSucceeded()) {
|
||||
log.warn(rst.getErrorMessage());
|
||||
}
|
||||
return rst;
|
||||
}
|
||||
|
||||
/**
|
||||
* 关闭JestClient客户端
|
||||
*
|
||||
*/
|
||||
public void closeJestClient() {
|
||||
if (jestClient != null) {
|
||||
jestClient.shutdownClient();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,65 +0,0 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/3/2.
|
||||
*/
|
||||
public class ESColumn {
|
||||
|
||||
private String name;//: "appkey",
|
||||
|
||||
private String type;//": "TEXT",
|
||||
|
||||
private String timezone;
|
||||
|
||||
private String format;
|
||||
|
||||
private Boolean array;
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public void setTimeZone(String timezone) {
|
||||
this.timezone = timezone;
|
||||
}
|
||||
|
||||
public void setFormat(String format) {
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public String getTimezone() {
|
||||
return timezone;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public void setTimezone(String timezone) {
|
||||
this.timezone = timezone;
|
||||
}
|
||||
|
||||
public Boolean isArray() {
|
||||
return array;
|
||||
}
|
||||
|
||||
public void setArray(Boolean array) {
|
||||
this.array = array;
|
||||
}
|
||||
|
||||
public Boolean getArray() {
|
||||
return array;
|
||||
}
|
||||
}
|
@ -1,460 +0,0 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Column;
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordReceiver;
|
||||
import com.alibaba.datax.common.spi.Writer;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
import io.searchbox.client.JestResult;
|
||||
import io.searchbox.core.Bulk;
|
||||
import io.searchbox.core.BulkResult;
|
||||
import io.searchbox.core.Index;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
public class ESWriter extends Writer {
|
||||
private final static String WRITE_COLUMNS = "write_columns";
|
||||
|
||||
public static class Job extends Writer.Job {
|
||||
private static final Logger log = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Configuration conf = null;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.conf = super.getPluginJobConf();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
/**
|
||||
* 注意:此方法仅执行一次。
|
||||
* 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。
|
||||
*/
|
||||
ESClient esClient = new ESClient();
|
||||
esClient.createClient(Key.getEndpoint(conf),
|
||||
Key.getAccessID(conf),
|
||||
Key.getAccessKey(conf),
|
||||
false,
|
||||
300000,
|
||||
false,
|
||||
false);
|
||||
|
||||
String indexName = Key.getIndexName(conf);
|
||||
String typeName = Key.getTypeName(conf);
|
||||
boolean dynamic = Key.getDynamic(conf);
|
||||
String mappings = genMappings(typeName);
|
||||
String settings = JSONObject.toJSONString(
|
||||
Key.getSettings(conf)
|
||||
);
|
||||
log.info(String.format("index:[%s], type:[%s], mappings:[%s]", indexName, typeName, mappings));
|
||||
|
||||
try {
|
||||
boolean isIndicesExists = esClient.indicesExists(indexName);
|
||||
if (Key.isCleanup(this.conf) && isIndicesExists) {
|
||||
esClient.deleteIndex(indexName);
|
||||
}
|
||||
// 强制创建,内部自动忽略已存在的情况
|
||||
if (!esClient.createIndex(indexName, typeName, mappings, settings, dynamic)) {
|
||||
throw new IOException("create index or mapping failed");
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_MAPPINGS, ex.toString());
|
||||
}
|
||||
esClient.closeJestClient();
|
||||
}
|
||||
|
||||
private String genMappings(String typeName) {
|
||||
String mappings = null;
|
||||
Map<String, Object> propMap = new HashMap<String, Object>();
|
||||
List<ESColumn> columnList = new ArrayList<ESColumn>();
|
||||
|
||||
List column = conf.getList("column");
|
||||
if (column != null) {
|
||||
for (Object col : column) {
|
||||
JSONObject jo = JSONObject.parseObject(col.toString());
|
||||
String colName = jo.getString("name");
|
||||
String colTypeStr = jo.getString("type");
|
||||
if (colTypeStr == null) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " column must have type");
|
||||
}
|
||||
ESFieldType colType = ESFieldType.getESFieldType(colTypeStr);
|
||||
if (colType == null) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " unsupported type");
|
||||
}
|
||||
|
||||
ESColumn columnItem = new ESColumn();
|
||||
|
||||
if (colName.equals(Key.PRIMARY_KEY_COLUMN_NAME)) {
|
||||
// 兼容已有版本
|
||||
colType = ESFieldType.ID;
|
||||
colTypeStr = "id";
|
||||
}
|
||||
|
||||
columnItem.setName(colName);
|
||||
columnItem.setType(colTypeStr);
|
||||
|
||||
if (colType == ESFieldType.ID) {
|
||||
columnList.add(columnItem);
|
||||
// 如果是id,则properties为空
|
||||
continue;
|
||||
}
|
||||
|
||||
Boolean array = jo.getBoolean("array");
|
||||
if (array != null) {
|
||||
columnItem.setArray(array);
|
||||
}
|
||||
Map<String, Object> field = new HashMap<String, Object>();
|
||||
field.put("type", colTypeStr);
|
||||
//https://www.elastic.co/guide/en/elasticsearch/reference/5.2/breaking_50_mapping_changes.html#_literal_index_literal_property
|
||||
// https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_deep_dive_on_doc_values.html#_disabling_doc_values
|
||||
field.put("doc_values", jo.getBoolean("doc_values"));
|
||||
field.put("ignore_above", jo.getInteger("ignore_above"));
|
||||
field.put("index", jo.getBoolean("index"));
|
||||
|
||||
switch (colType) {
|
||||
case STRING:
|
||||
// 兼容string类型,ES5之前版本
|
||||
break;
|
||||
case KEYWORD:
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html#_warm_up_global_ordinals
|
||||
field.put("eager_global_ordinals", jo.getBoolean("eager_global_ordinals"));
|
||||
case TEXT:
|
||||
field.put("analyzer", jo.getString("analyzer"));
|
||||
// 优化disk使用,也同步会提高index性能
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html
|
||||
field.put("norms", jo.getBoolean("norms"));
|
||||
field.put("index_options", jo.getBoolean("index_options"));
|
||||
break;
|
||||
case DATE:
|
||||
columnItem.setTimeZone(jo.getString("timezone"));
|
||||
columnItem.setFormat(jo.getString("format"));
|
||||
// 后面时间会处理为带时区的标准时间,所以不需要给ES指定格式
|
||||
/*
|
||||
if (jo.getString("format") != null) {
|
||||
field.put("format", jo.getString("format"));
|
||||
} else {
|
||||
//field.put("format", "strict_date_optional_time||epoch_millis||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd");
|
||||
}
|
||||
*/
|
||||
break;
|
||||
case GEO_SHAPE:
|
||||
field.put("tree", jo.getString("tree"));
|
||||
field.put("precision", jo.getString("precision"));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
propMap.put(colName, field);
|
||||
columnList.add(columnItem);
|
||||
}
|
||||
}
|
||||
|
||||
conf.set(WRITE_COLUMNS, JSON.toJSONString(columnList));
|
||||
|
||||
log.info(JSON.toJSONString(columnList));
|
||||
|
||||
Map<String, Object> rootMappings = new HashMap<String, Object>();
|
||||
Map<String, Object> typeMappings = new HashMap<String, Object>();
|
||||
typeMappings.put("properties", propMap);
|
||||
rootMappings.put(typeName, typeMappings);
|
||||
|
||||
mappings = JSON.toJSONString(rootMappings);
|
||||
|
||||
if (mappings == null || "".equals(mappings)) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, "must have mappings");
|
||||
}
|
||||
|
||||
return mappings;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int mandatoryNumber) {
|
||||
List<Configuration> configurations = new ArrayList<Configuration>(mandatoryNumber);
|
||||
for (int i = 0; i < mandatoryNumber; i++) {
|
||||
configurations.add(conf);
|
||||
}
|
||||
return configurations;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
ESClient esClient = new ESClient();
|
||||
esClient.createClient(Key.getEndpoint(conf),
|
||||
Key.getAccessID(conf),
|
||||
Key.getAccessKey(conf),
|
||||
false,
|
||||
300000,
|
||||
false,
|
||||
false);
|
||||
String alias = Key.getAlias(conf);
|
||||
if (!"".equals(alias)) {
|
||||
log.info(String.format("alias [%s] to [%s]", alias, Key.getIndexName(conf)));
|
||||
try {
|
||||
esClient.alias(Key.getIndexName(conf), alias, Key.isNeedCleanAlias(conf));
|
||||
} catch (IOException e) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_ALIAS_MODIFY, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public static class Task extends Writer.Task {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
|
||||
ESClient esClient = null;
|
||||
private List<ESFieldType> typeList;
|
||||
private List<ESColumn> columnList;
|
||||
|
||||
private int trySize;
|
||||
private int batchSize;
|
||||
private String index;
|
||||
private String type;
|
||||
private String splitter;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.conf = super.getPluginJobConf();
|
||||
index = Key.getIndexName(conf);
|
||||
type = Key.getTypeName(conf);
|
||||
|
||||
trySize = Key.getTrySize(conf);
|
||||
batchSize = Key.getBatchSize(conf);
|
||||
splitter = Key.getSplitter(conf);
|
||||
columnList = JSON.parseObject(this.conf.getString(WRITE_COLUMNS), new TypeReference<List<ESColumn>>() {
|
||||
});
|
||||
|
||||
typeList = new ArrayList<ESFieldType>();
|
||||
|
||||
for (ESColumn col : columnList) {
|
||||
typeList.add(ESFieldType.getESFieldType(col.getType()));
|
||||
}
|
||||
|
||||
esClient = new ESClient();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
esClient.createClient(Key.getEndpoint(conf),
|
||||
Key.getAccessID(conf),
|
||||
Key.getAccessKey(conf),
|
||||
Key.isMultiThread(conf),
|
||||
Key.getTimeout(conf),
|
||||
Key.isCompression(conf),
|
||||
Key.isDiscovery(conf));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startWrite(RecordReceiver recordReceiver) {
|
||||
List<Record> writerBuffer = new ArrayList<Record>(this.batchSize);
|
||||
Record record = null;
|
||||
long total = 0;
|
||||
while ((record = recordReceiver.getFromReader()) != null) {
|
||||
writerBuffer.add(record);
|
||||
if (writerBuffer.size() >= this.batchSize) {
|
||||
total += doBatchInsert(writerBuffer);
|
||||
writerBuffer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
if (!writerBuffer.isEmpty()) {
|
||||
total += doBatchInsert(writerBuffer);
|
||||
writerBuffer.clear();
|
||||
}
|
||||
|
||||
String msg = String.format("task end, write size :%d", total);
|
||||
getTaskPluginCollector().collectMessage("writesize", String.valueOf(total));
|
||||
log.info(msg);
|
||||
esClient.closeJestClient();
|
||||
}
|
||||
|
||||
private String getDateStr(ESColumn esColumn, Column column) {
|
||||
DateTime date = null;
|
||||
DateTimeZone dtz = DateTimeZone.getDefault();
|
||||
if (esColumn.getTimezone() != null) {
|
||||
// 所有时区参考 http://www.joda.org/joda-time/timezones.html
|
||||
dtz = DateTimeZone.forID(esColumn.getTimezone());
|
||||
}
|
||||
if (column.getType() != Column.Type.DATE && esColumn.getFormat() != null) {
|
||||
DateTimeFormatter formatter = DateTimeFormat.forPattern(esColumn.getFormat());
|
||||
date = formatter.withZone(dtz).parseDateTime(column.asString());
|
||||
return date.toString();
|
||||
} else if (column.getType() == Column.Type.DATE) {
|
||||
date = new DateTime(column.asLong(), dtz);
|
||||
return date.toString();
|
||||
} else {
|
||||
return column.asString();
|
||||
}
|
||||
}
|
||||
|
||||
private long doBatchInsert(final List<Record> writerBuffer) {
|
||||
Map<String, Object> data = null;
|
||||
final Bulk.Builder bulkaction = new Bulk.Builder().defaultIndex(this.index).defaultType(this.type);
|
||||
for (Record record : writerBuffer) {
|
||||
data = new HashMap<String, Object>();
|
||||
String id = null;
|
||||
for (int i = 0; i < record.getColumnNumber(); i++) {
|
||||
Column column = record.getColumn(i);
|
||||
String columnName = columnList.get(i).getName();
|
||||
ESFieldType columnType = typeList.get(i);
|
||||
//如果是数组类型,那它传入的必是字符串类型
|
||||
if (columnList.get(i).isArray() != null && columnList.get(i).isArray()) {
|
||||
String[] dataList = column.asString().split(splitter);
|
||||
if (!columnType.equals(ESFieldType.DATE)) {
|
||||
data.put(columnName, dataList);
|
||||
} else {
|
||||
for (int pos = 0; pos < dataList.length; pos++) {
|
||||
dataList[pos] = getDateStr(columnList.get(i), column);
|
||||
}
|
||||
data.put(columnName, dataList);
|
||||
}
|
||||
} else {
|
||||
switch (columnType) {
|
||||
case ID:
|
||||
if (id != null) {
|
||||
id += record.getColumn(i).asString();
|
||||
} else {
|
||||
id = record.getColumn(i).asString();
|
||||
}
|
||||
break;
|
||||
case DATE:
|
||||
try {
|
||||
String dateStr = getDateStr(columnList.get(i), column);
|
||||
data.put(columnName, dateStr);
|
||||
} catch (Exception e) {
|
||||
getTaskPluginCollector().collectDirtyRecord(record, String.format("时间类型解析失败 [%s:%s] exception: %s", columnName, column.toString(), e.toString()));
|
||||
}
|
||||
break;
|
||||
case KEYWORD:
|
||||
case STRING:
|
||||
case TEXT:
|
||||
case IP:
|
||||
case GEO_POINT:
|
||||
data.put(columnName, column.asString());
|
||||
break;
|
||||
case BOOLEAN:
|
||||
data.put(columnName, column.asBoolean());
|
||||
break;
|
||||
case BYTE:
|
||||
case BINARY:
|
||||
data.put(columnName, column.asBytes());
|
||||
break;
|
||||
case LONG:
|
||||
data.put(columnName, column.asLong());
|
||||
break;
|
||||
case INTEGER:
|
||||
data.put(columnName, column.asBigInteger());
|
||||
break;
|
||||
case SHORT:
|
||||
data.put(columnName, column.asBigInteger());
|
||||
break;
|
||||
case FLOAT:
|
||||
case DOUBLE:
|
||||
data.put(columnName, column.asDouble());
|
||||
break;
|
||||
case NESTED:
|
||||
case OBJECT:
|
||||
case GEO_SHAPE:
|
||||
data.put(columnName, JSON.parse(column.asString()));
|
||||
break;
|
||||
default:
|
||||
getTaskPluginCollector().collectDirtyRecord(record, "类型错误:不支持的类型:" + columnType + " " + columnName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (id == null) {
|
||||
//id = UUID.randomUUID().toString();
|
||||
bulkaction.addAction(new Index.Builder(data).build());
|
||||
} else {
|
||||
bulkaction.addAction(new Index.Builder(data).id(id).build());
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
return RetryUtil.executeWithRetry(new Callable<Integer>() {
|
||||
@Override
|
||||
public Integer call() throws Exception {
|
||||
JestResult jestResult = esClient.bulkInsert(bulkaction, 1);
|
||||
if (jestResult.isSucceeded()) {
|
||||
return writerBuffer.size();
|
||||
}
|
||||
|
||||
String msg = String.format("response code: [%d] error :[%s]", jestResult.getResponseCode(), jestResult.getErrorMessage());
|
||||
log.warn(msg);
|
||||
if (esClient.isBulkResult(jestResult)) {
|
||||
BulkResult brst = (BulkResult) jestResult;
|
||||
List<BulkResult.BulkResultItem> failedItems = brst.getFailedItems();
|
||||
for (BulkResult.BulkResultItem item : failedItems) {
|
||||
if (item.status != 400) {
|
||||
// 400 BAD_REQUEST 如果非数据异常,请求异常,则不允许忽略
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s", item.status, item.error));
|
||||
} else {
|
||||
// 如果用户选择不忽略解析错误,则抛异常,默认为忽略
|
||||
if (!Key.isIgnoreParseError(conf)) {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s, config not ignoreParseError so throw this error", item.status, item.error));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<BulkResult.BulkResultItem> items = brst.getItems();
|
||||
for (int idx = 0; idx < items.size(); ++idx) {
|
||||
BulkResult.BulkResultItem item = items.get(idx);
|
||||
if (item.error != null && !"".equals(item.error)) {
|
||||
getTaskPluginCollector().collectDirtyRecord(writerBuffer.get(idx), String.format("status:[%d], error: %s", item.status, item.error));
|
||||
}
|
||||
}
|
||||
return writerBuffer.size() - brst.getFailedItems().size();
|
||||
} else {
|
||||
Integer status = esClient.getStatus(jestResult);
|
||||
switch (status) {
|
||||
case 429: //TOO_MANY_REQUESTS
|
||||
log.warn("server response too many requests, so auto reduce speed");
|
||||
break;
|
||||
}
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, jestResult.getErrorMessage());
|
||||
}
|
||||
}
|
||||
}, trySize, 60000L, true);
|
||||
} catch (Exception e) {
|
||||
if (Key.isIgnoreWriteError(this.conf)) {
|
||||
log.warn(String.format("重试[%d]次写入失败,忽略该错误,继续写入!", trySize));
|
||||
} else {
|
||||
throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, e);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
esClient.closeJestClient();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public enum ESWriterErrorCode implements ErrorCode {
|
||||
BAD_CONFIG_VALUE("ESWriter-00", "您配置的值不合法."),
|
||||
ES_INDEX_DELETE("ESWriter-01", "删除index错误."),
|
||||
ES_INDEX_CREATE("ESWriter-02", "创建index错误."),
|
||||
ES_MAPPINGS("ESWriter-03", "mappings错误."),
|
||||
ES_INDEX_INSERT("ESWriter-04", "插入数据错误."),
|
||||
ES_ALIAS_MODIFY("ESWriter-05", "别名修改错误."),
|
||||
;
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
ESWriterErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,312 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfo;
|
||||
import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.ClusterInfoResult;
|
||||
import com.alibaba.datax.plugin.writer.elasticsearchwriter.jest.PutMapping7;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.gson.JsonElement;
|
||||
import com.google.gson.JsonObject;
|
||||
import com.google.gson.JsonParser;
|
||||
import io.searchbox.action.Action;
|
||||
import io.searchbox.client.JestClient;
|
||||
import io.searchbox.client.JestClientFactory;
|
||||
import io.searchbox.client.JestResult;
|
||||
import io.searchbox.client.config.HttpClientConfig;
|
||||
import io.searchbox.client.config.HttpClientConfig.Builder;
|
||||
import io.searchbox.core.Bulk;
|
||||
import io.searchbox.indices.CreateIndex;
|
||||
import io.searchbox.indices.DeleteIndex;
|
||||
import io.searchbox.indices.IndicesExists;
|
||||
import io.searchbox.indices.aliases.*;
|
||||
import io.searchbox.indices.mapping.GetMapping;
|
||||
import io.searchbox.indices.mapping.PutMapping;
|
||||
|
||||
import io.searchbox.indices.settings.GetSettings;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/2/8.
|
||||
*/
|
||||
public class ElasticSearchClient {
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(ElasticSearchClient.class);
|
||||
|
||||
private JestClient jestClient;
|
||||
private Configuration conf;
|
||||
|
||||
public JestClient getClient() {
|
||||
return jestClient;
|
||||
}
|
||||
|
||||
public ElasticSearchClient(Configuration conf) {
|
||||
this.conf = conf;
|
||||
String endpoint = Key.getEndpoint(conf);
|
||||
String user = Key.getUsername(conf);
|
||||
String passwd = Key.getPassword(conf);
|
||||
boolean multiThread = Key.isMultiThread(conf);
|
||||
int readTimeout = Key.getTimeout(conf);
|
||||
boolean compression = Key.isCompression(conf);
|
||||
boolean discovery = Key.isDiscovery(conf);
|
||||
String discoveryFilter = Key.getDiscoveryFilter(conf);
|
||||
int totalConnection = this.conf.getInt("maxTotalConnection", 200);
|
||||
JestClientFactory factory = new JestClientFactory();
|
||||
Builder httpClientConfig = new HttpClientConfig
|
||||
.Builder(endpoint)
|
||||
// .setPreemptiveAuth(new HttpHost(endpoint))
|
||||
.multiThreaded(multiThread)
|
||||
.connTimeout(readTimeout)
|
||||
.readTimeout(readTimeout)
|
||||
.maxTotalConnection(totalConnection)
|
||||
.requestCompressionEnabled(compression)
|
||||
.discoveryEnabled(discovery)
|
||||
.discoveryFrequency(5L, TimeUnit.MINUTES)
|
||||
.discoveryFilter(discoveryFilter);
|
||||
if (!(StringUtils.isBlank(user) || StringUtils.isBlank(passwd))) {
|
||||
// 匿名登录
|
||||
httpClientConfig.defaultCredentials(user, passwd);
|
||||
}
|
||||
factory.setHttpClientConfig(httpClientConfig.build());
|
||||
this.jestClient = factory.getObject();
|
||||
}
|
||||
|
||||
public boolean indicesExists(String indexName) throws Exception {
|
||||
boolean isIndicesExists = false;
|
||||
JestResult rst = execute(new IndicesExists.Builder(indexName).build());
|
||||
if (rst.isSucceeded()) {
|
||||
isIndicesExists = true;
|
||||
} else {
|
||||
LOGGER.warn("IndicesExists got ResponseCode: {} ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
|
||||
switch (rst.getResponseCode()) {
|
||||
case 404:
|
||||
isIndicesExists = false;
|
||||
break;
|
||||
case 401:
|
||||
// 无权访问
|
||||
default:
|
||||
LOGGER.warn(rst.getErrorMessage());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return isIndicesExists;
|
||||
}
|
||||
|
||||
public boolean deleteIndex(String indexName) throws Exception {
|
||||
LOGGER.info("delete index {}", indexName);
|
||||
if (indicesExists(indexName)) {
|
||||
JestResult rst = execute(new DeleteIndex.Builder(indexName).build());
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.warn("DeleteIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
|
||||
return false;
|
||||
} else {
|
||||
LOGGER.info("delete index {} success", indexName);
|
||||
}
|
||||
} else {
|
||||
LOGGER.info("index cannot found, skip delete index {}", indexName);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isGreaterOrEqualThan7() throws Exception {
|
||||
try {
|
||||
ClusterInfoResult result = execute(new ClusterInfo.Builder().build());
|
||||
LOGGER.info("ClusterInfoResult: {}", result.getJsonString());
|
||||
return result.isGreaterOrEqualThan7();
|
||||
}catch(Exception e) {
|
||||
LOGGER.warn(e.getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取索引的settings
|
||||
* @param indexName 索引名
|
||||
* @return 设置
|
||||
*/
|
||||
public String getIndexSettings(String indexName) {
|
||||
GetSettings.Builder builder = new GetSettings.Builder();
|
||||
builder.addIndex(indexName);
|
||||
GetSettings getSettings = builder.build();
|
||||
try {
|
||||
LOGGER.info("begin GetSettings for index: {}", indexName);
|
||||
JestResult result = this.execute(getSettings);
|
||||
return result.getJsonString();
|
||||
} catch (Exception e) {
|
||||
String message = "GetSettings for index error: " + e.getMessage();
|
||||
LOGGER.warn(message, e);
|
||||
throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_GET_SETTINGS, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean createIndexIfNotExists(String indexName, String typeName,
|
||||
Object mappings, String settings,
|
||||
boolean dynamic, boolean isGreaterOrEqualThan7) throws Exception {
|
||||
JestResult rst;
|
||||
if (!indicesExists(indexName)) {
|
||||
LOGGER.info("create index {}", indexName);
|
||||
rst = execute(
|
||||
new CreateIndex.Builder(indexName)
|
||||
.settings(settings)
|
||||
.setParameter("master_timeout", Key.getMasterTimeout(this.conf))
|
||||
.build()
|
||||
);
|
||||
//index_already_exists_exception
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.warn("CreateIndex got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
|
||||
if (getStatus(rst) == 400) {
|
||||
LOGGER.info(String.format("index {} already exists", indexName));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LOGGER.info("create {} index success", indexName);
|
||||
}
|
||||
}
|
||||
|
||||
if (dynamic) {
|
||||
LOGGER.info("dynamic is true, ignore mappings");
|
||||
return true;
|
||||
}
|
||||
LOGGER.info("create mappings for {} {}", indexName, mappings);
|
||||
//如果大于7.x,mapping的PUT请求URI中不能带type,并且mapping设置中不能带有嵌套结构
|
||||
if (isGreaterOrEqualThan7) {
|
||||
rst = execute(new PutMapping7.Builder(indexName, mappings).
|
||||
setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build());
|
||||
} else {
|
||||
rst = execute(new PutMapping.Builder(indexName, typeName, mappings)
|
||||
.setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build());
|
||||
}
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.error("PutMapping got ResponseCode: {}, ErrorMessage: {}", rst.getResponseCode(), rst.getErrorMessage());
|
||||
return false;
|
||||
} else {
|
||||
LOGGER.info("index {} put mappings success", indexName);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public <T extends JestResult> T execute(Action<T> clientRequest) throws IOException {
|
||||
T rst = jestClient.execute(clientRequest);
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.warn(rst.getJsonString());
|
||||
}
|
||||
return rst;
|
||||
}
|
||||
|
||||
public Integer getStatus(JestResult rst) {
|
||||
JsonObject jsonObject = rst.getJsonObject();
|
||||
if (jsonObject.has("status")) {
|
||||
return jsonObject.get("status").getAsInt();
|
||||
}
|
||||
return 600;
|
||||
}
|
||||
|
||||
public boolean isBulkResult(JestResult rst) {
|
||||
JsonObject jsonObject = rst.getJsonObject();
|
||||
return jsonObject.has("items");
|
||||
}
|
||||
|
||||
|
||||
public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException {
|
||||
GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build();
|
||||
AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build();
|
||||
JestResult rst = null;
|
||||
List<AliasMapping> list = new ArrayList<AliasMapping>();
|
||||
if (needClean) {
|
||||
rst = execute(getAliases);
|
||||
if (rst.isSucceeded()) {
|
||||
JsonParser jp = new JsonParser();
|
||||
JsonObject jo = (JsonObject) jp.parse(rst.getJsonString());
|
||||
for (Map.Entry<String, JsonElement> entry : jo.entrySet()) {
|
||||
String tindex = entry.getKey();
|
||||
if (indexname.equals(tindex)) {
|
||||
continue;
|
||||
}
|
||||
AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build();
|
||||
String s = new Gson().toJson(m.getData());
|
||||
LOGGER.info(s);
|
||||
list.add(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", Key.getMasterTimeout(this.conf)).build();
|
||||
rst = execute(modifyAliases);
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.error(rst.getErrorMessage());
|
||||
throw new IOException(rst.getErrorMessage());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取index的mapping
|
||||
*/
|
||||
public String getIndexMapping(String indexName) {
|
||||
GetMapping.Builder builder = new GetMapping.Builder();
|
||||
builder.addIndex(indexName);
|
||||
GetMapping getMapping = builder.build();
|
||||
try {
|
||||
LOGGER.info("begin GetMapping for index: {}", indexName);
|
||||
JestResult result = this.execute(getMapping);
|
||||
return result.getJsonString();
|
||||
} catch (Exception e) {
|
||||
String message = "GetMapping for index error: " + e.getMessage();
|
||||
LOGGER.warn(message, e);
|
||||
throw DataXException.asDataXException(ElasticSearchWriterErrorCode.ES_MAPPINGS, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
public String getMappingForIndexType(String indexName, String typeName) {
|
||||
String indexMapping = this.getIndexMapping(indexName);
|
||||
JSONObject indexMappingInJson = JSON.parseObject(indexMapping);
|
||||
List<String> paths = Arrays.asList(indexName, "mappings");
|
||||
JSONObject properties = JsonPathUtil.getJsonObject(paths, indexMappingInJson);
|
||||
JSONObject propertiesParent = properties;
|
||||
if (StringUtils.isNotBlank(typeName) && properties.containsKey(typeName)) {
|
||||
propertiesParent = (JSONObject) properties.get(typeName);
|
||||
}
|
||||
JSONObject mapping = (JSONObject) propertiesParent.get("properties");
|
||||
return JSON.toJSONString(mapping);
|
||||
}
|
||||
|
||||
public JestResult bulkInsert(Bulk.Builder bulk) throws Exception {
|
||||
// es_rejected_execution_exception
|
||||
// illegal_argument_exception
|
||||
// cluster_block_exception
|
||||
JestResult rst = null;
|
||||
rst = execute(bulk.build());
|
||||
if (!rst.isSucceeded()) {
|
||||
LOGGER.warn(rst.getErrorMessage());
|
||||
}
|
||||
return rst;
|
||||
}
|
||||
|
||||
/**
|
||||
* 关闭JestClient客户端
|
||||
*
|
||||
*/
|
||||
public void closeJestClient() {
|
||||
if (jestClient != null) {
|
||||
try {
|
||||
// jestClient.shutdownClient();
|
||||
jestClient.close();
|
||||
} catch (IOException e) {
|
||||
LOGGER.warn("ignore error: ", e.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,126 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/3/2.
|
||||
*/
|
||||
public class ElasticSearchColumn {
|
||||
|
||||
private String name;//: "appkey",
|
||||
|
||||
private String type;//": "TEXT",
|
||||
|
||||
private String timezone;
|
||||
|
||||
/**
|
||||
* 源头数据格式化处理,datax做的事情
|
||||
*/
|
||||
private String format;
|
||||
|
||||
/**
|
||||
* 目标端格式化,es原生支持的格式
|
||||
*/
|
||||
private String dstFormat;
|
||||
|
||||
private boolean array;
|
||||
|
||||
/**
|
||||
* 是否使用目标端(ES原生)数组类型
|
||||
*
|
||||
* 默认是false
|
||||
*/
|
||||
private boolean dstArray = false;
|
||||
|
||||
private boolean jsonArray;
|
||||
|
||||
private boolean origin;
|
||||
|
||||
private List<String> combineFields;
|
||||
|
||||
private String combineFieldsValueSeparator = "-";
|
||||
|
||||
public String getCombineFieldsValueSeparator() {
|
||||
return combineFieldsValueSeparator;
|
||||
}
|
||||
|
||||
public void setCombineFieldsValueSeparator(String combineFieldsValueSeparator) {
|
||||
this.combineFieldsValueSeparator = combineFieldsValueSeparator;
|
||||
}
|
||||
|
||||
public List<String> getCombineFields() {
|
||||
return combineFields;
|
||||
}
|
||||
|
||||
public void setCombineFields(List<String> combineFields) {
|
||||
this.combineFields = combineFields;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public void setTimeZone(String timezone) {
|
||||
this.timezone = timezone;
|
||||
}
|
||||
|
||||
public void setFormat(String format) {
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public boolean isOrigin() { return origin; }
|
||||
|
||||
public void setOrigin(boolean origin) { this.origin = origin; }
|
||||
|
||||
public String getTimezone() {
|
||||
return timezone;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public void setTimezone(String timezone) {
|
||||
this.timezone = timezone;
|
||||
}
|
||||
|
||||
public boolean isArray() {
|
||||
return array;
|
||||
}
|
||||
|
||||
public void setArray(boolean array) {
|
||||
this.array = array;
|
||||
}
|
||||
|
||||
public boolean isJsonArray() {return jsonArray;}
|
||||
|
||||
public void setJsonArray(boolean jsonArray) {this.jsonArray = jsonArray;}
|
||||
|
||||
public String getDstFormat() {
|
||||
return dstFormat;
|
||||
}
|
||||
|
||||
public void setDstFormat(String dstFormat) {
|
||||
this.dstFormat = dstFormat;
|
||||
}
|
||||
|
||||
public boolean isDstArray() {
|
||||
return dstArray;
|
||||
}
|
||||
|
||||
public void setDstArray(boolean dstArray) {
|
||||
this.dstArray = dstArray;
|
||||
}
|
||||
}
|
@ -3,8 +3,11 @@ package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
/**
|
||||
* Created by xiongfeng.bxf on 17/3/1.
|
||||
*/
|
||||
public enum ESFieldType {
|
||||
public enum ElasticSearchFieldType {
|
||||
ID,
|
||||
PARENT,
|
||||
ROUTING,
|
||||
VERSION,
|
||||
STRING,
|
||||
TEXT,
|
||||
KEYWORD,
|
||||
@ -24,20 +27,18 @@ public enum ESFieldType {
|
||||
DATE_RANGE,
|
||||
GEO_POINT,
|
||||
GEO_SHAPE,
|
||||
|
||||
IP,
|
||||
IP_RANGE,
|
||||
COMPLETION,
|
||||
TOKEN_COUNT,
|
||||
|
||||
ARRAY,
|
||||
OBJECT,
|
||||
NESTED;
|
||||
|
||||
public static ESFieldType getESFieldType(String type) {
|
||||
public static ElasticSearchFieldType getESFieldType(String type) {
|
||||
if (type == null) {
|
||||
return null;
|
||||
}
|
||||
for (ESFieldType f : ESFieldType.values()) {
|
||||
for (ElasticSearchFieldType f : ElasticSearchFieldType.values()) {
|
||||
if (f.name().compareTo(type.toUpperCase()) == 0) {
|
||||
return f;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,41 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public enum ElasticSearchWriterErrorCode implements ErrorCode {
|
||||
BAD_CONFIG_VALUE("ESWriter-00", "The value you configured is not valid."),
|
||||
ES_INDEX_DELETE("ESWriter-01", "Delete index error."),
|
||||
ES_INDEX_CREATE("ESWriter-02", "Index creation error."),
|
||||
ES_MAPPINGS("ESWriter-03", "The mappings error."),
|
||||
ES_INDEX_INSERT("ESWriter-04", "Insert data error."),
|
||||
ES_ALIAS_MODIFY("ESWriter-05", "Alias modification error."),
|
||||
JSON_PARSE("ESWrite-06", "Json format parsing error"),
|
||||
UPDATE_WITH_ID("ESWrite-07", "Update mode must specify column type with id"),
|
||||
RECORD_FIELD_NOT_FOUND("ESWrite-08", "Field does not exist in the original table"),
|
||||
ES_GET_SETTINGS("ESWriter-09", "get settings failed");
|
||||
;
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
ElasticSearchWriterErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
|
||||
public class JsonPathUtil {
|
||||
|
||||
public static JSONObject getJsonObject(List<String> paths, JSONObject data) {
|
||||
if (null == paths || paths.isEmpty()) {
|
||||
return data;
|
||||
}
|
||||
|
||||
if (null == data) {
|
||||
return null;
|
||||
}
|
||||
|
||||
JSONObject dataTmp = data;
|
||||
for (String each : paths) {
|
||||
if (null != dataTmp) {
|
||||
dataTmp = dataTmp.getJSONObject(each);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return dataTmp;
|
||||
}
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.JSONException;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
|
||||
/**
|
||||
* @author bozu
|
||||
* @date 2021/01/06
|
||||
*/
|
||||
public class JsonUtil {
|
||||
|
||||
/**
|
||||
* 合并两个json
|
||||
* @param source 源json
|
||||
* @param target 目标json
|
||||
* @return 合并后的json
|
||||
* @throws JSONException
|
||||
*/
|
||||
public static String mergeJsonStr(String source, String target) throws JSONException {
|
||||
if(source == null) {
|
||||
return target;
|
||||
}
|
||||
if(target == null) {
|
||||
return source;
|
||||
}
|
||||
return JSON.toJSONString(deepMerge(JSON.parseObject(source), JSON.parseObject(target)));
|
||||
}
|
||||
|
||||
/**
|
||||
* 深度合并两个json对象,将source的值,merge到target中
|
||||
* @param source 源json
|
||||
* @param target 目标json
|
||||
* @return 合并后的json
|
||||
* @throws JSONException
|
||||
*/
|
||||
private static JSONObject deepMerge(JSONObject source, JSONObject target) throws JSONException {
|
||||
for (String key: source.keySet()) {
|
||||
Object value = source.get(key);
|
||||
if (target.containsKey(key)) {
|
||||
// existing value for "key" - recursively deep merge:
|
||||
if (value instanceof JSONObject) {
|
||||
JSONObject valueJson = (JSONObject)value;
|
||||
deepMerge(valueJson, target.getJSONObject(key));
|
||||
} else {
|
||||
target.put(key, value);
|
||||
}
|
||||
} else {
|
||||
target.put(key, value);
|
||||
}
|
||||
}
|
||||
return target;
|
||||
}
|
||||
}
|
@ -1,9 +1,13 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.TypeReference;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public final class Key {
|
||||
@ -37,31 +41,35 @@ public final class Key {
|
||||
|
||||
|
||||
public static String getEndpoint(Configuration conf) {
|
||||
return conf.getNecessaryValue("endpoint", ESWriterErrorCode.BAD_CONFIG_VALUE);
|
||||
return conf.getNecessaryValue("endpoint", ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE);
|
||||
}
|
||||
|
||||
public static String getAccessID(Configuration conf) {
|
||||
return conf.getString("accessId", "");
|
||||
public static String getUsername(Configuration conf) {
|
||||
return conf.getString("username", conf.getString("accessId"));
|
||||
}
|
||||
|
||||
public static String getAccessKey(Configuration conf) {
|
||||
return conf.getString("accessKey", "");
|
||||
public static String getPassword(Configuration conf) {
|
||||
return conf.getString("password", conf.getString("accessKey"));
|
||||
}
|
||||
|
||||
public static int getBatchSize(Configuration conf) {
|
||||
return conf.getInt("batchSize", 1000);
|
||||
return conf.getInt("batchSize", 1024);
|
||||
}
|
||||
|
||||
public static int getTrySize(Configuration conf) {
|
||||
return conf.getInt("trySize", 30);
|
||||
}
|
||||
|
||||
public static long getTryInterval(Configuration conf) {
|
||||
return conf.getLong("tryInterval", 60000L);
|
||||
}
|
||||
|
||||
public static int getTimeout(Configuration conf) {
|
||||
return conf.getInt("timeout", 600000);
|
||||
}
|
||||
|
||||
public static boolean isCleanup(Configuration conf) {
|
||||
return conf.getBool("cleanup", false);
|
||||
public static boolean isTruncate(Configuration conf) {
|
||||
return conf.getBool("truncate", conf.getBool("cleanup", false));
|
||||
}
|
||||
|
||||
public static boolean isDiscovery(Configuration conf) {
|
||||
@ -69,7 +77,7 @@ public final class Key {
|
||||
}
|
||||
|
||||
public static boolean isCompression(Configuration conf) {
|
||||
return conf.getBool("compression", true);
|
||||
return conf.getBool("compress", conf.getBool("compression", true));
|
||||
}
|
||||
|
||||
public static boolean isMultiThread(Configuration conf) {
|
||||
@ -77,9 +85,17 @@ public final class Key {
|
||||
}
|
||||
|
||||
public static String getIndexName(Configuration conf) {
|
||||
return conf.getNecessaryValue("index", ESWriterErrorCode.BAD_CONFIG_VALUE);
|
||||
return conf.getNecessaryValue("index", ElasticSearchWriterErrorCode.BAD_CONFIG_VALUE);
|
||||
}
|
||||
|
||||
public static String getDeleteBy(Configuration conf) {
|
||||
return conf.getString("deleteBy");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* TODO: 在7.0开始,一个索引只能建一个Type为_doc
|
||||
* */
|
||||
public static String getTypeName(Configuration conf) {
|
||||
String indexType = conf.getString("indexType");
|
||||
if(StringUtils.isBlank(indexType)){
|
||||
@ -128,4 +144,58 @@ public final class Key {
|
||||
public static boolean getDynamic(Configuration conf) {
|
||||
return conf.getBool("dynamic", false);
|
||||
}
|
||||
|
||||
public static String getDstDynamic(Configuration conf) {
|
||||
return conf.getString("dstDynamic");
|
||||
}
|
||||
|
||||
public static String getDiscoveryFilter(Configuration conf){
|
||||
return conf.getString("discoveryFilter","_all");
|
||||
}
|
||||
|
||||
public static Boolean getVersioning(Configuration conf) {
|
||||
return conf.getBool("versioning", false);
|
||||
}
|
||||
|
||||
public static Long getUnifiedVersion(Configuration conf) {
|
||||
return conf.getLong("version", System.currentTimeMillis());
|
||||
}
|
||||
|
||||
public static Map<String, Object> getUrlParams(Configuration conf) {
|
||||
return conf.getMap("urlParams", new HashMap<String, Object>());
|
||||
}
|
||||
|
||||
public static Integer getESVersion(Configuration conf) {
|
||||
return conf.getInt("esVersion");
|
||||
}
|
||||
|
||||
public static String getMasterTimeout(Configuration conf) {
|
||||
return conf.getString("masterTimeout", "5m");
|
||||
}
|
||||
|
||||
public static boolean isEnableNullUpdate(Configuration conf) {
|
||||
return conf.getBool("enableWriteNull", true);
|
||||
}
|
||||
|
||||
public static String getFieldDelimiter(Configuration conf) {
|
||||
return conf.getString("fieldDelimiter", "");
|
||||
}
|
||||
|
||||
public static PrimaryKeyInfo getPrimaryKeyInfo(Configuration conf) {
|
||||
String primaryKeyInfoString = conf.getString("primaryKeyInfo");
|
||||
if (StringUtils.isNotBlank(primaryKeyInfoString)) {
|
||||
return JSON.parseObject(primaryKeyInfoString, new TypeReference<PrimaryKeyInfo>() {});
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<PartitionColumn> getEsPartitionColumn(Configuration conf) {
|
||||
String esPartitionColumnString = conf.getString("esPartitionColumn");
|
||||
if (StringUtils.isNotBlank(esPartitionColumnString)) {
|
||||
return JSON.parseObject(esPartitionColumnString, new TypeReference<List<PartitionColumn>>() {});
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,16 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public class NoReRunException extends DataXException {
|
||||
public NoReRunException(String errorMessage) {
|
||||
super(errorMessage);
|
||||
}
|
||||
|
||||
public NoReRunException(ErrorCode errorCode, String errorMessage) {
|
||||
super(errorCode, errorMessage);
|
||||
}
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
public class PartitionColumn {
|
||||
private String name;
|
||||
// like: DATA
|
||||
private String metaType;
|
||||
private String comment;
|
||||
// like: VARCHAR
|
||||
private String type;
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getMetaType() {
|
||||
return metaType;
|
||||
}
|
||||
|
||||
public String getComment() {
|
||||
return comment;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public void setMetaType(String metaType) {
|
||||
this.metaType = metaType;
|
||||
}
|
||||
|
||||
public void setComment(String comment) {
|
||||
this.comment = comment;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class PrimaryKeyInfo {
|
||||
|
||||
/**
|
||||
* 主键类型:PrimaryKeyTypeEnum
|
||||
*
|
||||
* pk: 单个(业务)主键 specific: 联合主键
|
||||
*/
|
||||
private String type;
|
||||
|
||||
/**
|
||||
* 用户定义的联合主键的连接符号
|
||||
*/
|
||||
private String fieldDelimiter;
|
||||
|
||||
/**
|
||||
* 主键的列的名称
|
||||
*/
|
||||
private List<String> column;
|
||||
|
||||
public String getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public String getFieldDelimiter() {
|
||||
return fieldDelimiter;
|
||||
}
|
||||
|
||||
public List<String> getColumn() {
|
||||
return column;
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public void setFieldDelimiter(String fieldDelimiter) {
|
||||
this.fieldDelimiter = fieldDelimiter;
|
||||
}
|
||||
|
||||
public void setColumn(List<String> column) {
|
||||
this.column = column;
|
||||
}
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import io.searchbox.action.AbstractAction;
|
||||
import io.searchbox.client.config.ElasticsearchVersion;
|
||||
|
||||
public class ClusterInfo extends AbstractAction<ClusterInfoResult> {
|
||||
@Override
|
||||
protected String buildURI(ElasticsearchVersion elasticsearchVersion) {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRestMethodName() {
|
||||
return "GET";
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClusterInfoResult createNewElasticSearchResult(String responseBody, int statusCode, String reasonPhrase, Gson gson) {
|
||||
return createNewElasticSearchResult(new ClusterInfoResult(gson), responseBody, statusCode, reasonPhrase, gson);
|
||||
}
|
||||
|
||||
public static class Builder extends AbstractAction.Builder<ClusterInfo, ClusterInfo.Builder> {
|
||||
|
||||
public Builder() {
|
||||
setHeader("accept", "application/json");
|
||||
setHeader("content-type", "application/json");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClusterInfo build() {
|
||||
return new ClusterInfo();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import io.searchbox.client.JestResult;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class ClusterInfoResult extends JestResult {
|
||||
|
||||
private static final Pattern FIRST_NUMBER = Pattern.compile("\\d");
|
||||
|
||||
private static final int SEVEN = 7;
|
||||
|
||||
public ClusterInfoResult(Gson gson) {
|
||||
super(gson);
|
||||
}
|
||||
|
||||
public ClusterInfoResult(JestResult source) {
|
||||
super(source);
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断es集群的部署版本是否大于7.x
|
||||
* 大于7.x的es对于Index的type有较大改动,需要做额外判定
|
||||
* 对于7.x与6.x版本的es都做过测试,返回符合预期;5.x以下版本直接try-catch后返回false,向下兼容
|
||||
* @return
|
||||
*/
|
||||
public Boolean isGreaterOrEqualThan7() throws Exception {
|
||||
// 如果是没有权限,直接返回false,兼容老版本
|
||||
if (responseCode == 403) {
|
||||
return false;
|
||||
}
|
||||
if (!isSucceeded) {
|
||||
throw new Exception(getJsonString());
|
||||
}
|
||||
try {
|
||||
String version = jsonObject.getAsJsonObject("version").get("number").toString();
|
||||
Matcher matcher = FIRST_NUMBER.matcher(version);
|
||||
matcher.find();
|
||||
String number = matcher.group();
|
||||
Integer versionNum = Integer.valueOf(number);
|
||||
return versionNum >= SEVEN;
|
||||
} catch (Exception e) {
|
||||
//5.x 以下版本不做兼容测试,如果返回json格式解析失败,有可能是以下版本,所以认为不大于7.x
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,39 @@
|
||||
package com.alibaba.datax.plugin.writer.elasticsearchwriter.jest;
|
||||
|
||||
import io.searchbox.action.GenericResultAbstractAction;
|
||||
import io.searchbox.client.config.ElasticsearchVersion;
|
||||
|
||||
public class PutMapping7 extends GenericResultAbstractAction {
|
||||
protected PutMapping7(PutMapping7.Builder builder) {
|
||||
super(builder);
|
||||
|
||||
this.indexName = builder.index;
|
||||
this.payload = builder.source;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String buildURI(ElasticsearchVersion elasticsearchVersion) {
|
||||
return super.buildURI(elasticsearchVersion) + "/_mapping";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRestMethodName() {
|
||||
return "PUT";
|
||||
}
|
||||
|
||||
public static class Builder extends GenericResultAbstractAction.Builder<PutMapping7, PutMapping7.Builder> {
|
||||
private String index;
|
||||
private Object source;
|
||||
|
||||
public Builder(String index, Object source) {
|
||||
this.index = index;
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PutMapping7 build() {
|
||||
return new PutMapping7(this);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "elasticsearchwriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ESWriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ElasticSearchWriter",
|
||||
"description": "适用于: 生产环境. 原理: TODO",
|
||||
"developer": "alibaba"
|
||||
}
|
@ -2,6 +2,6 @@
|
||||
"name": "hbase20xsqlreader",
|
||||
"class": "com.alibaba.datax.plugin.reader.hbase20xsqlreader.HBase20xSQLReader",
|
||||
"description": "useScene: prod. mechanism: read data from phoenix through queryserver.",
|
||||
"developer": "bake"
|
||||
"developer": "alibaba"
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,6 @@
|
||||
"name": "hbase20xsqlwriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.hbase20xsqlwriter.HBase20xSQLWriter",
|
||||
"description": "useScene: prod. mechanism: use hbase sql UPSERT to put data, index tables will be updated too.",
|
||||
"developer": "bake"
|
||||
"developer": "alibaba"
|
||||
}
|
||||
|
||||
|
73
loghubreader/pom.xml
Normal file
73
loghubreader/pom.xml
Normal file
@ -0,0 +1,73 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>loghubreader</artifactId>
|
||||
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.aliyun.openservices</groupId>
|
||||
<artifactId>aliyun-log</artifactId>
|
||||
<version>0.6.22</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- compiler plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- assembly plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
34
loghubreader/src/main/assembly/package.xml
Normal file
34
loghubreader/src/main/assembly/package.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/loghubreader</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>loghubreader-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/loghubreader</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/reader/loghubreader/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
@ -0,0 +1,26 @@
|
||||
package com.alibaba.datax.plugin.reader.loghubreader;
|
||||
|
||||
public class Constant {
|
||||
|
||||
public static String DATETIME_FORMAT = "yyyyMMddHHmmss";
|
||||
public static String DATE_FORMAT = "yyyyMMdd";
|
||||
|
||||
static String META_COL_SOURCE = "__source__";
|
||||
static String META_COL_TOPIC = "__topic__";
|
||||
static String META_COL_CATEGORY = "__category__";
|
||||
static String META_COL_MACHINEUUID = "__machineUUID__";
|
||||
static String META_COL_HOSTNAME = "__hostname__";
|
||||
static String META_COL_PATH = "__path__";
|
||||
static String META_COL_LOGTIME = "__logtime__";
|
||||
public static String META_COL_RECEIVE_TIME = "__receive_time__";
|
||||
|
||||
/**
|
||||
* 除用户手动配置的列之外,其余数据列作为一个 json 读取到一列
|
||||
*/
|
||||
static String COL_EXTRACT_OTHERS = "C__extract_others__";
|
||||
|
||||
/**
|
||||
* 将所有元数据列作为一个 json 读取到一列
|
||||
*/
|
||||
static String COL_EXTRACT_ALL_META = "C__extract_all_meta__";
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
package com.alibaba.datax.plugin.reader.loghubreader;
|
||||
|
||||
public final class Key {
|
||||
|
||||
/**
|
||||
* 此处声明插件用到的需要插件使用者提供的配置项
|
||||
*/
|
||||
public static final String ENDPOINT = "endpoint";
|
||||
|
||||
public static final String ACCESSKEYID = "accessId";
|
||||
|
||||
public static final String ACCESSKEYSECRET = "accessKey";
|
||||
|
||||
public static final String PROJECT = "project";
|
||||
|
||||
public static final String LOGSTORE = "logstore";
|
||||
|
||||
public static final String TOPIC = "topic";
|
||||
|
||||
public static final String COLUMN = "column";
|
||||
|
||||
public static final String BATCHSIZE = "batchSize";
|
||||
|
||||
public static final String BEGINTIMESTAMPMILLIS = "beginTimestampMillis";
|
||||
|
||||
public static final String ENDTIMESTAMPMILLIS = "endTimestampMillis";
|
||||
|
||||
public static final String BEGINDATETIME = "beginDateTime";
|
||||
|
||||
public static final String ENDDATETIME = "endDateTime";
|
||||
|
||||
public static final String TIMEFORMAT = "timeformat";
|
||||
|
||||
public static final String SOURCE = "source";
|
||||
|
||||
public static final String SHARD = "shard";
|
||||
|
||||
}
|
@ -0,0 +1,482 @@
|
||||
package com.alibaba.datax.plugin.reader.loghubreader;
|
||||
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.element.StringColumn;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordSender;
|
||||
import com.alibaba.datax.common.spi.Reader;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.common.util.DataXCaseEnvUtil;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.aliyun.openservices.log.Client;
|
||||
import com.aliyun.openservices.log.common.Consts.CursorMode;
|
||||
import com.aliyun.openservices.log.common.*;
|
||||
import com.aliyun.openservices.log.exception.LogException;
|
||||
import com.aliyun.openservices.log.response.BatchGetLogResponse;
|
||||
import com.aliyun.openservices.log.response.GetCursorResponse;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
public class LogHubReader extends Reader {
|
||||
public static class Job extends Reader.Job {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Client client;
|
||||
private Configuration originalConfig;
|
||||
|
||||
private Long beginTimestampMillis;
|
||||
private Long endTimestampMillis;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
LOG.info("loghub reader job init begin ...");
|
||||
this.originalConfig = super.getPluginJobConf();
|
||||
validateParameter(originalConfig);
|
||||
|
||||
String endPoint = this.originalConfig.getString(Key.ENDPOINT);
|
||||
String accessKeyId = this.originalConfig.getString(Key.ACCESSKEYID);
|
||||
String accessKeySecret = this.originalConfig.getString(Key.ACCESSKEYSECRET);
|
||||
|
||||
client = new Client(endPoint, accessKeyId, accessKeySecret);
|
||||
LOG.info("loghub reader job init end.");
|
||||
}
|
||||
|
||||
private void validateParameter(Configuration conf){
|
||||
conf.getNecessaryValue(Key.ENDPOINT,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESSKEYID,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESSKEYSECRET,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.PROJECT,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.LOGSTORE,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.COLUMN,LogHubReaderErrorCode.REQUIRE_VALUE);
|
||||
|
||||
int batchSize = this.originalConfig.getInt(Key.BATCHSIZE);
|
||||
if (batchSize > 1000) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid batchSize[" + batchSize + "] value (0,1000]!");
|
||||
}
|
||||
|
||||
beginTimestampMillis = this.originalConfig.getLong(Key.BEGINTIMESTAMPMILLIS);
|
||||
String beginDateTime = this.originalConfig.getString(Key.BEGINDATETIME);
|
||||
|
||||
if (beginDateTime != null) {
|
||||
try {
|
||||
beginTimestampMillis = getUnixTimeFromDateTime(beginDateTime);
|
||||
} catch (ParseException e) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginDateTime[" + beginDateTime + "], format [yyyyMMddHHmmss or yyyyMMdd]!");
|
||||
}
|
||||
}
|
||||
|
||||
if (beginTimestampMillis != null && beginTimestampMillis <= 0) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginTimestampMillis[" + beginTimestampMillis + "]!");
|
||||
}
|
||||
|
||||
endTimestampMillis = this.originalConfig.getLong(Key.ENDTIMESTAMPMILLIS);
|
||||
String endDateTime = this.originalConfig.getString(Key.ENDDATETIME);
|
||||
|
||||
if (endDateTime != null) {
|
||||
try {
|
||||
endTimestampMillis = getUnixTimeFromDateTime(endDateTime);
|
||||
} catch (ParseException e) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid beginDateTime[" + endDateTime + "], format [yyyyMMddHHmmss or yyyyMMdd]!");
|
||||
}
|
||||
}
|
||||
|
||||
if (endTimestampMillis != null && endTimestampMillis <= 0) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid endTimestampMillis[" + endTimestampMillis + "]!");
|
||||
}
|
||||
|
||||
if (beginTimestampMillis != null && endTimestampMillis != null
|
||||
&& endTimestampMillis <= beginTimestampMillis) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"endTimestampMillis[" + endTimestampMillis + "] must bigger than beginTimestampMillis[" + beginTimestampMillis + "]!");
|
||||
}
|
||||
}
|
||||
|
||||
private long getUnixTimeFromDateTime(String dateTime) throws ParseException {
|
||||
try {
|
||||
String format = Constant.DATETIME_FORMAT;
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
|
||||
return simpleDateFormat.parse(dateTime).getTime() / 1000;
|
||||
} catch (ParseException ignored) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"Invalid DateTime[" + dateTime + "]!");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int adviceNumber) {
|
||||
LOG.info("split() begin...");
|
||||
|
||||
List<Configuration> readerSplitConfigs = new ArrayList<Configuration>();
|
||||
|
||||
final String project = this.originalConfig.getString(Key.PROJECT);
|
||||
final String logstore = this.originalConfig.getString(Key.LOGSTORE);
|
||||
|
||||
List<Shard> logStore = null;
|
||||
try {
|
||||
logStore = RetryUtil.executeWithRetry(new Callable<List<Shard>>() {
|
||||
@Override
|
||||
public List<Shard> call() throws Exception {
|
||||
return client.ListShard(project, logstore).GetShards();
|
||||
}
|
||||
}, DataXCaseEnvUtil.getRetryTimes(7), DataXCaseEnvUtil.getRetryInterval(1000L), DataXCaseEnvUtil.getRetryExponential(true));
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"get LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString());
|
||||
}
|
||||
|
||||
if (logStore == null) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.BAD_CONFIG_VALUE,
|
||||
"LogStore[" + logstore + "] isn't exists, please check !");
|
||||
}
|
||||
|
||||
int splitNumber = logStore.size();
|
||||
if (0 == splitNumber) {
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.EMPTY_LOGSTORE_VALUE,
|
||||
"LogStore[" + logstore + "] has 0 shard, please check !");
|
||||
}
|
||||
|
||||
Collections.shuffle(logStore);
|
||||
for (int i = 0; i < logStore.size(); i++) {
|
||||
if (beginTimestampMillis != null && endTimestampMillis != null) {
|
||||
try {
|
||||
String beginCursor = getCursorWithRetry(client, project, logstore, logStore.get(i).GetShardId(), beginTimestampMillis).GetCursor();
|
||||
String endCursor = getCursorWithRetry(client, project, logstore, logStore.get(i).GetShardId(), endTimestampMillis).GetCursor();
|
||||
if (beginCursor.equals(endCursor)) {
|
||||
if ((i == logStore.size() - 1) && (readerSplitConfigs.size() == 0)) {
|
||||
|
||||
} else {
|
||||
LOG.info("skip empty shard[" + logStore.get(i) + "]!");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("Check Shard[" + logStore.get(i) + "] Error, please check !" + e.toString());
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e);
|
||||
}
|
||||
}
|
||||
Configuration splitedConfig = this.originalConfig.clone();
|
||||
splitedConfig.set(Key.SHARD, logStore.get(i).GetShardId());
|
||||
readerSplitConfigs.add(splitedConfig);
|
||||
}
|
||||
|
||||
if (splitNumber < adviceNumber) {
|
||||
// LOG.info(MESSAGE_SOURCE.message("hdfsreader.12",
|
||||
// splitNumber, adviceNumber, splitNumber, splitNumber));
|
||||
}
|
||||
LOG.info("split() ok and end...");
|
||||
|
||||
return readerSplitConfigs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
|
||||
private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final long fromTime) throws Exception {
|
||||
return
|
||||
RetryUtil.executeWithRetry(new Callable<GetCursorResponse>() {
|
||||
@Override
|
||||
public GetCursorResponse call() throws Exception {
|
||||
LOG.info("loghug get cursor with project: {} logstore: {} shard: {} time: {}", project, logstore, shard, fromTime);
|
||||
return client.GetCursor(project, logstore, shard, fromTime);
|
||||
}
|
||||
}, 7, 1000L, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class Task extends Reader.Task {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
|
||||
|
||||
private Configuration taskConfig;
|
||||
private Client client;
|
||||
private String endPoint;
|
||||
private String accessKeyId;
|
||||
private String accessKeySecret;
|
||||
private String project;
|
||||
private String logstore;
|
||||
private long beginTimestampMillis;
|
||||
private long endTimestampMillis;
|
||||
private int batchSize;
|
||||
private int shard;
|
||||
private List<String> columns;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
this.taskConfig = super.getPluginJobConf();
|
||||
|
||||
endPoint = this.taskConfig.getString(Key.ENDPOINT);
|
||||
accessKeyId = this.taskConfig.getString(Key.ACCESSKEYID);
|
||||
accessKeySecret = this.taskConfig.getString(Key.ACCESSKEYSECRET);
|
||||
project = this.taskConfig.getString(Key.PROJECT);
|
||||
logstore = this.taskConfig.getString(Key.LOGSTORE);
|
||||
batchSize = this.taskConfig.getInt(Key.BATCHSIZE, 128);
|
||||
|
||||
this.beginTimestampMillis = this.taskConfig.getLong(Key.BEGINTIMESTAMPMILLIS, -1);
|
||||
String beginDateTime = this.taskConfig.getString(Key.BEGINDATETIME);
|
||||
|
||||
if (beginDateTime != null) {
|
||||
try {
|
||||
beginTimestampMillis = getUnixTimeFromDateTime(beginDateTime);
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
}
|
||||
|
||||
this.endTimestampMillis = this.taskConfig.getLong(Key.ENDTIMESTAMPMILLIS, -1);
|
||||
String endDateTime = this.taskConfig.getString(Key.ENDDATETIME);
|
||||
|
||||
if (endDateTime != null) {
|
||||
try {
|
||||
endTimestampMillis = getUnixTimeFromDateTime(endDateTime);
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
}
|
||||
|
||||
columns = this.taskConfig.getList(Key.COLUMN, String.class);
|
||||
|
||||
shard = this.taskConfig.getInt(Key.SHARD);
|
||||
|
||||
client = new Client(endPoint, accessKeyId, accessKeySecret);
|
||||
LOG.info("init loghub reader task finished.project:{} logstore:{} batchSize:{}", project, logstore, batchSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
}
|
||||
|
||||
private long getUnixTimeFromDateTime(String dateTime) throws ParseException {
|
||||
try {
|
||||
String format = Constant.DATETIME_FORMAT;
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
|
||||
return simpleDateFormat.parse(dateTime).getTime() / 1000;
|
||||
} catch (ParseException ignored) {
|
||||
}
|
||||
String format = Constant.DATE_FORMAT;
|
||||
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(format);
|
||||
return simpleDateFormat.parse(dateTime).getTime() / 1000;
|
||||
}
|
||||
|
||||
private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final long fromTime) throws Exception {
|
||||
return
|
||||
RetryUtil.executeWithRetry(new Callable<GetCursorResponse>() {
|
||||
@Override
|
||||
public GetCursorResponse call() throws Exception {
|
||||
LOG.info("loghug get cursor with project: {} logstore: {} shard: {} time: {}", project, logstore, shard, fromTime);
|
||||
return client.GetCursor(project, logstore, shard, fromTime);
|
||||
}
|
||||
}, 7, 1000L, true);
|
||||
}
|
||||
|
||||
private GetCursorResponse getCursorWithRetry(final Client client, final String project, final String logstore, final int shard, final CursorMode mode) throws Exception {
|
||||
return
|
||||
RetryUtil.executeWithRetry(new Callable<GetCursorResponse>() {
|
||||
@Override
|
||||
public GetCursorResponse call() throws Exception {
|
||||
LOG.info("loghug get cursor with project: {} logstore: {} shard: {} mode: {}", project, logstore, shard, mode);
|
||||
return client.GetCursor(project, logstore, shard, mode);
|
||||
}
|
||||
}, 7, 1000L, true);
|
||||
}
|
||||
|
||||
private BatchGetLogResponse batchGetLogWithRetry(final Client client, final String project, final String logstore, final int shard, final int batchSize,
|
||||
final String curCursor, final String endCursor) throws Exception {
|
||||
return
|
||||
RetryUtil.executeWithRetry(new Callable<BatchGetLogResponse>() {
|
||||
@Override
|
||||
public BatchGetLogResponse call() throws Exception {
|
||||
return client.BatchGetLog(project, logstore, shard, batchSize, curCursor, endCursor);
|
||||
}
|
||||
}, 7, 1000L, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startRead(RecordSender recordSender) {
|
||||
LOG.info("read start");
|
||||
|
||||
try {
|
||||
GetCursorResponse cursorRes;
|
||||
if (this.beginTimestampMillis != -1) {
|
||||
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, beginTimestampMillis);
|
||||
} else {
|
||||
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, CursorMode.BEGIN);
|
||||
}
|
||||
String beginCursor = cursorRes.GetCursor();
|
||||
|
||||
LOG.info("the begin cursor, loghub requestId: {} cursor: {}", cursorRes.GetRequestId(), cursorRes.GetCursor());
|
||||
|
||||
if (this.endTimestampMillis != -1) {
|
||||
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, endTimestampMillis);
|
||||
} else {
|
||||
cursorRes = getCursorWithRetry(client, project, logstore, this.shard, CursorMode.END);
|
||||
}
|
||||
String endCursor = cursorRes.GetCursor();
|
||||
LOG.info("the end cursor, loghub requestId: {} cursor: {}", cursorRes.GetRequestId(), cursorRes.GetCursor());
|
||||
|
||||
if (StringUtils.equals(beginCursor, endCursor)) {
|
||||
LOG.info("beginCursor:{} equals endCursor:{}, end directly!", beginCursor, endCursor);
|
||||
return;
|
||||
}
|
||||
|
||||
String currentCursor = null;
|
||||
String nextCursor = beginCursor;
|
||||
|
||||
HashMap<String, String> metaMap = new HashMap<String, String>();
|
||||
HashMap<String, String> dataMap = new HashMap<String, String>();
|
||||
JSONObject allMetaJson = new JSONObject();
|
||||
while (!StringUtils.equals(currentCursor, nextCursor)) {
|
||||
currentCursor = nextCursor;
|
||||
BatchGetLogResponse logDataRes = batchGetLogWithRetry(client, project, logstore, this.shard, this.batchSize, currentCursor, endCursor);
|
||||
|
||||
List<LogGroupData> logGroups = logDataRes.GetLogGroups();
|
||||
|
||||
for(LogGroupData logGroup: logGroups) {
|
||||
metaMap.clear();
|
||||
allMetaJson.clear();
|
||||
FastLogGroup flg = logGroup.GetFastLogGroup();
|
||||
|
||||
metaMap.put("C_Category", flg.getCategory());
|
||||
metaMap.put(Constant.META_COL_CATEGORY, flg.getCategory());
|
||||
allMetaJson.put(Constant.META_COL_CATEGORY, flg.getCategory());
|
||||
|
||||
metaMap.put("C_Source", flg.getSource());
|
||||
metaMap.put(Constant.META_COL_SOURCE, flg.getSource());
|
||||
allMetaJson.put(Constant.META_COL_SOURCE, flg.getSource());
|
||||
|
||||
metaMap.put("C_Topic", flg.getTopic());
|
||||
metaMap.put(Constant.META_COL_TOPIC, flg.getTopic());
|
||||
allMetaJson.put(Constant.META_COL_TOPIC, flg.getTopic());
|
||||
|
||||
metaMap.put("C_MachineUUID", flg.getMachineUUID());
|
||||
metaMap.put(Constant.META_COL_MACHINEUUID, flg.getMachineUUID());
|
||||
allMetaJson.put(Constant.META_COL_MACHINEUUID, flg.getMachineUUID());
|
||||
|
||||
for (int tagIdx = 0; tagIdx < flg.getLogTagsCount(); ++tagIdx) {
|
||||
FastLogTag logtag = flg.getLogTags(tagIdx);
|
||||
String tagKey = logtag.getKey();
|
||||
String tagValue = logtag.getValue();
|
||||
if (tagKey.equals(Constant.META_COL_HOSTNAME)) {
|
||||
metaMap.put("C_HostName", logtag.getValue());
|
||||
} else if (tagKey.equals(Constant.META_COL_PATH)) {
|
||||
metaMap.put("C_Path", logtag.getValue());
|
||||
}
|
||||
metaMap.put(tagKey, tagValue);
|
||||
allMetaJson.put(tagKey, tagValue);
|
||||
}
|
||||
|
||||
for (int lIdx = 0; lIdx < flg.getLogsCount(); ++lIdx) {
|
||||
dataMap.clear();
|
||||
FastLog log = flg.getLogs(lIdx);
|
||||
|
||||
String logTime = String.valueOf(log.getTime());
|
||||
metaMap.put("C_LogTime", logTime);
|
||||
metaMap.put(Constant.META_COL_LOGTIME, logTime);
|
||||
allMetaJson.put(Constant.META_COL_LOGTIME, logTime);
|
||||
|
||||
for (int cIdx = 0; cIdx < log.getContentsCount(); ++cIdx) {
|
||||
FastLogContent content = log.getContents(cIdx);
|
||||
dataMap.put(content.getKey(), content.getValue());
|
||||
}
|
||||
|
||||
Record record = recordSender.createRecord();
|
||||
|
||||
JSONObject extractOthers = new JSONObject();
|
||||
if(columns.contains(Constant.COL_EXTRACT_OTHERS)){
|
||||
List<String> keyList = Arrays.asList(dataMap.keySet().toArray(new String[dataMap.keySet().size()]));
|
||||
for (String otherKey:keyList) {
|
||||
if (!columns.contains(otherKey)){
|
||||
extractOthers.put(otherKey,dataMap.get(otherKey));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (null != this.columns && 1 == this.columns.size()) {
|
||||
String columnsInStr = columns.get(0).toString();
|
||||
if ("\"*\"".equals(columnsInStr) || "*".equals(columnsInStr)) {
|
||||
List<String> keyList = Arrays.asList(dataMap.keySet().toArray(new String[dataMap.keySet().size()]));
|
||||
Collections.sort(keyList);
|
||||
|
||||
for (String key : keyList) {
|
||||
record.addColumn(new StringColumn(key + ":" + dataMap.get(key)));
|
||||
}
|
||||
} else {
|
||||
if (dataMap.containsKey(columnsInStr)) {
|
||||
record.addColumn(new StringColumn(dataMap.get(columnsInStr)));
|
||||
} else if (metaMap.containsKey(columnsInStr)) {
|
||||
record.addColumn(new StringColumn(metaMap.get(columnsInStr)));
|
||||
} else if (Constant.COL_EXTRACT_OTHERS.equals(columnsInStr)){
|
||||
record.addColumn(new StringColumn(extractOthers.toJSONString()));
|
||||
} else if (Constant.COL_EXTRACT_ALL_META.equals(columnsInStr)) {
|
||||
record.addColumn(new StringColumn(allMetaJson.toJSONString()));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (String col : this.columns) {
|
||||
if (dataMap.containsKey(col)) {
|
||||
record.addColumn(new StringColumn(dataMap.get(col)));
|
||||
} else if (metaMap.containsKey(col)) {
|
||||
record.addColumn(new StringColumn(metaMap.get(col)));
|
||||
} else if (col != null && col.startsWith("'") && col.endsWith("'")){
|
||||
String constant = col.substring(1, col.length()-1);
|
||||
record.addColumn(new StringColumn(constant));
|
||||
}else if (Constant.COL_EXTRACT_OTHERS.equals(col)){
|
||||
record.addColumn(new StringColumn(extractOthers.toJSONString()));
|
||||
} else if (Constant.COL_EXTRACT_ALL_META.equals(col)) {
|
||||
record.addColumn(new StringColumn(allMetaJson.toJSONString()));
|
||||
} else {
|
||||
record.addColumn(new StringColumn(null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordSender.sendToWriter(record);
|
||||
}
|
||||
}
|
||||
|
||||
nextCursor = logDataRes.GetNextCursor();
|
||||
}
|
||||
} catch (LogException e) {
|
||||
if (e.GetErrorCode().equals("LogStoreNotExist")) {
|
||||
LOG.info("logStore[" + logstore +"] Not Exits! detail error messsage: " + e.toString());
|
||||
} else {
|
||||
LOG.error("read LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString());
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.error("read LogStore[" + logstore + "] error, please check ! detail error messsage: " + e.toString());
|
||||
throw DataXException.asDataXException(LogHubReaderErrorCode.LOG_HUB_ERROR, e);
|
||||
}
|
||||
|
||||
LOG.info("end read loghub shard...");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
package com.alibaba.datax.plugin.reader.loghubreader;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public enum LogHubReaderErrorCode implements ErrorCode {
|
||||
BAD_CONFIG_VALUE("LogHuReader-00", "The value you configured is invalid."),
|
||||
LOG_HUB_ERROR("LogHubReader-01","LogHub access encounter exception"),
|
||||
REQUIRE_VALUE("LogHubReader-02","Missing parameters"),
|
||||
EMPTY_LOGSTORE_VALUE("LogHubReader-03","There is no shard in this LogStore");
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
private LogHubReaderErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
6
loghubreader/src/main/resources/plugin.json
Normal file
6
loghubreader/src/main/resources/plugin.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "loghubreader",
|
||||
"class": "com.alibaba.datax.plugin.reader.loghubreader.LogHubReader",
|
||||
"description": "适用于: 从SLS LogHub中读取数据",
|
||||
"developer": "alibaba"
|
||||
}
|
12
loghubreader/src/main/resources/plugin_job_template.json
Normal file
12
loghubreader/src/main/resources/plugin_job_template.json
Normal file
@ -0,0 +1,12 @@
|
||||
{
|
||||
"name": "loghubreader",
|
||||
"parameter": {
|
||||
"endpoint": "",
|
||||
"accessId": "",
|
||||
"accessKey": "",
|
||||
"project": "",
|
||||
"logstore": "",
|
||||
"batchSize":1024,
|
||||
"column": []
|
||||
}
|
||||
}
|
73
loghubwriter/pom.xml
Normal file
73
loghubwriter/pom.xml
Normal file
@ -0,0 +1,73 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>datax-all</artifactId>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<artifactId>loghubwriter</artifactId>
|
||||
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.alibaba.datax</groupId>
|
||||
<artifactId>datax-common</artifactId>
|
||||
<version>${datax-project-version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<groupId>org.slf4j</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ch.qos.logback</groupId>
|
||||
<artifactId>logback-classic</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.aliyun.openservices</groupId>
|
||||
<artifactId>aliyun-log</artifactId>
|
||||
<version>0.6.12</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- compiler plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>${jdk-version}</source>
|
||||
<target>${jdk-version}</target>
|
||||
<encoding>${project-sourceEncoding}</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- assembly plugin -->
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/package.xml</descriptor>
|
||||
</descriptors>
|
||||
<finalName>datax</finalName>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>dwzip</id>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
34
loghubwriter/src/main/assembly/package.xml
Normal file
34
loghubwriter/src/main/assembly/package.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>dir</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<fileSets>
|
||||
<fileSet>
|
||||
<directory>src/main/resources</directory>
|
||||
<includes>
|
||||
<include>plugin.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/loghubwriter</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
<include>loghubwriter-0.0.1-SNAPSHOT.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/loghubwriter</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<outputDirectory>plugin/writer/loghubwriter/libs</outputDirectory>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
@ -0,0 +1,35 @@
|
||||
package com.alibaba.datax.plugin.writer.loghubwriter;
|
||||
|
||||
/**
|
||||
* 配置关键字
|
||||
* @author
|
||||
*/
|
||||
public final class Key {
|
||||
|
||||
/**
|
||||
* 此处声明插件用到的需要插件使用者提供的配置项
|
||||
*/
|
||||
public static final String ENDPOINT = "endpoint";
|
||||
|
||||
public static final String ACCESS_KEY_ID = "accessId";
|
||||
|
||||
public static final String ACCESS_KEY_SECRET = "accessKey";
|
||||
|
||||
public static final String PROJECT = "project";
|
||||
|
||||
public static final String LOG_STORE = "logstore";
|
||||
|
||||
public static final String TOPIC = "topic";
|
||||
|
||||
public static final String COLUMN = "column";
|
||||
|
||||
public static final String BATCH_SIZE = "batchSize";
|
||||
|
||||
public static final String TIME = "time";
|
||||
|
||||
public static final String TIME_FORMAT = "timeformat";
|
||||
|
||||
public static final String SOURCE = "source";
|
||||
|
||||
public static final String HASH_BY_KEY = "hashKey";
|
||||
}
|
@ -0,0 +1,315 @@
|
||||
package com.alibaba.datax.plugin.writer.loghubwriter;
|
||||
|
||||
import com.alibaba.datax.common.element.Record;
|
||||
import com.alibaba.datax.common.exception.DataXException;
|
||||
import com.alibaba.datax.common.plugin.RecordReceiver;
|
||||
import com.alibaba.datax.common.spi.Writer;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
import com.alibaba.datax.common.util.StrUtil;
|
||||
import com.aliyun.openservices.log.Client;
|
||||
import com.aliyun.openservices.log.common.LogItem;
|
||||
import com.aliyun.openservices.log.common.Shard;
|
||||
import com.aliyun.openservices.log.exception.LogException;
|
||||
import com.aliyun.openservices.log.request.ListShardRequest;
|
||||
import com.aliyun.openservices.log.request.PutLogsRequest;
|
||||
import com.aliyun.openservices.log.response.ListShardResponse;
|
||||
import com.aliyun.openservices.log.response.PutLogsResponse;
|
||||
|
||||
import org.apache.commons.codec.digest.Md5Crypt;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import sun.security.provider.MD5;
|
||||
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
/**
|
||||
* SLS 写插件
|
||||
* @author
|
||||
*/
|
||||
public class LogHubWriter extends Writer {
|
||||
|
||||
public static class Job extends Writer.Job {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
|
||||
|
||||
private Configuration jobConfig = null;
|
||||
|
||||
@Override
|
||||
public void init() {
|
||||
info(LOG, "loghub writer job init begin ...");
|
||||
this.jobConfig = super.getPluginJobConf();
|
||||
validateParameter(jobConfig);
|
||||
info(LOG, "loghub writer job init end.");
|
||||
}
|
||||
|
||||
private void validateParameter(Configuration conf){
|
||||
conf.getNecessaryValue(Key.ENDPOINT,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESS_KEY_ID,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.ACCESS_KEY_SECRET,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.PROJECT,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.LOG_STORE,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
conf.getNecessaryValue(Key.COLUMN,LogHubWriterErrorCode.REQUIRE_VALUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Configuration> split(int mandatoryNumber) {
|
||||
info(LOG, "split begin...");
|
||||
List<Configuration> configurationList = new ArrayList<Configuration>();
|
||||
for (int i = 0; i < mandatoryNumber; i++) {
|
||||
configurationList.add(this.jobConfig.clone());
|
||||
}
|
||||
info(LOG, "split end...");
|
||||
return configurationList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
}
|
||||
|
||||
public static class Task extends Writer.Task {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
|
||||
private Configuration taskConfig;
|
||||
private com.aliyun.openservices.log.Client logHubClient;
|
||||
private String logStore;
|
||||
private String topic;
|
||||
private String project;
|
||||
private List<String> columnList;
|
||||
private int batchSize;
|
||||
private String timeCol;
|
||||
private String timeFormat;
|
||||
private String source;
|
||||
private boolean isHashKey;
|
||||
private List<Shard> shards;
|
||||
public void init() {
|
||||
this.taskConfig = super.getPluginJobConf();
|
||||
String endpoint = taskConfig.getString(Key.ENDPOINT);
|
||||
String accessKeyId = taskConfig.getString(Key.ACCESS_KEY_ID);
|
||||
String accessKeySecret = taskConfig.getString(Key.ACCESS_KEY_SECRET);
|
||||
project = taskConfig.getString(Key.PROJECT);
|
||||
logStore = taskConfig.getString(Key.LOG_STORE);
|
||||
topic = taskConfig.getString(Key.TOPIC,"");
|
||||
columnList = taskConfig.getList(Key.COLUMN,String.class);
|
||||
batchSize = taskConfig.getInt(Key.BATCH_SIZE,1024);
|
||||
timeCol = taskConfig.getString(Key.TIME,"");
|
||||
timeFormat = taskConfig.getString(Key.TIME_FORMAT,"");
|
||||
source = taskConfig.getString(Key.SOURCE,"");
|
||||
isHashKey = taskConfig.getBool(Key.HASH_BY_KEY,false);
|
||||
logHubClient = new Client(endpoint, accessKeyId, accessKeySecret);
|
||||
if (isHashKey) {
|
||||
listShard();
|
||||
info(LOG, "init loghub writer with hash key mode.");
|
||||
}
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info("init loghub writer task finished.project:{} logstore:{} topic:{} batchSize:{}",project,logStore,topic,batchSize);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取通道的分片信息
|
||||
*/
|
||||
private void listShard() {
|
||||
try {
|
||||
ListShardResponse response = logHubClient.ListShard(new ListShardRequest(project,logStore));
|
||||
shards = response.GetShards();
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info("Get shard count:{}", shards.size());
|
||||
}
|
||||
} catch (LogException e) {
|
||||
info(LOG, "Get shard failed!");
|
||||
throw new RuntimeException("Get shard failed!", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepare() {
|
||||
}
|
||||
|
||||
private int getTime(String v) {
|
||||
try {
|
||||
if ("bigint".equalsIgnoreCase(timeFormat)) {
|
||||
return Integer.valueOf(v);
|
||||
}
|
||||
|
||||
DateFormat sdf = new SimpleDateFormat(timeFormat);
|
||||
Date date = sdf.parse(v);
|
||||
return (int)(date.getTime()/1000);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Format time failed!", e);
|
||||
}
|
||||
return (int)(((new Date())).getTime()/1000);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startWrite(RecordReceiver recordReceiver) {
|
||||
info(LOG, "start to write.....................");
|
||||
// 按照shared做hash处理
|
||||
if (isHashKey) {
|
||||
processDataWithHashKey(recordReceiver);
|
||||
} else {
|
||||
processDataWithoutHashKey(recordReceiver);
|
||||
}
|
||||
info(LOG, "finish to write.........");
|
||||
}
|
||||
|
||||
private void processDataWithHashKey(RecordReceiver receiver) {
|
||||
Record record;
|
||||
Map<String, List<LogItem>> logMap = new HashMap<String, List<LogItem>>(shards.size());
|
||||
int count = 0;
|
||||
try {
|
||||
while ((record = receiver.getFromReader()) != null) {
|
||||
LogItem logItem = new LogItem();
|
||||
if (record.getColumnNumber() != columnList.size()) {
|
||||
this.getTaskPluginCollector().collectDirtyRecord(record, "column not match");
|
||||
}
|
||||
|
||||
String id = "";
|
||||
for (int i = 0; i < record.getColumnNumber(); i++) {
|
||||
String colName = columnList.get(i);
|
||||
String colValue = record.getColumn(i).asString();
|
||||
if (colName.endsWith("_id")) {
|
||||
id = colValue;
|
||||
}
|
||||
|
||||
logItem.PushBack(colName, colValue);
|
||||
if (colName.equals(timeCol)) {
|
||||
logItem.SetTime(getTime(colValue));
|
||||
}
|
||||
}
|
||||
|
||||
String hashKey = getShardHashKey(StrUtil.getMd5(id), shards);
|
||||
if (!logMap.containsKey(hashKey)) {
|
||||
info(LOG, "Hash key:" + hashKey);
|
||||
logMap.put(hashKey, new ArrayList<LogItem>());
|
||||
}
|
||||
logMap.get(hashKey).add(logItem);
|
||||
|
||||
if (logMap.get(hashKey).size() % batchSize == 0) {
|
||||
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logMap.get(hashKey), hashKey);
|
||||
PutLogsResponse response = putLog(request);
|
||||
count += logMap.get(hashKey).size();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("record count:{}, request id:{}", logMap.get(hashKey).size(), response.GetRequestId());
|
||||
}
|
||||
logMap.get(hashKey).clear();
|
||||
}
|
||||
}
|
||||
|
||||
for (Map.Entry<String, List<LogItem>> entry : logMap.entrySet()) {
|
||||
if (!entry.getValue().isEmpty()) {
|
||||
// 将剩余的数据发送
|
||||
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, entry.getValue(), entry.getKey());
|
||||
PutLogsResponse response = putLog(request);
|
||||
count += entry.getValue().size();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("record count:{}, request id:{}", entry.getValue().size(), response.GetRequestId());
|
||||
}
|
||||
entry.getValue().clear();
|
||||
}
|
||||
}
|
||||
LOG.info("{} records have been sent", count);
|
||||
} catch (LogException ex) {
|
||||
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, ex.getMessage(), ex);
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private void processDataWithoutHashKey(RecordReceiver receiver) {
|
||||
Record record;
|
||||
ArrayList<LogItem> logGroup = new ArrayList<LogItem>();
|
||||
int count = 0;
|
||||
try {
|
||||
while ((record = receiver.getFromReader()) != null) {
|
||||
LogItem logItem = new LogItem();
|
||||
if(record.getColumnNumber() != columnList.size()){
|
||||
this.getTaskPluginCollector().collectDirtyRecord(record,"column not match");
|
||||
}
|
||||
for (int i = 0; i < record.getColumnNumber(); i++) {
|
||||
String colName = columnList.get(i);
|
||||
String colValue = record.getColumn(i).asString();
|
||||
logItem.PushBack(colName, colValue);
|
||||
if(colName.equals(timeCol)){
|
||||
logItem.SetTime(getTime(colValue));
|
||||
}
|
||||
}
|
||||
|
||||
logGroup.add(logItem);
|
||||
count++;
|
||||
if (count % batchSize == 0) {
|
||||
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logGroup);
|
||||
PutLogsResponse response = putLog(request);
|
||||
logGroup.clear();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("record count:{}, request id:{}", count, response.GetRequestId());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!logGroup.isEmpty()) {
|
||||
//将剩余的数据发送
|
||||
PutLogsRequest request = new PutLogsRequest(project, logStore, topic, source, logGroup);
|
||||
PutLogsResponse response = putLog(request);
|
||||
logGroup.clear();
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("record count:{}, request id:{}", count, response.GetRequestId());
|
||||
}
|
||||
}
|
||||
LOG.info("{} records have been sent", count);
|
||||
} catch (LogException ex) {
|
||||
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, ex.getMessage(), ex);
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(LogHubWriterErrorCode.LOG_HUB_ERROR, e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private PutLogsResponse putLog(final PutLogsRequest request) throws Exception{
|
||||
final Client client = this.logHubClient;
|
||||
|
||||
return RetryUtil.executeWithRetry(new Callable<PutLogsResponse>() {
|
||||
public PutLogsResponse call() throws LogException{
|
||||
return client.PutLogs(request);
|
||||
}
|
||||
}, 3, 1000L, false);
|
||||
}
|
||||
|
||||
private String getShardHashKey(String hashKey, List<Shard> shards) {
|
||||
for (Shard shard : shards) {
|
||||
if (hashKey.compareTo(shard.getExclusiveEndKey()) < 0 && hashKey.compareTo(shard.getInclusiveBeginKey()) >= 0) {
|
||||
return shard.getInclusiveBeginKey();
|
||||
}
|
||||
}
|
||||
return shards.get(0).getInclusiveBeginKey();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void post() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void destroy() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 日志打印控制
|
||||
*
|
||||
* @param logger
|
||||
* @param message
|
||||
*/
|
||||
public static void info(Logger logger, String message) {
|
||||
if (logger.isInfoEnabled()) {
|
||||
logger.info(message);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
package com.alibaba.datax.plugin.writer.loghubwriter;
|
||||
|
||||
import com.alibaba.datax.common.spi.ErrorCode;
|
||||
|
||||
public enum LogHubWriterErrorCode implements ErrorCode {
|
||||
BAD_CONFIG_VALUE("LogHubWriter-00", "The value you configured is invalid."),
|
||||
LOG_HUB_ERROR("LogHubWriter-01","LogHub access encounter exception"),
|
||||
REQUIRE_VALUE("LogHubWriter-02","Missing parameters");
|
||||
|
||||
private final String code;
|
||||
private final String description;
|
||||
|
||||
private LogHubWriterErrorCode(String code, String description) {
|
||||
this.code = code;
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCode() {
|
||||
return this.code;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Code:[%s], Description:[%s]. ", this.code,
|
||||
this.description);
|
||||
}
|
||||
}
|
6
loghubwriter/src/main/resources/plugin.json
Normal file
6
loghubwriter/src/main/resources/plugin.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"name": "loghubwriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.loghubwriter.LogHubWriter",
|
||||
"description": "适用于: 将数据导入到SLS LogHub中",
|
||||
"developer": "alibaba"
|
||||
}
|
13
loghubwriter/src/main/resources/plugin_job_template.json
Normal file
13
loghubwriter/src/main/resources/plugin_job_template.json
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "loghubwriter",
|
||||
"parameter": {
|
||||
"endpoint": "",
|
||||
"accessId": "",
|
||||
"accessKey": "",
|
||||
"project": "",
|
||||
"logstore": "",
|
||||
"topic": "",
|
||||
"batchSize":1024,
|
||||
"column": []
|
||||
}
|
||||
}
|
@ -197,9 +197,9 @@ MysqlReader插件实现了从Mysql读取数据。在底层实现上,MysqlReade
|
||||
|
||||
* **querySql**
|
||||
|
||||
* 描述:在有些业务场景下,where这一配置项不足以描述所筛选的条件,用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后,DataX系统就会忽略column这些配置型,直接使用这个配置项的内容对数据进行筛选,例如需要进行多表join后同步数据,使用select a,b from table_a join table_b on table_a.id = table_b.id <br />
|
||||
* 描述:在有些业务场景下,where这一配置项不足以描述所筛选的条件,用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后,DataX系统就会忽略table,column这些配置型,直接使用这个配置项的内容对数据进行筛选,例如需要进行多表join后同步数据,使用select a,b from table_a join table_b on table_a.id = table_b.id <br />
|
||||
|
||||
`当用户配置querySql时,MysqlReader直接忽略column、where条件的配置`,querySql优先级大于column、where选项。querySql和table不能同时存在
|
||||
`当用户配置querySql时,MysqlReader直接忽略table、column、where条件的配置`,querySql优先级大于table、column、where选项。
|
||||
|
||||
* 必选:否 <br />
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
package com.alibaba.datax.plugin.reader.oceanbasev10reader.ext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import com.alibaba.datax.common.constant.CommonConstant;
|
||||
@ -27,7 +26,7 @@ public class ReaderJob extends CommonRdbmsReader.Job {
|
||||
public void init(Configuration originalConfig) {
|
||||
//将config中的column和table中的关键字进行转义
|
||||
List<String> columns = originalConfig.getList(Key.COLUMN, String.class);
|
||||
ObReaderUtils.escapeDatabaseKeywords(columns);
|
||||
ObReaderUtils.escapeDatabaseKeyword(columns);
|
||||
originalConfig.set(Key.COLUMN, columns);
|
||||
|
||||
List<JSONObject> conns = originalConfig.getList(Constant.CONN_MARK, JSONObject.class);
|
||||
@ -38,7 +37,7 @@ public class ReaderJob extends CommonRdbmsReader.Job {
|
||||
|
||||
// tables will be null when querySql is configured
|
||||
if (tables != null) {
|
||||
ObReaderUtils.escapeDatabaseKeywords(tables);
|
||||
ObReaderUtils.escapeDatabaseKeyword(tables);
|
||||
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE),
|
||||
tables);
|
||||
}
|
||||
@ -79,7 +78,8 @@ public class ReaderJob extends CommonRdbmsReader.Job {
|
||||
final String obJdbcDelimiter = com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING;
|
||||
if (jdbcUrl.startsWith(obJdbcDelimiter)) {
|
||||
String[] ss = jdbcUrl.split(obJdbcDelimiter);
|
||||
if (ss.length >= 2) {
|
||||
int elementCount = 2;
|
||||
if (ss.length >= elementCount) {
|
||||
String tenant = ss[1].trim();
|
||||
String[] sss = tenant.split(":");
|
||||
return sss[0];
|
||||
|
@ -37,12 +37,15 @@ public class ObReaderUtils {
|
||||
|
||||
public static final DataBaseType databaseType = DataBaseType.OceanBase;
|
||||
|
||||
private static final String TABLE_SCHEMA_DELIMITER = ".";
|
||||
|
||||
private static final Pattern JDBC_PATTERN = Pattern.compile("jdbc:(oceanbase|mysql)://([\\w\\.-]+:\\d+)/([\\w\\.-]+)");
|
||||
|
||||
private static Set<String> keywordsFromString2HashSet(final String keywords) {
|
||||
return new HashSet(Arrays.asList(keywords.split(",")));
|
||||
}
|
||||
|
||||
public static String escapeDatabaseKeywords(String keyword) {
|
||||
public static String escapeDatabaseKeyword(String keyword) {
|
||||
if (databaseKeywords == null) {
|
||||
if (isOracleMode(compatibleMode)) {
|
||||
databaseKeywords = keywordsFromString2HashSet(ORACLE_KEYWORDS);
|
||||
@ -57,10 +60,10 @@ public class ObReaderUtils {
|
||||
return keyword;
|
||||
}
|
||||
|
||||
public static void escapeDatabaseKeywords(List<String> ids) {
|
||||
public static void escapeDatabaseKeyword(List<String> ids) {
|
||||
if (ids != null && ids.size() > 0) {
|
||||
for (int i = 0; i < ids.size(); i++) {
|
||||
ids.set(i, escapeDatabaseKeywords(ids.get(i)));
|
||||
ids.set(i, escapeDatabaseKeyword(ids.get(i)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -144,7 +147,7 @@ public class ObReaderUtils {
|
||||
if (isOracleMode(context.getCompatibleMode())) {
|
||||
tableName = tableName.toUpperCase();
|
||||
String schema;
|
||||
if (tableName.contains(".")) {
|
||||
if (tableName.contains(TABLE_SCHEMA_DELIMITER)) {
|
||||
schema = String.format("'%s'", tableName.substring(0, tableName.indexOf(".")));
|
||||
tableName = tableName.substring(tableName.indexOf(".") + 1);
|
||||
} else {
|
||||
@ -170,7 +173,7 @@ public class ObReaderUtils {
|
||||
while (rs.next()) {
|
||||
hasPk = true;
|
||||
String columnName = rs.getString("Column_name");
|
||||
columnName = escapeDatabaseKeywords(columnName);
|
||||
columnName = escapeDatabaseKeyword(columnName);
|
||||
if (!realIndex.contains(columnName)) {
|
||||
realIndex.add(columnName);
|
||||
}
|
||||
@ -462,7 +465,7 @@ public class ObReaderUtils {
|
||||
if (isOracleMode(compatibleMode)) {
|
||||
String schema;
|
||||
tableName = tableName.toUpperCase();
|
||||
if (tableName.contains(".")) {
|
||||
if (tableName.contains(TABLE_SCHEMA_DELIMITER)) {
|
||||
schema = String.format("'%s'", tableName.substring(0, tableName.indexOf(".")));
|
||||
tableName = tableName.substring(tableName.indexOf(".") + 1);
|
||||
} else {
|
||||
@ -513,7 +516,7 @@ public class ObReaderUtils {
|
||||
Iterator<Map.Entry<String, List<String>>> iterator = allIndex.entrySet().iterator();
|
||||
while (iterator.hasNext()) {
|
||||
Map.Entry<String, List<String>> entry = iterator.next();
|
||||
if (entry.getKey().equals("PRIMARY")) {
|
||||
if ("PRIMARY".equals(entry.getKey())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -770,9 +773,7 @@ public class ObReaderUtils {
|
||||
}
|
||||
|
||||
public static String getDbNameFromJdbcUrl(String jdbcUrl) {
|
||||
final Pattern pattern = Pattern.compile("jdbc:(oceanbase|mysql)://([\\w\\.-]+:\\d+)/([\\w\\.-]+)");
|
||||
|
||||
Matcher matcher = pattern.matcher(jdbcUrl);
|
||||
Matcher matcher = JDBC_PATTERN.matcher(jdbcUrl);
|
||||
if (matcher.find()) {
|
||||
return matcher.group(3);
|
||||
} else {
|
||||
@ -814,18 +815,52 @@ public class ObReaderUtils {
|
||||
if (version1 == null || version2 == null) {
|
||||
throw new RuntimeException("can not compare null version");
|
||||
}
|
||||
ObVersion v1 = new ObVersion(version1);
|
||||
ObVersion v2 = new ObVersion(version2);
|
||||
return v1.compareTo(v2);
|
||||
}
|
||||
|
||||
String[] ver1Part = version1.split("\\.");
|
||||
String[] ver2Part = version2.split("\\.");
|
||||
for (int i = 0; i < ver1Part.length; i++) {
|
||||
int v1 = Integer.parseInt(ver1Part[i]), v2 = Integer.parseInt(ver2Part[i]);
|
||||
if (v1 > v2) {
|
||||
return 1;
|
||||
} else if (v1 < v2) {
|
||||
return -1;
|
||||
/**
|
||||
*
|
||||
* @param conn
|
||||
* @param sql
|
||||
* @return
|
||||
*/
|
||||
public static List<String> getResultsFromSql(Connection conn, String sql) {
|
||||
List<String> list = new ArrayList();
|
||||
Statement stmt = null;
|
||||
ResultSet rs = null;
|
||||
|
||||
LOG.info("executing sql: " + sql);
|
||||
|
||||
try {
|
||||
stmt = conn.createStatement();
|
||||
rs = stmt.executeQuery(sql);
|
||||
while (rs.next()) {
|
||||
list.add(rs.getString(1));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("error when executing sql: " + e.getMessage());
|
||||
} finally {
|
||||
DBUtil.closeDBResources(rs, stmt, null);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* get obversion, try ob_version first, and then try version if failed
|
||||
* @param conn
|
||||
* @return
|
||||
*/
|
||||
public static ObVersion getObVersion(Connection conn) {
|
||||
List<String> results = getResultsFromSql(conn, "select ob_version()");
|
||||
if (results.size() == 0) {
|
||||
results = getResultsFromSql(conn, "select version()");
|
||||
}
|
||||
ObVersion obVersion = new ObVersion(results.get(0));
|
||||
|
||||
LOG.info("obVersion: " + obVersion);
|
||||
return obVersion;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,86 @@
|
||||
package com.alibaba.datax.plugin.reader.oceanbasev10reader.util;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* @author johnrobbet
|
||||
*/
|
||||
public class ObVersion implements Comparable<ObVersion> {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ObVersion.class);
|
||||
|
||||
private int majorVersion;
|
||||
private int minorVersion;
|
||||
private int releaseNumber;
|
||||
private int patchNumber;
|
||||
|
||||
public static final ObVersion V2276 = valueOf("2.2.76");
|
||||
private static final ObVersion DEFAULT_VERSION =
|
||||
valueOf(System.getProperty("defaultObVersion","3.2.3.0"));
|
||||
|
||||
private static final int VERSION_PART_COUNT = 4;
|
||||
|
||||
public ObVersion(String version) {
|
||||
try {
|
||||
String[] versionParts = version.split("\\.");
|
||||
majorVersion = Integer.valueOf(versionParts[0]);
|
||||
minorVersion = Integer.valueOf(versionParts[1]);
|
||||
releaseNumber = Integer.valueOf(versionParts[2]);
|
||||
int tempPatchNum = 0;
|
||||
if (versionParts.length == VERSION_PART_COUNT) {
|
||||
try {
|
||||
tempPatchNum = Integer.valueOf(versionParts[3]);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("fail to parse ob version: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
patchNumber = tempPatchNum;
|
||||
} catch (Exception ex) {
|
||||
LOG.warn("fail to get ob version, using default {} {}",
|
||||
DEFAULT_VERSION, ex.getMessage());
|
||||
majorVersion = DEFAULT_VERSION.majorVersion;
|
||||
minorVersion = DEFAULT_VERSION.minorVersion;
|
||||
releaseNumber = DEFAULT_VERSION.releaseNumber;
|
||||
patchNumber = DEFAULT_VERSION.patchNumber;
|
||||
}
|
||||
}
|
||||
|
||||
public static ObVersion valueOf(String version) {
|
||||
return new ObVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(ObVersion o) {
|
||||
if (this.majorVersion > o.majorVersion) {
|
||||
return 1;
|
||||
} else if (this.majorVersion < o.majorVersion) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (this.minorVersion > o.minorVersion) {
|
||||
return 1;
|
||||
} else if (this.minorVersion < o.minorVersion) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (this.releaseNumber > o.releaseNumber) {
|
||||
return 1;
|
||||
} else if (this.releaseNumber < o.releaseNumber) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (this.patchNumber > o.patchNumber) {
|
||||
return 1;
|
||||
} else if (this.patchNumber < o.patchNumber) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d.%d.%d.%d", majorVersion, minorVersion, releaseNumber, patchNumber);
|
||||
}
|
||||
}
|
@ -5,8 +5,13 @@ package com.alibaba.datax.plugin.reader.oceanbasev10reader.util;
|
||||
*/
|
||||
|
||||
public enum PartType {
|
||||
// Non partitioned table
|
||||
NONPARTITION("NONPARTITION"),
|
||||
|
||||
// Partitioned table
|
||||
PARTITION("PARTITION"),
|
||||
|
||||
// Subpartitioned table
|
||||
SUBPARTITION("SUBPARTITION");
|
||||
|
||||
private String typeString;
|
||||
|
@ -3,7 +3,6 @@ package com.alibaba.datax.plugin.reader.oceanbasev10reader.util;
|
||||
import com.alibaba.datax.common.util.Configuration;
|
||||
import com.alibaba.datax.plugin.rdbms.reader.Constant;
|
||||
import com.alibaba.datax.plugin.rdbms.reader.Key;
|
||||
import com.alibaba.datax.plugin.rdbms.reader.util.HintUtil;
|
||||
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
|
||||
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
|
||||
import com.alibaba.datax.plugin.reader.oceanbasev10reader.ext.ObReaderKey;
|
||||
@ -11,8 +10,6 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@ -22,12 +19,76 @@ import java.util.List;
|
||||
public class PartitionSplitUtil {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(PartitionSplitUtil.class);
|
||||
|
||||
private static final String ORACLE_GET_SUBPART_TEMPLATE =
|
||||
"select subpartition_name "
|
||||
+ "from dba_tab_subpartitions "
|
||||
+ "where table_name = '%s' and table_owner = '%s'";
|
||||
|
||||
private static final String ORACLE_GET_PART_TEMPLATE =
|
||||
"select partition_name "
|
||||
+ "from dba_tab_partitions "
|
||||
+ "where table_name = '%s' and table_owner = '%s'";
|
||||
|
||||
private static final String MYSQL_GET_PART_TEMPLATE =
|
||||
"select p.part_name "
|
||||
+ "from oceanbase.__all_part p, oceanbase.%s t, oceanbase.__all_database d "
|
||||
+ "where p.table_id = t.table_id "
|
||||
+ "and d.database_id = t.database_id "
|
||||
+ "and d.database_name = '%s' "
|
||||
+ "and t.table_name = '%s'";
|
||||
|
||||
private static final String MYSQL_GET_SUBPART_TEMPLATE =
|
||||
"select p.sub_part_name "
|
||||
+ "from oceanbase.__all_sub_part p, oceanbase.%s t, oceanbase.__all_database d "
|
||||
+ "where p.table_id = t.table_id "
|
||||
+ "and d.database_id = t.database_id "
|
||||
+ "and d.database_name = '%s' "
|
||||
+ "and t.table_name = '%s'";
|
||||
|
||||
/**
|
||||
* get partition info from data dictionary in ob oracle mode
|
||||
* @param config
|
||||
* @param tableName
|
||||
* @return
|
||||
*/
|
||||
public static PartInfo getObOraclePartInfoBySQL(Configuration config, String tableName) {
|
||||
PartInfo partInfo;
|
||||
DataBaseType dbType = ObReaderUtils.databaseType;
|
||||
String jdbcUrl = config.getString(Key.JDBC_URL);
|
||||
String username = config.getString(Key.USERNAME);
|
||||
String password = config.getString(Key.PASSWORD);
|
||||
String dbname = ObReaderUtils.getDbNameFromJdbcUrl(jdbcUrl).toUpperCase();
|
||||
Connection conn = DBUtil.getConnection(dbType, jdbcUrl, username, password);
|
||||
tableName = tableName.toUpperCase();
|
||||
|
||||
// check if the table has subpartitions or not
|
||||
String getSubPartSql = String.format(ORACLE_GET_SUBPART_TEMPLATE, tableName, dbname);
|
||||
List<String> partList = ObReaderUtils.getResultsFromSql(conn, getSubPartSql);
|
||||
if (partList != null && partList.size() > 0) {
|
||||
partInfo = new PartInfo(PartType.SUBPARTITION);
|
||||
partInfo.addPart(partList);
|
||||
return partInfo;
|
||||
}
|
||||
|
||||
String getPartSql = String.format(ORACLE_GET_PART_TEMPLATE, tableName, dbname);
|
||||
partList = ObReaderUtils.getResultsFromSql(conn, getPartSql);
|
||||
if (partList != null && partList.size() > 0) {
|
||||
partInfo = new PartInfo(PartType.PARTITION);
|
||||
partInfo.addPart(partList);
|
||||
return partInfo;
|
||||
}
|
||||
|
||||
// table is not partitioned
|
||||
partInfo = new PartInfo(PartType.NONPARTITION);
|
||||
return partInfo;
|
||||
}
|
||||
|
||||
public static List<Configuration> splitByPartition (Configuration configuration) {
|
||||
List<Configuration> allSlices = new ArrayList<>();
|
||||
List<Object> conns = configuration.getList(Constant.CONN_MARK, Object.class);
|
||||
for (int i = 0, len = conns.size(); i < len; i++) {
|
||||
List<Object> connections = configuration.getList(Constant.CONN_MARK, Object.class);
|
||||
for (int i = 0, len = connections.size(); i < len; i++) {
|
||||
Configuration sliceConfig = configuration.clone();
|
||||
Configuration connConf = Configuration.from(conns.get(i).toString());
|
||||
Configuration connConf = Configuration.from(connections.get(i).toString());
|
||||
String jdbcUrl = connConf.getString(Key.JDBC_URL);
|
||||
sliceConfig.set(Key.JDBC_URL, jdbcUrl);
|
||||
sliceConfig.remove(Constant.CONN_MARK);
|
||||
@ -64,7 +125,7 @@ public class PartitionSplitUtil {
|
||||
slices.add(slice);
|
||||
}
|
||||
} else {
|
||||
LOG.info("fail to get table part info or table is not partitioned, proceed as non-partitioned table.");
|
||||
LOG.info("table is not partitioned.");
|
||||
|
||||
Configuration slice = configuration.clone();
|
||||
slice.set(Key.QUERY_SQL, ObReaderUtils.buildQuerySql(weakRead, column, table, where));
|
||||
@ -74,7 +135,16 @@ public class PartitionSplitUtil {
|
||||
return slices;
|
||||
}
|
||||
|
||||
private static PartInfo getObPartInfoBySQL(Configuration config, String table) {
|
||||
public static PartInfo getObPartInfoBySQL(Configuration config, String table) {
|
||||
boolean isOracleMode = config.getString(ObReaderKey.OB_COMPATIBILITY_MODE).equals("ORACLE");
|
||||
if (isOracleMode) {
|
||||
return getObOraclePartInfoBySQL(config, table);
|
||||
} else {
|
||||
return getObMySQLPartInfoBySQL(config, table);
|
||||
}
|
||||
}
|
||||
|
||||
public static PartInfo getObMySQLPartInfoBySQL(Configuration config, String table) {
|
||||
PartInfo partInfo = new PartInfo(PartType.NONPARTITION);
|
||||
List<String> partList;
|
||||
Connection conn = null;
|
||||
@ -86,45 +156,22 @@ public class PartitionSplitUtil {
|
||||
String allTable = "__all_table";
|
||||
|
||||
conn = DBUtil.getConnection(DataBaseType.OceanBase, jdbcUrl, username, password);
|
||||
String obVersion = getResultsFromSql(conn, "select version()").get(0);
|
||||
|
||||
LOG.info("obVersion: " + obVersion);
|
||||
|
||||
if (ObReaderUtils.compareObVersion("2.2.76", obVersion) < 0) {
|
||||
ObVersion obVersion = ObReaderUtils.getObVersion(conn);
|
||||
if (obVersion.compareTo(ObVersion.V2276) >= 0) {
|
||||
allTable = "__all_table_v2";
|
||||
}
|
||||
|
||||
String queryPart = String.format(
|
||||
"select p.part_name " +
|
||||
"from oceanbase.__all_part p, oceanbase.%s t, oceanbase.__all_database d " +
|
||||
"where p.table_id = t.table_id " +
|
||||
"and d.database_id = t.database_id " +
|
||||
"and d.database_name = '%s' " +
|
||||
"and t.table_name = '%s'", allTable, dbname, table);
|
||||
String querySubPart = String.format(
|
||||
"select p.sub_part_name " +
|
||||
"from oceanbase.__all_sub_part p, oceanbase.%s t, oceanbase.__all_database d " +
|
||||
"where p.table_id = t.table_id " +
|
||||
"and d.database_id = t.database_id " +
|
||||
"and d.database_name = '%s' " +
|
||||
"and t.table_name = '%s'", allTable, dbname, table);
|
||||
if (config.getString(ObReaderKey.OB_COMPATIBILITY_MODE).equals("ORACLE")) {
|
||||
queryPart = String.format(
|
||||
"select partition_name from all_tab_partitions where TABLE_OWNER = '%s' and table_name = '%s'",
|
||||
dbname.toUpperCase(), table.toUpperCase());
|
||||
querySubPart = String.format(
|
||||
"select subpartition_name from all_tab_subpartitions where TABLE_OWNER = '%s' and table_name = '%s'",
|
||||
dbname.toUpperCase(), table.toUpperCase());
|
||||
}
|
||||
String querySubPart = String.format(MYSQL_GET_SUBPART_TEMPLATE, allTable, dbname, table);
|
||||
|
||||
PartType partType = PartType.SUBPARTITION;
|
||||
|
||||
// try subpartition first
|
||||
partList = getResultsFromSql(conn, querySubPart);
|
||||
partList = ObReaderUtils.getResultsFromSql(conn, querySubPart);
|
||||
|
||||
// if table is not sub-partitioned, the try partition
|
||||
if (partList.isEmpty()) {
|
||||
partList = getResultsFromSql(conn, queryPart);
|
||||
String queryPart = String.format(MYSQL_GET_PART_TEMPLATE, allTable, dbname, table);
|
||||
partList = ObReaderUtils.getResultsFromSql(conn, queryPart);
|
||||
partType = PartType.PARTITION;
|
||||
}
|
||||
|
||||
@ -140,26 +187,4 @@ public class PartitionSplitUtil {
|
||||
|
||||
return partInfo;
|
||||
}
|
||||
|
||||
private static List<String> getResultsFromSql(Connection conn, String sql) {
|
||||
List<String> list = new ArrayList();
|
||||
Statement stmt = null;
|
||||
ResultSet rs = null;
|
||||
|
||||
LOG.info("executing sql: " + sql);
|
||||
|
||||
try {
|
||||
stmt = conn.createStatement();
|
||||
rs = stmt.executeQuery(sql);
|
||||
while (rs.next()) {
|
||||
list.add(rs.getString(1));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("error when executing sql: " + e.getMessage());
|
||||
} finally {
|
||||
DBUtil.closeDBResources(rs, stmt, null);
|
||||
}
|
||||
|
||||
return list;
|
||||
}
|
||||
}
|
||||
|
@ -19,6 +19,15 @@ public class TaskContext {
|
||||
private boolean weakRead = true;
|
||||
private String userSavePoint;
|
||||
private String compatibleMode = ObReaderUtils.OB_COMPATIBLE_MODE_MYSQL;
|
||||
|
||||
public String getPartitionName() {
|
||||
return partitionName;
|
||||
}
|
||||
|
||||
public void setPartitionName(String partitionName) {
|
||||
this.partitionName = partitionName;
|
||||
}
|
||||
|
||||
private String partitionName;
|
||||
|
||||
// 断点续读的保存点
|
||||
@ -165,12 +174,4 @@ public class TaskContext {
|
||||
public void setCompatibleMode(String compatibleMode) {
|
||||
this.compatibleMode = compatibleMode;
|
||||
}
|
||||
|
||||
public String getPartitionName() {
|
||||
return partitionName;
|
||||
}
|
||||
|
||||
public void setPartitionName(String partitionName) {
|
||||
this.partitionName = partitionName;
|
||||
}
|
||||
}
|
||||
|
@ -18,5 +18,7 @@ public class ObReaderUtilsTest {
|
||||
assert ObReaderUtils.compareObVersion("2.2.70", "2.2.50") == 1;
|
||||
assert ObReaderUtils.compareObVersion("2.2.70", "3.1.2") == -1;
|
||||
assert ObReaderUtils.compareObVersion("3.1.2", "3.1.2") == 0;
|
||||
assert ObReaderUtils.compareObVersion("3.2.3.0", "3.2.3.0") == 0;
|
||||
assert ObReaderUtils.compareObVersion("3.2.3.0-CE", "3.2.3.0") == 0;
|
||||
}
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ public class OceanBaseV10Writer extends Writer {
|
||||
checkCompatibleMode(originalConfig);
|
||||
//将config中的column和table中的关键字进行转义
|
||||
List<String> columns = originalConfig.getList(Key.COLUMN, String.class);
|
||||
ObWriterUtils.escapeDatabaseKeywords(columns);
|
||||
ObWriterUtils.escapeDatabaseKeyword(columns);
|
||||
originalConfig.set(Key.COLUMN, columns);
|
||||
|
||||
List<JSONObject> conns = originalConfig.getList(Constant.CONN_MARK, JSONObject.class);
|
||||
@ -69,7 +69,7 @@ public class OceanBaseV10Writer extends Writer {
|
||||
JSONObject conn = conns.get(i);
|
||||
Configuration connConfig = Configuration.from(conn.toString());
|
||||
List<String> tables = connConfig.getList(Key.TABLE, String.class);
|
||||
ObWriterUtils.escapeDatabaseKeywords(tables);
|
||||
ObWriterUtils.escapeDatabaseKeyword(tables);
|
||||
originalConfig.set(String.format("%s[%d].%s", Constant.CONN_MARK, i, Key.TABLE), tables);
|
||||
}
|
||||
this.commonJob = new CommonRdbmsWriter.Job(DATABASE_TYPE);
|
||||
|
@ -25,7 +25,7 @@ public class ObWriterUtils {
|
||||
return new HashSet(Arrays.asList(keywords.split(",")));
|
||||
}
|
||||
|
||||
public static String escapeDatabaseKeywords(String keyword) {
|
||||
public static String escapeDatabaseKeyword(String keyword) {
|
||||
if (databaseKeywords == null) {
|
||||
if (isOracleMode()) {
|
||||
databaseKeywords = keywordsFromString2HashSet(ORACLE_KEYWORDS);
|
||||
@ -40,9 +40,9 @@ public class ObWriterUtils {
|
||||
return keyword;
|
||||
}
|
||||
|
||||
public static void escapeDatabaseKeywords(List<String> keywords) {
|
||||
public static void escapeDatabaseKeyword(List<String> keywords) {
|
||||
for (int i = 0; i < keywords.size(); i++) {
|
||||
keywords.set(i, escapeDatabaseKeywords(keywords.get(i)));
|
||||
keywords.set(i, escapeDatabaseKeyword(keywords.get(i)));
|
||||
}
|
||||
}
|
||||
public static Boolean isEscapeMode(String keyword){
|
||||
@ -159,7 +159,7 @@ public class ObWriterUtils {
|
||||
while (rs.next()) {
|
||||
String keyName = rs.getString("Key_name");
|
||||
String columnName = rs.getString("Column_name");
|
||||
columnName=escapeDatabaseKeywords(columnName);
|
||||
columnName= escapeDatabaseKeyword(columnName);
|
||||
if(!ObWriterUtils.isEscapeMode(columnName)){
|
||||
columnName = columnName.toUpperCase();
|
||||
}
|
||||
|
@ -3,20 +3,6 @@ package com.alibaba.datax.plugin.reader.odpsreader;
|
||||
public enum ColumnType {
|
||||
PARTITION, NORMAL, CONSTANT, UNKNOWN, ;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
switch (this) {
|
||||
case PARTITION:
|
||||
return "partition";
|
||||
case NORMAL:
|
||||
return "normal";
|
||||
case CONSTANT:
|
||||
return "constant";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
public static ColumnType asColumnType(String columnTypeString) {
|
||||
if ("partition".equals(columnTypeString)) {
|
||||
return PARTITION;
|
||||
|
@ -0,0 +1,24 @@
|
||||
package com.alibaba.datax.plugin.reader.odpsreader;
|
||||
|
||||
public class InternalColumnInfo {
|
||||
|
||||
private String columnName;
|
||||
|
||||
private ColumnType columnType;
|
||||
|
||||
public String getColumnName() {
|
||||
return columnName;
|
||||
}
|
||||
|
||||
public void setColumnName(String columnName) {
|
||||
this.columnName = columnName;
|
||||
}
|
||||
|
||||
public ColumnType getColumnType() {
|
||||
return columnType;
|
||||
}
|
||||
|
||||
public void setColumnType(ColumnType columnType) {
|
||||
this.columnType = columnType;
|
||||
}
|
||||
}
|
@ -15,8 +15,6 @@ import com.aliyun.odps.TableSchema;
|
||||
import com.aliyun.odps.tunnel.TableTunnel.DownloadSession;
|
||||
import com.aliyun.odps.type.TypeInfo;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.MutablePair;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -311,7 +309,7 @@ public class OdpsReader extends Reader {
|
||||
*/
|
||||
List<String> allPartitionColumns = this.originalConfig.getList(
|
||||
Constant.PARTITION_COLUMNS, String.class);
|
||||
List<Pair<String, ColumnType>> parsedColumns = OdpsUtil
|
||||
List<InternalColumnInfo> parsedColumns = OdpsUtil
|
||||
.parseColumns(allNormalColumns, allPartitionColumns,
|
||||
userConfiguredColumns);
|
||||
|
||||
@ -320,13 +318,15 @@ public class OdpsReader extends Reader {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("[ ");
|
||||
for (int i = 0, len = parsedColumns.size(); i < len; i++) {
|
||||
Pair<String, ColumnType> pair = parsedColumns.get(i);
|
||||
sb.append(String.format(" %s : %s", pair.getLeft(),
|
||||
pair.getRight()));
|
||||
InternalColumnInfo pair = parsedColumns.get(i);
|
||||
sb.append(String.format(" %s : %s", pair.getColumnName(),
|
||||
pair.getColumnType()));
|
||||
if (i != len - 1) {
|
||||
sb.append(",");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sb.append(" ]");
|
||||
LOG.info("parsed column details: {} .", sb.toString());
|
||||
}
|
||||
@ -500,22 +500,11 @@ public class OdpsReader extends Reader {
|
||||
}
|
||||
|
||||
try {
|
||||
List<Configuration> parsedColumnsTmp = this.readerSliceConf
|
||||
.getListConfiguration(Constant.PARSED_COLUMNS);
|
||||
List<Pair<String, ColumnType>> parsedColumns = new ArrayList<Pair<String, ColumnType>>();
|
||||
for (int i = 0; i < parsedColumnsTmp.size(); i++) {
|
||||
Configuration eachColumnConfig = parsedColumnsTmp.get(i);
|
||||
String columnName = eachColumnConfig.getString("left");
|
||||
ColumnType columnType = ColumnType
|
||||
.asColumnType(eachColumnConfig.getString("right"));
|
||||
parsedColumns.add(new MutablePair<String, ColumnType>(
|
||||
columnName, columnType));
|
||||
|
||||
}
|
||||
List<InternalColumnInfo> parsedColumns = this.readerSliceConf.getListWithJson(Constant.PARSED_COLUMNS,
|
||||
InternalColumnInfo.class);
|
||||
ReaderProxy readerProxy = new ReaderProxy(recordSender, downloadSession,
|
||||
columnTypeMap, parsedColumns, partition, this.isPartitionedTable,
|
||||
start, count, this.isCompress, this.readerSliceConf);
|
||||
|
||||
readerProxy.doRead();
|
||||
} catch (Exception e) {
|
||||
throw DataXException.asDataXException(OdpsReaderErrorCode.READ_DATA_FAIL,
|
||||
|
@ -17,7 +17,6 @@ import com.aliyun.odps.type.MapTypeInfo;
|
||||
import com.aliyun.odps.type.TypeInfo;
|
||||
import org.apache.commons.codec.binary.Base64;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -34,7 +33,7 @@ public class ReaderProxy {
|
||||
private RecordSender recordSender;
|
||||
private TableTunnel.DownloadSession downloadSession;
|
||||
private Map<String, TypeInfo> columnTypeMap;
|
||||
private List<Pair<String, ColumnType>> parsedColumns;
|
||||
private List<InternalColumnInfo> parsedColumns;
|
||||
private String partition;
|
||||
private boolean isPartitionTable;
|
||||
|
||||
@ -71,7 +70,7 @@ public class ReaderProxy {
|
||||
|
||||
public ReaderProxy(RecordSender recordSender, TableTunnel.DownloadSession downloadSession,
|
||||
Map<String, TypeInfo> columnTypeMap,
|
||||
List<Pair<String, ColumnType>> parsedColumns, String partition,
|
||||
List<InternalColumnInfo> parsedColumns, String partition,
|
||||
boolean isPartitionTable, long start, long count, boolean isCompress, Configuration taskConfig) {
|
||||
this.recordSender = recordSender;
|
||||
this.downloadSession = downloadSession;
|
||||
@ -136,9 +135,9 @@ public class ReaderProxy {
|
||||
// warn: for PARTITION||NORMAL columnTypeMap's key
|
||||
// sets(columnName) is big than parsedColumns's left
|
||||
// sets(columnName), always contain
|
||||
for (Pair<String, ColumnType> pair : this.parsedColumns) {
|
||||
String columnName = pair.getLeft();
|
||||
switch (pair.getRight()) {
|
||||
for (InternalColumnInfo pair : this.parsedColumns) {
|
||||
String columnName = pair.getColumnName();
|
||||
switch (pair.getColumnType()) {
|
||||
case PARTITION:
|
||||
String partitionColumnValue = this
|
||||
.getPartitionColumnValue(partitionMap,
|
||||
|
@ -7,6 +7,7 @@ import com.alibaba.datax.common.util.MessageSource;
|
||||
import com.alibaba.datax.common.util.RetryUtil;
|
||||
import com.alibaba.datax.plugin.reader.odpsreader.ColumnType;
|
||||
import com.alibaba.datax.plugin.reader.odpsreader.Constant;
|
||||
import com.alibaba.datax.plugin.reader.odpsreader.InternalColumnInfo;
|
||||
import com.alibaba.datax.plugin.reader.odpsreader.Key;
|
||||
import com.alibaba.datax.plugin.reader.odpsreader.OdpsReaderErrorCode;
|
||||
import com.aliyun.odps.*;
|
||||
@ -215,19 +216,18 @@ public final class OdpsUtil {
|
||||
return userConfiguredPartitionClassification;
|
||||
}
|
||||
|
||||
public static List<Pair<String, ColumnType>> parseColumns(
|
||||
public static List<InternalColumnInfo> parseColumns(
|
||||
List<String> allNormalColumns, List<String> allPartitionColumns,
|
||||
List<String> userConfiguredColumns) {
|
||||
List<Pair<String, ColumnType>> parsededColumns = new ArrayList<Pair<String, ColumnType>>();
|
||||
List<InternalColumnInfo> parsededColumns = new ArrayList<InternalColumnInfo>();
|
||||
// warn: upper & lower case
|
||||
for (String column : userConfiguredColumns) {
|
||||
MutablePair<String, ColumnType> pair = new MutablePair<String, ColumnType>();
|
||||
|
||||
InternalColumnInfo pair = new InternalColumnInfo();
|
||||
// if constant column
|
||||
if (OdpsUtil.checkIfConstantColumn(column)) {
|
||||
// remove first and last '
|
||||
pair.setLeft(column.substring(1, column.length() - 1));
|
||||
pair.setRight(ColumnType.CONSTANT);
|
||||
pair.setColumnName(column.substring(1, column.length() - 1));
|
||||
pair.setColumnType(ColumnType.CONSTANT);
|
||||
parsededColumns.add(pair);
|
||||
continue;
|
||||
}
|
||||
@ -236,8 +236,8 @@ public final class OdpsUtil {
|
||||
// repeated in partitioning columns
|
||||
int index = OdpsUtil.indexOfIgnoreCase(allNormalColumns, column);
|
||||
if (0 <= index) {
|
||||
pair.setLeft(allNormalColumns.get(index));
|
||||
pair.setRight(ColumnType.NORMAL);
|
||||
pair.setColumnName(allNormalColumns.get(index));
|
||||
pair.setColumnType(ColumnType.NORMAL);
|
||||
parsededColumns.add(pair);
|
||||
continue;
|
||||
}
|
||||
@ -245,8 +245,8 @@ public final class OdpsUtil {
|
||||
// if partition column
|
||||
index = OdpsUtil.indexOfIgnoreCase(allPartitionColumns, column);
|
||||
if (0 <= index) {
|
||||
pair.setLeft(allPartitionColumns.get(index));
|
||||
pair.setRight(ColumnType.PARTITION);
|
||||
pair.setColumnName(allPartitionColumns.get(index));
|
||||
pair.setColumnType(ColumnType.PARTITION);
|
||||
parsededColumns.add(pair);
|
||||
continue;
|
||||
}
|
||||
@ -431,13 +431,13 @@ public final class OdpsUtil {
|
||||
MESSAGE_SOURCE.message("odpsutil.12", tableName), e);
|
||||
}
|
||||
|
||||
public static List<Column> getNormalColumns(List<Pair<String, ColumnType>> parsedColumns,
|
||||
public static List<Column> getNormalColumns(List<InternalColumnInfo> parsedColumns,
|
||||
Map<String, TypeInfo> columnTypeMap) {
|
||||
List<Column> userConfigNormalColumns = new ArrayList<Column>();
|
||||
Set<String> columnNameSet = new HashSet<String>();
|
||||
for (Pair<String, ColumnType> columnInfo : parsedColumns) {
|
||||
if (columnInfo.getValue() == ColumnType.NORMAL) {
|
||||
String columnName = columnInfo.getKey();
|
||||
for (InternalColumnInfo columnInfo : parsedColumns) {
|
||||
if (columnInfo.getColumnType() == ColumnType.NORMAL) {
|
||||
String columnName = columnInfo.getColumnName();
|
||||
if (!columnNameSet.contains(columnName)) {
|
||||
Column column = new Column(columnName, columnTypeMap.get(columnName));
|
||||
userConfigNormalColumns.add(column);
|
||||
|
@ -24,9 +24,6 @@
|
||||
<httpclient.version>4.5</httpclient.version>
|
||||
<commons-io.version>2.4</commons-io.version>
|
||||
|
||||
<!-- json -->
|
||||
<fastjson.version>1.2.28</fastjson.version>
|
||||
|
||||
<!-- opentsdb -->
|
||||
<opentsdb.version>2.3.2</opentsdb.version>
|
||||
|
||||
@ -94,7 +91,6 @@
|
||||
<dependency>
|
||||
<groupId>com.alibaba</groupId>
|
||||
<artifactId>fastjson</artifactId>
|
||||
<version>${fastjson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- opentsdb -->
|
||||
|
@ -6,5 +6,5 @@
|
||||
"mechanism": "根据时间和 metric 直连底层 HBase 存储,从而 Scan 出符合条件的数据点",
|
||||
"warn": "指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00)"
|
||||
},
|
||||
"developer": "Benedict Jin"
|
||||
"developer": "alibaba"
|
||||
}
|
||||
|
@ -41,11 +41,9 @@
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.oracle</groupId>
|
||||
<groupId>oracle</groupId>
|
||||
<artifactId>ojdbc6</artifactId>
|
||||
<version>11.2.0.3</version>
|
||||
<scope>system</scope>
|
||||
<systemPath>${basedir}/src/main/lib/ojdbc6-11.2.0.3.jar</systemPath>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
@ -15,13 +15,6 @@
|
||||
<include>plugin_job_template.json</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/oraclereader</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>src/main/lib</directory>
|
||||
<includes>
|
||||
<include>ojdbc6-11.2.0.3.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/reader/oraclereader/libs</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
|
Binary file not shown.
@ -39,11 +39,9 @@
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.oracle</groupId>
|
||||
<groupId>oracle</groupId>
|
||||
<artifactId>ojdbc6</artifactId>
|
||||
<version>11.2.0.3</version>
|
||||
<scope>system</scope>
|
||||
<systemPath>${basedir}/src/main/lib/ojdbc6-11.2.0.3.jar</systemPath>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
@ -16,13 +16,6 @@
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/oraclewriter</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>src/main/lib</directory>
|
||||
<includes>
|
||||
<include>ojdbc6-11.2.0.3.jar</include>
|
||||
</includes>
|
||||
<outputDirectory>plugin/writer/oraclewriter/libs</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>target/</directory>
|
||||
<includes>
|
||||
|
Binary file not shown.
@ -2,5 +2,5 @@
|
||||
"name": "oscarwriter",
|
||||
"class": "com.alibaba.datax.plugin.writer.oscarwriter.OscarWriter",
|
||||
"description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql. warn: The more you know about the database, the less problems you encounter.",
|
||||
"developer": "linjiayu"
|
||||
"developer": "alibaba"
|
||||
}
|
@ -1,179 +0,0 @@
|
||||
#!/bin/usr/env python
|
||||
#-*- coding: utf-8 -*-
|
||||
|
||||
from optparse import OptionParser
|
||||
import sys
|
||||
import json
|
||||
import tabulate
|
||||
import zlib
|
||||
from ots2 import *
|
||||
|
||||
class ConsoleConfig:
|
||||
def __init__(self, config_file):
|
||||
f = open(config_file, 'r')
|
||||
config = json.loads(f.read())
|
||||
self.endpoint = str(config['endpoint'])
|
||||
self.accessid = str(config['accessId'])
|
||||
self.accesskey = str(config['accessKey'])
|
||||
self.instance_name = str(config['instanceName'])
|
||||
self.status_table = str(config['statusTable'])
|
||||
|
||||
self.ots = OTSClient(self.endpoint, self.accessid, self.accesskey, self.instance_name)
|
||||
|
||||
def describe_job(config, options):
|
||||
'''
|
||||
1. get job's description
|
||||
2. get all job's checkpoints and check if it is done
|
||||
'''
|
||||
if not options.stream_id:
|
||||
print "Error: Should set the stream id using '-s' or '--streamid'."
|
||||
sys.exit(-1)
|
||||
|
||||
if not options.timestamp:
|
||||
print "Error: Should set the timestamp using '-t' or '--timestamp'."
|
||||
sys.exit(-1)
|
||||
|
||||
pk = [('StreamId', options.stream_id), ('StatusType', 'DataxJobDesc'), ('StatusValue', '%16d' % int(options.timestamp))]
|
||||
consumed, pk, attrs, next_token = config.ots.get_row(config.status_table, pk, [], None, 1)
|
||||
if not attrs:
|
||||
print 'Stream job is not found.'
|
||||
sys.exit(-1)
|
||||
|
||||
job_detail = parse_job_detail(attrs)
|
||||
print '----------JobDescriptions----------'
|
||||
print json.dumps(job_detail, indent=2)
|
||||
print '-----------------------------------'
|
||||
|
||||
stream_checkpoints = _list_checkpoints(config, options.stream_id, int(options.timestamp))
|
||||
|
||||
cps_headers = ['ShardId', 'SendRecordCount', 'Checkpoint', 'SkipCount', 'Version']
|
||||
table_content = []
|
||||
for cp in stream_checkpoints:
|
||||
table_content.append([cp['ShardId'], cp['SendRecordCount'], cp['Checkpoint'], cp['SkipCount'], cp['Version']])
|
||||
|
||||
print tabulate.tabulate(table_content, headers=cps_headers)
|
||||
|
||||
# check if stream job has finished
|
||||
finished = True
|
||||
if len(job_detail['ShardIds']) != len(stream_checkpoints):
|
||||
finished = False
|
||||
|
||||
for cp in stream_checkpoints:
|
||||
if cp['Version'] != job_detail['Version']:
|
||||
finished = False
|
||||
|
||||
print '----------JobSummary----------'
|
||||
print 'ShardsCount:', len(job_detail['ShardIds'])
|
||||
print 'CheckPointsCount:', len(stream_checkpoints)
|
||||
print 'JobStatus:', 'Finished' if finished else 'NotFinished'
|
||||
print '------------------------------'
|
||||
|
||||
def _list_checkpoints(config, stream_id, timestamp):
|
||||
start_pk = [('StreamId', stream_id), ('StatusType', 'CheckpointForDataxReader'), ('StatusValue', '%16d' % timestamp)]
|
||||
end_pk = [('StreamId', stream_id), ('StatusType', 'CheckpointForDataxReader'), ('StatusValue', '%16d' % (timestamp + 1))]
|
||||
|
||||
consumed_counter = CapacityUnit(0, 0)
|
||||
columns_to_get = []
|
||||
checkpoints = []
|
||||
range_iter = config.ots.xget_range(
|
||||
config.status_table, Direction.FORWARD,
|
||||
start_pk, end_pk,
|
||||
consumed_counter, columns_to_get, 100,
|
||||
column_filter=None, max_version=1
|
||||
)
|
||||
|
||||
rows = []
|
||||
for (primary_key, attrs) in range_iter:
|
||||
checkpoint = {}
|
||||
for attr in attrs:
|
||||
checkpoint[attr[0]] = attr[1]
|
||||
|
||||
if not checkpoint.has_key('SendRecordCount'):
|
||||
checkpoint['SendRecordCount'] = 0
|
||||
checkpoint['ShardId'] = primary_key[2][1].split('\t')[1]
|
||||
checkpoints.append(checkpoint)
|
||||
|
||||
return checkpoints
|
||||
|
||||
def list_job(config, options):
|
||||
'''
|
||||
Two options:
|
||||
1. list all jobs of stream
|
||||
2. list all jobs and all streams
|
||||
'''
|
||||
consumed_counter = CapacityUnit(0, 0)
|
||||
|
||||
if options.stream_id:
|
||||
start_pk = [('StreamId', options.stream_id), ('StatusType', INF_MIN), ('StatusValue', INF_MIN)]
|
||||
end_pk = [('StreamId', options.stream_id), ('StatusType', INF_MAX), ('StatusValue', INF_MAX)]
|
||||
else:
|
||||
start_pk = [('StreamId', INF_MIN), ('StatusType', INF_MIN), ('StatusValue', INF_MIN)]
|
||||
end_pk = [('StreamId', INF_MAX), ('StatusType', INF_MAX), ('StatusValue', INF_MAX)]
|
||||
|
||||
columns_to_get = []
|
||||
range_iter = config.ots.xget_range(
|
||||
config.status_table, Direction.FORWARD,
|
||||
start_pk, end_pk,
|
||||
consumed_counter, columns_to_get, None,
|
||||
column_filter=None, max_version=1
|
||||
)
|
||||
|
||||
rows = []
|
||||
for (primary_key, attrs) in range_iter:
|
||||
if primary_key[1][1] == 'DataxJobDesc':
|
||||
job_detail = parse_job_detail(attrs)
|
||||
rows.append([job_detail['TableName'], job_detail['JobStreamId'], job_detail['EndTime'], job_detail['StartTime'], job_detail['EndTime'], job_detail['Version']])
|
||||
|
||||
headers = ['TableName', 'JobStreamId', 'Timestamp', 'StartTime', 'EndTime', 'Version']
|
||||
print tabulate.tabulate(rows, headers=headers)
|
||||
|
||||
def parse_job_detail(attrs):
|
||||
job_details = {}
|
||||
shard_ids_content = ''
|
||||
for attr in attrs:
|
||||
if attr[0].startswith('ShardIds_'):
|
||||
shard_ids_content += attr[1]
|
||||
else:
|
||||
job_details[attr[0]] = attr[1]
|
||||
|
||||
shard_ids = json.loads(zlib.decompress(shard_ids_content))
|
||||
|
||||
if not job_details.has_key('Version'):
|
||||
job_details['Version'] = ''
|
||||
|
||||
if not job_details.has_key('SkipCount'):
|
||||
job_details['SkipCount'] = 0
|
||||
job_details['ShardIds'] = shard_ids
|
||||
|
||||
return job_details
|
||||
|
||||
def parse_time(value):
|
||||
try:
|
||||
return int(value)
|
||||
except Exception,e:
|
||||
return int(time.mktime(time.strptime(value, '%Y-%m-%d %H:%M:%S')))
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = OptionParser()
|
||||
parser.add_option('-c', '--config', dest='config_file', help='path of config file', metavar='tablestore_streamreader_config.json')
|
||||
parser.add_option('-a', '--action', dest='action', help='the action to do', choices = ['describe_job', 'list_job'], metavar='')
|
||||
parser.add_option('-t', '--timestamp', dest='timestamp', help='the timestamp', metavar='')
|
||||
parser.add_option('-s', '--streamid', dest='stream_id', help='the id of stream', metavar='')
|
||||
parser.add_option('-d', '--shardid', dest='shard_id', help='the id of shard', metavar='')
|
||||
|
||||
options, args = parser.parse_args()
|
||||
|
||||
if not options.config_file:
|
||||
print "Error: Should set the path of config file using '-c' or '--config'."
|
||||
sys.exit(-1)
|
||||
|
||||
if not options.action:
|
||||
print "Error: Should set the action using '-a' or '--action'."
|
||||
sys.exit(-1)
|
||||
|
||||
console_config = ConsoleConfig(options.config_file)
|
||||
if options.action == 'list_job':
|
||||
list_job(console_config, options)
|
||||
elif options.action == 'describe_job':
|
||||
describe_job(console_config, options)
|
||||
|
File diff suppressed because it is too large
Load Diff
56
package.xml
56
package.xml
@ -60,13 +60,6 @@
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>db2reader/target/datax/</directory>
|
||||
<includes>
|
||||
<include>**/*.*</include>
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>postgresqlreader/target/datax/</directory>
|
||||
<includes>
|
||||
@ -103,13 +96,13 @@
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>otsstreamreader/target/datax/</directory>
|
||||
<includes>
|
||||
<include>**/*.*</include>
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>otsstreamreader/target/datax/</directory>
|
||||
<includes>
|
||||
<include>**/*.*</include>
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>txtfilereader/target/datax/</directory>
|
||||
<includes>
|
||||
@ -215,6 +208,27 @@
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>datahubreader/target/datax/</directory>
|
||||
<includes>
|
||||
<include>**/*.*</include>
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>loghubreader/target/datax/</directory>
|
||||
<includes>
|
||||
<include>**/*.*</include>
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>starrocksreader/target/datax/</directory>
|
||||
<includes>
|
||||
<include>**/*.*</include>
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
|
||||
<!-- writer -->
|
||||
<fileSet>
|
||||
@ -448,5 +462,19 @@
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>datahubwriter/target/datax/</directory>
|
||||
<includes>
|
||||
<include>**/*.*</include>
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>loghubwriter/target/datax/</directory>
|
||||
<includes>
|
||||
<include>**/*.*</include>
|
||||
</includes>
|
||||
<outputDirectory>datax</outputDirectory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
</assembly>
|
||||
|
@ -22,7 +22,8 @@ public enum DataBaseType {
|
||||
ClickHouse("clickhouse", "ru.yandex.clickhouse.ClickHouseDriver"),
|
||||
KingbaseES("kingbasees", "com.kingbase8.Driver"),
|
||||
Oscar("oscar", "com.oscar.Driver"),
|
||||
OceanBase("oceanbase", "com.alipay.oceanbase.jdbc.Driver");
|
||||
OceanBase("oceanbase", "com.alipay.oceanbase.jdbc.Driver"),
|
||||
StarRocks("starrocks", "com.mysql.jdbc.Driver");
|
||||
|
||||
|
||||
private String typeName;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user