Merge pull request #1 from alibaba/master

rebase
This commit is contained in:
Chase 2019-07-19 17:28:38 +08:00 committed by GitHub
commit 3c21cc28c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
76 changed files with 5497 additions and 397 deletions

161
.gitignore vendored
View File

@ -1,8 +1,157 @@
/target/ # Created by .ignore support plugin (hsz.mobi)
.classpath
.project
.settings
.DS_Store .DS_Store
/logs/ .AppleDouble
.idea/ .LSOverride
Icon
._*
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
*.class
*.log
*.ctxt
.mtj.tmp/
*.jar
*.war
*.nar
*.ear
*.zip
*.tar.gz
*.rar
hs_err_pid*
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/dictionaries
.idea/**/shelf
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
.idea/**/gradle.xml
.idea/**/libraries
cmake-build-debug/
cmake-build-release/
.idea/**/mongoSettings.xml
*.iws
out/
.idea_modules/
atlassian-ide-plugin.xml
.idea/replstate.xml
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
.idea/httpRequests
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
!/.mvn/wrapper/maven-wrapper.jar
.idea
*.iml *.iml
out
gen### Python template
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
*.manifest
*.spec
pip-log.txt
pip-delete-this-directory.txt
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
*.mo
*.pot
*.log
local_settings.py
db.sqlite3
instance/
.webassets-cache
.scrapy
docs/_build/
target/
.ipynb_checkpoints
.python-version
celerybeat-schedule
*.sage.py
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
.spyderproject
.spyproject
.ropeproject
/site
.mypy_cache/
.metadata
bin/
tmp/
*.tmp
*.bak
*.swp
*~.nib
local.properties
.settings/
.loadpath
.recommenders
.externalToolBuilders/
*.launch
*.pydevproject
.cproject
.autotools
.factorypath
.buildpath
.target
.tern-project
.texlipse
.springBeans
.recommenders/
.cache-main
.scala_dependencies
.worksheet

View File

@ -48,12 +48,15 @@ DataX目前已经有了比较全面的插件体系主流的RDBMS数据库、N
| | Hbase0.94 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase094xreader/doc/hbase094xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase094xwriter/doc/hbase094xwriter.md)| | | Hbase0.94 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase094xreader/doc/hbase094xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase094xwriter/doc/hbase094xwriter.md)|
| | Hbase1.1 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xreader/doc/hbase11xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xwriter/doc/hbase11xwriter.md)| | | Hbase1.1 | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xreader/doc/hbase11xreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xwriter/doc/hbase11xwriter.md)|
| | Phoenix4.x | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xsqlreader/doc/hbase11xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xsqlwriter/doc/hbase11xsqlwriter.md)| | | Phoenix4.x | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase11xsqlreader/doc/hbase11xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase11xsqlwriter/doc/hbase11xsqlwriter.md)|
| | Phoenix5.x | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hbase20xsqlreader/doc/hbase20xsqlreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hbase20xsqlwriter/doc/hbase20xsqlwriter.md)|
| | MongoDB | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/mongoreader/doc/mongoreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mongowriter/doc/mongowriter.md)| | | MongoDB | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/mongoreader/doc/mongoreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/mongowriter/doc/mongowriter.md)|
| | Hive | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)| | | Hive | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)|
| 无结构化数据存储 | TxtFile | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/txtfilereader/doc/txtfilereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/txtfilewriter/doc/txtfilewriter.md)| | 无结构化数据存储 | TxtFile | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/txtfilereader/doc/txtfilereader.md) 、[写](https://github.com/alibaba/DataX/blob/master/txtfilewriter/doc/txtfilewriter.md)|
| | FTP | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ftpreader/doc/ftpreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ftpwriter/doc/ftpwriter.md)| | | FTP | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/ftpreader/doc/ftpreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/ftpwriter/doc/ftpwriter.md)|
| | HDFS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)| | | HDFS | √ | √ |[读](https://github.com/alibaba/DataX/blob/master/hdfsreader/doc/hdfsreader.md) 、[写](https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md)|
| | Elasticsearch | | √ |[写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md)| | | Elasticsearch | | √ |[写](https://github.com/alibaba/DataX/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md)|
| 时间序列数据库 | OpenTSDB | √ | |[读](https://github.com/alibaba/DataX/blob/master/opentsdbreader/doc/opentsdbreader.md)|
| | TSDB | | √ |[写](https://github.com/alibaba/DataX/blob/master/tsdbwriter/doc/tsdbhttpwriter.md)|
# 我要开发新的插件 # 我要开发新的插件
请点击:[DataX插件开发宝典](https://github.com/alibaba/DataX/blob/master/dataxPluginDev.md) 请点击:[DataX插件开发宝典](https://github.com/alibaba/DataX/blob/master/dataxPluginDev.md)
@ -105,7 +108,7 @@ This software is free to use under the Apache License [Apache license](https://g
9. 有大数据产品、云产品、中间件技术解决方案者优先考虑。 9. 有大数据产品、云产品、中间件技术解决方案者优先考虑。
```` ````
钉钉用户请扫描以下二维码进行讨论: 钉钉用户请扫描以下二维码进行讨论:
![DataX-OpenSource-Dingding](https://img.alicdn.com/tfs/TB1SdPUH21TBuNjy0FjXXajyXXa-359-504.png) ![DataX-OpenSource-Dingding](https://img.alicdn.com/tfs/TB1ZQuhIG6qK1RjSZFmXXX0PFXa-362-501.png)

View File

@ -18,6 +18,8 @@
<phoenix.version>4.11.0-HBase-1.1</phoenix.version> <phoenix.version>4.11.0-HBase-1.1</phoenix.version>
<hadoop.version>2.7.1</hadoop.version> <hadoop.version>2.7.1</hadoop.version>
<commons-codec.version>1.8</commons-codec.version> <commons-codec.version>1.8</commons-codec.version>
<protobuf.version>3.2.0</protobuf.version>
<httpclient.version>4.4.1</httpclient.version>
</properties> </properties>
<dependencies> <dependencies>
@ -47,6 +49,11 @@
<artifactId>phoenix-core</artifactId> <artifactId>phoenix-core</artifactId>
<version>${phoenix.version}</version> <version>${phoenix.version}</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-queryserver-client</artifactId>
<version>${phoenix.version}</version>
</dependency>
<dependency> <dependency>
<groupId>com.google.guava</groupId> <groupId>com.google.guava</groupId>
<artifactId>guava</artifactId> <artifactId>guava</artifactId>
@ -58,6 +65,21 @@
<version>${commons-codec.version}</version> <version>${commons-codec.version}</version>
</dependency> </dependency>
<!-- httpclient begin -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpclient.version}</version>
</dependency>
<!-- httpclient end -->
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>${protobuf.version}</version>
</dependency>
<!-- for test --> <!-- for test -->
<dependency> <dependency>
<groupId>junit</groupId> <groupId>junit</groupId>

View File

@ -8,6 +8,7 @@ public final class Constant {
public static final boolean DEFAULT_LAST_COLUMN_IS_VERSION = false; // 默认最后一列不是version列 public static final boolean DEFAULT_LAST_COLUMN_IS_VERSION = false; // 默认最后一列不是version列
public static final int DEFAULT_BATCH_ROW_COUNT = 256; // 默认一次写256行 public static final int DEFAULT_BATCH_ROW_COUNT = 256; // 默认一次写256行
public static final boolean DEFAULT_TRUNCATE = false; // 默认开始的时候不清空表 public static final boolean DEFAULT_TRUNCATE = false; // 默认开始的时候不清空表
public static final boolean DEFAULT_USE_THIN_CLIENT = false; // 默认不用thin客户端
public static final int TYPE_UNSIGNED_TINYINT = 11; public static final int TYPE_UNSIGNED_TINYINT = 11;
public static final int TYPE_UNSIGNED_SMALLINT = 13; public static final int TYPE_UNSIGNED_SMALLINT = 13;

View File

@ -11,6 +11,7 @@ import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.schema.ColumnNotFoundException; import org.apache.phoenix.schema.ColumnNotFoundException;
import org.apache.phoenix.schema.MetaDataClient; import org.apache.phoenix.schema.MetaDataClient;
import org.apache.phoenix.schema.PTable; import org.apache.phoenix.schema.PTable;
import org.apache.phoenix.schema.types.PDataType;
import org.apache.phoenix.util.SchemaUtil; import org.apache.phoenix.util.SchemaUtil;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -18,7 +19,11 @@ import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.sql.Connection; import java.sql.Connection;
import java.sql.DriverManager; import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException; import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -28,6 +33,8 @@ import java.util.Map;
public class HbaseSQLHelper { public class HbaseSQLHelper {
private static final Logger LOG = LoggerFactory.getLogger(HbaseSQLHelper.class); private static final Logger LOG = LoggerFactory.getLogger(HbaseSQLHelper.class);
public static ThinClientPTable ptable;
/** /**
* 将datax的配置解析成sql writer的配置 * 将datax的配置解析成sql writer的配置
*/ */
@ -53,6 +60,11 @@ public class HbaseSQLHelper {
return new Pair<String, String>(zkQuorum, znode); return new Pair<String, String>(zkQuorum, znode);
} }
public static Map<String, String> getThinConnectConfig(String hbaseCfgString) {
assert hbaseCfgString != null;
return JSON.parseObject(hbaseCfgString, new TypeReference<Map<String, String>>() {});
}
/** /**
* 校验配置 * 校验配置
*/ */
@ -61,12 +73,12 @@ public class HbaseSQLHelper {
Connection conn = getJdbcConnection(cfg); Connection conn = getJdbcConnection(cfg);
// 检查表:存在可用 // 检查表:存在可用
checkTable(conn, cfg.getTableName()); checkTable(conn, cfg.getNamespace(), cfg.getTableName(), cfg.isThinClient());
// 校验元数据配置中给出的列必须是目的表中已经存在的列 // 校验元数据配置中给出的列必须是目的表中已经存在的列
PTable schema = null; PTable schema = null;
try { try {
schema = getTableSchema(conn, cfg.getTableName()); schema = getTableSchema(conn, cfg.getNamespace(), cfg.getTableName(), cfg.isThinClient());
} catch (SQLException e) { } catch (SQLException e) {
throw DataXException.asDataXException(HbaseSQLWriterErrorCode.GET_HBASE_CONNECTION_ERROR, throw DataXException.asDataXException(HbaseSQLWriterErrorCode.GET_HBASE_CONNECTION_ERROR,
"无法获取目的表" + cfg.getTableName() + "的元数据信息表可能不是SQL表或表名配置错误请检查您的配置 或者 联系 HBase 管理员.", e); "无法获取目的表" + cfg.getTableName() + "的元数据信息表可能不是SQL表或表名配置错误请检查您的配置 或者 联系 HBase 管理员.", e);
@ -97,7 +109,11 @@ public class HbaseSQLHelper {
Connection conn; Connection conn;
try { try {
Class.forName("org.apache.phoenix.jdbc.PhoenixDriver"); Class.forName("org.apache.phoenix.jdbc.PhoenixDriver");
conn = DriverManager.getConnection(connStr); if (cfg.isThinClient()) {
conn = getThinClientJdbcConnection(cfg);
} else {
conn = DriverManager.getConnection(connStr);
}
conn.setAutoCommit(false); conn.setAutoCommit(false);
} catch (Throwable e) { } catch (Throwable e) {
throw DataXException.asDataXException(HbaseSQLWriterErrorCode.GET_HBASE_CONNECTION_ERROR, throw DataXException.asDataXException(HbaseSQLWriterErrorCode.GET_HBASE_CONNECTION_ERROR,
@ -107,6 +123,32 @@ public class HbaseSQLHelper {
return conn; return conn;
} }
/**
* 创建 thin client jdbc连接
* @param cfg
* @return
* @throws SQLException
*/
public static Connection getThinClientJdbcConnection(HbaseSQLWriterConfig cfg) throws SQLException {
String connStr = cfg.getConnectionString();
LOG.info("Connecting to HBase cluster [" + connStr + "] use thin client ...");
Connection conn = DriverManager.getConnection(connStr, cfg.getUsername(), cfg.getPassword());
String userNamespaceQuery = "use " + cfg.getNamespace();
Statement statement = null;
try {
statement = conn.createStatement();
statement.executeUpdate(userNamespaceQuery);
return conn;
} catch (Exception e) {
throw DataXException.asDataXException(HbaseSQLWriterErrorCode.GET_HBASE_CONNECTION_ERROR,
"无法连接配置的namespace, 请检查配置 或者 联系 HBase 管理员.", e);
} finally {
if (statement != null) {
statement.close();
}
}
}
/** /**
* 获取一张表的元数据信息 * 获取一张表的元数据信息
* @param conn hbsae sql的jdbc连接 * @param conn hbsae sql的jdbc连接
@ -121,6 +163,70 @@ public class HbaseSQLHelper {
return mdc.updateCache(schemaName, tableName).getTable(); return mdc.updateCache(schemaName, tableName).getTable();
} }
/**
* 获取一张表的元数据信息
* @param conn
* @param namespace
* @param fullTableName
* @param isThinClient 是否使用thin client
* @return 表的元数据
* @throws SQLException
*/
public static PTable getTableSchema(Connection conn, String namespace, String fullTableName, boolean isThinClient)
throws
SQLException {
LOG.info("Start to get table schema of namespace=" + namespace + " , fullTableName=" + fullTableName);
if (!isThinClient) {
return getTableSchema(conn, fullTableName);
} else {
if (ptable == null) {
ResultSet result = conn.getMetaData().getColumns(null, namespace, fullTableName, null);
try {
ThinClientPTable retTable = new ThinClientPTable();
retTable.setColTypeMap(parseColType(result));
ptable = retTable;
}finally {
if (result != null) {
result.close();
}
}
}
return ptable;
}
}
/**
* 解析字段
* @param rs
* @return
* @throws SQLException
*/
public static Map<String, ThinClientPTable.ThinClientPColumn> parseColType(ResultSet rs) throws SQLException {
Map<String, ThinClientPTable.ThinClientPColumn> cols = new HashMap<String, ThinClientPTable
.ThinClientPColumn>();
ResultSetMetaData md = rs.getMetaData();
int columnCount = md.getColumnCount();
while (rs.next()) {
String colName = null;
PDataType colType = null;
for (int i = 1; i <= columnCount; i++) {
if (md.getColumnLabel(i).equals("TYPE_NAME")) {
colType = PDataType.fromSqlTypeName((String) rs.getObject(i));
} else if (md.getColumnLabel(i).equals("COLUMN_NAME")) {
colName = (String) rs.getObject(i);
}
}
if (colType == null || colName == null) {
throw new SQLException("ColType or colName is null, colType : " + colType + " , colName : " + colName);
}
cols.put(colName, new ThinClientPTable.ThinClientPColumn(colName, colType));
}
return cols;
}
/** /**
* 清空表 * 清空表
*/ */
@ -148,6 +254,24 @@ public class HbaseSQLHelper {
} }
} }
/**
* 检查表
* @param conn
* @param namespace
* @param tableName
* @param isThinClient
* @throws DataXException
*/
public static void checkTable(Connection conn, String namespace, String tableName, boolean isThinClient)
throws DataXException {
if (!isThinClient) {
checkTable(conn, tableName);
} else {
//ignore check table when use thin client
}
}
/** /**
* 检查表表要存在enabled * 检查表表要存在enabled
*/ */

View File

@ -2,6 +2,7 @@ package com.alibaba.datax.plugin.writer.hbase11xsqlwriter;
import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.common.util.Configuration;
import com.google.common.base.Strings;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
@ -9,6 +10,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.List; import java.util.List;
import java.util.Map;
/** /**
* HBase SQL writer config * HBase SQL writer config
@ -30,6 +32,10 @@ public class HbaseSQLWriterConfig {
private NullModeType nullMode; private NullModeType nullMode;
private int batchSize; // 一次批量写入多少行 private int batchSize; // 一次批量写入多少行
private boolean truncate; // 导入开始前是否要清空目的表 private boolean truncate; // 导入开始前是否要清空目的表
private boolean isThinClient;
private String namespace;
private String username;
private String password;
/** /**
* @return 获取原始的datax配置 * @return 获取原始的datax配置
@ -81,6 +87,22 @@ public class HbaseSQLWriterConfig {
return truncate; return truncate;
} }
public boolean isThinClient() {
return isThinClient;
}
public String getNamespace() {
return namespace;
}
public String getPassword() {
return password;
}
public String getUsername() {
return username;
}
/** /**
* @param dataxCfg * @param dataxCfg
* @return * @return
@ -100,6 +122,7 @@ public class HbaseSQLWriterConfig {
cfg.nullMode = NullModeType.getByTypeName(dataxCfg.getString(Key.NULL_MODE, Constant.DEFAULT_NULL_MODE)); cfg.nullMode = NullModeType.getByTypeName(dataxCfg.getString(Key.NULL_MODE, Constant.DEFAULT_NULL_MODE));
cfg.batchSize = dataxCfg.getInt(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_ROW_COUNT); cfg.batchSize = dataxCfg.getInt(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_ROW_COUNT);
cfg.truncate = dataxCfg.getBool(Key.TRUNCATE, Constant.DEFAULT_TRUNCATE); cfg.truncate = dataxCfg.getBool(Key.TRUNCATE, Constant.DEFAULT_TRUNCATE);
cfg.isThinClient = dataxCfg.getBool(Key.THIN_CLIENT, Constant.DEFAULT_USE_THIN_CLIENT);
// 4. 打印解析出来的配置 // 4. 打印解析出来的配置
LOG.info("HBase SQL writer config parsed:" + cfg.toString()); LOG.info("HBase SQL writer config parsed:" + cfg.toString());
@ -117,31 +140,52 @@ public class HbaseSQLWriterConfig {
"读 Hbase 时需要配置hbaseConfig其内容为 Hbase 连接信息,请联系 Hbase PE 获取该信息."); "读 Hbase 时需要配置hbaseConfig其内容为 Hbase 连接信息,请联系 Hbase PE 获取该信息.");
} }
// 解析zk服务器和znode信息
Pair<String, String> zkCfg; if (dataxCfg.getBool(Key.THIN_CLIENT, Constant.DEFAULT_USE_THIN_CLIENT)) {
try { Map<String, String> thinConnectConfig = HbaseSQLHelper.getThinConnectConfig(hbaseCfg);
zkCfg = HbaseSQLHelper.getHbaseConfig(hbaseCfg); String thinConnectStr = thinConnectConfig.get(Key.HBASE_THIN_CONNECT_URL);
} catch (Throwable t) { cfg.namespace = thinConnectConfig.get(Key.HBASE_THIN_CONNECT_NAMESPACE);
// 解析hbase配置错误 cfg.username = thinConnectConfig.get(Key.HBASE_THIN_CONNECT_USERNAME);
throw DataXException.asDataXException( cfg.password = thinConnectConfig.get(Key.HBASE_THIN_CONNECT_PASSWORD);
if (Strings.isNullOrEmpty(thinConnectStr)) {
throw DataXException.asDataXException(
HbaseSQLWriterErrorCode.ILLEGAL_VALUE,
"thinClient=true的轻客户端模式下HBase的hbase.thin.connect.url配置不能为空请联系HBase PE获取该信息.");
}
if (Strings.isNullOrEmpty(cfg.namespace) || Strings.isNullOrEmpty(cfg.username) || Strings
.isNullOrEmpty(cfg.password)) {
throw DataXException.asDataXException(HbaseSQLWriterErrorCode.ILLEGAL_VALUE,
"thinClient=true的轻客户端模式下HBase的hbase.thin.connect.namespce|username|password配置不能为空请联系HBase "
+ "PE获取该信息.");
}
cfg.connectionString = thinConnectStr;
} else {
// 解析zk服务器和znode信息
Pair<String, String> zkCfg;
try {
zkCfg = HbaseSQLHelper.getHbaseConfig(hbaseCfg);
} catch (Throwable t) {
// 解析hbase配置错误
throw DataXException.asDataXException(
HbaseSQLWriterErrorCode.REQUIRED_VALUE, HbaseSQLWriterErrorCode.REQUIRED_VALUE,
"解析hbaseConfig出错请确认您配置的hbaseConfig为合法的json数据格式内容正确."); "解析hbaseConfig出错请确认您配置的hbaseConfig为合法的json数据格式内容正确.");
} }
String zkQuorum = zkCfg.getFirst(); String zkQuorum = zkCfg.getFirst();
String znode = zkCfg.getSecond(); String znode = zkCfg.getSecond();
if (zkQuorum == null || zkQuorum.isEmpty()) { if (zkQuorum == null || zkQuorum.isEmpty()) {
throw DataXException.asDataXException( throw DataXException.asDataXException(
HbaseSQLWriterErrorCode.ILLEGAL_VALUE, HbaseSQLWriterErrorCode.ILLEGAL_VALUE,
"HBase的hbase.zookeeper.quorum配置不能为空请联系HBase PE获取该信息."); "HBase的hbase.zookeeper.quorum配置不能为空请联系HBase PE获取该信息.");
} }
if (znode == null || znode.isEmpty()) { if (znode == null || znode.isEmpty()) {
throw DataXException.asDataXException( throw DataXException.asDataXException(
HbaseSQLWriterErrorCode.ILLEGAL_VALUE, HbaseSQLWriterErrorCode.ILLEGAL_VALUE,
"HBase的zookeeper.znode.parent配置不能为空请联系HBase PE获取该信息."); "HBase的zookeeper.znode.parent配置不能为空请联系HBase PE获取该信息.");
} }
// 生成sql使用的连接字符串 格式 jdbc:phoenix:zk_quorum:2181:/znode_parent // 生成sql使用的连接字符串 格式 jdbc:phoenix:zk_quorum:2181:/znode_parent
cfg.connectionString = "jdbc:phoenix:" + zkQuorum + ":2181:" + znode; cfg.connectionString = "jdbc:phoenix:" + zkQuorum + ":2181:" + znode;
}
} }
private static void parseTableConfig(HbaseSQLWriterConfig cfg, Configuration dataxCfg) { private static void parseTableConfig(HbaseSQLWriterConfig cfg, Configuration dataxCfg) {

View File

@ -157,12 +157,20 @@ public class HbaseSQLWriterTask {
private PreparedStatement createPreparedStatement() throws SQLException { private PreparedStatement createPreparedStatement() throws SQLException {
// 生成列名集合列之间用逗号分隔 col1,col2,col3,... // 生成列名集合列之间用逗号分隔 col1,col2,col3,...
StringBuilder columnNamesBuilder = new StringBuilder(); StringBuilder columnNamesBuilder = new StringBuilder();
for (String col : cfg.getColumns()) { if (cfg.isThinClient()) {
// 列名使用双引号则不自动转换为全大写而是保留用户配置的大小写 for (String col : cfg.getColumns()) {
columnNamesBuilder.append("\""); // thin 客户端不使用双引号
columnNamesBuilder.append(col); columnNamesBuilder.append(col);
columnNamesBuilder.append("\""); columnNamesBuilder.append(",");
columnNamesBuilder.append(","); }
} else {
for (String col : cfg.getColumns()) {
// 列名使用双引号则不自动转换为全大写而是保留用户配置的大小写
columnNamesBuilder.append("\"");
columnNamesBuilder.append(col);
columnNamesBuilder.append("\"");
columnNamesBuilder.append(",");
}
} }
columnNamesBuilder.setLength(columnNamesBuilder.length() - 1); // 移除末尾多余的逗号 columnNamesBuilder.setLength(columnNamesBuilder.length() - 1); // 移除末尾多余的逗号
String columnNames = columnNamesBuilder.toString(); String columnNames = columnNamesBuilder.toString();
@ -171,9 +179,13 @@ public class HbaseSQLWriterTask {
// 生成UPSERT模板 // 生成UPSERT模板
String tableName = cfg.getTableName(); String tableName = cfg.getTableName();
// 表名使用双引号则不自动转换为全大写而是保留用户配置的大小写 StringBuilder upsertBuilder = null;
StringBuilder upsertBuilder = if (cfg.isThinClient()) {
new StringBuilder("upsert into \"" + tableName + "\" (" + columnNames + " ) values ("); upsertBuilder = new StringBuilder("upsert into " + tableName + " (" + columnNames + " ) values (");
} else {
// 表名使用双引号则不自动转换为全大写而是保留用户配置的大小写
upsertBuilder = new StringBuilder("upsert into \"" + tableName + "\" (" + columnNames + " ) values (");
}
for (int i = 0; i < cfg.getColumns().size(); i++) { for (int i = 0; i < cfg.getColumns().size(); i++) {
upsertBuilder.append("?,"); upsertBuilder.append("?,");
} }
@ -191,7 +203,8 @@ public class HbaseSQLWriterTask {
*/ */
private int[] getColumnSqlType(List<String> columnNames) throws SQLException { private int[] getColumnSqlType(List<String> columnNames) throws SQLException {
int[] types = new int[numberOfColumnsToWrite]; int[] types = new int[numberOfColumnsToWrite];
PTable ptable = HbaseSQLHelper.getTableSchema(connection, cfg.getTableName()); PTable ptable = HbaseSQLHelper
.getTableSchema(connection, cfg.getNamespace(), cfg.getTableName(), cfg.isThinClient());
for (int i = 0; i < columnNames.size(); i++) { for (int i = 0; i < columnNames.size(); i++) {
String name = columnNames.get(i); String name = columnNames.get(i);

View File

@ -10,6 +10,10 @@ public final class Key {
public final static String HBASE_CONFIG = "hbaseConfig"; public final static String HBASE_CONFIG = "hbaseConfig";
public final static String HBASE_ZK_QUORUM = HConstants.ZOOKEEPER_QUORUM; public final static String HBASE_ZK_QUORUM = HConstants.ZOOKEEPER_QUORUM;
public final static String HBASE_ZNODE_PARENT = HConstants.ZOOKEEPER_ZNODE_PARENT; public final static String HBASE_ZNODE_PARENT = HConstants.ZOOKEEPER_ZNODE_PARENT;
public final static String HBASE_THIN_CONNECT_URL = "hbase.thin.connect.url";
public final static String HBASE_THIN_CONNECT_NAMESPACE = "hbase.thin.connect.namespace";
public final static String HBASE_THIN_CONNECT_USERNAME = "hbase.thin.connect.username";
public final static String HBASE_THIN_CONNECT_PASSWORD = "hbase.thin.connect.password";
/** /**
* 必选writer要写入的表的表名 * 必选writer要写入的表的表名
@ -34,6 +38,9 @@ public final class Key {
*/ */
public static final String TRUNCATE = "truncate"; public static final String TRUNCATE = "truncate";
public static final String THIN_CLIENT = "thinClient";
/** /**
* 可选批量写入的最大行数默认100行 * 可选批量写入的最大行数默认100行
*/ */

View File

@ -0,0 +1,402 @@
package com.alibaba.datax.plugin.writer.hbase11xsqlwriter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.phoenix.hbase.index.util.KeyValueBuilder;
import org.apache.phoenix.index.IndexMaintainer;
import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.schema.AmbiguousColumnException;
import org.apache.phoenix.schema.ColumnFamilyNotFoundException;
import org.apache.phoenix.schema.ColumnNotFoundException;
import org.apache.phoenix.schema.PColumn;
import org.apache.phoenix.schema.PColumnFamily;
import org.apache.phoenix.schema.PIndexState;
import org.apache.phoenix.schema.PName;
import org.apache.phoenix.schema.PRow;
import org.apache.phoenix.schema.PTable;
import org.apache.phoenix.schema.PTableKey;
import org.apache.phoenix.schema.PTableType;
import org.apache.phoenix.schema.RowKeySchema;
import org.apache.phoenix.schema.SortOrder;
import org.apache.phoenix.schema.types.PDataType;
import java.util.List;
import java.util.Map;
public class ThinClientPTable implements PTable {
private Map<String, ThinClientPColumn> colMap;
public void setColTypeMap(Map<String, ThinClientPColumn> colMap) {
this.colMap = colMap;
}
@Override
public long getTimeStamp() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public long getSequenceNumber() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public long getIndexDisableTimestamp() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getSchemaName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getTableName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getTenantId() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PTableType getType() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getPKName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public List<PColumn> getPKColumns() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public List<PColumn> getColumns() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public List<PColumnFamily> getColumnFamilies() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PColumnFamily getColumnFamily(byte[] bytes) throws ColumnFamilyNotFoundException {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PColumnFamily getColumnFamily(String s) throws ColumnFamilyNotFoundException {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PColumn getColumnForColumnName(String colname) throws ColumnNotFoundException, AmbiguousColumnException {
if (!colMap.containsKey(colname)) {
throw new ColumnNotFoundException("Col " + colname + " not found");
}
return colMap.get(colname);
}
@Override
public PColumn getColumnForColumnQualifier(byte[] bytes, byte[] bytes1)
throws ColumnNotFoundException, AmbiguousColumnException {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PColumn getPKColumn(String s) throws ColumnNotFoundException {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PRow newRow(KeyValueBuilder keyValueBuilder, long l, ImmutableBytesWritable immutableBytesWritable, boolean b,
byte[]... bytes) {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PRow newRow(KeyValueBuilder keyValueBuilder, ImmutableBytesWritable immutableBytesWritable, boolean b,
byte[]... bytes) {
throw new UnsupportedOperationException("Not implement");
}
@Override
public int newKey(ImmutableBytesWritable immutableBytesWritable, byte[][] bytes) {
throw new UnsupportedOperationException("Not implement");
}
@Override
public RowKeySchema getRowKeySchema() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public Integer getBucketNum() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public List<PTable> getIndexes() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PIndexState getIndexState() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getParentName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getParentTableName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getParentSchemaName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public List<PName> getPhysicalNames() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getPhysicalName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean isImmutableRows() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean getIndexMaintainers(ImmutableBytesWritable immutableBytesWritable,
PhoenixConnection phoenixConnection) {
throw new UnsupportedOperationException("Not implement");
}
@Override
public IndexMaintainer getIndexMaintainer(PTable pTable, PhoenixConnection phoenixConnection) {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getDefaultFamilyName() {
return null;
}
@Override
public boolean isWALDisabled() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean isMultiTenant() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean getStoreNulls() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean isTransactional() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public ViewType getViewType() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public String getViewStatement() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public Short getViewIndexId() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PTableKey getKey() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public IndexType getIndexType() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public int getBaseColumnCount() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean rowKeyOrderOptimizable() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public int getRowTimestampColPos() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public long getUpdateCacheFrequency() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean isNamespaceMapped() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public String getAutoPartitionSeqName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean isAppendOnlySchema() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public ImmutableStorageScheme getImmutableStorageScheme() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public QualifierEncodingScheme getEncodingScheme() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public EncodedCQCounter getEncodedCQCounter() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean useStatsForParallelization() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public int getEstimatedSize() {
throw new UnsupportedOperationException("Not implement");
}
public static class ThinClientPColumn implements PColumn {
private String colName;
private PDataType pDataType;
public ThinClientPColumn(String colName, PDataType pDataType) {
this.colName = colName;
this.pDataType = pDataType;
}
@Override
public PName getName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PName getFamilyName() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public int getPosition() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public Integer getArraySize() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public byte[] getViewConstant() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean isViewReferenced() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public int getEstimatedSize() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public String getExpressionStr() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean isRowTimestamp() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean isDynamic() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public byte[] getColumnQualifierBytes() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public boolean isNullable() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public PDataType getDataType() {
return pDataType;
}
@Override
public Integer getMaxLength() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public Integer getScale() {
throw new UnsupportedOperationException("Not implement");
}
@Override
public SortOrder getSortOrder() {
throw new UnsupportedOperationException("Not implement");
}
}
}

View File

@ -0,0 +1,164 @@
# hbase20xsqlreader 插件文档
___
## 1 快速介绍
hbase20xsqlreader插件实现了从Phoenix(HBase SQL)读取数据对应版本为HBase2.X和Phoenix5.X。
## 2 实现原理
简而言之hbase20xsqlreader通过Phoenix轻客户端去连接Phoenix QueryServer并根据用户配置信息生成查询SELECT 语句然后发送到QueryServer读取HBase数据并将返回结果使用DataX自定义的数据类型拼装为抽象的数据集最终传递给下游Writer处理。
## 3 功能说明
### 3.1 配置样例
* 配置一个从Phoenix同步抽取数据到本地的作业:
```
{
"job": {
"content": [
{
"reader": {
"name": "hbase20xsqlreader", //指定插件为hbase20xsqlreader
"parameter": {
"queryServerAddress": "http://127.0.0.1:8765", //填写连接Phoenix QueryServer地址
"serialization": "PROTOBUF", //QueryServer序列化格式
"table": "TEST", //读取表名
"column": ["ID", "NAME"], //所要读取列名
"splitKey": "ID" //切分列,必须是表主键
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"encoding": "UTF-8",
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": "3"
}
}
}
}
```
### 3.2 参数说明
* **queryServerAddress**
* 描述hbase20xsqlreader需要通过Phoenix轻客户端去连接Phoenix QueryServer因此这里需要填写对应QueryServer地址。
* 必选:是 <br />
* 默认值:无 <br />
* **serialization**
* 描述QueryServer使用的序列化协议
* 必选:否 <br />
* 默认值PROTOBUF <br />
* **table**
* 描述:所要读取表名
* 必选:是 <br />
* 默认值:无 <br />
* **schema**
* 描述表所在的schema
* 必选:否 <br />
* 默认值:无 <br />
* **column**
* 描述填写需要从phoenix表中读取的列名集合使用JSON的数组描述字段信息空值表示读取所有列。
* 必选: 否<br />
* 默认值:全部列 <br />
* **splitKey**
* 描述读取表时对表进行切分并行读取切分时有两种方式1.根据该列的最大最小值按照指定channel个数均分这种方式仅支持整形和字符串类型切分列2.根据设置的splitPoint进行切分
* 必选:是 <br />
* 默认值:无 <br />
* **splitPoints**
* 描述由于根据切分列最大最小值切分时不能保证避免数据热点splitKey支持用户根据数据特征动态指定切分点对表数据进行切分。建议切分点根据Region的startkey和endkey设置保证每个查询对应单个Region
* 必选: 否<br />
* 默认值:无 <br />
* **where**
* 描述:支持对表查询增加过滤条件,每个切分都会携带该过滤条件。
* 必选: 否<br />
* 默认值:无<br />
* **querySql**
* 描述支持指定多个查询语句但查询列类型和数目必须保持一致用户可根据实际情况手动输入表查询语句或多表联合查询语句设置该参数后除queryserverAddress参数必须设置外其余参数将失去作用或可不设置。
* 必选: 否<br />
* 默认值:无<br />
### 3.3 类型转换
目前hbase20xsqlreader支持大部分Phoenix类型但也存在部分个别类型没有支持的情况请注意检查你的类型。
下面列出MysqlReader针对Mysql类型转换列表:
| DataX 内部类型| Phoenix 数据类型 |
| -------- | ----- |
| String |CHAR, VARCHAR|
| Bytes |BINARY, VARBINARY|
| Bool |BOOLEAN |
| Long |INTEGER, TINYINT, SMALLINT, BIGINT |
| Double |FLOAT, DECIMAL, DOUBLE, |
| Date |DATE, TIME, TIMESTAMP |
## 4 性能报告
## 5 约束限制
* 切分表时切分列仅支持单个列,且该列必须是表主键
* 不设置splitPoint默认使用自动切分此时切分列仅支持整形和字符型
* 表名和SCHEMA名及列名大小写敏感请与Phoenix表实际大小写保持一致
* 仅支持通过Phoenix QeuryServer读取数据因此您的Phoenix必须启动QueryServer服务才能使用本插件
## 6 FAQ
***

110
hbase20xsqlreader/pom.xml Normal file
View File

@ -0,0 +1,110 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>datax-all</artifactId>
<groupId>com.alibaba.datax</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>hbase20xsqlreader</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<phoenix.version>5.1.0-HBase-2.0.0.2</phoenix.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.aliyun.phoenix</groupId>
<artifactId>ali-phoenix-shaded-thin-client</artifactId>
<version>${phoenix.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<version>2.0.44-beta</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-core</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-service-face</artifactId>
</exclusion>
</exclusions>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>plugin-rdbms-util</artifactId>
<version>0.0.1-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/java</directory>
<includes>
<include>**/*.properties</include>
</includes>
</resource>
</resources>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,35 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
<include>plugin_job_template.json</include>
</includes>
<outputDirectory>plugin/reader/hbase20xsqlreader</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>hbase20xsqlreader-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/reader/hbase20xsqlreader</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/reader/hbase20xsqlreader/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,28 @@
package com.alibaba.datax.plugin.reader.hbase20xsqlreader;
public class Constant {
public static final String PK_TYPE = "pkType";
public static final Object PK_TYPE_STRING = "pkTypeString";
public static final Object PK_TYPE_LONG = "pkTypeLong";
public static final String DEFAULT_SERIALIZATION = "PROTOBUF";
public static final String CONNECT_STRING_TEMPLATE = "jdbc:phoenix:thin:url=%s;serialization=%s";
public static final String CONNECT_DRIVER_STRING = "org.apache.phoenix.queryserver.client.Driver";
public static final String SELECT_COLUMNS_TEMPLATE = "SELECT COLUMN_NAME, COLUMN_FAMILY FROM SYSTEM.CATALOG WHERE TABLE_NAME='%s' AND COLUMN_NAME IS NOT NULL";
public static String QUERY_SQL_TEMPLATE_WITHOUT_WHERE = "select %s from %s ";
public static String QUERY_SQL_TEMPLATE = "select %s from %s where (%s)";
public static String QUERY_MIN_MAX_TEMPLATE = "SELECT MIN(%s),MAX(%s) FROM %s";
public static String QUERY_COLUMN_TYPE_TEMPLATE = "SELECT %s FROM %s LIMIT 1";
public static String QUERY_SQL_PER_SPLIT = "querySqlPerSplit";
}

View File

@ -0,0 +1,403 @@
package com.alibaba.datax.plugin.reader.hbase20xsqlreader;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
import com.alibaba.datax.plugin.rdbms.util.RdbmsRangeSplitWrap;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.BigInteger;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
public class HBase20SQLReaderHelper {
private static final Logger LOG = LoggerFactory.getLogger(HBase20SQLReaderHelper.class);
private Configuration configuration;
private Connection connection;
private List<String> querySql;
private String fullTableName;
private List<String> columnNames;
private String splitKey;
private List<Object> splitPoints;
public HBase20SQLReaderHelper (Configuration configuration) {
this.configuration = configuration;
}
/**
* 校验配置参数是否正确
*/
public void validateParameter() {
// queryserver地址必须配置
String queryServerAddress = configuration.getNecessaryValue(Key.QUERYSERVER_ADDRESS,
HBase20xSQLReaderErrorCode.REQUIRED_VALUE);
String serialization = configuration.getString(Key.SERIALIZATION_NAME, Constant.DEFAULT_SERIALIZATION);
connection = getConnection(queryServerAddress, serialization);
//判断querySql是否配置如果配置则table配置可为空否则table必须配置
querySql = configuration.getList(Key.QUERY_SQL, String.class);
if (querySql == null || querySql.isEmpty()) {
LOG.info("Split according to splitKey or split points.");
String schema = configuration.getString(Key.SCHEMA, null);
String tableName = configuration.getNecessaryValue(Key.TABLE, HBase20xSQLReaderErrorCode.REQUIRED_VALUE);
if (schema != null && !schema.isEmpty()) {
fullTableName = "\"" + schema + "\".\"" + tableName + "\"";
} else {
fullTableName = "\"" + tableName + "\"";
}
// 如果列名未配置默认读取全部列*
columnNames = configuration.getList(Key.COLUMN, String.class);
splitKey = configuration.getString(Key.SPLIT_KEY, null);
splitPoints = configuration.getList(Key.SPLIT_POINT);
checkTable(schema, tableName);
dealWhere();
} else {
// 用户指定querySql切分不做处理根据给定sql读取数据即可
LOG.info("Split according to query sql.");
}
}
public Connection getConnection(String queryServerAddress, String serialization) {
String url = String.format(Constant.CONNECT_STRING_TEMPLATE, queryServerAddress, serialization);
LOG.debug("Connecting to QueryServer [" + url + "] ...");
Connection conn;
try {
Class.forName(Constant.CONNECT_DRIVER_STRING);
conn = DriverManager.getConnection(url);
conn.setAutoCommit(false);
} catch (Throwable e) {
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.GET_QUERYSERVER_CONNECTION_ERROR,
"无法连接QueryServer配置不正确或服务未启动请检查配置和服务状态或者联系HBase管理员.", e);
}
LOG.debug("Connected to QueryServer successfully.");
return conn;
}
/**
* 检查表名列名和切分列是否存在
*/
public void checkTable(String schema, String tableName) {
Statement statement = null;
ResultSet resultSet = null;
try {
statement = connection.createStatement();
String selectSql = String.format(Constant.SELECT_COLUMNS_TEMPLATE, tableName);
// 处理schema不为空情况
if (schema == null || schema.isEmpty()) {
selectSql = selectSql + " AND TABLE_SCHEM IS NULL";
} else {
selectSql = selectSql + " AND TABLE_SCHEM = '" + schema + "'";
}
resultSet = statement.executeQuery(selectSql);
List<String> primaryColumnNames = new ArrayList<String>();
List<String> allColumnName = new ArrayList<String>();
while (resultSet.next()) {
String columnName = resultSet.getString(1);
allColumnName.add(columnName);
// 列族为空表示该列为主键列
if (resultSet.getString(2) == null) {
primaryColumnNames.add(columnName);
}
}
if (columnNames != null && !columnNames.isEmpty()) {
for (String columnName : columnNames) {
if (!allColumnName.contains(columnName)) {
// 用户配置的列名在元数据中不存在
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_VALUE,
"您配置的列" + columnName + "在表" + tableName + "的元数据中不存在请检查您的配置或者联系HBase管理员.");
}
}
} else {
columnNames = allColumnName;
configuration.set(Key.COLUMN, allColumnName);
}
if (splitKey != null) {
// 切分列必须是主键列否则会严重影响读取性能
if (!primaryColumnNames.contains(splitKey)) {
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_VALUE,
"您配置的切分列" + splitKey + "不是表" + tableName + "的主键请检查您的配置或者联系HBase管理员.");
}
}
} catch (SQLException e) {
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.GET_PHOENIX_TABLE_ERROR,
"获取表" + tableName + "信息失败请检查您的集群和表状态或者联系HBase管理员.", e);
} finally {
closeJdbc(null, statement, resultSet);
}
}
public void closeJdbc(Connection connection, Statement statement, ResultSet resultSet) {
try {
if (resultSet != null) {
resultSet.close();
}
if (statement != null) {
statement.close();
}
if (connection != null) {
connection.close();
}
} catch (SQLException e) {
LOG.warn("数据库连接关闭异常.", HBase20xSQLReaderErrorCode.CLOSE_PHOENIX_CONNECTION_ERROR, e);
}
}
public void dealWhere() {
String where = configuration.getString(Key.WHERE, null);
if(StringUtils.isNotBlank(where)) {
String whereImprove = where.trim();
if(whereImprove.endsWith(";") || whereImprove.endsWith("")) {
whereImprove = whereImprove.substring(0,whereImprove.length()-1);
}
configuration.set(Key.WHERE, whereImprove);
}
}
/**
* 对表进行切分
*/
public List<Configuration> doSplit(int adviceNumber) {
List<Configuration> pluginParams = new ArrayList<Configuration>();
List<String> rangeList;
String where = configuration.getString(Key.WHERE);
boolean hasWhere = StringUtils.isNotBlank(where);
if (querySql == null || querySql.isEmpty()) {
// 如果splitPoints为空则根据splitKey自动切分不过这种切分方式无法保证数据均分且只支持整形和字符型列
if (splitPoints == null || splitPoints.isEmpty()) {
LOG.info("Split accoring min and max value of splitColumn...");
Pair<Object, Object> minMaxPK = getPkRange(configuration);
if (null == minMaxPK) {
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK,
"根据切分主键切分表失败. DataX仅支持切分主键为一个,并且类型为整数或者字符串类型. " +
"请尝试使用其他的切分主键或者联系 HBase管理员 进行处理.");
}
if (null == minMaxPK.getLeft() || null == minMaxPK.getRight()) {
// 切分后获取到的start/end Null 的情况
pluginParams.add(configuration);
return pluginParams;
}
boolean isStringType = Constant.PK_TYPE_STRING.equals(configuration
.getString(Constant.PK_TYPE));
boolean isLongType = Constant.PK_TYPE_LONG.equals(configuration
.getString(Constant.PK_TYPE));
if (isStringType) {
rangeList = RdbmsRangeSplitWrap.splitAndWrap(
String.valueOf(minMaxPK.getLeft()),
String.valueOf(minMaxPK.getRight()), adviceNumber,
splitKey, "'", null);
} else if (isLongType) {
rangeList = RdbmsRangeSplitWrap.splitAndWrap(
new BigInteger(minMaxPK.getLeft().toString()),
new BigInteger(minMaxPK.getRight().toString()),
adviceNumber, splitKey);
} else {
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK,
"您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. " +
"请尝试使用其他的切分主键或者联系HBase管理员进行处理.");
}
} else {
LOG.info("Split accoring splitPoints...");
// 根据指定splitPoints进行切分
rangeList = buildSplitRange();
}
String tempQuerySql;
if (null != rangeList && !rangeList.isEmpty()) {
for (String range : rangeList) {
Configuration tempConfig = configuration.clone();
tempQuerySql = buildQuerySql(columnNames, fullTableName, where)
+ (hasWhere ? " and " : " where ") + range;
LOG.info("Query SQL: " + tempQuerySql);
tempConfig.set(Constant.QUERY_SQL_PER_SPLIT, tempQuerySql);
pluginParams.add(tempConfig);
}
} else {
Configuration tempConfig = configuration.clone();
tempQuerySql = buildQuerySql(columnNames, fullTableName, where)
+ (hasWhere ? " and " : " where ")
+ String.format(" %s IS NOT NULL", splitKey);
LOG.info("Query SQL: " + tempQuerySql);
tempConfig.set(Constant.QUERY_SQL_PER_SPLIT, tempQuerySql);
pluginParams.add(tempConfig);
}
} else {
// 指定querySql不需要切分
for (String sql : querySql) {
Configuration tempConfig = configuration.clone();
tempConfig.set(Constant.QUERY_SQL_PER_SPLIT, sql);
pluginParams.add(tempConfig);
}
}
return pluginParams;
}
public static String buildQuerySql(List<String> columnNames, String table,
String where) {
String querySql;
StringBuilder columnBuilder = new StringBuilder();
for (String columnName : columnNames) {
columnBuilder.append("\"").append(columnName).append("\",");
}
columnBuilder.setLength(columnBuilder.length() -1);
if (StringUtils.isBlank(where)) {
querySql = String.format(Constant.QUERY_SQL_TEMPLATE_WITHOUT_WHERE,
columnBuilder.toString(), table);
} else {
querySql = String.format(Constant.QUERY_SQL_TEMPLATE, columnBuilder.toString(),
table, where);
}
return querySql;
}
private List<String> buildSplitRange() {
String getSplitKeyTypeSQL = String.format(Constant.QUERY_COLUMN_TYPE_TEMPLATE, splitKey, fullTableName);
Statement statement = null;
ResultSet resultSet = null;
List<String> splitConditions = new ArrayList<String>();
try {
statement = connection.createStatement();
resultSet = statement.executeQuery(getSplitKeyTypeSQL);
ResultSetMetaData rsMetaData = resultSet.getMetaData();
int type = rsMetaData.getColumnType(1);
String symbol = "%s";
switch (type) {
case Types.CHAR:
case Types.VARCHAR:
symbol = "'%s'";
break;
case Types.DATE:
symbol = "TO_DATE('%s')";
break;
case Types.TIME:
symbol = "TO_TIME('%s')";
break;
case Types.TIMESTAMP:
symbol = "TO_TIMESTAMP('%s')";
break;
case Types.BINARY:
case Types.VARBINARY:
case Types.ARRAY:
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK,
"切分列类型为" + rsMetaData.getColumnTypeName(1) + ",暂不支持该类型字段作为切分列。");
}
String splitCondition = null;
for (int i = 0; i <= splitPoints.size(); i++) {
if (i == 0) {
splitCondition = splitKey + " <= " + String.format(symbol, splitPoints.get(i));
} else if (i == splitPoints.size()) {
splitCondition = splitKey + " > " + String.format(symbol, splitPoints.get(i - 1));
} else {
splitCondition = splitKey + " > " + String.format(symbol, splitPoints.get(i - 1)) +
" AND " + splitKey + " <= " + String.format(symbol, splitPoints.get(i));
}
splitConditions.add(splitCondition);
}
return splitConditions;
} catch (SQLException e) {
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.GET_TABLE_COLUMNTYPE_ERROR,
"获取切分列类型失败请检查服务或给定表和切分列是否正常或者联系HBase管理员进行处理。", e);
} finally {
closeJdbc(null, statement, resultSet);
}
}
private Pair<Object, Object> getPkRange(Configuration configuration) {
String pkRangeSQL = String.format(Constant.QUERY_MIN_MAX_TEMPLATE, splitKey, splitKey, fullTableName);
String where = configuration.getString(Key.WHERE);
if (StringUtils.isNotBlank(where)) {
pkRangeSQL = String.format("%s WHERE (%s AND %s IS NOT NULL)",
pkRangeSQL, where, splitKey);
}
Statement statement = null;
ResultSet resultSet = null;
Pair<Object, Object> minMaxPK = null;
try {
statement = connection.createStatement();
resultSet = statement.executeQuery(pkRangeSQL);
ResultSetMetaData rsMetaData = resultSet.getMetaData();
if (isPKTypeValid(rsMetaData)) {
if (isStringType(rsMetaData.getColumnType(1))) {
if(configuration != null) {
configuration
.set(Constant.PK_TYPE, Constant.PK_TYPE_STRING);
}
if (resultSet.next()) {
minMaxPK = new ImmutablePair<Object, Object>(
resultSet.getString(1), resultSet.getString(2));
}
} else if (isLongType(rsMetaData.getColumnType(1))) {
if(configuration != null) {
configuration.set(Constant.PK_TYPE, Constant.PK_TYPE_LONG);
}
if (resultSet.next()) {
minMaxPK = new ImmutablePair<Object, Object>(
resultSet.getLong(1), resultSet.getLong(2));
}
} else {
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK,
"您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. " +
"DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系HBASE管理员进行处理.");
}
} else {
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK,
"您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. " +
"DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系HBASE管理员进行处理.");
}
} catch (SQLException e) {
throw DataXException.asDataXException(HBase20xSQLReaderErrorCode.ILLEGAL_SPLIT_PK, e);
} finally {
closeJdbc(null, statement, resultSet);
}
return minMaxPK;
}
private static boolean isPKTypeValid(ResultSetMetaData rsMetaData) {
boolean ret = false;
try {
int minType = rsMetaData.getColumnType(1);
int maxType = rsMetaData.getColumnType(2);
boolean isNumberType = isLongType(minType);
boolean isStringType = isStringType(minType);
if (minType == maxType && (isNumberType || isStringType)) {
ret = true;
}
} catch (Exception e) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"DataX获取切分主键(splitPk)字段类型失败. 该错误通常是系统底层异常导致. 请联系旺旺:askdatax或者DBA处理.");
}
return ret;
}
private static boolean isLongType(int type) {
boolean isValidLongType = type == Types.BIGINT || type == Types.INTEGER
|| type == Types.SMALLINT || type == Types.TINYINT;
return isValidLongType;
}
private static boolean isStringType(int type) {
return type == Types.CHAR || type == Types.NCHAR
|| type == Types.VARCHAR || type == Types.LONGVARCHAR
|| type == Types.NVARCHAR;
}
}

View File

@ -0,0 +1,53 @@
package com.alibaba.datax.plugin.reader.hbase20xsqlreader;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
import java.util.List;
public class HBase20xSQLReader extends Reader {
public static class Job extends Reader.Job {
private Configuration originalConfig;
private HBase20SQLReaderHelper readerHelper;
@Override
public void init() {
this.originalConfig = this.getPluginJobConf();
this.readerHelper = new HBase20SQLReaderHelper(this.originalConfig);
readerHelper.validateParameter();
}
@Override
public List<Configuration> split(int adviceNumber) {
return readerHelper.doSplit(adviceNumber);
}
@Override
public void destroy() {
// do nothing
}
}
public static class Task extends Reader.Task {
private Configuration readerConfig;
private HBase20xSQLReaderTask hbase20xSQLReaderTask;
@Override
public void init() {
this.readerConfig = super.getPluginJobConf();
hbase20xSQLReaderTask = new HBase20xSQLReaderTask(readerConfig, super.getTaskGroupId(), super.getTaskId());
}
@Override
public void startRead(RecordSender recordSender) {
hbase20xSQLReaderTask.readRecord(recordSender);
}
@Override
public void destroy() {
// do nothing
}
}
}

View File

@ -0,0 +1,39 @@
package com.alibaba.datax.plugin.reader.hbase20xsqlreader;
import com.alibaba.datax.common.spi.ErrorCode;
public enum HBase20xSQLReaderErrorCode implements ErrorCode {
REQUIRED_VALUE("Hbasewriter-00", "您缺失了必须填写的参数值."),
ILLEGAL_VALUE("Hbasewriter-01", "您填写的参数值不合法."),
GET_QUERYSERVER_CONNECTION_ERROR("Hbasewriter-02", "获取QueryServer连接时出错."),
GET_PHOENIX_TABLE_ERROR("Hbasewriter-03", "获取 Phoenix table时出错."),
GET_TABLE_COLUMNTYPE_ERROR("Hbasewriter-05", "获取表列类型时出错."),
CLOSE_PHOENIX_CONNECTION_ERROR("Hbasewriter-06", "关闭JDBC连接时时出错."),
ILLEGAL_SPLIT_PK("Hbasewriter-07", "非法splitKey配置."),
PHOENIX_COLUMN_TYPE_CONVERT_ERROR("Hbasewriter-08", "phoenix的列类型转换错误."),
QUERY_DATA_ERROR("Hbasewriter-09", "truncate hbase表时发生异常."),
;
private final String code;
private final String description;
private HBase20xSQLReaderErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s].", this.code, this.description);
}
}

View File

@ -0,0 +1,121 @@
package com.alibaba.datax.plugin.reader.hbase20xsqlreader;
import com.alibaba.datax.common.element.*;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.statistics.PerfRecord;
import com.alibaba.datax.common.util.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.BigDecimal;
import java.sql.*;
public class HBase20xSQLReaderTask {
private static final Logger LOG = LoggerFactory.getLogger(HBase20xSQLReaderTask.class);
private Configuration readerConfig;
private int taskGroupId = -1;
private int taskId=-1;
public HBase20xSQLReaderTask(Configuration config, int taskGroupId, int taskId) {
this.readerConfig = config;
this.taskGroupId = taskGroupId;
this.taskId = taskId;
}
public void readRecord(RecordSender recordSender) {
String querySql = readerConfig.getString(Constant.QUERY_SQL_PER_SPLIT);
LOG.info("Begin to read record by Sql: [{}\n] {}.", querySql);
HBase20SQLReaderHelper helper = new HBase20SQLReaderHelper(readerConfig);
Connection conn = helper.getConnection(readerConfig.getString(Key.QUERYSERVER_ADDRESS),
readerConfig.getString(Key.SERIALIZATION_NAME, Constant.DEFAULT_SERIALIZATION));
Statement statement = null;
ResultSet resultSet = null;
try {
long rsNextUsedTime = 0;
long lastTime = System.nanoTime();
statement = conn.createStatement();
// 统计查询时间
PerfRecord queryPerfRecord = new PerfRecord(taskGroupId,taskId, PerfRecord.PHASE.SQL_QUERY);
queryPerfRecord.start();
resultSet = statement.executeQuery(querySql);
ResultSetMetaData meta = resultSet.getMetaData();
int columnNum = meta.getColumnCount();
// 统计的result_Next时间
PerfRecord allResultPerfRecord = new PerfRecord(taskGroupId, taskId, PerfRecord.PHASE.RESULT_NEXT_ALL);
allResultPerfRecord.start();
while (resultSet.next()) {
Record record = recordSender.createRecord();
rsNextUsedTime += (System.nanoTime() - lastTime);
for (int i = 1; i <= columnNum; i++) {
Column column = this.convertPhoenixValueToDataxColumn(meta.getColumnType(i), resultSet.getObject(i));
record.addColumn(column);
}
lastTime = System.nanoTime();
recordSender.sendToWriter(record);
}
allResultPerfRecord.end(rsNextUsedTime);
LOG.info("Finished read record by Sql: [{}\n] {}.", querySql);
} catch (SQLException e) {
throw DataXException.asDataXException(
HBase20xSQLReaderErrorCode.QUERY_DATA_ERROR, "查询Phoenix数据出现异常请检查服务状态或与HBase管理员联系", e);
} finally {
helper.closeJdbc(conn, statement, resultSet);
}
}
private Column convertPhoenixValueToDataxColumn(int sqlType, Object value) {
Column column;
switch (sqlType) {
case Types.CHAR:
case Types.VARCHAR:
column = new StringColumn((String) value);
break;
case Types.BINARY:
case Types.VARBINARY:
column = new BytesColumn((byte[]) value);
break;
case Types.BOOLEAN:
column = new BoolColumn((Boolean) value);
break;
case Types.INTEGER:
column = new LongColumn((Integer) value);
break;
case Types.TINYINT:
column = new LongColumn(((Byte) value).longValue());
break;
case Types.SMALLINT:
column = new LongColumn(((Short) value).longValue());
break;
case Types.BIGINT:
column = new LongColumn((Long) value);
break;
case Types.FLOAT:
column = new DoubleColumn((Float.valueOf(value.toString())));
break;
case Types.DECIMAL:
column = new DoubleColumn((BigDecimal)value);
break;
case Types.DOUBLE:
column = new DoubleColumn((Double) value);
break;
case Types.DATE:
column = new DateColumn((Date) value);
break;
case Types.TIME:
column = new DateColumn((Time) value);
break;
case Types.TIMESTAMP:
column = new DateColumn((Timestamp) value);
break;
default:
throw DataXException.asDataXException(
HBase20xSQLReaderErrorCode.PHOENIX_COLUMN_TYPE_CONVERT_ERROR, "遇到不可识别的phoenix类型" + "sqlType :" + sqlType);
}
return column;
}
}

View File

@ -0,0 +1,40 @@
package com.alibaba.datax.plugin.reader.hbase20xsqlreader;
public class Key {
/**
* 必选writer要读取的表的表名
*/
public final static String TABLE = "table";
/**
* 必选writer要读取哪些列
*/
public final static String COLUMN = "column";
/**
* 必选Phoenix QueryServer服务地址
*/
public final static String QUERYSERVER_ADDRESS = "queryServerAddress";
/**
* 可选序列化格式默认为PROTOBUF
*/
public static final String SERIALIZATION_NAME = "serialization";
/**
* 可选Phoenix表所属schema默认为空
*/
public static final String SCHEMA = "schema";
/**
* 可选读取数据时切分列
*/
public static final String SPLIT_KEY = "splitKey";
/**
* 可选读取数据时切分点
*/
public static final String SPLIT_POINT = "splitPoint";
/**
* 可选读取数据过滤条件配置
*/
public static final String WHERE = "where";
/**
* 可选查询语句配置
*/
public static final String QUERY_SQL = "querySql";
}

View File

@ -0,0 +1,7 @@
{
"name": "hbase20xsqlreader",
"class": "com.alibaba.datax.plugin.reader.hbase20xsqlreader.HBase20xSQLReader",
"description": "useScene: prod. mechanism: read data from phoenix through queryserver.",
"developer": "bake"
}

View File

@ -0,0 +1,13 @@
{
"name": "hbase20xsqlreader",
"parameter": {
"queryserverAddress": "",
"serialization": "PROTOBUF",
"schema": "",
"table": "TABLE1",
"column": ["ID", "NAME"],
"splitKey": "rowkey",
"splitPoint":[],
"where": ""
}
}

View File

@ -0,0 +1,172 @@
# HBase20xsqlwriter插件文档
## 1. 快速介绍
HBase20xsqlwriter实现了向hbase中的SQL表(phoenix)批量导入数据的功能。Phoenix因为对rowkey做了数据编码所以直接使用HBaseAPI进行写入会面临手工数据转换的问题麻烦且易错。本插件提供了SQL方式直接向Phoenix表写入数据。
在底层实现上通过Phoenix QueryServer的轻客户端驱动执行UPSERT语句向Phoenix写入数据。
### 1.1 支持的功能
* 支持带索引的表的数据导入,可以同步更新所有的索引表
### 1.2 限制
* 要求版本为Phoenix5.x及HBase2.x
* 仅支持通过Phoenix QeuryServer导入数据因此您Phoenix必须启动QueryServer服务才能使用本插件
* 不支持清空已有表数据
* 仅支持通过phoenix创建的表不支持原生HBase表
* 不支持带时间戳的数据导入
## 2. 实现原理
通过Phoenix轻客户端连接Phoenix QueryServer服务执行UPSERT语句向表中批量写入数据。因为使用上层接口所以可以同步更新索引表。
## 3. 配置说明
### 3.1 配置样例
```json
{
"job": {
"entry": {
"jvm": "-Xms2048m -Xmx2048m"
},
"content": [
{
"reader": {
"name": "txtfilereader",
"parameter": {
"path": "/Users/shf/workplace/datax_test/hbase20xsqlwriter/txt/normal.txt",
"charset": "UTF-8",
"column": [
{
"index": 0,
"type": "String"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 3,
"type": "string"
}
],
"fieldDelimiter": ","
}
},
"writer": {
"name": "hbase20xsqlwriter",
"parameter": {
"batchSize": "100",
"column": [
"UID",
"TS",
"EVENTID",
"CONTENT"
],
"queryServerAddress": "http://127.0.0.1:8765",
"nullMode": "skip",
"table": "目标hbase表名大小写有关"
}
}
}
],
"setting": {
"speed": {
"channel": 5
}
}
}
}
```
### 3.2 参数说明
* **name**
* 描述:插件名字,必须是`hbase11xsqlwriter`
* 必选:是
* 默认值:无
* **schema**
* 描述表所在的schema
* 必选:否 <br />
* 默认值:无 <br />
* **table**
* 描述要导入的表名大小写敏感通常phoenix表都是**大写**表名
* 必选:是
* 默认值:无
* **column**
* 描述列名大小写敏感通常phoenix的列名都是**大写**。
* 需要注意列的顺序必须与reader输出的列的顺序一一对应。
* 不需要填写数据类型会自动从phoenix获取列的元数据
* 必选:是
* 默认值:无
* **queryServerAddress**
* 描述Phoenix QueryServer地址为必填项格式http://${hostName}:${ip}如http://172.16.34.58:8765
* 必选:是
* 默认值:无
* **serialization**
* 描述QueryServer使用的序列化协议
* 必选:否
* 默认值PROTOBUF
* **batchSize**
* 描述:批量写入的最大行数
* 必选:否
* 默认值256
* **nullMode**
* 描述读取到的列值为null时如何处理。目前有两种方式
* skip跳过这一列即不插入这一列(如果该行的这一列之前已经存在,则会被删除)
* empty插入空值值类型的空值是0varchar的空值是空字符串
* 必选:否
* 默认值skip
## 4. 性能报告
## 5. 约束限制
writer中的列的定义顺序必须与reader的列顺序匹配。reader中的列顺序定义了输出的每一行中列的组织顺序。而writer的列顺序定义的是在收到的数据中writer期待的列的顺序。例如
reader的列顺序是 c1, c2, c3, c4
writer的列顺序是 x1, x2, x3, x4
则reader输出的列c1就会赋值给writer的列x1。如果writer的列顺序是x1, x2, x4, x3则c3会赋值给x4c4会赋值给x3.
## 6. FAQ
1. 并发开多少合适?速度慢时增加并发有用吗?
数据导入进程默认JVM的堆大小是2GB并发(channel数)是通过多线程实现的开过多的线程有时并不能提高导入速度反而可能因为过于频繁的GC导致性能下降。一般建议并发数(channel)为5-10.
2. batchSize设置多少比较合适
默认是256但应根据每行的大小来计算最合适的batchSize。通常一次操作的数据量在2MB-4MB左右用这个值除以行大小即可得到batchSize。

106
hbase20xsqlwriter/pom.xml Normal file
View File

@ -0,0 +1,106 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>datax-all</artifactId>
<groupId>com.alibaba.datax</groupId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>hbase20xsqlwriter</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<phoenix.version>5.1.0-HBase-2.0.0.2</phoenix.version>
<commons-codec.version>1.8</commons-codec.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.aliyun.phoenix</groupId>
<artifactId>ali-phoenix-shaded-thin-client</artifactId>
<version>${phoenix.version}</version>
</dependency>
<!-- for test -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-core</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-service-face</artifactId>
</exclusion>
</exclusions>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.9.5</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>src/main/java</directory>
<includes>
<include>**/*.properties</include>
</includes>
</resource>
</resources>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,35 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
<include>plugin_job_template.json</include>
</includes>
<outputDirectory>plugin/writer/hbase20xsqlwriter</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>hbase20xsqlwriter-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/writer/hbase20xsqlwriter</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/writer/hbase20xsqlwriter/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,17 @@
package com.alibaba.datax.plugin.writer.hbase20xsqlwriter;
public final class Constant {
public static final String DEFAULT_NULL_MODE = "skip";
public static final String DEFAULT_SERIALIZATION = "PROTOBUF";
public static final int DEFAULT_BATCH_ROW_COUNT = 256; // 默认一次写256行
public static final int TYPE_UNSIGNED_TINYINT = 11;
public static final int TYPE_UNSIGNED_SMALLINT = 13;
public static final int TYPE_UNSIGNED_INTEGER = 9;
public static final int TYPE_UNSIGNED_LONG = 10;
public static final int TYPE_UNSIGNED_FLOAT = 14;
public static final int TYPE_UNSIGNED_DOUBLE = 15;
public static final int TYPE_UNSIGNED_DATE = 19;
public static final int TYPE_UNSIGNED_TIME = 18;
public static final int TYPE_UNSIGNED_TIMESTAMP = 20;
}

View File

@ -0,0 +1,142 @@
package com.alibaba.datax.plugin.writer.hbase20xsqlwriter;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
public class HBase20xSQLHelper {
private static final Logger LOG = LoggerFactory.getLogger(HBase20xSQLHelper.class);
/**
* phoenix瘦客户端连接前缀
*/
public static final String CONNECT_STRING_PREFIX = "jdbc:phoenix:thin:";
/**
* phoenix驱动名
*/
public static final String CONNECT_DRIVER_STRING = "org.apache.phoenix.queryserver.client.Driver";
/**
* 从系统表查找配置表信息
*/
public static final String SELECT_CATALOG_TABLE_STRING = "SELECT COLUMN_NAME FROM SYSTEM.CATALOG WHERE TABLE_NAME='%s' AND COLUMN_NAME IS NOT NULL";
/**
* 验证配置参数是否正确
*/
public static void validateParameter(com.alibaba.datax.common.util.Configuration originalConfig) {
// 表名和queryserver地址必须配置否则抛异常
String tableName = originalConfig.getNecessaryValue(Key.TABLE, HBase20xSQLWriterErrorCode.REQUIRED_VALUE);
String queryServerAddress = originalConfig.getNecessaryValue(Key.QUERYSERVER_ADDRESS, HBase20xSQLWriterErrorCode.REQUIRED_VALUE);
// 序列化格式可不配置默认PROTOBUF
String serialization = originalConfig.getString(Key.SERIALIZATION_NAME, Constant.DEFAULT_SERIALIZATION);
String connStr = getConnectionUrl(queryServerAddress, serialization);
// 校验jdbc连接是否正常
Connection conn = getThinClientConnection(connStr);
List<String> columnNames = originalConfig.getList(Key.COLUMN, String.class);
if (columnNames == null || columnNames.isEmpty()) {
throw DataXException.asDataXException(
HBase20xSQLWriterErrorCode.ILLEGAL_VALUE, "HBase的columns配置不能为空,请添加目标表的列名配置.");
}
String schema = originalConfig.getString(Key.SCHEMA);
// 检查表以及配置列是否存在
checkTable(conn, schema, tableName, columnNames);
}
/**
* 获取JDBC连接轻量级连接使用完后必须显式close
*/
public static Connection getThinClientConnection(String connStr) {
LOG.debug("Connecting to QueryServer [" + connStr + "] ...");
Connection conn;
try {
Class.forName(CONNECT_DRIVER_STRING);
conn = DriverManager.getConnection(connStr);
conn.setAutoCommit(false);
} catch (Throwable e) {
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.GET_QUERYSERVER_CONNECTION_ERROR,
"无法连接QueryServer配置不正确或服务未启动请检查配置和服务状态或者联系HBase管理员.", e);
}
LOG.debug("Connected to QueryServer successfully.");
return conn;
}
public static Connection getJdbcConnection(Configuration conf) {
String queryServerAddress = conf.getNecessaryValue(Key.QUERYSERVER_ADDRESS, HBase20xSQLWriterErrorCode.REQUIRED_VALUE);
// 序列化格式可不配置默认PROTOBUF
String serialization = conf.getString(Key.SERIALIZATION_NAME, "PROTOBUF");
String connStr = getConnectionUrl(queryServerAddress, serialization);
return getThinClientConnection(connStr);
}
public static String getConnectionUrl(String queryServerAddress, String serialization) {
String urlFmt = CONNECT_STRING_PREFIX + "url=%s;serialization=%s";
return String.format(urlFmt, queryServerAddress, serialization);
}
public static void checkTable(Connection conn, String schema, String tableName, List<String> columnNames) throws DataXException {
String selectSystemTable = getSelectSystemSQL(schema, tableName);
Statement st = null;
ResultSet rs = null;
try {
st = conn.createStatement();
rs = st.executeQuery(selectSystemTable);
List<String> allColumns = new ArrayList<String>();
if (rs.next()) {
allColumns.add(rs.getString(1));
} else {
LOG.error(tableName + "表不存在,请检查表名是否正确或是否已创建.", HBase20xSQLWriterErrorCode.GET_HBASE_TABLE_ERROR);
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.GET_HBASE_TABLE_ERROR,
tableName + "表不存在,请检查表名是否正确或是否已创建.");
}
while (rs.next()) {
allColumns.add(rs.getString(1));
}
for (String columnName : columnNames) {
if (!allColumns.contains(columnName)) {
// 用户配置的列名在元数据中不存在
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE,
"您配置的列" + columnName + "在目的表" + tableName + "的元数据中不存在请检查您的配置或者联系HBase管理员.");
}
}
} catch (SQLException t) {
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.GET_HBASE_TABLE_ERROR,
"获取表" + tableName + "信息失败请检查您的集群和表状态或者联系HBase管理员.", t);
} finally {
closeJdbc(conn, st, rs);
}
}
private static String getSelectSystemSQL(String schema, String tableName) {
String sql = String.format(SELECT_CATALOG_TABLE_STRING, tableName);
if (schema != null) {
sql = sql + " AND TABLE_SCHEM = '" + schema + "'";
}
return sql;
}
public static void closeJdbc(Connection connection, Statement statement, ResultSet resultSet) {
try {
if (resultSet != null) {
resultSet.close();
}
if (statement != null) {
statement.close();
}
if (connection != null) {
connection.close();
}
} catch (SQLException e) {
LOG.warn("数据库连接关闭异常.", HBase20xSQLWriterErrorCode.CLOSE_HBASE_CONNECTION_ERROR);
}
}
}

View File

@ -0,0 +1,58 @@
package com.alibaba.datax.plugin.writer.hbase20xsqlwriter;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.spi.Writer;
import com.alibaba.datax.common.util.Configuration;
import java.util.ArrayList;
import java.util.List;
public class HBase20xSQLWriter extends Writer {
public static class Job extends Writer.Job {
private Configuration config = null;
@Override
public void init() {
this.config = this.getPluginJobConf();
HBase20xSQLHelper.validateParameter(this.config);
}
@Override
public List<Configuration> split(int mandatoryNumber) {
List<Configuration> splitResultConfigs = new ArrayList<Configuration>();
for (int j = 0; j < mandatoryNumber; j++) {
splitResultConfigs.add(config.clone());
}
return splitResultConfigs;
}
@Override
public void destroy() {
//doNothing
}
}
public static class Task extends Writer.Task {
private Configuration taskConfig;
private HBase20xSQLWriterTask writerTask;
@Override
public void init() {
this.taskConfig = super.getPluginJobConf();
this.writerTask = new HBase20xSQLWriterTask(this.taskConfig);
}
@Override
public void startWrite(RecordReceiver lineReceiver) {
this.writerTask.startWriter(lineReceiver, super.getTaskPluginCollector());
}
@Override
public void destroy() {
// 不需要close
}
}
}

View File

@ -0,0 +1,37 @@
package com.alibaba.datax.plugin.writer.hbase20xsqlwriter;
import com.alibaba.datax.common.spi.ErrorCode;
public enum HBase20xSQLWriterErrorCode implements ErrorCode {
REQUIRED_VALUE("Hbasewriter-00", "您缺失了必须填写的参数值."),
ILLEGAL_VALUE("Hbasewriter-01", "您填写的参数值不合法."),
GET_QUERYSERVER_CONNECTION_ERROR("Hbasewriter-02", "获取QueryServer连接时出错."),
GET_HBASE_TABLE_ERROR("Hbasewriter-03", "获取 Hbase table时出错."),
CLOSE_HBASE_CONNECTION_ERROR("Hbasewriter-04", "关闭Hbase连接时出错."),
GET_TABLE_COLUMNTYPE_ERROR("Hbasewriter-05", "获取表列类型时出错."),
PUT_HBASE_ERROR("Hbasewriter-07", "写入hbase时发生IO异常."),
;
private final String code;
private final String description;
private HBase20xSQLWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s].", this.code, this.description);
}
}

View File

@ -0,0 +1,389 @@
package com.alibaba.datax.plugin.writer.hbase20xsqlwriter;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.plugin.TaskPluginCollector;
import com.alibaba.datax.common.util.Configuration;
import com.google.common.collect.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.BigDecimal;
import java.sql.*;
import java.util.Arrays;
import java.util.List;
public class HBase20xSQLWriterTask {
private final static Logger LOG = LoggerFactory.getLogger(HBase20xSQLWriterTask.class);
private Configuration configuration;
private TaskPluginCollector taskPluginCollector;
private Connection connection = null;
private PreparedStatement pstmt = null;
// 需要向hbsae写入的列的数量,即用户配置的column参数中列的个数时间戳不包含在内
private int numberOfColumnsToWrite;
// 期待从源头表的Record中拿到多少列
private int numberOfColumnsToRead;
private int[] columnTypes;
private List<String> columns;
private String fullTableName;
private NullModeType nullModeType;
private int batchSize;
public HBase20xSQLWriterTask(Configuration configuration) {
// 这里仅解析配置不访问远端集群配置的合法性检查在writer的init过程中进行
this.configuration = configuration;
}
public void startWriter(RecordReceiver lineReceiver, TaskPluginCollector taskPluginCollector) {
this.taskPluginCollector = taskPluginCollector;
try {
// 准备阶段
initialize();
// 写入数据
writeData(lineReceiver);
} catch (Throwable e) {
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.PUT_HBASE_ERROR, e);
} finally {
// 关闭jdbc连接
HBase20xSQLHelper.closeJdbc(connection, pstmt, null);
}
}
/**
* 初始化JDBC操作对象及列类型
* @throws SQLException
*/
private void initialize() throws SQLException {
if (connection == null) {
connection = HBase20xSQLHelper.getJdbcConnection(configuration);
connection.setAutoCommit(false);
}
nullModeType = NullModeType.getByTypeName(configuration.getString(Key.NULLMODE, Constant.DEFAULT_NULL_MODE));
batchSize = configuration.getInt(Key.BATCHSIZE, Constant.DEFAULT_BATCH_ROW_COUNT);
String schema = configuration.getString(Key.SCHEMA);
String tableName = configuration.getNecessaryValue(Key.TABLE, HBase20xSQLWriterErrorCode.REQUIRED_VALUE);
fullTableName = "\"" + tableName + "\"";
if (schema != null && !schema.isEmpty()) {
fullTableName = "\"" + schema + "\".\"" + tableName + "\"";
}
columns = configuration.getList(Key.COLUMN, String.class);
if (pstmt == null) {
// 一个Task的生命周期中只使用一个PreparedStatement对象
pstmt = createPreparedStatement();
columnTypes = getColumnSqlType();
}
}
/**
* 生成sql模板并根据模板创建PreparedStatement
*/
private PreparedStatement createPreparedStatement() throws SQLException {
// 生成列名集合列之间用逗号分隔 col1,col2,col3,...
StringBuilder columnNamesBuilder = new StringBuilder();
for (String col : columns) {
// 列名使用双引号则不自动转换为全大写而是保留用户配置的大小写
columnNamesBuilder.append("\"");
columnNamesBuilder.append(col);
columnNamesBuilder.append("\"");
columnNamesBuilder.append(",");
}
// 移除末尾多余的逗号
columnNamesBuilder.setLength(columnNamesBuilder.length() - 1);
String columnNames = columnNamesBuilder.toString();
numberOfColumnsToWrite = columns.size();
numberOfColumnsToRead = numberOfColumnsToWrite; // 开始的时候要读的列数娱要写的列数相等
// 生成UPSERT模板
StringBuilder upsertBuilder =
new StringBuilder("upsert into " + fullTableName + " (" + columnNames + " ) values (");
for (int i = 0; i < numberOfColumnsToWrite; i++) {
upsertBuilder.append("?,");
}
upsertBuilder.setLength(upsertBuilder.length() - 1); // 移除末尾多余的逗号
upsertBuilder.append(")");
String sql = upsertBuilder.toString();
PreparedStatement ps = connection.prepareStatement(sql);
LOG.debug("SQL template generated: " + sql);
return ps;
}
/**
* 根据列名来从数据库元数据中获取这一列对应的SQL类型
*/
private int[] getColumnSqlType() throws SQLException {
int[] types = new int[numberOfColumnsToWrite];
StringBuilder columnNamesBuilder = new StringBuilder();
for (String columnName : columns) {
columnNamesBuilder.append("\"").append(columnName).append("\",");
}
columnNamesBuilder.setLength(columnNamesBuilder.length() - 1);
// 查询一条数据获取表meta信息
String selectSql = "SELECT " + columnNamesBuilder + " FROM " + fullTableName + " LIMIT 1";
Statement statement = null;
try {
statement = connection.createStatement();
ResultSetMetaData meta = statement.executeQuery(selectSql).getMetaData();
for (int i = 0; i < columns.size(); i++) {
String name = columns.get(i);
types[i] = meta.getColumnType(i + 1);
LOG.debug("Column name : " + name + ", sql type = " + types[i] + " " + meta.getColumnTypeName(i + 1));
}
} catch (SQLException e) {
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.GET_TABLE_COLUMNTYPE_ERROR,
"获取表" + fullTableName + "列类型失败请检查配置和服务状态或者联系HBase管理员.", e);
} finally {
HBase20xSQLHelper.closeJdbc(null, statement, null);
}
return types;
}
/**
* 从接收器中获取每条记录写入Phoenix
*/
private void writeData(RecordReceiver lineReceiver) throws SQLException {
List<Record> buffer = Lists.newArrayListWithExpectedSize(batchSize);
Record record = null;
while ((record = lineReceiver.getFromReader()) != null) {
// 校验列数量是否符合预期
if (record.getColumnNumber() != numberOfColumnsToRead) {
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE,
"数据源给出的列数量[" + record.getColumnNumber() + "]与您配置中的列数量[" + numberOfColumnsToRead +
"]不同, 请检查您的配置 或者 联系 Hbase 管理员.");
}
buffer.add(record);
if (buffer.size() > batchSize) {
doBatchUpsert(buffer);
buffer.clear();
}
}
// 处理剩余的record
if (!buffer.isEmpty()) {
doBatchUpsert(buffer);
buffer.clear();
}
}
/**
* 批量提交一组数据如果失败则尝试一行行提交如果仍然失败抛错给用户
*/
private void doBatchUpsert(List<Record> records) throws SQLException {
try {
// 将所有record提交到connection缓存
for (Record r : records) {
setupStatement(r);
pstmt.addBatch();
}
pstmt.executeBatch();
// 将缓存的数据提交到phoenix
connection.commit();
pstmt.clearParameters();
pstmt.clearBatch();
} catch (SQLException e) {
LOG.error("Failed batch committing " + records.size() + " records", e);
// 批量提交失败则一行行重试以确定哪一行出错
connection.rollback();
HBase20xSQLHelper.closeJdbc(null, pstmt, null);
connection.setAutoCommit(true);
pstmt = createPreparedStatement();
doSingleUpsert(records);
} catch (Exception e) {
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.PUT_HBASE_ERROR, e);
}
}
/**
* 单行提交将出错的行记录到脏数据中由脏数据收集模块判断任务是否继续
*/
private void doSingleUpsert(List<Record> records) throws SQLException {
int rowNumber = 0;
for (Record r : records) {
try {
rowNumber ++;
setupStatement(r);
pstmt.executeUpdate();
} catch (SQLException e) {
//出错了记录脏数据
LOG.error("Failed writing to phoenix, rowNumber: " + rowNumber);
this.taskPluginCollector.collectDirtyRecord(r, e);
}
}
}
private void setupStatement(Record record) throws SQLException {
for (int i = 0; i < numberOfColumnsToWrite; i++) {
Column col = record.getColumn(i);
int sqlType = columnTypes[i];
// PreparedStatement中的索引从1开始所以用i+1
setupColumn(i + 1, sqlType, col);
}
}
private void setupColumn(int pos, int sqlType, Column col) throws SQLException {
if (col.getRawData() != null) {
switch (sqlType) {
case Types.CHAR:
case Types.VARCHAR:
pstmt.setString(pos, col.asString());
break;
case Types.BINARY:
case Types.VARBINARY:
pstmt.setBytes(pos, col.asBytes());
break;
case Types.BOOLEAN:
pstmt.setBoolean(pos, col.asBoolean());
break;
case Types.TINYINT:
case Constant.TYPE_UNSIGNED_TINYINT:
pstmt.setByte(pos, col.asLong().byteValue());
break;
case Types.SMALLINT:
case Constant.TYPE_UNSIGNED_SMALLINT:
pstmt.setShort(pos, col.asLong().shortValue());
break;
case Types.INTEGER:
case Constant.TYPE_UNSIGNED_INTEGER:
pstmt.setInt(pos, col.asLong().intValue());
break;
case Types.BIGINT:
case Constant.TYPE_UNSIGNED_LONG:
pstmt.setLong(pos, col.asLong());
break;
case Types.FLOAT:
pstmt.setFloat(pos, col.asDouble().floatValue());
break;
case Types.DOUBLE:
pstmt.setDouble(pos, col.asDouble());
break;
case Types.DECIMAL:
pstmt.setBigDecimal(pos, col.asBigDecimal());
break;
case Types.DATE:
case Constant.TYPE_UNSIGNED_DATE:
pstmt.setDate(pos, new Date(col.asDate().getTime()));
break;
case Types.TIME:
case Constant.TYPE_UNSIGNED_TIME:
pstmt.setTime(pos, new Time(col.asDate().getTime()));
break;
case Types.TIMESTAMP:
case Constant.TYPE_UNSIGNED_TIMESTAMP:
pstmt.setTimestamp(pos, new Timestamp(col.asDate().getTime()));
break;
default:
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE,
"不支持您配置的列类型:" + sqlType + ", 请检查您的配置 或者 联系 Hbase 管理员.");
}
} else {
// 没有值按空值的配置情况处理
switch (nullModeType){
case Skip:
// 跳过空值则不插入该列,
pstmt.setNull(pos, sqlType);
break;
case Empty:
// 插入"空值"请注意不同类型的空值不同
// 另外对SQL来说空值本身是有值的这与直接操作HBASE Native API时的空值完全不同
pstmt.setObject(pos, getEmptyValue(sqlType));
break;
default:
// nullMode的合法性在初始化配置的时候已经校验过这里一定不会出错
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE,
"Hbasewriter 不支持该 nullMode 类型: " + nullModeType +
", 目前支持的 nullMode 类型是:" + Arrays.asList(NullModeType.values()));
}
}
}
/**
* 根据类型获取"空值"
* 值类型的空值都是0bool是falseString是空字符串
* @param sqlType sql数据类型定义于{@link Types}
*/
private Object getEmptyValue(int sqlType) {
switch (sqlType) {
case Types.VARCHAR:
return "";
case Types.BOOLEAN:
return false;
case Types.TINYINT:
case Constant.TYPE_UNSIGNED_TINYINT:
return (byte) 0;
case Types.SMALLINT:
case Constant.TYPE_UNSIGNED_SMALLINT:
return (short) 0;
case Types.INTEGER:
case Constant.TYPE_UNSIGNED_INTEGER:
return (int) 0;
case Types.BIGINT:
case Constant.TYPE_UNSIGNED_LONG:
return (long) 0;
case Types.FLOAT:
return (float) 0.0;
case Types.DOUBLE:
return (double) 0.0;
case Types.DECIMAL:
return new BigDecimal(0);
case Types.DATE:
case Constant.TYPE_UNSIGNED_DATE:
return new Date(0);
case Types.TIME:
case Constant.TYPE_UNSIGNED_TIME:
return new Time(0);
case Types.TIMESTAMP:
case Constant.TYPE_UNSIGNED_TIMESTAMP:
return new Timestamp(0);
case Types.BINARY:
case Types.VARBINARY:
return new byte[0];
default:
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE,
"不支持您配置的列类型:" + sqlType + ", 请检查您的配置 或者 联系 Hbase 管理员.");
}
}
}

View File

@ -0,0 +1,36 @@
package com.alibaba.datax.plugin.writer.hbase20xsqlwriter;
public class Key {
/**
* 必选writer要写入的表的表名
*/
public final static String TABLE = "table";
/**
* 必选writer要写入哪些列
*/
public final static String COLUMN = "column";
/**
* 必选Phoenix QueryServer服务地址
*/
public final static String QUERYSERVER_ADDRESS = "queryServerAddress";
/**
* 可选序列化格式默认为PROTOBUF
*/
public static final String SERIALIZATION_NAME = "serialization";
/**
* 可选批量写入的最大行数默认100行
*/
public static final String BATCHSIZE = "batchSize";
/**
* 可选遇到空值默认跳过
*/
public static final String NULLMODE = "nullMode";
/**
* 可选Phoenix表所属schema默认为空
*/
public static final String SCHEMA = "schema";
}

View File

@ -0,0 +1,32 @@
package com.alibaba.datax.plugin.writer.hbase20xsqlwriter;
import com.alibaba.datax.common.exception.DataXException;
import java.util.Arrays;
public enum NullModeType {
Skip("skip"),
Empty("empty")
;
private String mode;
NullModeType(String mode) {
this.mode = mode.toLowerCase();
}
public String getMode() {
return mode;
}
public static NullModeType getByTypeName(String modeName) {
for (NullModeType modeType : values()) {
if (modeType.mode.equalsIgnoreCase(modeName)) {
return modeType;
}
}
throw DataXException.asDataXException(HBase20xSQLWriterErrorCode.ILLEGAL_VALUE,
"Hbasewriter 不支持该 nullMode 类型:" + modeName + ", 目前支持的 nullMode 类型是:" + Arrays.asList(values()));
}
}

View File

@ -0,0 +1,7 @@
{
"name": "hbase20xsqlwriter",
"class": "com.alibaba.datax.plugin.writer.hbase20xsqlwriter.HBase20xSQLWriter",
"description": "useScene: prod. mechanism: use hbase sql UPSERT to put data, index tables will be updated too.",
"developer": "bake"
}

View File

@ -0,0 +1,13 @@
{
"name": "hbase20xsqlwriter",
"parameter": {
"queryServerAddress": "",
"table": "",
"serialization": "PROTOBUF",
"column": [
],
"batchSize": "100",
"nullMode": "skip",
"schema": ""
}
}

View File

@ -0,0 +1,209 @@
# OpenTSDBReader 插件文档
___
## 1 快速介绍
OpenTSDBReader 插件实现了从 OpenTSDB 读取数据。OpenTSDB 是主要由 Yahoo 维护的、可扩展的、分布式时序数据库,与阿里巴巴自研 TSDB 的关系与区别详见阿里云官网:《[相比 OpenTSDB 优势](https://help.aliyun.com/document_detail/113368.html)》
## 2 实现原理
在底层实现上OpenTSDBReader 通过 HTTP 请求链接到 OpenTSDB 实例,利用 `/api/config` 接口获取到其底层存储 HBase 的连接信息,再利用 AsyncHBase 框架连接 HBase通过 Scan 的方式将数据点扫描出来。整个同步的过程通过 metric 和时间段进行切分,即某个 metric 在某一个小时内的数据迁移,组合成一个迁移 Task。
## 3 功能说明
### 3.1 配置样例
* 配置一个从 OpenTSDB 数据库同步抽取数据到本地的作业:
```json
{
"job": {
"content": [
{
"reader": {
"name": "opentsdbreader",
"parameter": {
"endpoint": "http://localhost:4242",
"column": [
"m"
],
"beginDateTime": "2019-01-01 00:00:00",
"endDateTime": "2019-01-01 03:00:00"
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"encoding": "UTF-8",
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": 1
}
}
}
}
```
### 3.2 参数说明
* **name**
* 描述:本插件的名称
* 必选:是
* 默认值opentsdbreader
* **parameter**
* **endpoint**
* 描述OpenTSDB 的 HTTP 连接地址
* 必选:是
* 格式http://IP:Port
* 默认值:无
* **column**
* 描述:数据迁移任务需要迁移的 Metric 列表
* 必选:是
* 默认值:无
* **beginDateTime**
* 描述:和 endDateTime 配合使用,用于指定哪个时间段内的数据点,需要被迁移
* 必选:是
* 格式:`yyyy-MM-dd HH:mm:ss`
* 默认值:无
* 注意:指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00)
* **endDateTime**
* 描述:和 beginDateTime 配合使用,用于指定哪个时间段内的数据点,需要被迁移
* 必选:是
* 格式:`yyyy-MM-dd HH:mm:ss`
* 默认值:无
* 注意:指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00)
### 3.3 类型转换
| DataX 内部类型 | TSDB 数据类型 |
| -------------- | ------------------------------------------------------------ |
| String | TSDB 数据点序列化字符串,包括 timestamp、metric、tags 和 value |
## 4 性能报告
### 4.1 环境准备
#### 4.1.1 数据特征
从 Metric、时间线、Value 和 采集周期 四个方面来描述:
##### metric
固定指定一个 metric 为 `m`
##### tagkv
前四个 tagkv 全排列,形成 `10 * 20 * 100 * 100 = 2000000` 条时间线,最后 IP 对应 2000000 条时间线从 1 开始自增。
| **tag_k** | **tag_v** |
| --------- | ------------- |
| zone | z1~z10 |
| cluster | c1~c20 |
| group | g1~100 |
| app | a1~a100 |
| ip | ip1~ip2000000 |
##### value
度量值为 [1, 100] 区间内的随机值
##### interval
采集周期为 10 秒,持续摄入 3 小时,总数据量为 `3 * 60 * 60 / 10 * 2000000 = 2,160,000,000` 个数据点。
#### 4.1.2 机器参数
OpenTSDB Reader 机型: 64C256G
HBase 机型: 8C16G * 5
#### 4.1.3 DataX jvm 参数
"-Xms4096m -Xmx4096m"
### 4.2 测试报告
| 通道数| DataX 速度 (Rec/s) |DataX 流量 (MB/s)|
|--------| --------|--------|
|1| 215428 | 25.65 |
|2| 424994 | 50.60 |
|3| 603132 | 71.81 |
## 5 约束限制
### 5.1 需要确保与 OpenTSDB 底层存储的网络是连通的
具体缘由详见 6.1
### 5.2 如果存在某一个 Metric 下在一个小时范围内的数据量过大,可能需要通过 `-j` 参数调整 JVM 内存大小
考虑到下游 Writer 如果写入速度不及 OpenTSDB reader 的查询数据,可能会存在积压的情况,因此需要适当地调整 JVM 参数。以"从 OpenTSDB 数据库同步抽取数据到本地的作业"为例,启动命令如下:
```bash
python datax/bin/datax.py opentsdb2stream.json -j "-Xms4096m -Xmx4096m"
```
### 5.3 指定起止时间会自动被转为整点时刻
指定起止时间会自动被转为整点时刻,例如 2019-4-18 的 `[3:35, 3:55)` 会被转为 `[3:00, 4:00)`
### 5.4 目前只支持兼容 OpenTSDB 2.3.x
其他版本暂不保证兼容
## 6 FAQ
***
**Q为什么需要连接 OpenTSDB 的底层存储,为什么不直接使用 `/api/query` 查询获取数据点?**
A因为通过 OpenTSDB 的 HTTP 接口(`/api/query`)来读取数据的话,经内部压测发现,在大数据量的情况下,会导致 OpenTSDB 的异步框架会报 CallBack 过多的问题;所以,采用了直连底层 HBase 存储,通过 Scan 的方式来扫描数据点,来避免这个问题。另外,还考虑到,可以通过指定 metric 和时间范围,可以顺序地 Scan HBase 表,提高查询效率。

156
opentsdbreader/pom.xml Normal file
View File

@ -0,0 +1,156 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-all</artifactId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<artifactId>opentsdbreader</artifactId>
<name>opentsdbreader</name>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<!-- common -->
<commons-lang3.version>3.3.2</commons-lang3.version>
<!-- http -->
<httpclient.version>4.4</httpclient.version>
<commons-io.version>2.4</commons-io.version>
<!-- json -->
<fastjson.version>1.2.28</fastjson.version>
<!-- opentsdb -->
<opentsdb.version>2.3.2</opentsdb.version>
<!-- test -->
<junit4.version>4.12</junit4.version>
<!-- time -->
<joda-time.version>2.9.9</joda-time.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>fastjson</artifactId>
<groupId>com.alibaba</groupId>
</exclusion>
<exclusion>
<artifactId>commons-math3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<!-- common -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>${commons-lang3.version}</version>
</dependency>
<!-- http -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpclient.version}</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>${commons-io.version}</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
<version>${httpclient.version}</version>
</dependency>
<!-- json -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<!-- opentsdb -->
<dependency>
<groupId>net.opentsdb</groupId>
<artifactId>opentsdb</artifactId>
<version>${opentsdb.version}</version>
</dependency>
<!-- time -->
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>${joda-time.version}</version>
</dependency>
<!-- test -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit4.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<!-- assembly plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,35 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
<include>plugin_job_template.json</include>
</includes>
<outputDirectory>plugin/reader/opentsdbreader</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>opentsdbreader-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/reader/opentsdbreader</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/reader/opentsdbreader/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,104 @@
package com.alibaba.datax.plugin.reader.conn;
import net.opentsdb.core.*;
import net.opentsdb.utils.DateTime;
import java.util.ArrayList;
import java.util.HashMap;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionCliQuery
*
* @author Benedict Jin
* @since 2019-04-17
*/
final class CliQuery {
/**
* Parses the query from the command lines.
*
* @param args The command line arguments.
* @param tsdb The TSDB to use.
* @param queries The list in which {@link Query}s will be appended.
*/
static void parseCommandLineQuery(final String[] args,
final TSDB tsdb,
final ArrayList<Query> queries) {
long start_ts = DateTime.parseDateTimeString(args[0], null);
if (start_ts >= 0) {
start_ts /= 1000;
}
long end_ts = -1;
if (args.length > 3) {
// see if we can detect an end time
try {
if (args[1].charAt(0) != '+' && (args[1].indexOf(':') >= 0
|| args[1].indexOf('/') >= 0 || args[1].indexOf('-') >= 0
|| Long.parseLong(args[1]) > 0)) {
end_ts = DateTime.parseDateTimeString(args[1], null);
}
} catch (NumberFormatException ignore) {
// ignore it as it means the third parameter is likely the aggregator
}
}
// temp fixup to seconds from ms until the rest of TSDB supports ms
// Note you can't append this to the DateTime.parseDateTimeString() call as
// it clobbers -1 results
if (end_ts >= 0) {
end_ts /= 1000;
}
int i = end_ts < 0 ? 1 : 2;
while (i < args.length && args[i].charAt(0) == '+') {
i++;
}
while (i < args.length) {
final Aggregator agg = Aggregators.get(args[i++]);
final boolean rate = "rate".equals(args[i]);
RateOptions rate_options = new RateOptions(false, Long.MAX_VALUE,
RateOptions.DEFAULT_RESET_VALUE);
if (rate) {
i++;
long counterMax = Long.MAX_VALUE;
long resetValue = RateOptions.DEFAULT_RESET_VALUE;
if (args[i].startsWith("counter")) {
String[] parts = Tags.splitString(args[i], ',');
if (parts.length >= 2 && parts[1].length() > 0) {
counterMax = Long.parseLong(parts[1]);
}
if (parts.length >= 3 && parts[2].length() > 0) {
resetValue = Long.parseLong(parts[2]);
}
rate_options = new RateOptions(true, counterMax, resetValue);
i++;
}
}
final boolean downsample = "downsample".equals(args[i]);
if (downsample) {
i++;
}
final long interval = downsample ? Long.parseLong(args[i++]) : 0;
final Aggregator sampler = downsample ? Aggregators.get(args[i++]) : null;
final String metric = args[i++];
final HashMap<String, String> tags = new HashMap<String, String>();
while (i < args.length && args[i].indexOf(' ', 1) < 0
&& args[i].indexOf('=', 1) > 0) {
Tags.parse(tags, args[i++]);
}
final Query query = tsdb.newQuery();
query.setStartTime(start_ts);
if (end_ts > 0) {
query.setEndTime(end_ts);
}
query.setTimeSeries(metric, tags, agg, rate, rate_options);
if (downsample) {
query.downsample(interval, sampler);
}
queries.add(query);
}
}
}

View File

@ -0,0 +1,77 @@
package com.alibaba.datax.plugin.reader.conn;
import com.alibaba.datax.common.plugin.RecordSender;
import java.util.List;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionConnection for TSDB-like databases
*
* @author Benedict Jin
* @since 2019-03-29
*/
public interface Connection4TSDB {
/**
* Get the address of Database.
*
* @return host+ip
*/
String address();
/**
* Get the version of Database.
*
* @return version
*/
String version();
/**
* Get these configurations.
*
* @return configs
*/
String config();
/**
* Get the list of supported version.
*
* @return version list
*/
String[] getSupportVersionPrefix();
/**
* Send data points by metric & start time & end time.
*
* @param metric metric
* @param start startTime
* @param end endTime
* @param recordSender sender
*/
void sendDPs(String metric, Long start, Long end, RecordSender recordSender) throws Exception;
/**
* Put data point.
*
* @param dp data point
* @return whether the data point is written successfully
*/
boolean put(DataPoint4TSDB dp);
/**
* Put data points.
*
* @param dps data points
* @return whether the data point is written successfully
*/
boolean put(List<DataPoint4TSDB> dps);
/**
* Whether current version is supported.
*
* @return true: supported; false: not yet!
*/
boolean isSupported();
}

View File

@ -0,0 +1,68 @@
package com.alibaba.datax.plugin.reader.conn;
import com.alibaba.fastjson.JSON;
import java.util.Map;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionDataPoint for TSDB
*
* @author Benedict Jin
* @since 2019-04-10
*/
public class DataPoint4TSDB {
private long timestamp;
private String metric;
private Map<String, String> tags;
private Object value;
public DataPoint4TSDB() {
}
public DataPoint4TSDB(long timestamp, String metric, Map<String, String> tags, Object value) {
this.timestamp = timestamp;
this.metric = metric;
this.tags = tags;
this.value = value;
}
public long getTimestamp() {
return timestamp;
}
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
public String getMetric() {
return metric;
}
public void setMetric(String metric) {
this.metric = metric;
}
public Map<String, String> getTags() {
return tags;
}
public void setTags(Map<String, String> tags) {
this.tags = tags;
}
public Object getValue() {
return value;
}
public void setValue(Object value) {
this.value = value;
}
@Override
public String toString() {
return JSON.toJSONString(this);
}
}

View File

@ -0,0 +1,96 @@
package com.alibaba.datax.plugin.reader.conn;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.plugin.RecordSender;
import net.opentsdb.core.*;
import net.opentsdb.core.Internal.Cell;
import org.hbase.async.KeyValue;
import org.hbase.async.Scanner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTool to dump the data straight from HBase
*
* @author Benedict Jin
* @since 2019-04-17
*/
final class DumpSeries {
private static final Logger LOG = LoggerFactory.getLogger(DumpSeries.class);
/**
* Dump all data points with special metric and time range, then send them all by {@link RecordSender}.
*/
static void doDump(TSDB tsdb, String[] args, RecordSender sender) throws Exception {
final ArrayList<Query> queries = new ArrayList<Query>();
CliQuery.parseCommandLineQuery(args, tsdb, queries);
List<DataPoint4TSDB> dps = new LinkedList<DataPoint4TSDB>();
for (final Query query : queries) {
final List<Scanner> scanners = Internal.getScanners(query);
for (Scanner scanner : scanners) {
ArrayList<ArrayList<KeyValue>> rows;
while ((rows = scanner.nextRows().join()) != null) {
for (final ArrayList<KeyValue> row : rows) {
final byte[] key = row.get(0).key();
final long baseTime = Internal.baseTime(tsdb, key);
final String metric = Internal.metricName(tsdb, key);
for (final KeyValue kv : row) {
formatKeyValue(dps, tsdb, kv, baseTime, metric);
for (DataPoint4TSDB dp : dps) {
StringColumn tsdbColumn = new StringColumn(dp.toString());
Record record = sender.createRecord();
record.addColumn(tsdbColumn);
sender.sendToWriter(record);
}
dps.clear();
}
}
}
}
}
}
/**
* Parse KeyValue into data points.
*/
private static void formatKeyValue(final List<DataPoint4TSDB> dps, final TSDB tsdb,
final KeyValue kv, final long baseTime, final String metric) {
Map<String, String> tagKVs = Internal.getTags(tsdb, kv.key());
final byte[] qualifier = kv.qualifier();
final int q_len = qualifier.length;
if (!AppendDataPoints.isAppendDataPoints(qualifier) && q_len % 2 != 0) {
// custom data object, not a data point
if (LOG.isDebugEnabled()) {
LOG.debug("Not a data point");
}
} else if (q_len == 2 || q_len == 4 && Internal.inMilliseconds(qualifier)) {
// regular data point
final Cell cell = Internal.parseSingleValue(kv);
if (cell == null) {
throw new IllegalDataException("Unable to parse row: " + kv);
}
dps.add(new DataPoint4TSDB(cell.absoluteTimestamp(baseTime), metric, tagKVs, cell.parseValue()));
} else {
final Collection<Cell> cells;
if (q_len == 3) {
// append data points
cells = new AppendDataPoints().parseKeyValue(tsdb, kv);
} else {
// compacted column
cells = Internal.extractDataPoints(kv);
}
for (Cell cell : cells) {
dps.add(new DataPoint4TSDB(cell.absoluteTimestamp(baseTime), metric, tagKVs, cell.parseValue()));
}
}
}
}

View File

@ -0,0 +1,78 @@
package com.alibaba.datax.plugin.reader.conn;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.plugin.reader.util.TSDBUtils;
import com.alibaba.fastjson.JSON;
import org.apache.commons.lang3.StringUtils;
import java.util.List;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionOpenTSDB Connection
*
* @author Benedict Jin
* @since 2019-03-29
*/
public class OpenTSDBConnection implements Connection4TSDB {
private String address;
public OpenTSDBConnection(String address) {
this.address = address;
}
@Override
public String address() {
return address;
}
@Override
public String version() {
return TSDBUtils.version(address);
}
@Override
public String config() {
return TSDBUtils.config(address);
}
@Override
public String[] getSupportVersionPrefix() {
return new String[]{"2.3"};
}
@Override
public void sendDPs(String metric, Long start, Long end, RecordSender recordSender) throws Exception {
OpenTSDBDump.dump(this, metric, start, end, recordSender);
}
@Override
public boolean put(DataPoint4TSDB dp) {
return false;
}
@Override
public boolean put(List<DataPoint4TSDB> dps) {
return false;
}
@Override
public boolean isSupported() {
String versionJson = version();
if (StringUtils.isBlank(versionJson)) {
throw new RuntimeException("Cannot get the version!");
}
String version = JSON.parseObject(versionJson).getString("version");
if (StringUtils.isBlank(version)) {
return false;
}
for (String prefix : getSupportVersionPrefix()) {
if (version.startsWith(prefix)) {
return true;
}
}
return false;
}
}

View File

@ -0,0 +1,48 @@
package com.alibaba.datax.plugin.reader.conn;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.fastjson.JSON;
import net.opentsdb.core.TSDB;
import net.opentsdb.utils.Config;
import java.util.Map;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionOpenTSDB Dump
*
* @author Benedict Jin
* @since 2019-04-15
*/
final class OpenTSDBDump {
private static TSDB TSDB_INSTANCE;
private OpenTSDBDump() {
}
static void dump(OpenTSDBConnection conn, String metric, Long start, Long end, RecordSender sender) throws Exception {
DumpSeries.doDump(getTSDB(conn), new String[]{start + "", end + "", "none", metric}, sender);
}
private static TSDB getTSDB(OpenTSDBConnection conn) {
if (TSDB_INSTANCE == null) {
synchronized (TSDB.class) {
if (TSDB_INSTANCE == null) {
try {
Config config = new Config(false);
Map configurations = JSON.parseObject(conn.config(), Map.class);
for (Object key : configurations.keySet()) {
config.overrideConfig(key.toString(), configurations.get(key.toString()).toString());
}
TSDB_INSTANCE = new TSDB(config);
} catch (Exception e) {
throw new RuntimeException("Cannot init OpenTSDB connection!");
}
}
}
}
return TSDB_INSTANCE;
}
}

View File

@ -0,0 +1,14 @@
package com.alibaba.datax.plugin.reader.opentsdbreader;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionKey
*
* @author Benedict Jin
* @since 2019-04-18
*/
public final class Constant {
static final String DEFAULT_DATA_FORMAT = "yyyy-MM-dd HH:mm:ss";
}

View File

@ -0,0 +1,17 @@
package com.alibaba.datax.plugin.reader.opentsdbreader;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionKey
*
* @author Benedict Jin
* @since 2019-04-18
*/
public class Key {
static final String ENDPOINT = "endpoint";
static final String COLUMN = "column";
static final String BEGIN_DATE_TIME = "beginDateTime";
static final String END_DATE_TIME = "endDateTime";
}

View File

@ -0,0 +1,207 @@
package com.alibaba.datax.plugin.reader.opentsdbreader;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.reader.conn.OpenTSDBConnection;
import com.alibaba.datax.plugin.reader.util.TimeUtils;
import com.alibaba.fastjson.JSON;
import org.apache.commons.lang3.StringUtils;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionKey
*
* @author Benedict Jin
* @since 2019-04-18
*/
@SuppressWarnings("unused")
public class OpenTSDBReader extends Reader {
public static class Job extends Reader.Job {
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
private Configuration originalConfig;
@Override
public void init() {
this.originalConfig = super.getPluginJobConf();
String address = originalConfig.getString(Key.ENDPOINT);
if (StringUtils.isBlank(address)) {
throw DataXException.asDataXException(
OpenTSDBReaderErrorCode.REQUIRED_VALUE,
"The parameter [" + Key.ENDPOINT + "] is not set.");
}
List<String> columns = originalConfig.getList(Key.COLUMN, String.class);
if (columns == null || columns.isEmpty()) {
throw DataXException.asDataXException(
OpenTSDBReaderErrorCode.REQUIRED_VALUE,
"The parameter [" + Key.COLUMN + "] is not set.");
}
SimpleDateFormat format = new SimpleDateFormat(Constant.DEFAULT_DATA_FORMAT);
String startTime = originalConfig.getString(Key.BEGIN_DATE_TIME);
Long startDate;
if (startTime == null || startTime.trim().length() == 0) {
throw DataXException.asDataXException(
OpenTSDBReaderErrorCode.REQUIRED_VALUE,
"The parameter [" + Key.BEGIN_DATE_TIME + "] is not set.");
} else {
try {
startDate = format.parse(startTime).getTime();
} catch (ParseException e) {
throw DataXException.asDataXException(OpenTSDBReaderErrorCode.ILLEGAL_VALUE,
"The parameter [" + Key.BEGIN_DATE_TIME +
"] needs to conform to the [" + Constant.DEFAULT_DATA_FORMAT + "] format.");
}
}
String endTime = originalConfig.getString(Key.END_DATE_TIME);
Long endDate;
if (endTime == null || endTime.trim().length() == 0) {
throw DataXException.asDataXException(
OpenTSDBReaderErrorCode.REQUIRED_VALUE,
"The parameter [" + Key.END_DATE_TIME + "] is not set.");
} else {
try {
endDate = format.parse(endTime).getTime();
} catch (ParseException e) {
throw DataXException.asDataXException(OpenTSDBReaderErrorCode.ILLEGAL_VALUE,
"The parameter [" + Key.END_DATE_TIME +
"] needs to conform to the [" + Constant.DEFAULT_DATA_FORMAT + "] format.");
}
}
if (startDate >= endDate) {
throw DataXException.asDataXException(OpenTSDBReaderErrorCode.ILLEGAL_VALUE,
"The parameter [" + Key.BEGIN_DATE_TIME +
"] should be less than the parameter [" + Key.END_DATE_TIME + "].");
}
}
@Override
public void prepare() {
}
@Override
public List<Configuration> split(int adviceNumber) {
List<Configuration> configurations = new ArrayList<Configuration>();
// get metrics
List<String> columns = originalConfig.getList(Key.COLUMN, String.class);
// get time range
SimpleDateFormat format = new SimpleDateFormat(Constant.DEFAULT_DATA_FORMAT);
long startTime;
try {
startTime = format.parse(originalConfig.getString(Key.BEGIN_DATE_TIME)).getTime();
} catch (ParseException e) {
throw DataXException.asDataXException(
OpenTSDBReaderErrorCode.ILLEGAL_VALUE, "解析[" + Key.BEGIN_DATE_TIME + "]失败.", e);
}
long endTime;
try {
endTime = format.parse(originalConfig.getString(Key.END_DATE_TIME)).getTime();
} catch (ParseException e) {
throw DataXException.asDataXException(
OpenTSDBReaderErrorCode.ILLEGAL_VALUE, "解析[" + Key.END_DATE_TIME + "]失败.", e);
}
if (TimeUtils.isSecond(startTime)) {
startTime *= 1000;
}
if (TimeUtils.isSecond(endTime)) {
endTime *= 1000;
}
DateTime startDateTime = new DateTime(TimeUtils.getTimeInHour(startTime));
DateTime endDateTime = new DateTime(TimeUtils.getTimeInHour(endTime));
// split by metric
for (String column : columns) {
// split by time in hour
while (startDateTime.isBefore(endDateTime)) {
Configuration clone = this.originalConfig.clone();
clone.set(Key.COLUMN, Collections.singletonList(column));
clone.set(Key.BEGIN_DATE_TIME, startDateTime.getMillis());
startDateTime = startDateTime.plusHours(1);
// Make sure the time interval is [start, end).
// Because net.opentsdb.core.Query.setEndTime means less than or equal to the end time.
clone.set(Key.END_DATE_TIME, startDateTime.getMillis() - 1);
configurations.add(clone);
LOG.info("Configuration: {}", JSON.toJSONString(clone));
}
}
return configurations;
}
@Override
public void post() {
}
@Override
public void destroy() {
}
}
public static class Task extends Reader.Task {
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
private List<String> columns;
private OpenTSDBConnection conn;
private Long startTime;
private Long endTime;
@Override
public void init() {
Configuration readerSliceConfig = super.getPluginJobConf();
LOG.info("getPluginJobConf: {}", JSON.toJSONString(readerSliceConfig));
this.columns = readerSliceConfig.getList(Key.COLUMN, String.class);
String address = readerSliceConfig.getString(Key.ENDPOINT);
conn = new OpenTSDBConnection(address);
this.startTime = readerSliceConfig.getLong(Key.BEGIN_DATE_TIME);
this.endTime = readerSliceConfig.getLong(Key.END_DATE_TIME);
}
@Override
public void prepare() {
}
@Override
public void startRead(RecordSender recordSender) {
try {
for (String column : columns) {
conn.sendDPs(column, this.startTime, this.endTime, recordSender);
}
} catch (Exception e) {
throw DataXException.asDataXException(
OpenTSDBReaderErrorCode.ILLEGAL_VALUE, "获取或发送数据点的过程中出错!", e);
}
}
@Override
public void post() {
}
@Override
public void destroy() {
}
}
}

View File

@ -0,0 +1,40 @@
package com.alibaba.datax.plugin.reader.opentsdbreader;
import com.alibaba.datax.common.spi.ErrorCode;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionOpenTSDB Reader Error Code
*
* @author Benedict Jin
* @since 2019-04-18
*/
public enum OpenTSDBReaderErrorCode implements ErrorCode {
REQUIRED_VALUE("OpenTSDBReader-00", "缺失必要的值"),
ILLEGAL_VALUE("OpenTSDBReader-01", "值非法");
private final String code;
private final String description;
OpenTSDBReaderErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code, this.description);
}
}

View File

@ -0,0 +1,68 @@
package com.alibaba.datax.plugin.reader.util;
import com.alibaba.fastjson.JSON;
import org.apache.http.client.fluent.Content;
import org.apache.http.client.fluent.Request;
import org.apache.http.entity.ContentType;
import java.nio.charset.Charset;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionHttpUtils
*
* @author Benedict Jin
* @since 2019-03-29
*/
public final class HttpUtils {
public final static Charset UTF_8 = Charset.forName("UTF-8");
public final static int CONNECT_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60);
public final static int SOCKET_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60);
private HttpUtils() {
}
public static String get(String url) throws Exception {
Content content = Request.Get(url)
.connectTimeout(CONNECT_TIMEOUT_DEFAULT_IN_MILL)
.socketTimeout(SOCKET_TIMEOUT_DEFAULT_IN_MILL)
.execute()
.returnContent();
if (content == null) {
return null;
}
return content.asString(UTF_8);
}
public static String post(String url, Map<String, Object> params) throws Exception {
return post(url, JSON.toJSONString(params), CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL);
}
public static String post(String url, String params) throws Exception {
return post(url, params, CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL);
}
public static String post(String url, Map<String, Object> params,
int connectTimeoutInMill, int socketTimeoutInMill) throws Exception {
return post(url, JSON.toJSONString(params), connectTimeoutInMill, socketTimeoutInMill);
}
public static String post(String url, String params,
int connectTimeoutInMill, int socketTimeoutInMill) throws Exception {
Content content = Request.Post(url)
.connectTimeout(connectTimeoutInMill)
.socketTimeout(socketTimeoutInMill)
.addHeader("Content-Type", "application/json")
.bodyString(params, ContentType.APPLICATION_JSON)
.execute()
.returnContent();
if (content == null) {
return null;
}
return content.asString(UTF_8);
}
}

View File

@ -0,0 +1,68 @@
package com.alibaba.datax.plugin.reader.util;
import com.alibaba.datax.plugin.reader.conn.DataPoint4TSDB;
import com.alibaba.fastjson.JSON;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTSDB Utils
*
* @author Benedict Jin
* @since 2019-03-29
*/
public final class TSDBUtils {
private static final Logger LOG = LoggerFactory.getLogger(TSDBUtils.class);
private TSDBUtils() {
}
public static String version(String address) {
String url = String.format("%s/api/version", address);
String rsp;
try {
rsp = HttpUtils.get(url);
} catch (Exception e) {
throw new RuntimeException(e);
}
return rsp;
}
public static String config(String address) {
String url = String.format("%s/api/config", address);
String rsp;
try {
rsp = HttpUtils.get(url);
} catch (Exception e) {
throw new RuntimeException(e);
}
return rsp;
}
public static boolean put(String address, List<DataPoint4TSDB> dps) {
return put(address, JSON.toJSON(dps));
}
public static boolean put(String address, DataPoint4TSDB dp) {
return put(address, JSON.toJSON(dp));
}
private static boolean put(String address, Object o) {
String url = String.format("%s/api/put", address);
String rsp;
try {
rsp = HttpUtils.post(url, o.toString());
// If successful, the returned content should be null.
assert rsp == null;
} catch (Exception e) {
LOG.error("Address: {}, DataPoints: {}", url, o);
throw new RuntimeException(e);
}
return true;
}
}

View File

@ -0,0 +1,38 @@
package com.alibaba.datax.plugin.reader.util;
import java.util.concurrent.TimeUnit;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTimeUtils
*
* @author Benedict Jin
* @since 2019-04-22
*/
public final class TimeUtils {
private TimeUtils() {
}
private static final long SECOND_MASK = 0xFFFFFFFF00000000L;
private static final long HOUR_IN_MILL = TimeUnit.HOURS.toMillis(1);
/**
* Weather the timestamp is second.
*
* @param ts timestamp
*/
public static boolean isSecond(long ts) {
return (ts & SECOND_MASK) == 0;
}
/**
* Get the hour.
*
* @param ms time in millisecond
*/
public static long getTimeInHour(long ms) {
return ms - ms % HOUR_IN_MILL;
}
}

View File

@ -0,0 +1,10 @@
{
"name": "opentsdbreader",
"class": "com.alibaba.datax.plugin.reader.opentsdbreader.OpenTSDBReader",
"description": {
"useScene": "从 OpenTSDB 中摄取数据点",
"mechanism": "根据时间和 metric 直连底层 HBase 存储,从而 Scan 出符合条件的数据点",
"warn": "指定起止时间会自动忽略分钟和秒,转为整点时刻,例如 2019-4-18 的 [3:35, 4:55) 会被转为 [3:00, 4:00)"
},
"developer": "Benedict Jin"
}

View File

@ -0,0 +1,11 @@
{
"name": "opentsdbreader",
"parameter": {
"endpoint": "http://localhost:8242",
"column": [
"m"
],
"startTime": "2019-01-01 00:00:00",
"endTime": "2019-01-01 01:00:00"
}
}

View File

@ -0,0 +1,30 @@
package com.alibaba.datax.plugin.reader.conn;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionOpenTSDB Connection4TSDB Test
*
* @author Benedict Jin
* @since 2019-03-29
*/
@Ignore
public class OpenTSDBConnectionTest {
private static final String OPENTSDB_ADDRESS = "http://localhost:8242";
@Test
public void testVersion() {
String version = new OpenTSDBConnection(OPENTSDB_ADDRESS).version();
Assert.assertNotNull(version);
}
@Test
public void testIsSupported() {
Assert.assertTrue(new OpenTSDBConnection(OPENTSDB_ADDRESS).isSupported());
}
}

View File

@ -0,0 +1,18 @@
package com.alibaba.datax.plugin.reader.util;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionConst
*
* @author Benedict Jin
* @since 2019-03-29
*/
final class Const {
private Const() {
}
static final String OPENTSDB_ADDRESS = "http://localhost:8242";
static final String TSDB_ADDRESS = "http://localhost:8240";
}

View File

@ -0,0 +1,39 @@
package com.alibaba.datax.plugin.reader.util;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionHttpUtils Test
*
* @author Benedict Jin
* @since 2019-03-29
*/
@Ignore
public class HttpUtilsTest {
@Test
public void testSimpleCase() throws Exception {
String url = "https://httpbin.org/post";
Map<String, Object> params = new HashMap<String, Object>();
params.put("foo", "bar");
String rsp = HttpUtils.post(url, params);
System.out.println(rsp);
Assert.assertNotNull(rsp);
}
@Test
public void testGet() throws Exception {
String url = String.format("%s/api/version", Const.OPENTSDB_ADDRESS);
String rsp = HttpUtils.get(url);
System.out.println(rsp);
Assert.assertNotNull(rsp);
}
}

View File

@ -0,0 +1,28 @@
package com.alibaba.datax.plugin.reader.util;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTSDB Test
*
* @author Benedict Jin
* @since 2019-04-11
*/
@Ignore
public class TSDBTest {
@Test
public void testVersion() {
String version = TSDBUtils.version(Const.TSDB_ADDRESS);
Assert.assertNotNull(version);
System.out.println(version);
version = TSDBUtils.version(Const.OPENTSDB_ADDRESS);
Assert.assertNotNull(version);
System.out.println(version);
}
}

View File

@ -0,0 +1,33 @@
package com.alibaba.datax.plugin.reader.util;
import org.junit.Assert;
import org.junit.Test;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* Functioncom.alibaba.datax.common.util
*
* @author Benedict Jin
* @since 2019-04-22
*/
public class TimeUtilsTest {
@Test
public void testIsSecond() {
Assert.assertFalse(TimeUtils.isSecond(System.currentTimeMillis()));
Assert.assertTrue(TimeUtils.isSecond(System.currentTimeMillis() / 1000));
}
@Test
public void testGetTimeInHour() throws ParseException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date date = sdf.parse("2019-04-18 15:32:33");
long timeInHour = TimeUtils.getTimeInHour(date.getTime());
Assert.assertEquals("2019-04-18 15:00:00", sdf.format(timeInHour));
}
}

View File

@ -159,6 +159,13 @@
</includes> </includes>
<outputDirectory>datax</outputDirectory> <outputDirectory>datax</outputDirectory>
</fileSet> </fileSet>
<fileSet>
<directory>opentsdbreader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<!-- writer --> <!-- writer -->
<fileSet> <fileSet>
@ -308,5 +315,26 @@
</includes> </includes>
<outputDirectory>datax</outputDirectory> <outputDirectory>datax</outputDirectory>
</fileSet> </fileSet>
<fileSet>
<directory>hbase20xsqlreader/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>hbase20xsqlwriter/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
<fileSet>
<directory>tsdbwriter/target/datax/</directory>
<includes>
<include>**/*.*</include>
</includes>
<outputDirectory>datax</outputDirectory>
</fileSet>
</fileSets> </fileSets>
</assembly> </assembly>

View File

@ -1,359 +0,0 @@
package com.alibaba.datax.plugin.rdbms.util;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import java.util.StringTokenizer;
// TODO delete it
public class SqlFormatUtil {
private static final Set<String> BEGIN_CLAUSES = new HashSet<String>();
private static final Set<String> END_CLAUSES = new HashSet<String>();
private static final Set<String> LOGICAL = new HashSet<String>();
private static final Set<String> QUANTIFIERS = new HashSet<String>();
private static final Set<String> DML = new HashSet<String>();
private static final Set<String> MISC = new HashSet<String>();
private static final String WHITESPACE = " \n\r\f\t";
static {
BEGIN_CLAUSES.add("left");
BEGIN_CLAUSES.add("right");
BEGIN_CLAUSES.add("inner");
BEGIN_CLAUSES.add("outer");
BEGIN_CLAUSES.add("group");
BEGIN_CLAUSES.add("order");
END_CLAUSES.add("where");
END_CLAUSES.add("set");
END_CLAUSES.add("having");
END_CLAUSES.add("join");
END_CLAUSES.add("from");
END_CLAUSES.add("by");
END_CLAUSES.add("join");
END_CLAUSES.add("into");
END_CLAUSES.add("union");
LOGICAL.add("and");
LOGICAL.add("or");
LOGICAL.add("when");
LOGICAL.add("else");
LOGICAL.add("end");
QUANTIFIERS.add("in");
QUANTIFIERS.add("all");
QUANTIFIERS.add("exists");
QUANTIFIERS.add("some");
QUANTIFIERS.add("any");
DML.add("insert");
DML.add("update");
DML.add("delete");
MISC.add("select");
MISC.add("on");
}
static final String indentString = " ";
static final String initial = "\n ";
public static String format(String source) {
return new FormatProcess(source).perform();
}
private static class FormatProcess {
boolean beginLine = true;
boolean afterBeginBeforeEnd = false;
boolean afterByOrSetOrFromOrSelect = false;
boolean afterValues = false;
boolean afterOn = false;
boolean afterBetween = false;
boolean afterInsert = false;
int inFunction = 0;
int parensSinceSelect = 0;
private LinkedList<Integer> parenCounts = new LinkedList<Integer>();
private LinkedList<Boolean> afterByOrFromOrSelects = new LinkedList<Boolean>();
int indent = 1;
StringBuilder result = new StringBuilder();
StringTokenizer tokens;
String lastToken;
String token;
String lcToken;
public FormatProcess(String sql) {
tokens = new StringTokenizer(sql, "()+*/-=<>'`\"[]," + WHITESPACE,
true);
}
public String perform() {
result.append(initial);
while (tokens.hasMoreTokens()) {
token = tokens.nextToken();
lcToken = token.toLowerCase();
if ("'".equals(token)) {
String t;
do {
t = tokens.nextToken();
token += t;
} while (!"'".equals(t) && tokens.hasMoreTokens()); // cannot
// handle
// single
// quotes
} else if ("\"".equals(token)) {
String t;
do {
t = tokens.nextToken();
token += t;
} while (!"\"".equals(t));
}
if (afterByOrSetOrFromOrSelect && ",".equals(token)) {
commaAfterByOrFromOrSelect();
} else if (afterOn && ",".equals(token)) {
commaAfterOn();
}
else if ("(".equals(token)) {
openParen();
} else if (")".equals(token)) {
closeParen();
}
else if (BEGIN_CLAUSES.contains(lcToken)) {
beginNewClause();
}
else if (END_CLAUSES.contains(lcToken)) {
endNewClause();
}
else if ("select".equals(lcToken)) {
select();
}
else if (DML.contains(lcToken)) {
updateOrInsertOrDelete();
}
else if ("values".equals(lcToken)) {
values();
}
else if ("on".equals(lcToken)) {
on();
}
else if (afterBetween && lcToken.equals("and")) {
misc();
afterBetween = false;
}
else if (LOGICAL.contains(lcToken)) {
logical();
}
else if (isWhitespace(token)) {
white();
}
else {
misc();
}
if (!isWhitespace(token)) {
lastToken = lcToken;
}
}
return result.toString();
}
private void commaAfterOn() {
out();
indent--;
newline();
afterOn = false;
afterByOrSetOrFromOrSelect = true;
}
private void commaAfterByOrFromOrSelect() {
out();
newline();
}
private void logical() {
if ("end".equals(lcToken)) {
indent--;
}
newline();
out();
beginLine = false;
}
private void on() {
indent++;
afterOn = true;
newline();
out();
beginLine = false;
}
private void misc() {
out();
if ("between".equals(lcToken)) {
afterBetween = true;
}
if (afterInsert) {
newline();
afterInsert = false;
} else {
beginLine = false;
if ("case".equals(lcToken)) {
indent++;
}
}
}
private void white() {
if (!beginLine) {
result.append(" ");
}
}
private void updateOrInsertOrDelete() {
out();
indent++;
beginLine = false;
if ("update".equals(lcToken)) {
newline();
}
if ("insert".equals(lcToken)) {
afterInsert = true;
}
}
private void select() {
out();
indent++;
newline();
parenCounts.addLast(Integer.valueOf(parensSinceSelect));
afterByOrFromOrSelects.addLast(Boolean
.valueOf(afterByOrSetOrFromOrSelect));
parensSinceSelect = 0;
afterByOrSetOrFromOrSelect = true;
}
private void out() {
result.append(token);
}
private void endNewClause() {
if (!afterBeginBeforeEnd) {
indent--;
if (afterOn) {
indent--;
afterOn = false;
}
newline();
}
out();
if (!"union".equals(lcToken)) {
indent++;
}
newline();
afterBeginBeforeEnd = false;
afterByOrSetOrFromOrSelect = "by".equals(lcToken)
|| "set".equals(lcToken) || "from".equals(lcToken);
}
private void beginNewClause() {
if (!afterBeginBeforeEnd) {
if (afterOn) {
indent--;
afterOn = false;
}
indent--;
newline();
}
out();
beginLine = false;
afterBeginBeforeEnd = true;
}
private void values() {
indent--;
newline();
out();
indent++;
newline();
afterValues = true;
}
private void closeParen() {
parensSinceSelect--;
if (parensSinceSelect < 0) {
indent--;
parensSinceSelect = parenCounts.removeLast().intValue();
afterByOrSetOrFromOrSelect = afterByOrFromOrSelects
.removeLast().booleanValue();
}
if (inFunction > 0) {
inFunction--;
out();
} else {
if (!afterByOrSetOrFromOrSelect) {
indent--;
newline();
}
out();
}
beginLine = false;
}
private void openParen() {
if (isFunctionName(lastToken) || inFunction > 0) {
inFunction++;
}
beginLine = false;
if (inFunction > 0) {
out();
} else {
out();
if (!afterByOrSetOrFromOrSelect) {
indent++;
newline();
beginLine = true;
}
}
parensSinceSelect++;
}
private static boolean isFunctionName(String tok) {
final char begin = tok.charAt(0);
final boolean isIdentifier = Character.isJavaIdentifierStart(begin)
|| '"' == begin;
return isIdentifier && !LOGICAL.contains(tok)
&& !END_CLAUSES.contains(tok) && !QUANTIFIERS.contains(tok)
&& !DML.contains(tok) && !MISC.contains(tok);
}
private static boolean isWhitespace(String token) {
return WHITESPACE.indexOf(token) >= 0;
}
private void newline() {
result.append("\n");
for (int i = 0; i < indent; i++) {
result.append(indentString);
}
beginLine = true;
}
}
}

View File

@ -62,6 +62,7 @@
<module>rdbmsreader</module> <module>rdbmsreader</module>
<module>hbase11xreader</module> <module>hbase11xreader</module>
<module>hbase094xreader</module> <module>hbase094xreader</module>
<module>opentsdbreader</module>
<!-- writer --> <!-- writer -->
<module>mysqlwriter</module> <module>mysqlwriter</module>
@ -85,10 +86,13 @@
<module>hbase11xsqlwriter</module> <module>hbase11xsqlwriter</module>
<module>hbase11xsqlreader</module> <module>hbase11xsqlreader</module>
<module>elasticsearchwriter</module> <module>elasticsearchwriter</module>
<module>tsdbwriter</module>
<!-- common support module --> <!-- common support module -->
<module>plugin-rdbms-util</module> <module>plugin-rdbms-util</module>
<module>plugin-unstructured-storage-util</module> <module>plugin-unstructured-storage-util</module>
<module>hbase20xsqlreader</module>
<module>hbase20xsqlwriter</module>
</modules> </modules>
<dependencyManagement> <dependencyManagement>

View File

@ -0,0 +1,187 @@
# TSDBWriter 插件文档
___
## 1 快速介绍
TSDBWriter 插件实现了将数据点写入到阿里巴巴自研 TSDB 数据库中(后续简称 TSDB
时间序列数据库Time Series Database , 简称 TSDB是一种高性能低成本稳定可靠的在线时序数据库服务提供高效读写高压缩比存储、时序数据插值及聚合计算广泛应用于物联网IoT设备监控系统 企业能源管理系统EMS生产安全监控系统电力检测系统等行业场景。 TSDB 提供百万级时序数据秒级写入,高压缩比低成本存储、预降采样、插值、多维聚合计算,查询结果可视化功能;解决由于设备采集点数量巨大,数据采集频率高,造成的存储成本高,写入和查询分析效率低的问题。更多关于 TSDB 的介绍,详见[阿里云 TSDB 官网](https://help.aliyun.com/product/54825.html)。
## 2 实现原理
通过 HTTP 连接 TSDB 实例,并通过 `/api/put` 接口将数据点写入。关于写入接口详见 TSDB 的[接口说明文档](https://help.aliyun.com/document_detail/59939.html)。
## 3 功能说明
### 3.1 配置样例
* 配置一个从 OpenTSDB 数据库同步抽取数据到 TSDB
```json
{
"job": {
"content": [
{
"reader": {
"name": "opentsdbreader",
"parameter": {
"endpoint": "http://localhost:4242",
"column": [
"m"
],
"startTime": "2019-01-01 00:00:00",
"endTime": "2019-01-01 03:00:00"
}
},
"writer": {
"name": "tsdbhttpwriter",
"parameter": {
"endpoint": "http://localhost:8242"
}
}
}
],
"setting": {
"speed": {
"channel": 1
}
}
}
}
```
### 3.2 参数说明
* **name**
* 描述:本插件的名称
* 必选:是
* 默认值tsdbhttpwriter
* **parameter**
* **endpoint**
* 描述TSDB 的 HTTP 连接地址
* 必选:是
* 格式http://IP:Port
* 默认值:无
* **batchSize**
* 描述:每次批量数据的条数
* 必选:否
* 格式int需要保证大于 0
* 默认值100
* **maxRetryTime**
* 描述:失败后重试的次数
* 必选:否
* 格式int需要保证大于 1
* 默认值3
* **ignoreWriteError**
* 描述:如果设置为 true则忽略写入错误继续写入否则多次重试后仍写入失败的话则会终止写入任务
* 必选:否
* 格式bool
* 默认值false
### 3.3 类型转换
| DataX 内部类型 | TSDB 数据类型 |
| -------------- | ------------------------------------------------------------ |
| String | TSDB 数据点序列化字符串,包括 timestamp、metric、tags 和 value |
## 4 性能报告
### 4.1 环境准备
#### 4.1.1 数据特征
从 Metric、时间线、Value 和 采集周期 四个方面来描述:
##### metric
固定指定一个 metric 为 `m`
##### tagkv
前四个 tagkv 全排列,形成 `10 * 20 * 100 * 100 = 2000000` 条时间线,最后 IP 对应 2000000 条时间线从 1 开始自增。
| **tag_k** | **tag_v** |
| --------- | ------------- |
| zone | z1~z10 |
| cluster | c1~c20 |
| group | g1~100 |
| app | a1~a100 |
| ip | ip1~ip2000000 |
##### value
度量值为 [1, 100] 区间内的随机值
##### interval
采集周期为 10 秒,持续摄入 3 小时,总数据量为 `3 * 60 * 60 / 10 * 2000000 = 2,160,000,000` 个数据点。
#### 4.1.2 机器参数
TSDB Writer 机型: 64C256G
HBase 机型: 8C16G * 5
#### 4.1.3 DataX jvm 参数
"-Xms4096m -Xmx4096m"
### 4.2 测试报告
| 通道数 | DataX 速度 (Rec/s) | DataX 流量 (MB/s) |
| ------ | ------------------ | ----------------- |
| 1 | 129753 | 15.45 |
| 2 | 284953 | 33.70 |
| 3 | 385868 | 45.71 |
## 5 约束限制
### 5.1 目前只支持兼容 TSDB 2.4.x 及以上版本
其他版本暂不保证兼容
## 6 FAQ

136
tsdbwriter/pom.xml Normal file
View File

@ -0,0 +1,136 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-all</artifactId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<artifactId>tsdbwriter</artifactId>
<name>tsdbwriter</name>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<!-- common -->
<commons-lang3.version>3.3.2</commons-lang3.version>
<!-- http -->
<httpclient.version>4.4</httpclient.version>
<commons-io.version>2.4</commons-io.version>
<!-- json -->
<fastjson.version>1.2.28</fastjson.version>
<!-- test -->
<junit4.version>4.12</junit4.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba.datax</groupId>
<artifactId>datax-common</artifactId>
<version>${datax-project-version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>fastjson</artifactId>
<groupId>com.alibaba</groupId>
</exclusion>
<exclusion>
<artifactId>commons-math3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<!-- common -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>${commons-lang3.version}</version>
</dependency>
<!-- http -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpclient.version}</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>${commons-io.version}</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
<version>${httpclient.version}</version>
</dependency>
<!-- json -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<!-- test -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit4.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<!-- compiler plugin -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.6</source>
<target>1.6</target>
<encoding>${project-sourceEncoding}</encoding>
</configuration>
</plugin>
<!-- assembly plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/package.xml</descriptor>
</descriptors>
<finalName>datax</finalName>
</configuration>
<executions>
<execution>
<id>dwzip</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,35 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id></id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>src/main/resources</directory>
<includes>
<include>plugin.json</include>
<include>plugin_job_template.json</include>
</includes>
<outputDirectory>plugin/writer/tsdbwriter</outputDirectory>
</fileSet>
<fileSet>
<directory>target/</directory>
<includes>
<include>tsdbwriter-0.0.1-SNAPSHOT.jar</include>
</includes>
<outputDirectory>plugin/writer/tsdbwriter</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>plugin/writer/tsdbwriter/libs</outputDirectory>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,85 @@
package com.alibaba.datax.plugin.writer.conn;
import com.alibaba.datax.common.plugin.RecordSender;
import java.util.List;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionConnection for TSDB-like databases
*
* @author Benedict Jin
* @since 2019-03-29
*/
public interface Connection4TSDB {
/**
* Get the address of Database.
*
* @return host+ip
*/
String address();
/**
* Get the version of Database.
*
* @return version
*/
String version();
/**
* Get these configurations.
*
* @return configs
*/
String config();
/**
* Get the list of supported version.
*
* @return version list
*/
String[] getSupportVersionPrefix();
/**
* Send data points by metric & start time & end time.
*
* @param metric metric
* @param start startTime
* @param end endTime
* @param recordSender sender
*/
void sendDPs(String metric, Long start, Long end, RecordSender recordSender) throws Exception;
/**
* Put data point.
*
* @param dp data point
* @return whether the data point is written successfully
*/
boolean put(DataPoint4TSDB dp);
/**
* Put data points.
*
* @param dps data points
* @return whether the data point is written successfully
*/
boolean put(List<DataPoint4TSDB> dps);
/**
* Put data points.
*
* @param dps data points
* @return whether the data point is written successfully
*/
boolean put(String dps);
/**
* Whether current version is supported.
*
* @return true: supported; false: not yet!
*/
boolean isSupported();
}

View File

@ -0,0 +1,68 @@
package com.alibaba.datax.plugin.writer.conn;
import com.alibaba.fastjson.JSON;
import java.util.Map;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionDataPoint for TSDB
*
* @author Benedict Jin
* @since 2019-04-10
*/
public class DataPoint4TSDB {
private long timestamp;
private String metric;
private Map<String, String> tags;
private Object value;
public DataPoint4TSDB() {
}
public DataPoint4TSDB(long timestamp, String metric, Map<String, String> tags, Object value) {
this.timestamp = timestamp;
this.metric = metric;
this.tags = tags;
this.value = value;
}
public long getTimestamp() {
return timestamp;
}
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
public String getMetric() {
return metric;
}
public void setMetric(String metric) {
this.metric = metric;
}
public Map<String, String> getTags() {
return tags;
}
public void setTags(Map<String, String> tags) {
this.tags = tags;
}
public Object getValue() {
return value;
}
public void setValue(Object value) {
this.value = value;
}
@Override
public String toString() {
return JSON.toJSONString(this);
}
}

View File

@ -0,0 +1,86 @@
package com.alibaba.datax.plugin.writer.conn;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.plugin.writer.util.TSDBUtils;
import com.alibaba.fastjson.JSON;
import org.apache.commons.lang3.StringUtils;
import java.util.List;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTSDB Connection
*
* @author Benedict Jin
* @since 2019-03-29
*/
public class TSDBConnection implements Connection4TSDB {
private String address;
public TSDBConnection(String address) {
if (StringUtils.isBlank(address)) {
throw new RuntimeException("TSDBConnection init failed because address is blank!");
}
this.address = address;
}
@Override
public String address() {
return address;
}
@Override
public String version() {
return TSDBUtils.version(address);
}
@Override
public String config() {
return TSDBUtils.config(address);
}
@Override
public String[] getSupportVersionPrefix() {
return new String[]{"2.4.1", "2.4.2"};
}
@Override
public void sendDPs(String metric, Long start, Long end, RecordSender recordSender) {
throw new RuntimeException("Not support yet!");
}
@Override
public boolean put(DataPoint4TSDB dp) {
return TSDBUtils.put(address, dp);
}
@Override
public boolean put(List<DataPoint4TSDB> dps) {
return TSDBUtils.put(address, dps);
}
@Override
public boolean put(String dps) {
return TSDBUtils.put(address, dps);
}
@Override
public boolean isSupported() {
String versionJson = version();
if (StringUtils.isBlank(versionJson)) {
throw new RuntimeException("Cannot get the version!");
}
String version = JSON.parseObject(versionJson).getString("version");
if (StringUtils.isBlank(version)) {
return false;
}
for (String prefix : getSupportVersionPrefix()) {
if (version.startsWith(prefix)) {
return true;
}
}
return false;
}
}

View File

@ -0,0 +1,16 @@
package com.alibaba.datax.plugin.writer.tsdbwriter;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionKey
*
* @author Benedict Jin
* @since 2019-04-18
*/
public final class Constant {
static final int DEFAULT_BATCH_SIZE = 100;
static final int DEFAULT_TRY_SIZE = 3;
static final boolean DEFAULT_IGNORE_WRITE_ERROR = false;
}

View File

@ -0,0 +1,17 @@
package com.alibaba.datax.plugin.writer.tsdbwriter;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionKey
*
* @author Benedict Jin
* @since 2019-04-18
*/
public class Key {
static final String ENDPOINT = "endpoint";
static final String BATCH_SIZE = "batchSize";
static final String MAX_RETRY_TIME = "maxRetryTime";
static final String IGNORE_WRITE_ERROR = "ignoreWriteError";
}

View File

@ -0,0 +1,171 @@
package com.alibaba.datax.plugin.writer.tsdbwriter;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.spi.Writer;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.common.util.RetryUtil;
import com.alibaba.datax.plugin.writer.conn.TSDBConnection;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTSDB Http Writer
*
* @author Benedict Jin
* @since 2019-04-18
*/
@SuppressWarnings("unused")
public class TSDBWriter extends Writer {
public static class Job extends Writer.Job {
private static final Logger LOG = LoggerFactory.getLogger(Job.class);
private Configuration originalConfig;
@Override
public void init() {
this.originalConfig = super.getPluginJobConf();
String address = this.originalConfig.getString(Key.ENDPOINT);
if (StringUtils.isBlank(address)) {
throw DataXException.asDataXException(TSDBWriterErrorCode.REQUIRED_VALUE,
"The parameter [" + Key.ENDPOINT + "] is not set.");
}
Integer batchSize = this.originalConfig.getInt(Key.BATCH_SIZE);
if (batchSize == null || batchSize < 1) {
originalConfig.set(Key.BATCH_SIZE, Constant.DEFAULT_BATCH_SIZE);
LOG.info("The parameter [" + Key.BATCH_SIZE +
"] will be default value: " + Constant.DEFAULT_BATCH_SIZE);
}
Integer retrySize = this.originalConfig.getInt(Key.MAX_RETRY_TIME);
if (retrySize == null || retrySize < 0) {
originalConfig.set(Key.MAX_RETRY_TIME, Constant.DEFAULT_TRY_SIZE);
LOG.info("The parameter [" + Key.MAX_RETRY_TIME +
"] will be default value: " + Constant.DEFAULT_TRY_SIZE);
}
Boolean ignoreWriteError = this.originalConfig.getBool(Key.IGNORE_WRITE_ERROR);
if (ignoreWriteError == null) {
originalConfig.set(Key.IGNORE_WRITE_ERROR, Constant.DEFAULT_IGNORE_WRITE_ERROR);
LOG.info("The parameter [" + Key.IGNORE_WRITE_ERROR +
"] will be default value: " + Constant.DEFAULT_IGNORE_WRITE_ERROR);
}
}
@Override
public void prepare() {
}
@Override
public List<Configuration> split(int mandatoryNumber) {
ArrayList<Configuration> configurations = new ArrayList<Configuration>(mandatoryNumber);
for (int i = 0; i < mandatoryNumber; i++) {
configurations.add(this.originalConfig.clone());
}
return configurations;
}
@Override
public void post() {
}
@Override
public void destroy() {
}
}
public static class Task extends Writer.Task {
private static final Logger LOG = LoggerFactory.getLogger(Task.class);
private TSDBConnection conn;
private int batchSize;
private int retrySize;
private boolean ignoreWriteError;
@Override
public void init() {
Configuration writerSliceConfig = getPluginJobConf();
String address = writerSliceConfig.getString(Key.ENDPOINT);
this.conn = new TSDBConnection(address);
this.batchSize = writerSliceConfig.getInt(Key.BATCH_SIZE);
this.retrySize = writerSliceConfig.getInt(Key.MAX_RETRY_TIME);
this.ignoreWriteError = writerSliceConfig.getBool(Key.IGNORE_WRITE_ERROR);
}
@Override
public void prepare() {
}
@Override
public void startWrite(RecordReceiver recordReceiver) {
try {
Record lastRecord = null;
Record record;
int count = 0;
StringBuilder dps = new StringBuilder();
while ((record = recordReceiver.getFromReader()) != null) {
final int recordLength = record.getColumnNumber();
for (int i = 0; i < recordLength; i++) {
dps.append(record.getColumn(i).asString());
dps.append(",");
count++;
if (count == batchSize) {
count = 0;
batchPut(record, "[" + dps.substring(0, dps.length() - 1) + "]");
dps = new StringBuilder();
}
}
lastRecord = record;
}
if (StringUtils.isNotBlank(dps.toString())) {
batchPut(lastRecord, "[" + dps.substring(0, dps.length() - 1) + "]");
}
} catch (Exception e) {
throw DataXException.asDataXException(TSDBWriterErrorCode.RUNTIME_EXCEPTION, e);
}
}
private void batchPut(final Record record, final String dps) {
try {
RetryUtil.executeWithRetry(new Callable<Integer>() {
@Override
public Integer call() {
if (!conn.put(dps)) {
getTaskPluginCollector().collectDirtyRecord(record, "Put data points failed!");
throw DataXException.asDataXException(TSDBWriterErrorCode.RUNTIME_EXCEPTION,
"Put data points failed!");
}
return 0;
}
}, retrySize, 60000L, true);
} catch (Exception e) {
if (ignoreWriteError) {
LOG.warn("Ignore write exceptions and continue writing.");
} else {
throw DataXException.asDataXException(TSDBWriterErrorCode.RETRY_WRITER_EXCEPTION, e);
}
}
}
@Override
public void post() {
}
@Override
public void destroy() {
}
}
}

View File

@ -0,0 +1,41 @@
package com.alibaba.datax.plugin.writer.tsdbwriter;
import com.alibaba.datax.common.spi.ErrorCode;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTSDB Http Writer Error Code
*
* @author Benedict Jin
* @since 2019-04-18
*/
public enum TSDBWriterErrorCode implements ErrorCode {
REQUIRED_VALUE("TSDBWriter-00", "Missing the necessary value"),
RUNTIME_EXCEPTION("TSDBWriter-01", "Runtime exception"),
RETRY_WRITER_EXCEPTION("TSDBWriter-02", "After repeated attempts, the write still fails");
private final String code;
private final String description;
TSDBWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code, this.description);
}
}

View File

@ -0,0 +1,68 @@
package com.alibaba.datax.plugin.writer.util;
import com.alibaba.fastjson.JSON;
import org.apache.http.client.fluent.Content;
import org.apache.http.client.fluent.Request;
import org.apache.http.entity.ContentType;
import java.nio.charset.Charset;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionHttpUtils
*
* @author Benedict Jin
* @since 2019-03-29
*/
public final class HttpUtils {
public final static Charset UTF_8 = Charset.forName("UTF-8");
public final static int CONNECT_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60);
public final static int SOCKET_TIMEOUT_DEFAULT_IN_MILL = (int) TimeUnit.SECONDS.toMillis(60);
private HttpUtils() {
}
public static String get(String url) throws Exception {
Content content = Request.Get(url)
.connectTimeout(CONNECT_TIMEOUT_DEFAULT_IN_MILL)
.socketTimeout(SOCKET_TIMEOUT_DEFAULT_IN_MILL)
.execute()
.returnContent();
if (content == null) {
return null;
}
return content.asString(UTF_8);
}
public static String post(String url, Map<String, Object> params) throws Exception {
return post(url, JSON.toJSONString(params), CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL);
}
public static String post(String url, String params) throws Exception {
return post(url, params, CONNECT_TIMEOUT_DEFAULT_IN_MILL, SOCKET_TIMEOUT_DEFAULT_IN_MILL);
}
public static String post(String url, Map<String, Object> params,
int connectTimeoutInMill, int socketTimeoutInMill) throws Exception {
return post(url, JSON.toJSONString(params), connectTimeoutInMill, socketTimeoutInMill);
}
public static String post(String url, String params,
int connectTimeoutInMill, int socketTimeoutInMill) throws Exception {
Content content = Request.Post(url)
.connectTimeout(connectTimeoutInMill)
.socketTimeout(socketTimeoutInMill)
.addHeader("Content-Type", "application/json")
.bodyString(params, ContentType.APPLICATION_JSON)
.execute()
.returnContent();
if (content == null) {
return null;
}
return content.asString(UTF_8);
}
}

View File

@ -0,0 +1,72 @@
package com.alibaba.datax.plugin.writer.util;
import com.alibaba.datax.plugin.writer.conn.DataPoint4TSDB;
import com.alibaba.fastjson.JSON;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTSDB Utils
*
* @author Benedict Jin
* @since 2019-03-29
*/
public final class TSDBUtils {
private static final Logger LOG = LoggerFactory.getLogger(TSDBUtils.class);
private TSDBUtils() {
}
public static String version(String address) {
String url = String.format("%s/api/version", address);
String rsp;
try {
rsp = HttpUtils.get(url);
} catch (Exception e) {
throw new RuntimeException(e);
}
return rsp;
}
public static String config(String address) {
String url = String.format("%s/api/config", address);
String rsp;
try {
rsp = HttpUtils.get(url);
} catch (Exception e) {
throw new RuntimeException(e);
}
return rsp;
}
public static boolean put(String address, List<DataPoint4TSDB> dps) {
return put(address, JSON.toJSON(dps));
}
public static boolean put(String address, DataPoint4TSDB dp) {
return put(address, JSON.toJSON(dp));
}
private static boolean put(String address, Object o) {
return put(address, o.toString());
}
public static boolean put(String address, String s) {
String url = String.format("%s/api/put", address);
String rsp;
try {
rsp = HttpUtils.post(url, s);
// If successful, the returned content should be null.
assert rsp == null;
} catch (Exception e) {
LOG.error("Address: {}, DataPoints: {}", url, s);
throw new RuntimeException(e);
}
return true;
}
}

View File

@ -0,0 +1,10 @@
{
"name": "tsdbwriter",
"class": "com.alibaba.datax.plugin.writer.tsdbwriter.TSDBWriter",
"description": {
"useScene": "往 TSDB 中摄入数据点",
"mechanism": "调用 TSDB 的 /api/put 接口,实现数据点的写入",
"warn": ""
},
"developer": "Benedict Jin"
}

View File

@ -0,0 +1,6 @@
{
"name": "tsdbwriter",
"parameter": {
"endpoint": "http://localhost:8242"
}
}

View File

@ -0,0 +1,30 @@
package com.alibaba.datax.plugin.writer.conn;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTSDBConnection Test
*
* @author Benedict Jin
* @since 2019-03-29
*/
@Ignore
public class TSDBConnectionTest {
private static final String TSDB_ADDRESS = "http://localhost:8240";
@Test
public void testVersion() {
String version = new TSDBConnection(TSDB_ADDRESS).version();
Assert.assertNotNull(version);
}
@Test
public void testIsSupported() {
Assert.assertTrue(new TSDBConnection(TSDB_ADDRESS).isSupported());
}
}

View File

@ -0,0 +1,18 @@
package com.alibaba.datax.plugin.writer.util;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionConst
*
* @author Benedict Jin
* @since 2019-03-29
*/
final class Const {
private Const() {
}
static final String OPENTSDB_ADDRESS = "http://localhost:8242";
static final String TSDB_ADDRESS = "http://localhost:8240";
}

View File

@ -0,0 +1,39 @@
package com.alibaba.datax.plugin.writer.util;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionHttpUtils Test
*
* @author Benedict Jin
* @since 2019-03-29
*/
@Ignore
public class HttpUtilsTest {
@Test
public void testSimpleCase() throws Exception {
String url = "https://httpbin.org/post";
Map<String, Object> params = new HashMap<String, Object>();
params.put("foo", "bar");
String rsp = HttpUtils.post(url, params);
System.out.println(rsp);
Assert.assertNotNull(rsp);
}
@Test
public void testGet() throws Exception {
String url = String.format("%s/api/version", Const.OPENTSDB_ADDRESS);
String rsp = HttpUtils.get(url);
System.out.println(rsp);
Assert.assertNotNull(rsp);
}
}

View File

@ -0,0 +1,28 @@
package com.alibaba.datax.plugin.writer.util;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
/**
* Copyright @ 2019 alibaba.com
* All right reserved.
* FunctionTSDB Test
*
* @author Benedict Jin
* @since 2019-04-11
*/
@Ignore
public class TSDBTest {
@Test
public void testVersion() {
String version = TSDBUtils.version(Const.TSDB_ADDRESS);
Assert.assertNotNull(version);
System.out.println(version);
version = TSDBUtils.version(Const.OPENTSDB_ADDRESS);
Assert.assertNotNull(version);
System.out.println(version);
}
}