mirror of
https://github.com/alibaba/DataX.git
synced 2025-05-02 04:59:51 +08:00
support csv format import
support csv format import
This commit is contained in:
parent
79db76dd8f
commit
6c41fe832b
@ -1,5 +1,4 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
Licensed to the Apache Software Foundation (ASF) under one
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
or more contributor license agreements. See the NOTICE file
|
or more contributor license agreements. See the NOTICE file
|
||||||
@ -18,12 +17,10 @@ KIND, either express or implied. See the License for the
|
|||||||
specific language governing permissions and limitations
|
specific language governing permissions and limitations
|
||||||
under the License.
|
under the License.
|
||||||
-->
|
-->
|
||||||
|
<assembly xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
<assembly
|
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
||||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
|
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
<id/>
|
||||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
|
|
||||||
<id></id>
|
|
||||||
<formats>
|
<formats>
|
||||||
<format>dir</format>
|
<format>dir</format>
|
||||||
</formats>
|
</formats>
|
||||||
@ -45,7 +42,6 @@ under the License.
|
|||||||
<outputDirectory>plugin/writer/doriswriter</outputDirectory>
|
<outputDirectory>plugin/writer/doriswriter</outputDirectory>
|
||||||
</fileSet>
|
</fileSet>
|
||||||
</fileSets>
|
</fileSets>
|
||||||
|
|
||||||
<dependencySets>
|
<dependencySets>
|
||||||
<dependencySet>
|
<dependencySet>
|
||||||
<useProjectArtifact>false</useProjectArtifact>
|
<useProjectArtifact>false</useProjectArtifact>
|
||||||
|
@ -22,16 +22,17 @@ import com.alibaba.datax.common.element.DateColumn;
|
|||||||
import com.alibaba.datax.common.element.Record;
|
import com.alibaba.datax.common.element.Record;
|
||||||
import org.apache.commons.lang3.time.DateFormatUtils;
|
import org.apache.commons.lang3.time.DateFormatUtils;
|
||||||
|
|
||||||
|
import java.time.ZoneId;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.TimeZone;
|
import java.util.TimeZone;
|
||||||
|
|
||||||
public abstract class DorisCodec {
|
public abstract class DorisCodec {
|
||||||
protected static String timeZone = "GMT+8";
|
protected final TimeZone timeZone;
|
||||||
protected static TimeZone timeZoner = TimeZone.getTimeZone(timeZone);
|
|
||||||
protected final List<String> fieldNames;
|
protected final List<String> fieldNames;
|
||||||
|
|
||||||
public DorisCodec(final List<String> fieldNames) {
|
public DorisCodec(final List<String> fieldNames, final String timeZone) {
|
||||||
this.fieldNames = fieldNames;
|
this.fieldNames = fieldNames;
|
||||||
|
this.timeZone = TimeZone.getTimeZone(ZoneId.of(timeZone));
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract String serialize(Record row);
|
public abstract String serialize(Record row);
|
||||||
@ -60,9 +61,9 @@ public abstract class DorisCodec {
|
|||||||
final DateColumn.DateType dateType = ((DateColumn) col).getSubType();
|
final DateColumn.DateType dateType = ((DateColumn) col).getSubType();
|
||||||
switch (dateType) {
|
switch (dateType) {
|
||||||
case DATE:
|
case DATE:
|
||||||
return DateFormatUtils.format(col.asDate(), "yyyy-MM-dd", timeZoner);
|
return DateFormatUtils.format(col.asDate(), "yyyy-MM-dd", timeZone);
|
||||||
case DATETIME:
|
case DATETIME:
|
||||||
return DateFormatUtils.format(col.asDate(), "yyyy-MM-dd HH:mm:ss", timeZoner);
|
return DateFormatUtils.format(col.asDate(), "yyyy-MM-dd HH:mm:ss", timeZone);
|
||||||
default:
|
default:
|
||||||
return col.asString();
|
return col.asString();
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,49 @@
|
|||||||
|
// Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing,
|
||||||
|
// software distributed under the License is distributed on an
|
||||||
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, either express or implied. See the License for the
|
||||||
|
// specific language governing permissions and limitations
|
||||||
|
// under the License.
|
||||||
|
|
||||||
|
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||||
|
|
||||||
|
import com.alibaba.datax.common.element.Record;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class DorisCsvCodec extends DorisCodec {
|
||||||
|
|
||||||
|
private final String columnSeparator;
|
||||||
|
|
||||||
|
public DorisCsvCodec(final List<String> fieldNames, String columnSeparator, String timeZone) {
|
||||||
|
super(fieldNames, timeZone);
|
||||||
|
this.columnSeparator = EscapeHandler.escapeString(columnSeparator);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String serialize(final Record row) {
|
||||||
|
if (null == this.fieldNames) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
List<String> list = new ArrayList<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < this.fieldNames.size(); i++) {
|
||||||
|
Object value = this.convertColumn(row.getColumn(i));
|
||||||
|
list.add(value != null ? value.toString() : "\\N");
|
||||||
|
}
|
||||||
|
|
||||||
|
return String.join(columnSeparator, list);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -17,15 +17,20 @@
|
|||||||
|
|
||||||
package com.alibaba.datax.plugin.writer.doriswriter;
|
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
// A wrapper class to hold a batch of loaded rows
|
// A wrapper class to hold a batch of loaded rows
|
||||||
public class DorisFlushBatch {
|
public class DorisFlushBatch {
|
||||||
private String lineDelimiter;
|
private final String format;
|
||||||
|
private final String lineDelimiter;
|
||||||
private String label;
|
private String label;
|
||||||
private long rows = 0;
|
private long byteSize = 0;
|
||||||
private StringBuilder data = new StringBuilder();
|
private List<String> data = new ArrayList<>();
|
||||||
|
|
||||||
public DorisFlushBatch(String lineDelimiter) {
|
public DorisFlushBatch(String lineDelimiter, String format) {
|
||||||
this.lineDelimiter = lineDelimiter;
|
this.lineDelimiter = EscapeHandler.escapeString(lineDelimiter);
|
||||||
|
this.format = format;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLabel(String label) {
|
public void setLabel(String label) {
|
||||||
@ -37,22 +42,25 @@ public class DorisFlushBatch {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public long getRows() {
|
public long getRows() {
|
||||||
return rows;
|
return data.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void putData(String row) {
|
public void putData(String row) {
|
||||||
if (data.length() > 0) {
|
data.add(row);
|
||||||
data.append(lineDelimiter);
|
byteSize += row.getBytes().length;
|
||||||
}
|
|
||||||
data.append(row);
|
|
||||||
rows++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public StringBuilder getData() {
|
public String getData() {
|
||||||
return data;
|
String result;
|
||||||
|
if (Key.DEFAULT_FORMAT_CSV.equalsIgnoreCase(format)) {
|
||||||
|
result = String.join(this.lineDelimiter, data);
|
||||||
|
} else {
|
||||||
|
result = data.toString();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getSize() {
|
public long getSize() {
|
||||||
return data.length();
|
return byteSize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,8 +28,8 @@ import java.util.Map;
|
|||||||
public class DorisJsonCodec extends DorisCodec {
|
public class DorisJsonCodec extends DorisCodec {
|
||||||
private Map<String, Object> rowMap;
|
private Map<String, Object> rowMap;
|
||||||
|
|
||||||
public DorisJsonCodec(final List<String> fieldNames) {
|
public DorisJsonCodec(final List<String> fieldNames, final String timeZone) {
|
||||||
super(fieldNames);
|
super(fieldNames, timeZone);
|
||||||
this.rowMap = new HashMap<>(this.fieldNames.size());
|
this.rowMap = new HashMap<>(this.fieldNames.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,6 +48,7 @@ public class DorisWriter extends Writer {
|
|||||||
private Key keys;
|
private Key keys;
|
||||||
private DorisCodec rowCodec;
|
private DorisCodec rowCodec;
|
||||||
private int batchNum = 0;
|
private int batchNum = 0;
|
||||||
|
private String labelPrefix;
|
||||||
|
|
||||||
public Task() {
|
public Task() {
|
||||||
}
|
}
|
||||||
@ -55,7 +56,12 @@ public class DorisWriter extends Writer {
|
|||||||
@Override
|
@Override
|
||||||
public void init() {
|
public void init() {
|
||||||
this.keys = new Key(super.getPluginJobConf());
|
this.keys = new Key(super.getPluginJobConf());
|
||||||
this.rowCodec = new DorisJsonCodec(this.keys.getColumns());
|
if (Key.DEFAULT_FORMAT_CSV.equalsIgnoreCase(this.keys.getFormat())) {
|
||||||
|
this.rowCodec = new DorisCsvCodec(this.keys.getColumns(), this.keys.getColumnSeparator(), this.keys.getTimeZone());
|
||||||
|
} else {
|
||||||
|
this.rowCodec = new DorisJsonCodec(this.keys.getColumns(), this.keys.getTimeZone());
|
||||||
|
}
|
||||||
|
this.labelPrefix = this.keys.getLabelPrefix() + UUID.randomUUID();
|
||||||
this.dorisWriterEmitter = new DorisWriterEmitter(keys);
|
this.dorisWriterEmitter = new DorisWriterEmitter(keys);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,7 +72,7 @@ public class DorisWriter extends Writer {
|
|||||||
@Override
|
@Override
|
||||||
public void startWrite(RecordReceiver recordReceiver) {
|
public void startWrite(RecordReceiver recordReceiver) {
|
||||||
String lineDelimiter = this.keys.getLineDelimiter();
|
String lineDelimiter = this.keys.getLineDelimiter();
|
||||||
DorisFlushBatch flushBatch = new DorisFlushBatch(lineDelimiter);
|
DorisFlushBatch flushBatch = new DorisFlushBatch(lineDelimiter, this.keys.getFormat());
|
||||||
long batchCount = 0;
|
long batchCount = 0;
|
||||||
long batchByteSize = 0L;
|
long batchByteSize = 0L;
|
||||||
Record record;
|
Record record;
|
||||||
@ -93,7 +99,7 @@ public class DorisWriter extends Writer {
|
|||||||
// clear buffer
|
// clear buffer
|
||||||
batchCount = 0;
|
batchCount = 0;
|
||||||
batchByteSize = 0L;
|
batchByteSize = 0L;
|
||||||
flushBatch = new DorisFlushBatch(lineDelimiter);
|
flushBatch = new DorisFlushBatch(lineDelimiter, this.keys.getFormat());
|
||||||
}
|
}
|
||||||
} // end of while
|
} // end of while
|
||||||
|
|
||||||
@ -103,14 +109,12 @@ public class DorisWriter extends Writer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void flush(DorisFlushBatch flushBatch) {
|
private void flush(DorisFlushBatch flushBatch) {
|
||||||
final String label = getStreamLoadLabel();
|
flushBatch.setLabel(getStreamLoadLabel());
|
||||||
flushBatch.setLabel(label);
|
dorisWriterEmitter.emit(flushBatch);
|
||||||
dorisWriterEmitter.doStreamLoad(flushBatch);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getStreamLoadLabel() {
|
private String getStreamLoadLabel() {
|
||||||
String labelPrefix = this.keys.getLabelPrefix();
|
return labelPrefix + "_" + (batchNum++);
|
||||||
return labelPrefix + UUID.randomUUID().toString() + "_" + (batchNum++);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -23,6 +23,7 @@ import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
|
|||||||
import com.alibaba.fastjson.JSON;
|
import com.alibaba.fastjson.JSON;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.http.HttpEntity;
|
import org.apache.http.HttpEntity;
|
||||||
import org.apache.http.HttpHeaders;
|
import org.apache.http.HttpHeaders;
|
||||||
import org.apache.http.HttpRequest;
|
import org.apache.http.HttpRequest;
|
||||||
@ -47,12 +48,11 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.HttpURLConnection;
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URL;
|
|
||||||
import java.util.Base64;
|
import java.util.Base64;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
// Used to load batch of rows to Doris using stream load
|
// Used to load batch of rows to Doris using stream load
|
||||||
public class DorisWriterEmitter {
|
public class DorisWriterEmitter {
|
||||||
@ -88,13 +88,28 @@ public class DorisWriterEmitter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void emit(final DorisFlushBatch flushData) {
|
||||||
|
String host = this.getAvailableHost();
|
||||||
|
for (int i = 0; i <= this.keys.getMaxRetries(); i++) {
|
||||||
|
try {
|
||||||
|
doStreamLoad(flushData, host);
|
||||||
|
return;
|
||||||
|
} catch (DataXException ex) {
|
||||||
|
if (i >= this.keys.getMaxRetries()) {
|
||||||
|
throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, ex);
|
||||||
|
}
|
||||||
|
LOG.error("StreamLoad error, switch host {} and retry: ", host, ex);
|
||||||
|
host = this.getAvailableHost();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* execute doris stream load
|
* execute doris stream load
|
||||||
*/
|
*/
|
||||||
public void doStreamLoad(final DorisFlushBatch flushData) {
|
private void doStreamLoad(final DorisFlushBatch flushData, String host) {
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
final String host = this.getAvailableHost();
|
if (StringUtils.isEmpty(host)) {
|
||||||
if (null == host) {
|
|
||||||
throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, "None of the load url can be connected.");
|
throw DataXException.asDataXException(DBUtilErrorCode.WRITE_DATA_ERROR, "None of the load url can be connected.");
|
||||||
}
|
}
|
||||||
final String loadUrl = host + "/api/" + this.keys.getDatabase() + "/" + this.keys.getTable() + "/_stream_load";
|
final String loadUrl = host + "/api/" + this.keys.getDatabase() + "/" + this.keys.getTable() + "/_stream_load";
|
||||||
@ -130,28 +145,12 @@ public class DorisWriterEmitter {
|
|||||||
while (this.hostPos < targetHosts.size()) {
|
while (this.hostPos < targetHosts.size()) {
|
||||||
final String host = targetHosts.get(hostPos);
|
final String host = targetHosts.get(hostPos);
|
||||||
++this.hostPos;
|
++this.hostPos;
|
||||||
if (this.tryHttpConnection(host)) {
|
return host;
|
||||||
return host;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean tryHttpConnection(final String host) {
|
|
||||||
try {
|
|
||||||
final URL url = new URL(host);
|
|
||||||
final HttpURLConnection co = (HttpURLConnection) url.openConnection();
|
|
||||||
co.setConnectTimeout(1000);
|
|
||||||
co.connect();
|
|
||||||
co.disconnect();
|
|
||||||
return true;
|
|
||||||
} catch (Exception e) {
|
|
||||||
LOG.warn("Failed to connect to address:{} , Exception ={}", host, e);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Map<String, Object> doHttpPut(final String loadUrl, final DorisFlushBatch flushBatch) throws IOException {
|
private Map<String, Object> doHttpPut(final String loadUrl, final DorisFlushBatch flushBatch) throws IOException {
|
||||||
LOG.info(String.format("Executing stream load to: '%s', size: %s, rows: %d",
|
LOG.info(String.format("Executing stream load to: '%s', size: %s, rows: %d",
|
||||||
loadUrl, flushBatch.getSize(), flushBatch.getRows()));
|
loadUrl, flushBatch.getSize(), flushBatch.getRows()));
|
||||||
@ -181,10 +180,12 @@ public class DorisWriterEmitter {
|
|||||||
final HttpPut httpPut = new HttpPut(loadUrl);
|
final HttpPut httpPut = new HttpPut(loadUrl);
|
||||||
final List<String> cols = this.keys.getColumns();
|
final List<String> cols = this.keys.getColumns();
|
||||||
if (null != cols && !cols.isEmpty()) {
|
if (null != cols && !cols.isEmpty()) {
|
||||||
httpPut.setHeader("columns", String.join(",", cols));
|
httpPut.setHeader("columns", String.join(",", cols.stream().map(item -> String.format("`%s`", item.trim().replace("`", ""))).collect(Collectors.toList())));
|
||||||
}
|
}
|
||||||
|
|
||||||
// put loadProps to http header
|
//set default header
|
||||||
|
setDefaultHeader(httpPut);
|
||||||
|
// put custom loadProps to http header
|
||||||
final Map<String, Object> loadProps = this.keys.getLoadProps();
|
final Map<String, Object> loadProps = this.keys.getLoadProps();
|
||||||
if (null != loadProps) {
|
if (null != loadProps) {
|
||||||
for (final Map.Entry<String, Object> entry : loadProps.entrySet()) {
|
for (final Map.Entry<String, Object> entry : loadProps.entrySet()) {
|
||||||
@ -196,14 +197,9 @@ public class DorisWriterEmitter {
|
|||||||
httpPut.setHeader(HttpHeaders.EXPECT, "100-continue");
|
httpPut.setHeader(HttpHeaders.EXPECT, "100-continue");
|
||||||
httpPut.setHeader(HttpHeaders.AUTHORIZATION, this.getBasicAuthHeader(this.keys.getUsername(), this.keys.getPassword()));
|
httpPut.setHeader(HttpHeaders.AUTHORIZATION, this.getBasicAuthHeader(this.keys.getUsername(), this.keys.getPassword()));
|
||||||
httpPut.setHeader("label", flushBatch.getLabel());
|
httpPut.setHeader("label", flushBatch.getLabel());
|
||||||
httpPut.setHeader("format", "json");
|
|
||||||
httpPut.setHeader("line_delimiter", this.keys.getLineDelimiterDesc());
|
|
||||||
httpPut.setHeader("read_json_by_line", "true");
|
|
||||||
httpPut.setHeader("fuzzy_parse", "true");
|
|
||||||
|
|
||||||
// Use ByteArrayEntity instead of StringEntity to handle Chinese correctly
|
// Use ByteArrayEntity instead of StringEntity to handle Chinese correctly
|
||||||
httpPut.setEntity(new ByteArrayEntity(flushBatch.getData().toString().getBytes()));
|
httpPut.setEntity(new ByteArrayEntity(flushBatch.getData().getBytes()));
|
||||||
|
|
||||||
httpPut.setConfig(requestConfig);
|
httpPut.setConfig(requestConfig);
|
||||||
|
|
||||||
try (final CloseableHttpResponse resp = httpclient.execute(httpPut)) {
|
try (final CloseableHttpResponse resp = httpclient.execute(httpPut)) {
|
||||||
@ -222,6 +218,21 @@ public class DorisWriterEmitter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set default request headers in json and csv formats.
|
||||||
|
* csv default delimiters are \x01 and \x02
|
||||||
|
*/
|
||||||
|
private void setDefaultHeader(HttpPut httpPut) {
|
||||||
|
if (Key.DEFAULT_FORMAT_CSV.equalsIgnoreCase(this.keys.getFormat())) {
|
||||||
|
httpPut.setHeader("line_delimiter", this.keys.getLineDelimiter());
|
||||||
|
httpPut.setHeader("column_separator", this.keys.getColumnSeparator());
|
||||||
|
} else {
|
||||||
|
httpPut.setHeader("format", "json");
|
||||||
|
httpPut.setHeader("strip_outer_array", "true");
|
||||||
|
httpPut.setHeader("fuzzy_parse", "true");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private String getBasicAuthHeader(final String username, final String password) {
|
private String getBasicAuthHeader(final String username, final String password) {
|
||||||
final String auth = username + ":" + password;
|
final String auth = username + ":" + password;
|
||||||
final byte[] encodedAuth = Base64.getEncoder().encode(auth.getBytes());
|
final byte[] encodedAuth = Base64.getEncoder().encode(auth.getBytes());
|
||||||
@ -231,44 +242,51 @@ public class DorisWriterEmitter {
|
|||||||
// for test
|
// for test
|
||||||
public static void main(String[] args) throws IOException {
|
public static void main(String[] args) throws IOException {
|
||||||
String json = "{\n" +
|
String json = "{\n" +
|
||||||
" \"feLoadUrl\": [\"127.0.0.1:8030\"],\n" +
|
" \"beLoadUrl\": [\"127.0.0.1:8040\"],\n" +
|
||||||
" \"column\": [\"k1\", \"k2\", \"k3\"],\n" +
|
" \"column\": [\"name\", \"age\", \"cdate\", \"cdatetime\"],\n" +
|
||||||
" \"database\": \"db1\",\n" +
|
" \"database\": \"test\",\n" +
|
||||||
" \"jdbcUrl\": \"jdbc:mysql://127.0.0.1:9030/\",\n" +
|
" \"jdbcUrl\": \"jdbc:mysql://127.0.0.1:9030/\",\n" +
|
||||||
" \"loadProps\": {\n" +
|
" \"loadProps\": {\n" +
|
||||||
|
// " \"line_delimiter\": \"\\\\x03\",\n" +
|
||||||
|
// " \"column_separator\": \"\\\\x04\",\n" +
|
||||||
" },\n" +
|
" },\n" +
|
||||||
" \"password\": \"12345\",\n" +
|
" \"format\": \"csv\",\n" +
|
||||||
|
" \"password\": \"\",\n" +
|
||||||
" \"postSql\": [],\n" +
|
" \"postSql\": [],\n" +
|
||||||
" \"preSql\": [],\n" +
|
" \"preSql\": [],\n" +
|
||||||
" \"table\": \"t1\",\n" +
|
" \"table\": \"test_datax\",\n" +
|
||||||
|
" \"maxRetries\": \"0\",\n" +
|
||||||
" \"username\": \"root\"\n" +
|
" \"username\": \"root\"\n" +
|
||||||
" }";
|
" }";
|
||||||
Configuration configuration = Configuration.from(json);
|
Configuration configuration = Configuration.from(json);
|
||||||
Key key = new Key(configuration);
|
Key key = new Key(configuration);
|
||||||
|
|
||||||
DorisWriterEmitter emitter = new DorisWriterEmitter(key);
|
DorisWriterEmitter emitter = new DorisWriterEmitter(key);
|
||||||
DorisFlushBatch flushBatch = new DorisFlushBatch("\n");
|
DorisFlushBatch flushBatch = new DorisFlushBatch(key.getLineDelimiter(), key.getFormat());
|
||||||
flushBatch.setLabel("test4");
|
|
||||||
Map<String, String> row1 = Maps.newHashMap();
|
Map<String, String> row1 = Maps.newHashMap();
|
||||||
row1.put("k1", "2021-02-02");
|
row1.put("cdate", "2021-02-02");
|
||||||
row1.put("k2", "2021-02-02 00:00:00");
|
row1.put("cdatetime", "2021-02-02 00:00:00");
|
||||||
row1.put("k3", "3");
|
row1.put("name", "zhangsan");
|
||||||
String rowStr1 = JSON.toJSONString(row1);
|
row1.put("age", "18");
|
||||||
System.out.println("rows1: " + rowStr1);
|
|
||||||
flushBatch.putData(rowStr1);
|
|
||||||
|
|
||||||
Map<String, String> row2 = Maps.newHashMap();
|
Map<String, String> row2 = Maps.newHashMap();
|
||||||
row2.put("k1", "2021-02-03");
|
row2.put("cdate", "2022-02-02");
|
||||||
row2.put("k2", "2021-02-03 00:00:00");
|
row2.put("cdatetime", "2022-02-02 10:00:00");
|
||||||
row2.put("k3", "4");
|
row2.put("name", "lisi");
|
||||||
|
row2.put("age", "180");
|
||||||
|
String rowStr1 = JSON.toJSONString(row1);
|
||||||
String rowStr2 = JSON.toJSONString(row2);
|
String rowStr2 = JSON.toJSONString(row2);
|
||||||
|
if ("csv".equals(key.getFormat())) {
|
||||||
|
rowStr1 = String.join(EscapeHandler.escapeString(key.getColumnSeparator()), "2021-02-02", "2021-02-02 00:00:00", "zhangsan", "18");
|
||||||
|
rowStr2 = String.join(EscapeHandler.escapeString(key.getColumnSeparator()), "2022-02-02", "2022-02-02 10:00:00", "lisi", "180");
|
||||||
|
}
|
||||||
|
System.out.println("rows1: " + rowStr1);
|
||||||
System.out.println("rows2: " + rowStr2);
|
System.out.println("rows2: " + rowStr2);
|
||||||
flushBatch.putData(rowStr2);
|
|
||||||
|
|
||||||
for (int i = 0; i < 500000; ++i) {
|
for (int i = 0; i < 1; ++i) {
|
||||||
|
flushBatch.putData(rowStr1);
|
||||||
flushBatch.putData(rowStr2);
|
flushBatch.putData(rowStr2);
|
||||||
}
|
}
|
||||||
|
emitter.emit(flushBatch);
|
||||||
emitter.doStreamLoad(flushBatch);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,42 @@
|
|||||||
|
// Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing,
|
||||||
|
// software distributed under the License is distributed on an
|
||||||
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, either express or implied. See the License for the
|
||||||
|
// specific language governing permissions and limitations
|
||||||
|
// under the License.
|
||||||
|
|
||||||
|
package com.alibaba.datax.plugin.writer.doriswriter;
|
||||||
|
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handler for escape in properties.
|
||||||
|
*/
|
||||||
|
public class EscapeHandler {
|
||||||
|
public static final String ESCAPE_DELIMITERS_FLAGS = "\\x";
|
||||||
|
public static final Pattern ESCAPE_PATTERN = Pattern.compile("\\\\x([0-9|a-f|A-F]{2})");
|
||||||
|
|
||||||
|
public static String escapeString(String source) {
|
||||||
|
if (source.startsWith(ESCAPE_DELIMITERS_FLAGS)) {
|
||||||
|
Matcher m = ESCAPE_PATTERN.matcher(source);
|
||||||
|
StringBuffer buf = new StringBuffer();
|
||||||
|
while (m.find()) {
|
||||||
|
m.appendReplacement(buf, String.format("%s", (char) Integer.parseInt(m.group(1), 16)));
|
||||||
|
}
|
||||||
|
m.appendTail(buf);
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
}
|
@ -23,6 +23,8 @@ import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
|
|||||||
import com.google.common.base.Strings;
|
import com.google.common.base.Strings;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@ -34,6 +36,7 @@ public class Key implements Serializable {
|
|||||||
public static final String DATABASE = "database";
|
public static final String DATABASE = "database";
|
||||||
public static final String TABLE = "table";
|
public static final String TABLE = "table";
|
||||||
public static final String COLUMN = "column";
|
public static final String COLUMN = "column";
|
||||||
|
public static final String TIME_ZONE = "timeZone";
|
||||||
|
|
||||||
public static final String USERNAME = "username";
|
public static final String USERNAME = "username";
|
||||||
public static final String PASSWORD = "password";
|
public static final String PASSWORD = "password";
|
||||||
@ -42,28 +45,36 @@ public class Key implements Serializable {
|
|||||||
public static final String POST_SQL = "postSql";
|
public static final String POST_SQL = "postSql";
|
||||||
|
|
||||||
public static final String LOAD_PROPS = "loadProps";
|
public static final String LOAD_PROPS = "loadProps";
|
||||||
|
public static final String LOAD_PROPS_LINE_DELIMITER = "line_delimiter";
|
||||||
|
public static final String LOAD_PROPS_COLUMN_SEPARATOR = "column_separator";
|
||||||
|
|
||||||
public static final String MAX_BATCH_ROWS = "maxBatchRows";
|
public static final String MAX_BATCH_ROWS = "maxBatchRows";
|
||||||
public static final String MAX_BATCH_BYTE_SIZE = "maxBatchByteSize";
|
public static final String MAX_BATCH_BYTE_SIZE = "maxBatchByteSize";
|
||||||
|
public static final String MAX_RETRIES = "maxRetries";
|
||||||
public static final String LABEL_PREFIX = "labelPrefix";
|
public static final String LABEL_PREFIX = "labelPrefix";
|
||||||
public static final String LINE_DELIMITER = "lineDelimiter";
|
public static final String FORMAT = "format";
|
||||||
public static final String CONNECT_TIMEOUT = "connectTimeout";
|
public static final String CONNECT_TIMEOUT = "connectTimeout";
|
||||||
private final Configuration options;
|
private final Configuration options;
|
||||||
private final String lineDelimiterDesc;
|
|
||||||
|
|
||||||
private static final long DEFAULT_MAX_BATCH_ROWS = 50_0000;
|
private static final long DEFAULT_MAX_BATCH_ROWS = 50_0000;
|
||||||
private static final long DEFAULT_MAX_BATCH_BYTE_SIZE = 100 * 1024 * 1024; // 100MB
|
private static final long DEFAULT_MAX_BATCH_BYTE_SIZE = 90 * 1024 * 1024; // 90MB
|
||||||
|
private static final int DEFAULT_MAX_RETRIES = 0;
|
||||||
|
|
||||||
private static final String DEFAULT_LABEL_PREFIX = "datax_doris_writer_";
|
private static final String DEFAULT_LABEL_PREFIX = "datax_doris_writer_";
|
||||||
private static final String DEFAULT_LINE_DELIMITER = "\n";
|
private static final String DEFAULT_COLUMN_SEPARATOR = "\\x01";
|
||||||
|
private static final String DEFAULT_LINE_DELIMITER = "\\x02";
|
||||||
|
public static final String DEFAULT_FORMAT_CSV = "csv";
|
||||||
|
private static final String DEFAULT_TIME_ZONE = "+08:00";
|
||||||
private static final int DEFAULT_CONNECT_TIMEOUT = -1;
|
private static final int DEFAULT_CONNECT_TIMEOUT = -1;
|
||||||
|
|
||||||
public Key(final Configuration options) {
|
public Key(final Configuration options) {
|
||||||
this.options = options;
|
this.options = options;
|
||||||
this.lineDelimiterDesc = parseHexReadable(this.getLineDelimiter());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void doPretreatment() {
|
public void doPretreatment() {
|
||||||
this.validateRequired();
|
this.validateRequired();
|
||||||
this.validateStreamLoadUrl();
|
this.validateStreamLoadUrl();
|
||||||
|
this.validateFormat();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getJdbcUrl() {
|
public String getJdbcUrl() {
|
||||||
@ -98,6 +109,10 @@ public class Key implements Serializable {
|
|||||||
return this.options.getList(COLUMN, String.class);
|
return this.options.getList(COLUMN, String.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getTimeZone() {
|
||||||
|
return this.options.getString(TIME_ZONE, DEFAULT_TIME_ZONE);
|
||||||
|
}
|
||||||
|
|
||||||
public List<String> getPreSqlList() {
|
public List<String> getPreSqlList() {
|
||||||
return this.options.getList(PRE_SQL, String.class);
|
return this.options.getList(PRE_SQL, String.class);
|
||||||
}
|
}
|
||||||
@ -107,7 +122,7 @@ public class Key implements Serializable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> getLoadProps() {
|
public Map<String, Object> getLoadProps() {
|
||||||
return this.options.getMap(LOAD_PROPS);
|
return this.options.getMap(LOAD_PROPS, new HashMap<>());
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getBatchRows() {
|
public long getBatchRows() {
|
||||||
@ -118,22 +133,30 @@ public class Key implements Serializable {
|
|||||||
return this.options.getLong(MAX_BATCH_BYTE_SIZE, DEFAULT_MAX_BATCH_BYTE_SIZE);
|
return this.options.getLong(MAX_BATCH_BYTE_SIZE, DEFAULT_MAX_BATCH_BYTE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getMaxRetries() {
|
||||||
|
return this.options.getInt(MAX_RETRIES, DEFAULT_MAX_RETRIES);
|
||||||
|
}
|
||||||
|
|
||||||
public String getLabelPrefix() {
|
public String getLabelPrefix() {
|
||||||
return this.options.getString(LABEL_PREFIX, DEFAULT_LABEL_PREFIX);
|
return this.options.getString(LABEL_PREFIX, DEFAULT_LABEL_PREFIX);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getLineDelimiter() {
|
public String getLineDelimiter() {
|
||||||
return this.options.getString(LINE_DELIMITER, DEFAULT_LINE_DELIMITER);
|
return getLoadProps().getOrDefault(LOAD_PROPS_LINE_DELIMITER, DEFAULT_LINE_DELIMITER).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFormat() {
|
||||||
|
return this.options.getString(FORMAT, DEFAULT_FORMAT_CSV);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getColumnSeparator() {
|
||||||
|
return getLoadProps().getOrDefault(LOAD_PROPS_COLUMN_SEPARATOR, DEFAULT_COLUMN_SEPARATOR).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getConnectTimeout() {
|
public int getConnectTimeout() {
|
||||||
return this.options.getInt(CONNECT_TIMEOUT, DEFAULT_CONNECT_TIMEOUT);
|
return this.options.getInt(CONNECT_TIMEOUT, DEFAULT_CONNECT_TIMEOUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getLineDelimiterDesc() {
|
|
||||||
return lineDelimiterDesc;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void validateStreamLoadUrl() {
|
private void validateStreamLoadUrl() {
|
||||||
List<String> urlList = this.getBeLoadUrlList();
|
List<String> urlList = this.getBeLoadUrlList();
|
||||||
if (urlList == null) {
|
if (urlList == null) {
|
||||||
@ -152,14 +175,11 @@ public class Key implements Serializable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String parseHexReadable(String s) {
|
private void validateFormat() {
|
||||||
byte[] separatorBytes = s.getBytes();
|
String format = this.getFormat();
|
||||||
StringBuilder desc = new StringBuilder();
|
if (!Arrays.asList("csv", "json").contains(format.toLowerCase())) {
|
||||||
|
throw DataXException.asDataXException(DBUtilErrorCode.CONF_ERROR, "format only supports csv or json");
|
||||||
for (byte separatorByte : separatorBytes) {
|
|
||||||
desc.append(String.format("\\x%02x", separatorByte));
|
|
||||||
}
|
}
|
||||||
return desc.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void validateRequired() {
|
private void validateRequired() {
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
"database": "",
|
"database": "",
|
||||||
"table": "",
|
"table": "",
|
||||||
"column": [],
|
"column": [],
|
||||||
|
"timeZone": "",
|
||||||
"preSql": [],
|
"preSql": [],
|
||||||
"postSql": [],
|
"postSql": [],
|
||||||
"jdbcUrl": "",
|
"jdbcUrl": "",
|
||||||
|
Loading…
Reference in New Issue
Block a user