migrate datax.py to python3, and compatible with python2

This commit is contained in:
dingbo 2021-11-19 16:38:55 +08:00
parent bb0d715eae
commit 17c39b11ba
2 changed files with 65 additions and 51 deletions

View File

@ -1,23 +1,26 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import sys
import os
import signal
import subprocess
import time
import re
import socket
import json
from optparse import OptionParser
from optparse import OptionGroup
from string import Template
import codecs
import json
import os
import platform
import re
import signal
import socket
import subprocess
import sys
import time
from optparse import OptionGroup
from optparse import OptionParser
from string import Template
ispy2 = sys.version_info.major == 2
def isWindows():
return platform.system() == 'Windows'
DATAX_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATAX_VERSION = 'DATAX-OPENSOURCE-3.0'
@ -52,13 +55,19 @@ def getLocalIp():
def suicide(signum, e):
global child_process
if ispy2:
print >> sys.stderr, "[Error] DataX receive unexpected signal %d, starts to suicide." % (signum)
else:
print("[Error] DataX receive unexpected signal %d, starts to suicide." % (signum), sys.stderr)
if child_process:
child_process.send_signal(signal.SIGQUIT)
time.sleep(1)
child_process.kill()
if ispy2:
print >> sys.stderr, "DataX Process was killed ! you did ?"
else:
print("DataX Process was killed ! you did ?", sys.stderr)
sys.exit(RET_STATE["KILL"])
@ -92,10 +101,10 @@ def getOptionParser():
'if you have mutiple parameters: -p"-DtableName=your-table-name -DcolumnName=your-column-name".'
'Note: you should config in you job tableName with ${tableName}.')
prodEnvOptionGroup.add_option("-r", "--reader", metavar="<parameter used in view job config[reader] template>",
action="store", dest="reader",type="string",
action="store", dest="reader", type="string",
help='View job config[reader] template, eg: mysqlreader,streamreader')
prodEnvOptionGroup.add_option("-w", "--writer", metavar="<parameter used in view job config[writer] template>",
action="store", dest="writer",type="string",
action="store", dest="writer", type="string",
help='View job config[writer] template, eg: mysqlwriter,streamwriter')
parser.add_option_group(prodEnvOptionGroup)
@ -108,14 +117,17 @@ def getOptionParser():
parser.add_option_group(devEnvOptionGroup)
return parser
def generateJobConfigTemplate(reader, writer):
readerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n" % (reader,reader,reader)
writerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n " % (writer,writer,writer)
print readerRef
print writerRef
readerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n" % (
reader, reader, reader)
writerRef = "Please refer to the %s document:\n https://github.com/alibaba/DataX/blob/master/%s/doc/%s.md \n " % (
writer, writer, writer)
print(readerRef)
print(writerRef)
jobGuid = 'Please save the following configuration as a json file and use\n python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json \nto run the job.\n'
print jobGuid
jobTemplate={
print(jobGuid)
jobTemplate = {
"job": {
"setting": {
"speed": {
@ -130,24 +142,26 @@ def generateJobConfigTemplate(reader, writer):
]
}
}
readerTemplatePath = "%s/plugin/reader/%s/plugin_job_template.json" % (DATAX_HOME,reader)
writerTemplatePath = "%s/plugin/writer/%s/plugin_job_template.json" % (DATAX_HOME,writer)
readerTemplatePath = "%s/plugin/reader/%s/plugin_job_template.json" % (DATAX_HOME, reader)
writerTemplatePath = "%s/plugin/writer/%s/plugin_job_template.json" % (DATAX_HOME, writer)
try:
readerPar = readPluginTemplate(readerTemplatePath);
except Exception, e:
print "Read reader[%s] template error: can\'t find file %s" % (reader,readerTemplatePath)
readerPar = readPluginTemplate(readerTemplatePath)
except:
print("Read reader[%s] template error: can\'t find file %s" % (reader, readerTemplatePath))
try:
writerPar = readPluginTemplate(writerTemplatePath);
except Exception, e:
print "Read writer[%s] template error: : can\'t find file %s" % (writer,writerTemplatePath)
jobTemplate['job']['content'][0]['reader'] = readerPar;
jobTemplate['job']['content'][0]['writer'] = writerPar;
print json.dumps(jobTemplate, indent=4, sort_keys=True)
writerPar = readPluginTemplate(writerTemplatePath)
except:
print("Read writer[%s] template error: : can\'t find file %s" % (writer, writerTemplatePath))
jobTemplate['job']['content'][0]['reader'] = readerPar
jobTemplate['job']['content'][0]['writer'] = writerPar
print(json.dumps(jobTemplate, indent=4, sort_keys=True))
def readPluginTemplate(plugin):
with open(plugin, 'r') as f:
return json.load(f)
def isUrl(path):
if not path:
return False
@ -168,7 +182,7 @@ def buildStartCommand(options, args):
if options.remoteDebug:
tempJVMCommand = tempJVMCommand + " " + REMOTE_DEBUG_CONFIG
print 'local ip: ', getLocalIp()
print('local ip: ', getLocalIp())
if options.loglevel:
tempJVMCommand = tempJVMCommand + " " + ("-Dloglevel=%s" % (options.loglevel))
@ -198,11 +212,11 @@ def buildStartCommand(options, args):
def printCopyright():
print '''
print('''
DataX (%s), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
''' % DATAX_VERSION
''' % DATAX_VERSION)
sys.stdout.flush()
@ -211,7 +225,7 @@ if __name__ == "__main__":
parser = getOptionParser()
options, args = parser.parse_args(sys.argv[1:])
if options.reader is not None and options.writer is not None:
generateJobConfigTemplate(options.reader,options.writer)
generateJobConfigTemplate(options.reader, options.writer)
sys.exit(RET_STATE['OK'])
if len(args) != 1:
parser.print_help()

View File

@ -10,7 +10,7 @@ DataX本身作为数据同步框架将不同数据源的同步抽象为从源
- Linux
- [JDK(1.8以上推荐1.8) ](http://www.oracle.com/technetwork/cn/java/javase/downloads/index.html)
- [Python(推荐Python2.6.X) ](https://www.python.org/downloads/)
- [Python(2或3都可以) ](https://www.python.org/downloads/)
- [Apache Maven 3.x](https://maven.apache.org/download.cgi) (Compile DataX)
# Quick Start