5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-10 05:09:41 +08:00

SQOOP-1709: Column Type enhancements for complex types

(Veena Basavaraj via Jarek Jarcec Cecho)
This commit is contained in:
Jarek Jarcec Cecho 2014-11-12 17:59:18 -08:00
parent 9a07675c37
commit 100810be41
23 changed files with 438 additions and 234 deletions

View File

@ -18,12 +18,14 @@
package org.apache.sqoop.json.util;
import org.apache.sqoop.schema.Schema;
import org.apache.sqoop.schema.type.AbstractComplexType;
import org.apache.sqoop.schema.type.AbstractComplexListType;
import org.apache.sqoop.schema.type.AbstractPrimitiveType;
import org.apache.sqoop.schema.type.AbstractString;
import org.apache.sqoop.schema.type.Column;
import org.apache.sqoop.schema.type.Array;
import org.apache.sqoop.schema.type.Binary;
import org.apache.sqoop.schema.type.Bit;
import org.apache.sqoop.schema.type.Column;
import org.apache.sqoop.schema.type.ColumnType;
import org.apache.sqoop.schema.type.Date;
import org.apache.sqoop.schema.type.DateTime;
import org.apache.sqoop.schema.type.Decimal;
@ -34,7 +36,6 @@
import org.apache.sqoop.schema.type.Set;
import org.apache.sqoop.schema.type.Text;
import org.apache.sqoop.schema.type.Time;
import org.apache.sqoop.schema.type.ColumnType;
import org.apache.sqoop.schema.type.Unknown;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
@ -44,15 +45,27 @@
*/
public class SchemaSerialization {
// common attributes of all column types
private static final String NAME = "name";
private static final String CREATION_DATE = "created";
private static final String NOTE = "note";
private static final String COLUMNS = "columns";
private static final String TYPE = "type";
private static final String NULLABLE = "nullable";
// size attribute is relevant to String and Array type only
private static final String SIZE = "size";
// maps and enum attributes
private static final String MAP = "map";
private static final String KEY = "key";
private static final String VALUE = "value";
private static final String SIZE = "size";
// arrays and set attribute
private static final String LIST = "list";
private static final String LIST_TYPE = "listType";
// number attribute
private static final String BYTE_SIZE = "byteSize";
// string attribute
private static final String CHAR_SIZE = "charSize";
private static final String FRACTION = "fraction";
private static final String TIMEZONE = "timezone";
private static final String PRECISION = "precision";
@ -81,9 +94,7 @@ public static Schema restoreSchema(JSONObject jsonObject) {
String note = (String) jsonObject.get(NOTE);
java.util.Date date = new java.util.Date((Long) jsonObject.get(CREATION_DATE));
Schema schema = new Schema(name)
.setNote(note)
.setCreationDate(date);
Schema schema = new Schema(name).setNote(note).setCreationDate(date);
JSONArray columnsArray = (JSONArray) jsonObject.get(COLUMNS);
for (Object obj : columnsArray) {
@ -103,15 +114,26 @@ private static JSONObject extractColumn(Column column) {
switch (column.getType()) {
case MAP:
ret.put(VALUE, extractColumn(((Map)column).getValue()));
case ARRAY:
JSONObject map = new JSONObject();
ret.put(MAP, map);
map.put(KEY, extractColumn(((Map) column).getKey()));
map.put(VALUE, extractColumn(((Map) column).getValue()));
break;
case ENUM:
case SET:
ret.put(KEY, extractColumn(((AbstractComplexType) column).getKey()));
JSONObject list = new JSONObject();
ret.put(LIST, list);
list.put(LIST_TYPE, extractColumn(((AbstractComplexListType) column).getListType()));
break;
case ARRAY:
JSONObject arrayList = new JSONObject();
ret.put(LIST, arrayList);
arrayList.put(SIZE, ((Array) column).getSize());
arrayList.put(LIST_TYPE, extractColumn(((Array) column).getListType()));
break;
case BINARY:
case TEXT:
ret.put(SIZE, ((AbstractString)column).getLength());
ret.put(CHAR_SIZE, ((AbstractString) column).getCharSize());
break;
case DATE_TIME:
ret.put(FRACTION, ((DateTime) column).getFraction());
@ -122,11 +144,11 @@ private static JSONObject extractColumn(Column column) {
ret.put(SCALE, ((Decimal) column).getScale());
break;
case FIXED_POINT:
ret.put(SIZE, ((FixedPoint) column).getByteSize());
ret.put(BYTE_SIZE, ((FixedPoint) column).getByteSize());
ret.put(UNSIGNED, ((FixedPoint) column).getUnsigned());
break;
case FLOATING_POINT:
ret.put(SIZE, ((FloatingPoint) column).getByteSize());
ret.put(BYTE_SIZE, ((FloatingPoint) column).getByteSize());
break;
case TIME:
ret.put(FRACTION, ((Time) column).getFraction());
@ -145,35 +167,42 @@ private static JSONObject extractColumn(Column column) {
return ret;
}
private static Column restoreColumn(JSONObject obj) {
String name = (String) obj.get(NAME);
Boolean nullable = (Boolean) obj.get(NULLABLE);
Column key = null;
if(obj.containsKey(KEY)) {
key = restoreColumn((JSONObject) obj.get(KEY));
}
AbstractPrimitiveType key = null;
Column value = null;
if(obj.containsKey(VALUE)) {
value = restoreColumn((JSONObject) obj.get(VALUE));
Long arraySize = null;
Column listType = null;
// complex type attribute
if (obj.containsKey(MAP)) {
JSONObject map = (JSONObject) obj.get(MAP);
if (map.containsKey(KEY)) {
key = (AbstractPrimitiveType) restoreColumn((JSONObject) map.get(KEY));
}
if (map.containsKey(VALUE)) {
value = restoreColumn((JSONObject) map.get(VALUE));
}
}
if (obj.containsKey(LIST)) {
JSONObject list = (JSONObject) obj.get(LIST);
if (list.containsKey(LIST_TYPE)) {
listType = restoreColumn((JSONObject) list.get(LIST_TYPE));
}
arraySize = (Long) list.get(SIZE);
}
Long size = (Long)obj.get(SIZE);
Boolean fraction = (Boolean)obj.get(FRACTION);
Boolean timezone = (Boolean)obj.get(TIMEZONE);
Long precision = (Long)obj.get(PRECISION);
Long scale = (Long)obj.get(SCALE);
Boolean unsigned = (Boolean)obj.get(UNSIGNED);
Long jdbcType = (Long)obj.get(JDBC_TYPE);
ColumnType type = ColumnType.valueOf((String) obj.get(TYPE));
Column output = null;
switch (type) {
case ARRAY:
output = new Array(key);
output = new Array(listType).setSize(arraySize);
break;
case BINARY:
output = new Binary().setLength(size);
Long charSize = (Long) obj.get(CHAR_SIZE);
output = new Binary().setCharSize(charSize);
break;
case BIT:
output = new Bit();
@ -182,33 +211,43 @@ private static Column restoreColumn(JSONObject obj) {
output = new Date();
break;
case DATE_TIME:
Boolean fraction = (Boolean) obj.get(FRACTION);
Boolean timezone = (Boolean) obj.get(TIMEZONE);
output = new DateTime().setFraction(fraction).setTimezone(timezone);
break;
case DECIMAL:
Long precision = (Long) obj.get(PRECISION);
Long scale = (Long) obj.get(SCALE);
output = new Decimal().setPrecision(precision).setScale(scale);
break;
case ENUM:
output = new Enum(key);
output = new Enum(listType);
break;
case FIXED_POINT:
output = new FixedPoint().setByteSize(size).setUnsigned(unsigned);
Boolean unsigned = (Boolean) obj.get(UNSIGNED);
Long fixedPointByteSize = (Long) obj.get(BYTE_SIZE);
output = new FixedPoint().setByteSize(fixedPointByteSize).setUnsigned(unsigned);
break;
case FLOATING_POINT:
output = new FloatingPoint().setByteSize(size);
Long floatingPointByteSize = (Long) obj.get(BYTE_SIZE);
output = new FloatingPoint().setByteSize(floatingPointByteSize);
break;
case MAP:
output = new Map(key, value);
break;
case SET:
output = new Set(key);
output = new Set(listType);
break;
case TEXT:
output = new Text().setLength(size);
charSize = (Long) obj.get(CHAR_SIZE);
output = new Text().setCharSize(charSize);
break;
case TIME:
output = new Time().setFraction(fraction);
Boolean timeFraction = (Boolean) obj.get(FRACTION);
output = new Time().setFraction(timeFraction);
break;
case UNKNOWN:
Long jdbcType = (Long) obj.get(JDBC_TYPE);
output = new Unknown().setJdbcType(jdbcType);
break;
default:

View File

@ -0,0 +1,81 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.schema.type;
/**
* Complex types that can have nested data as a map or list structure
*/
public abstract class AbstractComplexListType extends AbstractComplexType {
// represents the type of the list elements
Column listType;
public AbstractComplexListType(Column listType) {
super();
setListType(listType);
}
public AbstractComplexListType(String name, Column listType) {
super(name);
setListType(listType);
}
public AbstractComplexListType(String name, Boolean nullable, Column listType) {
super(name, nullable);
setListType(listType);
}
private void setListType(Column listType) {
assert listType != null;
this.listType = listType;
}
public Column getListType() {
return listType;
}
@Override
public String toString() {
return new StringBuilder(super.toString()).append(",listType=").append(listType).toString();
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof AbstractComplexListType))
return false;
if (!super.equals(o))
return false;
AbstractComplexListType that = (AbstractComplexListType) o;
if (listType != null ? !listType.equals(that.listType) : that.listType != null)
return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (listType != null ? listType.hashCode() : 0);
return result;
}
}

View File

@ -18,63 +18,24 @@
package org.apache.sqoop.schema.type;
/**
* Complex types that are incorporating primitive types.
* Complex types that can have nested data as a map or list structure
*/
public abstract class AbstractComplexType extends Column {
/**
* Incorporated type
*/
private Column key;
public AbstractComplexType(Column key) {
setKey(key);
public AbstractComplexType() {
super();
}
public AbstractComplexType(String name, Column key) {
public AbstractComplexType(String name) {
super(name);
setKey(key);
}
public AbstractComplexType(String name, Boolean nullable, Column key) {
public AbstractComplexType(String name, Boolean nullable) {
super(name, nullable);
setKey(key);
}
public Column getKey() {
return key;
public AbstractComplexType(String name, Boolean nullable, long size) {
super(name, nullable);
}
public void setKey(Column key) {
assert key != null;
this.key = key;
}
@Override
public String toString() {
return new StringBuilder(super.toString())
.append(",key=").append(key.toString())
.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof AbstractComplexType)) return false;
if (!super.equals(o)) return false;
AbstractComplexType that = (AbstractComplexType) o;
if (key != null ? !key.equals(that.key) : that.key != null) return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (key != null ? key.hashCode() : 0);
return result;
}
}

View File

@ -23,6 +23,7 @@
public abstract class AbstractDateTime extends Column {
protected AbstractDateTime() {
super();
}
protected AbstractDateTime(String name) {

View File

@ -20,9 +20,10 @@
/**
* Any type related to number.
*/
public abstract class AbstractNumber extends Column {
public abstract class AbstractNumber extends AbstractPrimitiveType {
protected AbstractNumber() {
super();
}
protected AbstractNumber(String name) {

View File

@ -0,0 +1,36 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.schema.type;
/**
* Primitive type for column
*/
public abstract class AbstractPrimitiveType extends Column {
protected AbstractPrimitiveType() {
super();
}
protected AbstractPrimitiveType(String name) {
super(name);
}
protected AbstractPrimitiveType(String name, Boolean nullable) {
super(name, nullable);
}
}

View File

@ -20,44 +20,50 @@
/**
* Any type that is encoding character (or byte) array.
*/
public abstract class AbstractString extends Column {
public abstract class AbstractString extends AbstractPrimitiveType {
private Long length;
/**
* Represents the size for the column type and will be handy for connectors
* to map this info to the native data sources they represent
* https://issues.apache.org/jira/secure/attachment/12589331/Sqoop2Datatypes.pdf
*/
private Long charSize;
protected AbstractString() {
super();
}
protected AbstractString(String name) {
super(name);
}
protected AbstractString(String name, Long length) {
protected AbstractString(String name, Long size) {
super(name);
this.length = length;
this.charSize = size;
}
protected AbstractString(String name, Boolean nullable) {
super(name, nullable);
}
protected AbstractString(String name, Boolean nullable, Long length) {
protected AbstractString(String name, Boolean nullable, Long size) {
super(name, nullable);
this.length = length;
this.charSize = size;
}
public Long getLength() {
return length;
public Long getCharSize() {
return charSize;
}
public AbstractString setLength(Long length) {
this.length = length;
public AbstractString setCharSize(Long size) {
this.charSize = size;
return this;
}
@Override
public String toString() {
return new StringBuilder(super.toString())
.append(",length=").append(length)
.append(",charSize=").append(charSize)
.toString();
}
@ -69,7 +75,7 @@ public boolean equals(Object o) {
AbstractString that = (AbstractString) o;
if (length != null ? !length.equals(that.length) : that.length != null)
if (charSize != null ? !charSize.equals(that.charSize) : that.charSize != null)
return false;
return true;
@ -78,7 +84,7 @@ public boolean equals(Object o) {
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (length != null ? length.hashCode() : 0);
result = 31 * result + (charSize != null ? charSize.hashCode() : 0);
return result;
}
}

View File

@ -22,18 +22,35 @@
*
* JDBC Types: array
*/
public class Array extends AbstractComplexType {
public class Array extends AbstractComplexListType {
public Array(Column key) {
super(key);
/**
* Represents the size for the column type and will be handy for connectors to
* map this info to the native data sources they represent
* https://issues.apache.org/jira/secure/attachment/12589331/Sqoop2Datatypes.pdf
* NOTE : only certain data sources such as Postgres support size attribute for arrays
*/
private Long size;
public Array(Column listType) {
super(listType);
}
public Array(String name, Column key) {
super(name, key);
public Array(String name, Column listType) {
super(name, listType);
}
public Array(String name, Boolean nullable, Column key) {
super(name, nullable, key);
public Array(String name, Boolean nullable, Column listType) {
super(name, nullable, listType);
}
public Long getSize() {
return size;
}
public Array setSize(Long size) {
this.size = size;
return this;
}
@Override
@ -43,10 +60,33 @@ public ColumnType getType() {
@Override
public String toString() {
return new StringBuilder("Array{")
.append(super.toString())
.append("}")
.toString();
return new StringBuilder("Array{").
append(super.toString()).
append("}").toString();
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof Array))
return false;
if (!super.equals(o))
return false;
Array that = (Array) o;
if (size != null ? !size.equals(that.size) : that.size != null)
return false;
return true;
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (size != null ? size.hashCode() : 0);
return result;
}
}

View File

@ -25,13 +25,14 @@
public class Binary extends AbstractString {
public Binary() {
super();
}
public Binary(String name) {
super(name);
}
public Binary(String name, Long length) {
public Binary(String name, Long size) {
super(name);
}
@ -40,8 +41,8 @@ public Binary(String name, Boolean nullable) {
super(name, nullable);
}
public Binary(String name, Boolean nullable, Long length) {
super(name, nullable, length);
public Binary(String name, Boolean nullable, Long size) {
super(name, nullable, size);
}
@Override

View File

@ -25,6 +25,7 @@
public class Bit extends Column {
public Bit() {
super();
}
public Bit(String name) {

View File

@ -23,7 +23,7 @@
public abstract class Column {
/**
* Name of the column.
* Name of the column. It is optional
*/
String name;
@ -104,8 +104,4 @@ public int hashCode() {
return result;
}
public boolean validate(Object o) {
// TODO(SQOOP-1707)
return true;
}
}

View File

@ -25,6 +25,7 @@
public class Date extends AbstractDateTime {
public Date() {
super();
}
public Date(String name) {

View File

@ -35,6 +35,7 @@ public class DateTime extends AbstractDateTime {
private Boolean timezone;
public DateTime() {
super();
}
public DateTime(String name) {

View File

@ -35,6 +35,7 @@ public class Decimal extends AbstractNumber {
private Long scale;
public Decimal() {
super();
}
public Decimal(String name) {

View File

@ -18,22 +18,23 @@
package org.apache.sqoop.schema.type;
/**
* Enum can contain one value from predefined list.
* Enum is a set of predefined values of its own type
*
* JDBC Types: enum
*/
public class Enum extends AbstractComplexType {
public Enum(Column key) {
super(key);
public class Enum extends AbstractComplexListType {
public Enum(Column listType) {
super(listType);
}
public Enum(String name, Column key) {
super(name, key);
public Enum(String name, Column listType) {
super(name, listType);
}
public Enum(String name, Boolean nullable, Column key) {
super(name, nullable, key);
public Enum(String name, Boolean nullable, Column listType) {
super(name, nullable, listType);
}
@Override
@ -45,7 +46,6 @@ public ColumnType getType() {
public String toString() {
return new StringBuilder("Enum{")
.append(super.toString())
.append("}")
.toString();
.append("}").toString();
}
}

View File

@ -24,11 +24,22 @@
*/
public class FixedPoint extends AbstractNumber {
/**
This field will come handy in connectors that might require to use the
size information to do additional type mappings in their data source
For example in Hive.
Default: bigint
if size < 1 then tinyint
if size < 2 then smallint
if size < 4 then int
Read more: https://issues.apache.org/jira/secure/attachment/12589331/Sqoop2Datatypes.pdf
*/
private Long byteSize;
private Boolean unsigned;
public FixedPoint() {
super();
}
public FixedPoint(String name) {

View File

@ -24,9 +24,15 @@
*/
public class FloatingPoint extends AbstractNumber {
/**
This field will come handy in connector that might require to use the
size information on the schema object to do additional type mappings in their source
Read more infomration : https://issues.apache.org/jira/secure/attachment/12589331/Sqoop2Datatypes.pdf
*/
private Long byteSize;
public FloatingPoint() {
super();
}
public FloatingPoint(String name) {

View File

@ -24,20 +24,30 @@
*/
public class Map extends AbstractComplexType {
// They key can be either a string or number
private AbstractPrimitiveType key;
// The value inside the map can be either a primitive or a complex column type
private Column value;
public Map(Column key, Column value) {
super(key);
this.value = value;
public Map(AbstractPrimitiveType key, Column value) {
super();
setKeyValue(key, value);
}
public Map(String name, Column key, Column value) {
super(name, key);
this.value = value;
public Map(String name, AbstractPrimitiveType key, Column value) {
super(name);
setKeyValue(key, value);
}
public Map(String name, Boolean nullable, Column key, Column value) {
super(name, nullable, key);
public Map(String name, Boolean nullable, AbstractPrimitiveType key, Column value) {
super(name, nullable);
setKeyValue(key, value);
}
private void setKeyValue(AbstractPrimitiveType key, Column value) {
assert key != null;
assert value != null;
this.key = key;
this.value = value;
}
@ -46,27 +56,34 @@ public ColumnType getType() {
return ColumnType.MAP;
}
public AbstractPrimitiveType getKey() {
return key;
}
public Column getValue() {
return value;
}
@Override
public String toString() {
return new StringBuilder("Map{")
.append(super.toString())
.append(",value=").append(value)
.append("}")
.toString();
return new StringBuilder("Map{").append(super.toString()).append(",key=").append(key)
.append(",value=").append(value).append("}").toString();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Map)) return false;
if (!super.equals(o)) return false;
if (this == o)
return true;
if (!(o instanceof Map))
return false;
if (!super.equals(o))
return false;
Map map = (Map) o;
if (key != null ? !key.equals(map.key) : map.key != null)
return false;
if (value != null ? !value.equals(map.value) : map.value != null)
return false;
@ -76,6 +93,7 @@ public boolean equals(Object o) {
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + (key != null ? key.hashCode() : 0);
result = 31 * result + (value != null ? value.hashCode() : 0);
return result;
}

View File

@ -18,22 +18,22 @@
package org.apache.sqoop.schema.type;
/**
* Unique values of the same type.
* Set contains unique values in a collection of a given type
*
* JDBC Types: set
*/
public class Set extends AbstractComplexType {
public class Set extends AbstractComplexListType {
public Set(Column key) {
super(key);
public Set(Column listType) {
super(listType);
}
public Set(String name, Column key) {
super(name, key);
public Set(String name, Column listType) {
super(name, listType);
}
public Set(String name, Boolean nullable, Column key) {
super(name, nullable, key);
public Set(String name, Boolean nullable, Column listType) {
super(name, nullable, listType);
}
@Override
@ -43,9 +43,8 @@ public ColumnType getType() {
@Override
public String toString() {
return new StringBuilder("Set{")
.append(super.toString())
.append("}")
.toString();
return new StringBuilder("Set{").
append(super.toString()).
append("}").toString();
}
}

View File

@ -25,14 +25,15 @@
public class Text extends AbstractString {
public Text() {
super();
}
public Text(String name) {
super(name);
}
public Text(String name, Long length) {
super(name, length);
public Text(String name, Long size) {
super(name, size);
}
public Text(String name, Boolean nullable) {

View File

@ -27,6 +27,7 @@ public class Time extends AbstractDateTime {
private Boolean fraction;
public Time() {
super();
}
public Time(String name) {

View File

@ -43,6 +43,7 @@ public Unknown setJdbcType(Long jdbcType) {
}
public Unknown() {
super();
}
public Unknown(Long jdbcType) {

View File

@ -45,7 +45,8 @@ public class TestSchemaSerialization {
@Test
public void testArray() {
Schema array = new Schema("array").addColumn(new Array("a", new Decimal()));
// create an array type containing decimals
Schema array = new Schema("array").addColumn(new Array("a", new Decimal()).setSize(1L));
transferAndAssert(array);
}
@ -122,7 +123,7 @@ public void testTime() {
}
@Test
public void testUnsupported() {
public void testUnknown() {
Schema t = new Schema("t").addColumn(new Unknown("u", 4L));
transferAndAssert(t);
}
@ -156,7 +157,7 @@ public void testAllTypes() {
@Test
public void testComplex() {
Schema complex = new Schema("complex")
.addColumn(new Map(new Array(new Enum(new Text())), new Set(new Array(new Text()))).setName("a"))
.addColumn(new Map(new Text(), new Set(new Array(new Text()))).setName("a"))
;
transferAndAssert(complex);
}