5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 05:31:14 +08:00

SQOOP-379 Migrate lib and io packages to new name space

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1190430 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Bilung Lee 2011-10-28 16:32:43 +00:00
parent a8cdad50d2
commit 315fff82b9
47 changed files with 5119 additions and 3756 deletions

View File

@ -808,7 +808,6 @@
<arg value="+%Y" />
</exec>
<javadoc
packagenames="com.cloudera.sqoop.lib.*"
destdir="${build.javadoc}"
author="true"
version="true"
@ -818,6 +817,7 @@
bottom="Copyright &amp;copy; ${year} The Apache Software Foundation">
<packageset dir="${src.dir}">
<include name="com/cloudera/sqoop/lib/**" />
<include name="org/apache/sqoop/lib/**" />
</packageset>
<classpath>
<path refid="compile.classpath" />

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,22 +15,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.io;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.ReflectionUtils;
/**
* Provides a mapping from codec names to concrete implementation class names.
*
* @deprecated use org.apache.sqoop.io.CodecMap instead.
* @see org.apache.sqoop.io.CodecMap
*/
public final class CodecMap {
@ -40,33 +34,10 @@ public final class CodecMap {
// Note: do not add more values here, since codecs are discovered using the
// standard Hadoop mechanism (io.compression.codecs). See
// CompressionCodecFactory.
public static final String NONE = "none";
public static final String DEFLATE = "deflate";
public static final String LZO = "lzo";
public static final String LZOP = "lzop";
private static Map<String, String> codecNames;
static {
codecNames = new TreeMap<String, String>();
// Register the names of codecs we know about.
codecNames.put(NONE, null);
codecNames.put(DEFLATE, "org.apache.hadoop.io.compress.DefaultCodec");
codecNames.put(LZO, "com.hadoop.compression.lzo.LzoCodec");
codecNames.put(LZOP, "com.hadoop.compression.lzo.LzopCodec");
// add more from Hadoop CompressionCodecFactory
for (Class<? extends CompressionCodec> cls
: CompressionCodecFactory.getCodecClasses(new Configuration())) {
String simpleName = cls.getSimpleName();
String codecName = simpleName;
if (simpleName.endsWith("Codec")) {
codecName = simpleName.substring(0, simpleName.length()
- "Codec".length());
}
codecNames.put(codecName.toLowerCase(), cls.getCanonicalName());
}
}
public static final String NONE = org.apache.sqoop.io.CodecMap.NONE;
public static final String DEFLATE = org.apache.sqoop.io.CodecMap.DEFLATE;
public static final String LZO = org.apache.sqoop.io.CodecMap.LZO;
public static final String LZOP = org.apache.sqoop.io.CodecMap.LZOP;
private CodecMap() {
}
@ -79,11 +50,7 @@ private CodecMap() {
*/
public static String getCodecClassName(String codecName)
throws UnsupportedCodecException {
if (!codecNames.containsKey(codecName)) {
throw new UnsupportedCodecException(codecName);
}
return codecNames.get(codecName);
return org.apache.sqoop.io.CodecMap.getCodecClassName(codecName);
}
/**
@ -94,79 +61,13 @@ public static String getCodecClassName(String codecName)
*/
public static CompressionCodec getCodec(String codecName,
Configuration conf) throws UnsupportedCodecException {
// Try standard Hadoop mechanism first
CompressionCodec codec = getCodecByName(codecName, conf);
if (codec != null) {
return codec;
}
// Fall back to Sqoop mechanism
String codecClassName = null;
try {
codecClassName = getCodecClassName(codecName);
if (null == codecClassName) {
return null;
}
Class<? extends CompressionCodec> codecClass =
(Class<? extends CompressionCodec>)
conf.getClassByName(codecClassName);
return (CompressionCodec) ReflectionUtils.newInstance(
codecClass, conf);
} catch (ClassNotFoundException cnfe) {
throw new UnsupportedCodecException("Cannot find codec class "
+ codecClassName + " for codec " + codecName);
}
}
/**
* Find the relevant compression codec for the codec's canonical class name
* or by codec alias.
* <p>
* Codec aliases are case insensitive.
* <p>
* The code alias is the short class name (without the package name).
* If the short class name ends with 'Codec', then there are two aliases for
* the codec, the complete short class name and the short class name without
* the 'Codec' ending. For example for the 'GzipCodec' codec class name the
* alias are 'gzip' and 'gzipcodec'.
* <p>
* Note: When HADOOP-7323 is available this method can be replaced with a call
* to CompressionCodecFactory.
* @param classname the canonical class name of the codec or the codec alias
* @return the codec object or null if none matching the name were found
*/
private static CompressionCodec getCodecByName(String codecName,
Configuration conf) {
List<Class<? extends CompressionCodec>> codecs =
CompressionCodecFactory.getCodecClasses(conf);
for (Class<? extends CompressionCodec> cls : codecs) {
if (codecMatches(cls, codecName)) {
return ReflectionUtils.newInstance(cls, conf);
}
}
return null;
}
private static boolean codecMatches(Class<? extends CompressionCodec> cls,
String codecName) {
String simpleName = cls.getSimpleName();
if (cls.getName().equals(codecName)
|| simpleName.equalsIgnoreCase(codecName)) {
return true;
}
if (simpleName.endsWith("Codec")) {
String prefix = simpleName.substring(0, simpleName.length()
- "Codec".length());
if (prefix.equalsIgnoreCase(codecName)) {
return true;
}
}
return false;
return org.apache.sqoop.io.CodecMap.getCodec(codecName, conf);
}
/**
* Return the set of available codec names.
*/
public static Set<String> getCodecNames() {
return codecNames.keySet();
return org.apache.sqoop.io.CodecMap.getCodecNames();
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,76 +15,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.io;
import java.io.InputStream;
import java.io.IOException;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.commons.io.input.CountingInputStream;
import org.apache.commons.io.input.ProxyInputStream;
/**
* Provides an InputStream that can consume a fixed maximum number of bytes
* from an underlying stream. Closing the FixedLengthInputStream does not
* close the underlying stream. After reading the maximum number of available
* bytes this acts as though EOF has been reached.
*
* @deprecated use org.apache.sqoop.io.FixedLengthInputStream instead.
* @see org.apache.sqoop.io.FixedLengthInputStream
*/
public class FixedLengthInputStream extends ProxyInputStream {
public class FixedLengthInputStream
extends org.apache.sqoop.io.FixedLengthInputStream {
private CountingInputStream countingIn;
private long maxBytes;
public FixedLengthInputStream(InputStream stream, long maxLen) {
super(new CountingInputStream(new CloseShieldInputStream(stream)));
// Save a correctly-typed reference to the underlying stream.
this.countingIn = (CountingInputStream) this.in;
this.maxBytes = maxLen;
}
/** @return the number of bytes already consumed by the client. */
private long consumed() {
return countingIn.getByteCount();
}
/**
* @return number of bytes remaining to be read before the limit
* is reached.
*/
private long toLimit() {
return maxBytes - consumed();
}
@Override
public int available() throws IOException {
return (int) Math.min(toLimit(), countingIn.available());
}
@Override
public int read() throws IOException {
if (toLimit() > 0) {
return super.read();
} else {
return -1; // EOF.
}
}
@Override
public int read(byte [] buf) throws IOException {
return read(buf, 0, buf.length);
}
@Override
public int read(byte [] buf, int start, int count) throws IOException {
long limit = toLimit();
if (limit == 0) {
return -1; // EOF.
} else {
return super.read(buf, start, (int) Math.min(count, limit));
}
}
public FixedLengthInputStream(InputStream stream, long maxLen) {
super(stream, maxLen);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,19 +15,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.io;
import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
/**
* A cache of open LobFile.Reader objects.
@ -38,17 +30,13 @@
* instances, it is most useful to have a single global cache. This cache is
* internally synchronized; only one thread can insert or retrieve a reader
* from the cache at a time.
*
* @deprecated use org.apache.sqoop.io.LobReaderCache instead.
* @see org.apache.sqoop.io.LobReaderCache
*/
public final class LobReaderCache {
public final class LobReaderCache extends org.apache.sqoop.io.LobReaderCache {
public static final Log LOG = LogFactory.getLog(
LobReaderCache.class.getName());
private Map<Path, LobFile.Reader> readerMap;
private LobReaderCache() {
this.readerMap = new TreeMap<Path, LobFile.Reader>();
}
public static final Log LOG = org.apache.sqoop.io.LobReaderCache.LOG;
private static final LobReaderCache CACHE;
static {
@ -71,79 +59,7 @@ public static LobReaderCache getCache() {
*/
public static Path qualify(Path path, Configuration conf)
throws IOException {
if (null == path) {
return null;
}
FileSystem fs = path.getFileSystem(conf);
if (null == fs) {
fs = FileSystem.get(conf);
}
return path.makeQualified(fs);
}
/**
* Open a LobFile for read access, returning a cached reader if one is
* available, or a new reader otherwise.
* @param path the path to the LobFile to open
* @param conf the configuration to use to access the FS.
* @throws IOException if there's an error opening the file.
*/
public LobFile.Reader get(Path path, Configuration conf)
throws IOException {
LobFile.Reader reader = null;
Path canonicalPath = qualify(path, conf);
// Look up an entry in the cache.
synchronized(this) {
reader = readerMap.remove(canonicalPath);
}
if (null != reader && !reader.isClosed()) {
// Cache hit. return it.
LOG.debug("Using cached reader for " + canonicalPath);
return reader;
}
// Cache miss; open the file.
LOG.debug("No cached reader available for " + canonicalPath);
return LobFile.open(path, conf);
}
/**
* Return a reader back to the cache. If there's already a reader for
* this path, then the current reader is closed.
* @param reader the opened reader. Any record-specific subreaders should be
* closed.
* @throws IOException if there's an error accessing the path's filesystem.
*/
public void recycle(LobFile.Reader reader) throws IOException {
Path canonicalPath = reader.getPath();
// Check if the cache has a reader for this path already. If not, add this.
boolean cached = false;
synchronized(this) {
if (readerMap.get(canonicalPath) == null) {
LOG.debug("Caching reader for path: " + canonicalPath);
readerMap.put(canonicalPath, reader);
cached = true;
}
}
if (!cached) {
LOG.debug("Reader already present for path: " + canonicalPath
+ "; closing.");
reader.close();
}
}
@Override
protected synchronized void finalize() throws Throwable {
for (LobFile.Reader r : readerMap.values()) {
r.close();
}
super.finalize();
return org.apache.sqoop.io.LobReaderCache.qualify(path, conf);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,82 +15,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.io;
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.util.Shell;
import org.apache.log4j.Logger;
/**
* A named FIFO channel.
*
* @deprecated use org.apache.sqoop.io.NamedFifo instead.
* @see org.apache.sqoop.io.NamedFifo
*/
public class NamedFifo {
public class NamedFifo extends org.apache.sqoop.io.NamedFifo {
private static final Logger LOG = Logger.getLogger(NamedFifo.class);
private File fifoFile;
/** Create a named FIFO object at the local fs path given by 'pathname'. */
public NamedFifo(String pathname) {
this.fifoFile = new File(pathname);
super(pathname);
}
/** Create a named FIFO object at the local fs path given by the 'fifo' File
* object. */
public NamedFifo(File fifo) {
this.fifoFile = fifo;
}
/**
* Return the File object representing the FIFO.
*/
public File getFile() {
return this.fifoFile;
}
/**
* Create a named FIFO object.
* The pipe will be created with permissions 0600.
* @throws IOException on failure.
*/
public void create() throws IOException {
create(0600);
}
/**
* Create a named FIFO object with the specified fs permissions.
* This depends on the 'mknod' or 'mkfifo' (Mac OS X) system utility
* existing. (for example, provided by Linux coreutils). This object
* will be deleted when the process exits.
* @throws IOException on failure.
*/
public void create(int permissions) throws IOException {
String filename = fifoFile.toString();
// Format permissions as a mode string in base 8.
String modeStr = Integer.toString(permissions, 8);
// Create the FIFO itself.
try {
String output = Shell.execCommand("mknod", "--mode=0" + modeStr,
filename, "p");
LOG.info("mknod output:\n"+output);
} catch (IOException ex) {
LOG.info("IO error running mknod: " + ex.getMessage());
LOG.debug("IO error running mknod", ex);
}
if (!this.fifoFile.exists()) {
LOG.info("mknod failed, falling back to mkfifo");
String output = Shell.execCommand("mkfifo", "-m", "0" + modeStr,
filename);
LOG.info("mkfifo output:\n"+output);
}
// Schedule the FIFO to be cleaned up when we exit.
this.fifoFile.deleteOnExit();
super(fifo);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -20,56 +18,27 @@
package com.cloudera.sqoop.io;
import java.io.BufferedWriter;
import java.io.OutputStreamWriter;
import java.io.IOException;
import org.apache.sqoop.io.SplittingOutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* A BufferedWriter implementation that wraps around a SplittingOutputStream
* and allows splitting of the underlying stream.
* Splits occur at allowSplit() calls, or newLine() calls.
*
* @deprecated use org.apache.sqoop.io.SplittableBufferedWriter instead.
* @see org.apache.sqoop.io.SplittableBufferedWriter
*/
public class SplittableBufferedWriter extends BufferedWriter {
public static final Log LOG = LogFactory.getLog(
SplittableBufferedWriter.class.getName());
private SplittingOutputStream splitOutputStream;
private boolean alwaysFlush;
public class SplittableBufferedWriter
extends org.apache.sqoop.io.SplittableBufferedWriter {
public SplittableBufferedWriter(
final SplittingOutputStream splitOutputStream) {
super(new OutputStreamWriter(splitOutputStream));
this.splitOutputStream = splitOutputStream;
this.alwaysFlush = false;
super(splitOutputStream);
}
/** For testing. */
SplittableBufferedWriter(final SplittingOutputStream splitOutputStream,
final boolean alwaysFlush) {
super(new OutputStreamWriter(splitOutputStream));
this.splitOutputStream = splitOutputStream;
this.alwaysFlush = alwaysFlush;
}
public void newLine() throws IOException {
super.newLine();
this.allowSplit();
}
public void allowSplit() throws IOException {
if (alwaysFlush) {
this.flush();
}
if (this.splitOutputStream.wouldSplit()) {
LOG.debug("Starting new split");
this.flush();
this.splitOutputStream.allowSplit();
}
super(splitOutputStream, alwaysFlush);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,19 +15,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.io;
import java.io.OutputStream;
import java.io.IOException;
import java.util.Formatter;
import org.apache.commons.io.output.CountingOutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
@ -37,127 +28,18 @@
* An output stream that writes to an underlying filesystem, opening
* a new file after a specified number of bytes have been written to the
* current one.
*
* @deprecated use org.apache.sqoop.io.SplittingOutputStream instead.
* @see org.apache.sqoop.io.SplittingOutputStream
*/
public class SplittingOutputStream extends OutputStream {
public class SplittingOutputStream
extends org.apache.sqoop.io.SplittingOutputStream {
public static final Log LOG = LogFactory.getLog(
SplittingOutputStream.class.getName());
public static final Log LOG = org.apache.sqoop.io.SplittingOutputStream.LOG;
private OutputStream writeStream;
private CountingOutputStream countingFilterStream;
private Configuration conf;
private Path destDir;
private String filePrefix;
private long cutoffBytes;
private CompressionCodec codec;
private int fileNum;
/**
* Create a new SplittingOutputStream.
* @param conf the Configuration to use to interface with HDFS
* @param destDir the directory where the files will go (should already
* exist).
* @param filePrefix the first part of the filename, which will be appended
* by a number. This file will be placed inside destDir.
* @param cutoff the approximate number of bytes to use per file
* @param doGzip if true, then output files will be gzipped and have a .gz
* suffix.
*/
public SplittingOutputStream(final Configuration conf, final Path destDir,
final String filePrefix, final long cutoff, final CompressionCodec codec)
throws IOException {
this.conf = conf;
this.destDir = destDir;
this.filePrefix = filePrefix;
this.cutoffBytes = cutoff;
if (this.cutoffBytes < 0) {
this.cutoffBytes = 0; // splitting disabled.
}
this.codec = codec;
this.fileNum = 0;
openNextFile();
}
/** Initialize the OutputStream to the next file to write to.
*/
private void openNextFile() throws IOException {
FileSystem fs = FileSystem.get(conf);
StringBuffer sb = new StringBuffer();
Formatter fmt = new Formatter(sb);
fmt.format("%05d", this.fileNum++);
String filename = filePrefix + fmt.toString();
if (codec != null) {
filename = filename + codec.getDefaultExtension();
}
Path destFile = new Path(destDir, filename);
LOG.debug("Opening next output file: " + destFile);
if (fs.exists(destFile)) {
Path canonicalDest = destFile.makeQualified(fs);
throw new IOException("Destination file " + canonicalDest
+ " already exists");
}
OutputStream fsOut = fs.create(destFile);
// Count how many actual bytes hit HDFS.
this.countingFilterStream = new CountingOutputStream(fsOut);
if (codec != null) {
// Wrap that in a compressing stream.
this.writeStream = codec.createOutputStream(this.countingFilterStream);
} else {
// Write to the counting stream directly.
this.writeStream = this.countingFilterStream;
}
}
/**
* @return true if allowSplit() would actually cause a split.
*/
public boolean wouldSplit() {
return this.cutoffBytes > 0
&& this.countingFilterStream.getByteCount() >= this.cutoffBytes;
}
/** If we've written more to the disk than the user's split size,
* open the next file.
*/
private void checkForNextFile() throws IOException {
if (wouldSplit()) {
LOG.debug("Starting new split");
this.writeStream.flush();
this.writeStream.close();
openNextFile();
}
}
/** Defines a point in the stream when it is acceptable to split to a new
file; e.g., the end of a record.
*/
public void allowSplit() throws IOException {
checkForNextFile();
}
public void close() throws IOException {
this.writeStream.close();
}
public void flush() throws IOException {
this.writeStream.flush();
}
public void write(byte [] b) throws IOException {
this.writeStream.write(b);
}
public void write(byte [] b, int off, int len) throws IOException {
this.writeStream.write(b, off, len);
}
public void write(int b) throws IOException {
this.writeStream.write(b);
super(conf, destDir, filePrefix, cutoff, codec);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,15 +15,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.io;
import java.io.IOException;
/**
* Thrown when a compression codec cannot be recognized.
*
* @deprecated use org.apache.sqoop.io.UnsupportedCodecException instead.
* @see org.apache.sqoop.io.UnsupportedCodecException
*/
public class UnsupportedCodecException extends IOException {
public class UnsupportedCodecException
extends org.apache.sqoop.io.UnsupportedCodecException {
public UnsupportedCodecException() {
super("UnsupportedCodecException");
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,7 +15,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import java.io.DataInput;
@ -26,8 +23,6 @@
import java.math.BigDecimal;
import java.math.BigInteger;
import org.apache.hadoop.io.Text;
/**
* Serialize BigDecimal classes to/from DataInput and DataOutput objects.
*
@ -43,43 +38,24 @@
* [int: scale][boolean: b == true][string: BigInt-part.toString()]
*
* TODO(aaron): Get this to work with Hadoop's Serializations framework.
*
* @deprecated use org.apache.sqoop.lib.BigDecimalSerializer instead.
* @see org.apache.sqoop.lib.BigDecimalSerializer
*/
public final class BigDecimalSerializer {
private BigDecimalSerializer() { }
static final BigInteger LONG_MAX_AS_BIGINT =
BigInteger.valueOf(Long.MAX_VALUE);
org.apache.sqoop.lib.BigDecimalSerializer.LONG_MAX_AS_BIGINT;
static final BigInteger LONG_MIN_AS_BIGINT =
BigInteger.valueOf(Long.MIN_VALUE);
org.apache.sqoop.lib.BigDecimalSerializer.LONG_MIN_AS_BIGINT;
public static void write(BigDecimal d, DataOutput out) throws IOException {
int scale = d.scale();
BigInteger bigIntPart = d.unscaledValue();
boolean fastpath = bigIntPart.compareTo(LONG_MAX_AS_BIGINT) < 0
&& bigIntPart .compareTo(LONG_MIN_AS_BIGINT) > 0;
out.writeInt(scale);
out.writeBoolean(fastpath);
if (fastpath) {
out.writeLong(bigIntPart.longValue());
} else {
Text.writeString(out, bigIntPart.toString());
}
org.apache.sqoop.lib.BigDecimalSerializer.write(d, out);
}
public static BigDecimal readFields(DataInput in) throws IOException {
int scale = in.readInt();
boolean fastpath = in.readBoolean();
BigInteger unscaledIntPart;
if (fastpath) {
long unscaledValue = in.readLong();
unscaledIntPart = BigInteger.valueOf(unscaledValue);
} else {
String unscaledValueStr = Text.readString(in);
unscaledIntPart = new BigInteger(unscaledValueStr);
}
return new BigDecimal(unscaledIntPart, scale);
return org.apache.sqoop.lib.BigDecimalSerializer.readFields(in);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,37 +15,27 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.regex.Matcher;
import org.apache.hadoop.io.BytesWritable;
import com.cloudera.sqoop.io.LobFile;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* BlobRef is a wrapper that holds a BLOB either directly, or a
* reference to a file that holds the BLOB data.
*
* @deprecated use org.apache.sqoop.lib.BlobRef instead.
* @see org.apache.sqoop.lib.BlobRef
*/
public class BlobRef extends LobRef<byte[], BytesWritable, InputStream> {
public class BlobRef extends org.apache.sqoop.lib.BlobRef {
public static final Log LOG = LogFactory.getLog(BlobRef.class.getName());
public static final Log LOG = org.apache.sqoop.lib.BlobRef.LOG;
public BlobRef() {
super();
}
public BlobRef(byte [] bytes) {
super(new BytesWritable(bytes));
super(bytes);
}
/**
@ -60,45 +48,6 @@ public BlobRef(String file, long offset, long length) {
super(file, offset, length);
}
@Override
protected InputStream getExternalSource(LobFile.Reader reader)
throws IOException {
return reader.readBlobRecord();
}
@Override
protected InputStream getInternalSource(BytesWritable data) {
return new ByteArrayInputStream(data.getBytes(), 0, data.getLength());
}
@Override
protected byte [] getInternalData(BytesWritable data) {
return Arrays.copyOf(data.getBytes(), data.getLength());
}
@Override
protected BytesWritable deepCopyData(BytesWritable data) {
return new BytesWritable(Arrays.copyOf(data.getBytes(), data.getLength()));
}
@Override
public void readFieldsInternal(DataInput in) throws IOException {
// For internally-stored BLOBs, the data is a BytesWritable
// containing the actual data.
BytesWritable data = getDataObj();
if (null == data) {
data = new BytesWritable();
}
data.readFields(in);
setDataObj(data);
}
@Override
public void writeInternal(DataOutput out) throws IOException {
getDataObj().write(out);
}
/**
* Create a BlobRef based on parsed data from a line of text.
@ -110,24 +59,7 @@ public void writeInternal(DataOutput out) throws IOException {
* an empty BlobRef if the data to be parsed is actually inline.
*/
public static BlobRef parse(String inputString) {
// If inputString is of the form 'externalLob(lf,%s,%d,%d)', then this is
// an external BLOB stored at the LobFile indicated by '%s' with the next
// two arguments representing its offset and length in the file.
// Otherwise, it is an inline BLOB, which we don't support parsing of.
Matcher m = EXTERNAL_MATCHER.get();
m.reset(inputString);
if (m.matches()) {
// This is a LobFile. Extract the filename, offset and len from the
// matcher.
return new BlobRef(m.group(1), Long.valueOf(m.group(2)),
Long.valueOf(m.group(3)));
} else {
// This is inline BLOB string data.
LOG.warn(
"Reparsing inline BLOB data is not supported; use SequenceFiles.");
return new BlobRef();
}
return org.apache.sqoop.lib.BlobRef.parse(inputString);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -23,6 +21,8 @@
/**
* Parse string representations of boolean values into boolean
* scalar types.
* @deprecated use org.apache.sqoop.lib.BooleanParser instead.
* @see org.apache.sqoop.lib.BooleanParser
*/
public final class BooleanParser {
private BooleanParser() {
@ -37,9 +37,7 @@ private BooleanParser() {
* <p>All comparisons are case-insensitive.</p>
*/
public static boolean valueOf(final String s) {
return s != null && ("true".equalsIgnoreCase(s) || "t".equalsIgnoreCase(s)
|| "1".equals(s) || "on".equalsIgnoreCase(s)
|| "yes".equalsIgnoreCase(s));
return org.apache.sqoop.lib.BooleanParser.valueOf(s);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -20,21 +18,14 @@
package com.cloudera.sqoop.lib;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.regex.Matcher;
import org.apache.hadoop.io.Text;
import com.cloudera.sqoop.io.LobFile;
/**
* ClobRef is a wrapper that holds a CLOB either directly, or a
* reference to a file that holds the CLOB data.
*
* @deprecated use org.apache.sqoop.lib.ClobRef instead.
* @see org.apache.sqoop.lib.ClobRef
*/
public class ClobRef extends LobRef<String, String, Reader> {
public class ClobRef extends org.apache.sqoop.lib.ClobRef {
public ClobRef() {
super();
@ -54,60 +45,13 @@ public ClobRef(String file, long offset, long length) {
super(file, offset, length);
}
@Override
protected Reader getExternalSource(LobFile.Reader reader)
throws IOException {
return reader.readClobRecord();
}
@Override
protected Reader getInternalSource(String data) {
return new StringReader(data);
}
@Override
protected String deepCopyData(String data) {
return data;
}
@Override
protected String getInternalData(String data) {
return data;
}
@Override
public void readFieldsInternal(DataInput in) throws IOException {
// For internally-stored clobs, the data is written as UTF8 Text.
setDataObj(Text.readString(in));
}
@Override
public void writeInternal(DataOutput out) throws IOException {
Text.writeString(out, getDataObj());
}
/**
* Create a ClobRef based on parsed data from a line of text.
* @param inputString the text-based input data to parse.
* @return a ClobRef to the given data.
*/
public static ClobRef parse(String inputString) {
// If inputString is of the form 'externalLob(lf,%s,%d,%d)', then this is
// an external CLOB stored at the LobFile indicated by '%s' with the next
// two arguments representing its offset and length in the file.
// Otherwise, it is an inline CLOB, which we read as-is.
Matcher m = EXTERNAL_MATCHER.get();
m.reset(inputString);
if (m.matches()) {
// This is a LobFile. Extract the filename, offset and len from the
// matcher.
return new ClobRef(m.group(1), Long.valueOf(m.group(2)),
Long.valueOf(m.group(3)));
} else {
// This is inline CLOB string data.
return new ClobRef(inputString);
}
return org.apache.sqoop.lib.ClobRef.parse(inputString);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,33 +15,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
/**
* Encapsulates a set of delimiters used to encode a record.
* @deprecated use org.apache.sqoop.lib.DelimiterSet instead.
* @see org.apache.sqoop.lib.DelimiterSet
*/
public class DelimiterSet implements Cloneable {
public class DelimiterSet extends org.apache.sqoop.lib.DelimiterSet {
public static final char NULL_CHAR = '\000';
private char fieldDelim; // fields terminated by this.
private char recordDelim; // records terminated by this.
// If these next two fields are '\000', then they are ignored.
private char enclosedBy;
private char escapedBy;
// If true, then the enclosed-by character is applied to every
// field, not just ones containing embedded delimiters.
private boolean encloseRequired;
public static final char NULL_CHAR =
org.apache.sqoop.lib.DelimiterSet.NULL_CHAR;
/**
* Create a delimiter set with the default delimiters
* (comma for fields, newline for records).
*/
public DelimiterSet() {
this(',', '\n', NULL_CHAR, NULL_CHAR, false);
super();
}
/**
@ -57,152 +46,7 @@ public DelimiterSet() {
*/
public DelimiterSet(char field, char record, char enclose, char escape,
boolean isEncloseRequired) {
this.fieldDelim = field;
this.recordDelim = record;
this.enclosedBy = enclose;
this.escapedBy = escape;
this.encloseRequired = isEncloseRequired;
}
/**
* Sets the fields-terminated-by character.
*/
public void setFieldsTerminatedBy(char f) {
this.fieldDelim = f;
}
/**
* @return the fields-terminated-by character.
*/
public char getFieldsTerminatedBy() {
return this.fieldDelim;
}
/**
* Sets the end-of-record lines-terminated-by character.
*/
public void setLinesTerminatedBy(char r) {
this.recordDelim = r;
}
/**
* @return the end-of-record (lines-terminated-by) character.
*/
public char getLinesTerminatedBy() {
return this.recordDelim;
}
/**
* Sets the enclosed-by character.
* @param e the enclosed-by character, or '\000' for no enclosing character.
*/
public void setEnclosedBy(char e) {
this.enclosedBy = e;
}
/**
* @return the enclosed-by character, or '\000' for none.
*/
public char getEnclosedBy() {
return this.enclosedBy;
}
/**
* Sets the escaped-by character.
* @param e the escaped-by character, or '\000' for no escape character.
*/
public void setEscapedBy(char e) {
this.escapedBy = e;
}
/**
* @return the escaped-by character, or '\000' for none.
*/
public char getEscapedBy() {
return this.escapedBy;
}
/**
* Set whether the enclosed-by character must be applied to all fields,
* or only fields with embedded delimiters.
*/
public void setEncloseRequired(boolean required) {
this.encloseRequired = required;
}
/**
* @return true if the enclosed-by character must be applied to all fields,
* or false if it's only used for fields with embedded delimiters.
*/
public boolean isEncloseRequired() {
return this.encloseRequired;
}
@Override
/**
* @return a string representation of the delimiters.
*/
public String toString() {
return "fields=" + this.fieldDelim
+ " records=" + this.recordDelim
+ " escape=" + this.escapedBy
+ " enclose=" + this.enclosedBy
+ " required=" + this.encloseRequired;
}
/**
* Format this set of delimiters as a call to the constructor for
* this object, that would generate identical delimiters.
* @return a String that can be embedded in generated code that
* provides this set of delimiters.
*/
public String formatConstructor() {
return "new DelimiterSet((char) " + (int) this.fieldDelim + ", "
+ "(char) " + (int) this.recordDelim + ", "
+ "(char) " + (int) this.enclosedBy + ", "
+ "(char) " + (int) this.escapedBy + ", "
+ this.encloseRequired + ")";
}
@Override
/**
* @return a hash code for this set of delimiters.
*/
public int hashCode() {
return (int) this.fieldDelim
+ (((int) this.recordDelim) << 4)
+ (((int) this.escapedBy) << 8)
+ (((int) this.enclosedBy) << 12)
+ (((int) this.recordDelim) << 16)
+ (this.encloseRequired ? 0xFEFE : 0x7070);
}
@Override
/**
* @return true if this delimiter set is the same as another set of
* delimiters.
*/
public boolean equals(Object other) {
if (null == other) {
return false;
} else if (!other.getClass().equals(getClass())) {
return false;
}
DelimiterSet set = (DelimiterSet) other;
return this.fieldDelim == set.fieldDelim
&& this.recordDelim == set.recordDelim
&& this.escapedBy == set.escapedBy
&& this.enclosedBy == set.enclosedBy
&& this.encloseRequired == set.encloseRequired;
}
@Override
/**
* @return a new copy of this same set of delimiters.
*/
public Object clone() throws CloneNotSupportedException {
return super.clone();
super(field, record, enclose, escape, isEncloseRequired);
}
/**

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,11 +15,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
/**
* Static helper class that will help format data with quotes and escape chars.
*
* @deprecated use org.apache.sqoop.lib.FieldFormatter instead.
* @see org.apache.sqoop.lib.FieldFormatter
*/
public final class FieldFormatter {
@ -35,7 +35,8 @@ private FieldFormatter() { }
*/
public static String hiveStringDropDelims(String str,
DelimiterSet delimiters) {
return hiveStringReplaceDelims(str, "", delimiters);
return org.apache.sqoop.lib.FieldFormatter.hiveStringDropDelims(
str, delimiters);
}
/**
@ -47,8 +48,8 @@ public static String hiveStringDropDelims(String str,
*/
public static String hiveStringReplaceDelims(String str, String replacement,
DelimiterSet delimiters) {
String droppedDelims = str.replaceAll("\\n|\\r|\01", replacement);
return escapeAndEnclose(droppedDelims, delimiters);
return org.apache.sqoop.lib.FieldFormatter.hiveStringReplaceDelims(
str, replacement, delimiters);
}
/**
@ -73,68 +74,7 @@ public static String hiveStringReplaceDelims(String str, String replacement,
* @return the escaped, enclosed version of 'str'.
*/
public static String escapeAndEnclose(String str, DelimiterSet delimiters) {
char escape = delimiters.getEscapedBy();
char enclose = delimiters.getEnclosedBy();
boolean encloseRequired = delimiters.isEncloseRequired();
// true if we can use an escape character.
boolean escapingLegal = DelimiterSet.NULL_CHAR != escape;
String withEscapes;
if (null == str) {
return null;
}
if (escapingLegal) {
// escaping is legal. Escape any instances of the escape char itself.
withEscapes = str.replace("" + escape, "" + escape + escape);
} else {
// no need to double-escape
withEscapes = str;
}
if (DelimiterSet.NULL_CHAR == enclose) {
// The enclose-with character was left unset, so we can't enclose items.
if (escapingLegal) {
// If the user has used the fields-terminated-by or
// lines-terminated-by characters in the string, escape them if we
// have an escape character.
String fields = "" + delimiters.getFieldsTerminatedBy();
String lines = "" + delimiters.getLinesTerminatedBy();
withEscapes = withEscapes.replace(fields, "" + escape + fields);
withEscapes = withEscapes.replace(lines, "" + escape + lines);
}
// No enclosing possible, so now return this.
return withEscapes;
}
// if we have an enclosing character, and escaping is legal, then the
// encloser must always be escaped.
if (escapingLegal) {
withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose);
}
boolean actuallyDoEnclose = encloseRequired;
if (!actuallyDoEnclose) {
// check if the string requires enclosing.
char [] mustEncloseFor = new char[2];
mustEncloseFor[0] = delimiters.getFieldsTerminatedBy();
mustEncloseFor[1] = delimiters.getLinesTerminatedBy();
for (char reason : mustEncloseFor) {
if (str.indexOf(reason) != -1) {
actuallyDoEnclose = true;
break;
}
}
}
if (actuallyDoEnclose) {
return "" + enclose + withEscapes + enclose;
} else {
return withEscapes;
}
return org.apache.sqoop.lib.FieldFormatter.escapeAndEnclose(
str, delimiters);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,24 +15,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import java.io.IOException;
/**
* Interface implemented by classes that process FieldMappable objects.
*
* @deprecated use org.apache.sqoop.lib.FieldMapProcessor instead.
* @see org.apache.sqoop.lib.FieldMapProcessor
*/
public interface FieldMapProcessor {
/**
* Allow arbitrary processing of a FieldMappable object.
* @param record an object which can emit a map of its field names to values.
* @throws IOException if the processor encounters an IO error when
* operating on this object.
* @throws ProcessingException if the FieldMapProcessor encounters
* a general processing error when operating on this object.
*/
void accept(FieldMappable record) throws IOException, ProcessingException;
public interface FieldMapProcessor
extends org.apache.sqoop.lib.FieldMapProcessor {
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,22 +15,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import java.util.Map;
/**
* Interface describing a class capable of returning a map of the fields
* of the object to their values.
*
* @deprecated use org.apache.sqoop.lib.FieldMappable instead.
* @see org.apache.sqoop.lib.FieldMappable
*/
public interface FieldMappable {
public interface FieldMappable extends org.apache.sqoop.lib.FieldMappable {
/**
* Returns a map containing all fields of this record.
* @return a map from column names to the object-based values for
* this record. The map may not be null, though it may be empty.
*/
Map<String, Object> getFieldMap();
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,10 +15,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import org.apache.hadoop.io.BytesWritable;
import java.math.BigDecimal;
import java.sql.Date;
import java.sql.PreparedStatement;
@ -29,228 +25,163 @@
import java.sql.Time;
import java.sql.Timestamp;
import org.apache.hadoop.io.BytesWritable;
/**
* Contains a set of methods which can read db columns from a ResultSet into
* Java types, and do serialization of these types to/from DataInput/DataOutput
* for use with Hadoop's Writable implementation. This supports null values
* for all types.
*
* @deprecated use org.apache.sqoop.lib.JdbcWritableBridge instead.
* @see org.apache.sqoop.lib.JdbcWritableBridge
*/
public final class JdbcWritableBridge {
// Currently, cap BLOB/CLOB objects at 16 MB until we can use external
// storage.
public static final long MAX_BLOB_LENGTH = 16 * 1024 * 1024;
public static final long MAX_CLOB_LENGTH = 16 * 1024 * 1024;
public static final long MAX_BLOB_LENGTH =
org.apache.sqoop.lib.JdbcWritableBridge.MAX_BLOB_LENGTH;
public static final long MAX_CLOB_LENGTH =
org.apache.sqoop.lib.JdbcWritableBridge.MAX_CLOB_LENGTH;
private JdbcWritableBridge() {
}
public static Integer readInteger(int colNum, ResultSet r)
throws SQLException {
int val;
val = r.getInt(colNum);
if (r.wasNull()) {
return null;
} else {
return Integer.valueOf(val);
}
return org.apache.sqoop.lib.JdbcWritableBridge.readInteger(colNum, r);
}
public static Long readLong(int colNum, ResultSet r) throws SQLException {
long val;
val = r.getLong(colNum);
if (r.wasNull()) {
return null;
} else {
return Long.valueOf(val);
}
return org.apache.sqoop.lib.JdbcWritableBridge.readLong(colNum, r);
}
public static String readString(int colNum, ResultSet r) throws SQLException {
return r.getString(colNum);
return org.apache.sqoop.lib.JdbcWritableBridge.readString(colNum, r);
}
public static Float readFloat(int colNum, ResultSet r) throws SQLException {
float val;
val = r.getFloat(colNum);
if (r.wasNull()) {
return null;
} else {
return Float.valueOf(val);
}
return org.apache.sqoop.lib.JdbcWritableBridge.readFloat(colNum, r);
}
public static Double readDouble(int colNum, ResultSet r) throws SQLException {
double val;
val = r.getDouble(colNum);
if (r.wasNull()) {
return null;
} else {
return Double.valueOf(val);
}
return org.apache.sqoop.lib.JdbcWritableBridge.readDouble(colNum, r);
}
public static Boolean readBoolean(int colNum, ResultSet r)
throws SQLException {
boolean val;
val = r.getBoolean(colNum);
if (r.wasNull()) {
return null;
} else {
return Boolean.valueOf(val);
}
return org.apache.sqoop.lib.JdbcWritableBridge.readBoolean(colNum, r);
}
public static Time readTime(int colNum, ResultSet r) throws SQLException {
return r.getTime(colNum);
return org.apache.sqoop.lib.JdbcWritableBridge.readTime(colNum, r);
}
public static Timestamp readTimestamp(int colNum, ResultSet r)
throws SQLException {
return r.getTimestamp(colNum);
return org.apache.sqoop.lib.JdbcWritableBridge.readTimestamp(colNum, r);
}
public static Date readDate(int colNum, ResultSet r) throws SQLException {
return r.getDate(colNum);
return org.apache.sqoop.lib.JdbcWritableBridge.readDate(colNum, r);
}
public static BytesWritable readBytesWritable(int colNum, ResultSet r)
throws SQLException {
byte [] bytes = r.getBytes(colNum);
return bytes == null ? null : new BytesWritable(bytes);
return org.apache.sqoop.lib.JdbcWritableBridge.readBytesWritable(colNum, r);
}
public static BigDecimal readBigDecimal(int colNum, ResultSet r)
throws SQLException {
return r.getBigDecimal(colNum);
return org.apache.sqoop.lib.JdbcWritableBridge.readBigDecimal(colNum, r);
}
public static BlobRef readBlobRef(int colNum, ResultSet r)
throws SQLException {
// Loading of BLOBs is delayed; handled by LargeObjectLoader.
return null;
return org.apache.sqoop.lib.JdbcWritableBridge.readBlobRef(colNum, r);
}
public static ClobRef readClobRef(int colNum, ResultSet r)
throws SQLException {
// Loading of CLOBs is delayed; handled by LargeObjectLoader.
return null;
return org.apache.sqoop.lib.JdbcWritableBridge.readClobRef(colNum, r);
}
public static void writeInteger(Integer val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setInt(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeInteger(
val, paramIdx, sqlType, s);
}
public static void writeLong(Long val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setLong(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeLong(
val, paramIdx, sqlType, s);
}
public static void writeDouble(Double val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setDouble(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeDouble(
val, paramIdx, sqlType, s);
}
public static void writeBoolean(Boolean val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setBoolean(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeBoolean(
val, paramIdx, sqlType, s);
}
public static void writeFloat(Float val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setFloat(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeFloat(
val, paramIdx, sqlType, s);
}
public static void writeString(String val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setString(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeString(
val, paramIdx, sqlType, s);
}
public static void writeTimestamp(Timestamp val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setTimestamp(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeTimestamp(
val, paramIdx, sqlType, s);
}
public static void writeTime(Time val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setTime(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeTime(
val, paramIdx, sqlType, s);
}
public static void writeDate(Date val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setDate(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeDate(
val, paramIdx, sqlType, s);
}
public static void writeBytesWritable(BytesWritable val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
// val.getBytes() is only valid in [0, len)
byte [] rawBytes = val.getBytes();
int len = val.getLength();
byte [] outBytes = new byte[len];
System.arraycopy(rawBytes, 0, outBytes, 0, len);
s.setBytes(paramIdx, outBytes);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeBytesWritable(
val, paramIdx, sqlType, s);
}
public static void writeBigDecimal(BigDecimal val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setBigDecimal(paramIdx, val);
}
org.apache.sqoop.lib.JdbcWritableBridge.writeBigDecimal(
val, paramIdx, sqlType, s);
}
public static void writeBlobRef(BlobRef val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException {
// TODO: support this.
throw new RuntimeException("Unsupported: Cannot export BLOB data");
org.apache.sqoop.lib.JdbcWritableBridge.writeBlobRef(
val, paramIdx, sqlType, s);
}
public static void writeClobRef(ClobRef val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException {
// TODO: support this.
throw new RuntimeException("Unsupported: Cannot export CLOB data");
org.apache.sqoop.lib.JdbcWritableBridge.writeClobRef(
val, paramIdx, sqlType, s);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,26 +15,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.cloudera.sqoop.io.LobFile;
import com.cloudera.sqoop.util.TaskId;
/**
* Contains a set of methods which can read db columns from a ResultSet into
@ -47,26 +31,18 @@
* This is a singleton instance class; only one may exist at a time.
* However, its lifetime is limited to the current TaskInputOutputContext's
* life.
*
* @deprecated use org.apache.sqoop.lib.LargeObjectLoader instead.
* @see org.apache.sqoop.lib.LargeObjectLoader
*/
public class LargeObjectLoader implements Closeable {
public class LargeObjectLoader extends org.apache.sqoop.lib.LargeObjectLoader {
// Spill to external storage for BLOB/CLOB objects > 16 MB.
public static final long DEFAULT_MAX_LOB_LENGTH = 16 * 1024 * 1024;
public static final long DEFAULT_MAX_LOB_LENGTH =
org.apache.sqoop.lib.LargeObjectLoader.DEFAULT_MAX_LOB_LENGTH;
public static final String MAX_INLINE_LOB_LEN_KEY =
"sqoop.inline.lob.length.max";
private Configuration conf;
private Path workPath;
private FileSystem fs;
// Handles to the open BLOB / CLOB file writers.
private LobFile.Writer curBlobWriter;
private LobFile.Writer curClobWriter;
// Counter that is used with the current task attempt id to
// generate unique LOB file names.
private long nextLobFileId = 0;
org.apache.sqoop.lib.LargeObjectLoader.MAX_INLINE_LOB_LEN_KEY;
/**
* Create a new LargeObjectLoader.
@ -75,246 +51,6 @@ public class LargeObjectLoader implements Closeable {
*/
public LargeObjectLoader(Configuration conf, Path workPath)
throws IOException {
this.conf = conf;
this.workPath = workPath;
this.fs = FileSystem.get(conf);
this.curBlobWriter = null;
this.curClobWriter = null;
}
@Override
protected synchronized void finalize() throws Throwable {
close();
super.finalize();
}
@Override
public void close() throws IOException {
if (null != curBlobWriter) {
curBlobWriter.close();
curBlobWriter = null;
}
if (null != curClobWriter) {
curClobWriter.close();
curClobWriter = null;
}
}
/**
* @return a filename to use to put an external LOB in.
*/
private String getNextLobFileName() {
String file = "_lob/large_obj_" + TaskId.get(conf, "unknown_task_id")
+ nextLobFileId + ".lob";
nextLobFileId++;
return file;
}
/**
* Calculates a path to a new LobFile object, creating any
* missing directories.
* @return a Path to a LobFile to write
*/
private Path getNextLobFilePath() throws IOException {
Path p = new Path(workPath, getNextLobFileName());
Path parent = p.getParent();
if (!fs.exists(parent)) {
fs.mkdirs(parent);
}
return p;
}
/**
* @return the current LobFile writer for BLOBs, creating one if necessary.
*/
private LobFile.Writer getBlobWriter() throws IOException {
if (null == this.curBlobWriter) {
this.curBlobWriter = LobFile.create(getNextLobFilePath(), conf, false);
}
return this.curBlobWriter;
}
/**
* @return the current LobFile writer for CLOBs, creating one if necessary.
*/
private LobFile.Writer getClobWriter() throws IOException {
if (null == this.curClobWriter) {
this.curClobWriter = LobFile.create(getNextLobFilePath(), conf, true);
}
return this.curClobWriter;
}
/**
* Returns the path being written to by a given LobFile.Writer, relative
* to the working directory of this LargeObjectLoader.
* @param w the LobFile.Writer whose path should be examined.
* @return the path this is writing to, relative to the current working dir.
*/
private String getRelativePath(LobFile.Writer w) {
Path writerPath = w.getPath();
String writerPathStr = writerPath.toString();
String workPathStr = workPath.toString();
if (!workPathStr.endsWith(File.separator)) {
workPathStr = workPathStr + File.separator;
}
if (writerPathStr.startsWith(workPathStr)) {
return writerPathStr.substring(workPathStr.length());
}
// Outside the working dir; return the whole thing.
return writerPathStr;
}
/**
* Copies all character data from the provided Reader to the provided
* Writer. Does not close handles when it's done.
* @param reader data source
* @param writer data sink
* @throws IOException if an I/O error occurs either reading or writing.
*/
private void copyAll(Reader reader, Writer writer) throws IOException {
int bufferSize = conf.getInt("io.file.buffer.size",
4096);
char [] buf = new char[bufferSize];
while (true) {
int charsRead = reader.read(buf);
if (-1 == charsRead) {
break; // no more stream to read.
}
writer.write(buf, 0, charsRead);
}
}
/**
* Copies all byte data from the provided InputStream to the provided
* OutputStream. Does not close handles when it's done.
* @param input data source
* @param output data sink
* @throws IOException if an I/O error occurs either reading or writing.
*/
private void copyAll(InputStream input, OutputStream output)
throws IOException {
int bufferSize = conf.getInt("io.file.buffer.size",
4096);
byte [] buf = new byte[bufferSize];
while (true) {
int bytesRead = input.read(buf, 0, bufferSize);
if (-1 == bytesRead) {
break; // no more stream to read.
}
output.write(buf, 0, bytesRead);
}
}
/**
* Actually read a BlobRef instance from the ResultSet and materialize
* the data either inline or to a file.
*
* @param colNum the column of the ResultSet's current row to read.
* @param r the ResultSet to read from.
* @return a BlobRef encapsulating the data in this field.
* @throws IOException if an error occurs writing to the FileSystem.
* @throws SQLException if an error occurs reading from the database.
*/
public BlobRef readBlobRef(int colNum, ResultSet r)
throws IOException, InterruptedException, SQLException {
long maxInlineLobLen = conf.getLong(
MAX_INLINE_LOB_LEN_KEY,
DEFAULT_MAX_LOB_LENGTH);
Blob b = r.getBlob(colNum);
if (null == b) {
return null;
} else if (b.length() > maxInlineLobLen) {
// Deserialize very large BLOBs into separate files.
long len = b.length();
LobFile.Writer lobWriter = getBlobWriter();
long recordOffset = lobWriter.tell();
InputStream is = null;
OutputStream os = lobWriter.writeBlobRecord(len);
try {
is = b.getBinaryStream();
copyAll(is, os);
} finally {
if (null != os) {
os.close();
}
if (null != is) {
is.close();
}
// Mark the record as finished.
lobWriter.finishRecord();
}
return new BlobRef(getRelativePath(curBlobWriter), recordOffset, len);
} else {
// This is a 1-based array.
return new BlobRef(b.getBytes(1, (int) b.length()));
}
}
/**
* Actually read a ClobRef instance from the ResultSet and materialize
* the data either inline or to a file.
*
* @param colNum the column of the ResultSet's current row to read.
* @param r the ResultSet to read from.
* @return a ClobRef encapsulating the data in this field.
* @throws IOException if an error occurs writing to the FileSystem.
* @throws SQLException if an error occurs reading from the database.
*/
public ClobRef readClobRef(int colNum, ResultSet r)
throws IOException, InterruptedException, SQLException {
long maxInlineLobLen = conf.getLong(
MAX_INLINE_LOB_LEN_KEY,
DEFAULT_MAX_LOB_LENGTH);
Clob c = r.getClob(colNum);
if (null == c) {
return null;
} else if (c.length() > maxInlineLobLen) {
// Deserialize large CLOB into separate file.
long len = c.length();
LobFile.Writer lobWriter = getClobWriter();
long recordOffset = lobWriter.tell();
Reader reader = null;
Writer w = lobWriter.writeClobRecord(len);
try {
reader = c.getCharacterStream();
copyAll(reader, w);
} finally {
if (null != w) {
w.close();
}
if (null != reader) {
reader.close();
}
// Mark the record as finished.
lobWriter.finishRecord();
}
return new ClobRef(getRelativePath(lobWriter), recordOffset, len);
} else {
// This is a 1-based array.
return new ClobRef(c.getSubString(1, (int) c.length()));
}
super(conf, workPath);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,28 +15,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import com.cloudera.sqoop.io.LobFile;
import com.cloudera.sqoop.io.LobReaderCache;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Abstract base class that holds a reference to a Blob or a Clob.
@ -46,284 +27,28 @@
* CONTAINERTYPE is the type used to hold this data (e.g., BytesWritable).
* ACCESSORTYPE is the type used to access this data in a streaming fashion
* (either an InputStream or a Reader).
*
* @deprecated use org.apache.sqoop.lib.LobRef instead.
* @see org.apache.sqoop.lib.LobRef
*/
public abstract class LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE>
implements Closeable, Writable {
extends org.apache.sqoop.lib.LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE> {
public static final Log LOG = LogFactory.getLog(LobRef.class.getName());
public static final Log LOG = org.apache.sqoop.lib.LobRef.LOG;
protected LobRef() {
this.fileName = null;
this.offset = 0;
this.length = 0;
this.realData = null;
super();
}
protected LobRef(CONTAINERTYPE container) {
this.fileName = null;
this.offset = 0;
this.length = 0;
this.realData = container;
super(container);
}
protected LobRef(String file, long offset, long length) {
this.fileName = file;
this.offset = offset;
this.length = length;
this.realData = null;
super(file, offset, length);
}
// If the data is 'small', it's held directly, here.
private CONTAINERTYPE realData;
/** Internal API to retrieve the data object. */
protected CONTAINERTYPE getDataObj() {
return realData;
}
/** Internal API to set the data object. */
protected void setDataObj(CONTAINERTYPE data) {
this.realData = data;
}
// If there data is too large to materialize fully, it's written into a file
// whose path (relative to the rest of the dataset) is recorded here. This
// takes precedence if the value fof fileName is non-null. These records are
// currently written into LobFile-formatted files, which hold multiple
// records. The starting offset and length of the record are recorded here
// as well.
private String fileName;
private long offset;
private long length;
// If we've opened a LobFile object, track our reference to it here.
private LobFile.Reader lobReader;
@Override
@SuppressWarnings("unchecked")
/**
* Clone the current reference object. data is deep-copied; any open
* file handle remains with the original only.
*/
public Object clone() throws CloneNotSupportedException {
LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE> r =
(LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE>) super.clone();
r.lobReader = null; // Reference to opened reader is not duplicated.
if (null != realData) {
r.realData = deepCopyData(realData);
}
return r;
}
@Override
protected synchronized void finalize() throws Throwable {
close();
super.finalize();
}
public void close() throws IOException {
// Discard any open LobReader.
if (null != this.lobReader) {
LobReaderCache.getCache().recycle(this.lobReader);
}
}
/**
* @return true if the LOB data is in an external file; false if
* it materialized inline.
*/
public boolean isExternal() {
return fileName != null;
}
/**
* Convenience method to access #getDataStream(Configuration, Path)
* from within a map task that read this LobRef from a file-based
* InputSplit.
* @param mapContext the Mapper.Context instance that encapsulates
* the current map task.
* @return an object that lazily streams the record to the client.
* @throws IllegalArgumentException if it cannot find the source
* path for this LOB based on the MapContext.
* @throws IOException if it could not read the LOB from external storage.
*/
public ACCESSORTYPE getDataStream(Mapper.Context mapContext)
throws IOException {
InputSplit split = mapContext.getInputSplit();
if (split instanceof FileSplit) {
Path basePath = ((FileSplit) split).getPath().getParent();
return getDataStream(mapContext.getConfiguration(),
basePath);
} else {
throw new IllegalArgumentException(
"Could not ascertain LOB base path from MapContext.");
}
}
/**
* Get access to the LOB data itself.
* This method returns a lazy reader of the LOB data, accessing the
* filesystem for external LOB storage as necessary.
* @param conf the Configuration used to access the filesystem
* @param basePath the base directory where the table records are
* stored.
* @return an object that lazily streams the record to the client.
* @throws IOException if it could not read the LOB from external storage.
*/
public ACCESSORTYPE getDataStream(Configuration conf, Path basePath)
throws IOException {
if (isExternal()) {
// Read from external storage.
Path pathToRead = LobReaderCache.qualify(
new Path(basePath, fileName), conf);
LOG.debug("Retreving data stream from external path: " + pathToRead);
if (lobReader != null) {
// We already have a reader open to a LobFile. Is it the correct file?
if (!pathToRead.equals(lobReader.getPath())) {
// No. Close this.lobReader and get the correct one.
LOG.debug("Releasing previous external reader for "
+ lobReader.getPath());
LobReaderCache.getCache().recycle(lobReader);
lobReader = LobReaderCache.getCache().get(pathToRead, conf);
}
} else {
lobReader = LobReaderCache.getCache().get(pathToRead, conf);
}
// We now have a LobFile.Reader associated with the correct file. Get to
// the correct offset and return an InputStream/Reader to the user.
if (lobReader.tell() != offset) {
LOG.debug("Seeking to record start offset " + offset);
lobReader.seek(offset);
}
if (!lobReader.next()) {
throw new IOException("Could not locate record at " + pathToRead
+ ":" + offset);
}
return getExternalSource(lobReader);
} else {
// This data is already materialized in memory; wrap it and return.
return getInternalSource(realData);
}
}
/**
* Using the LobFile reader, get an accessor InputStream or Reader to the
* underlying data.
*/
protected abstract ACCESSORTYPE getExternalSource(LobFile.Reader reader)
throws IOException;
/**
* Wrap the materialized data in an InputStream or Reader.
*/
protected abstract ACCESSORTYPE getInternalSource(CONTAINERTYPE data);
/**
* @return the materialized data itself.
*/
protected abstract DATATYPE getInternalData(CONTAINERTYPE data);
/**
* Make a copy of the materialized data.
*/
protected abstract CONTAINERTYPE deepCopyData(CONTAINERTYPE data);
public DATATYPE getData() {
if (isExternal()) {
throw new RuntimeException(
"External LOBs must be read via getDataStream()");
}
return getInternalData(realData);
}
@Override
public String toString() {
if (isExternal()) {
return "externalLob(lf," + fileName + "," + Long.toString(offset)
+ "," + Long.toString(length) + ")";
} else {
return realData.toString();
}
}
@Override
public void readFields(DataInput in) throws IOException {
// The serialization format for this object is:
// boolean isExternal
// if true, then:
// a string identifying the external storage type
// and external-storage-specific data.
// if false, then we use readFieldsInternal() to allow BlobRef/ClobRef
// to serialize as it sees fit.
//
// Currently the only external storage supported is LobFile, identified
// by the string "lf". This serializes with the filename (as a string),
// followed by a long-valued offset and a long-valued length.
boolean isExternal = in.readBoolean();
if (isExternal) {
this.realData = null;
String storageType = Text.readString(in);
if (!storageType.equals("lf")) {
throw new IOException("Unsupported external LOB storage code: "
+ storageType);
}
// Storage type "lf" is LobFile: filename, offset, length.
this.fileName = Text.readString(in);
this.offset = in.readLong();
this.length = in.readLong();
} else {
readFieldsInternal(in);
this.fileName = null;
this.offset = 0;
this.length = 0;
}
}
/**
* Perform the readFields() operation on a fully-materializable record.
* @param in the DataInput to deserialize from.
*/
protected abstract void readFieldsInternal(DataInput in) throws IOException;
@Override
public void write(DataOutput out) throws IOException {
out.writeBoolean(isExternal());
if (isExternal()) {
Text.writeString(out, "lf"); // storage type "lf" for LobFile.
Text.writeString(out, fileName);
out.writeLong(offset);
out.writeLong(length);
} else {
writeInternal(out);
}
}
/**
* Perform the write() operation on a fully-materializable record.
* @param out the DataOutput to deserialize to.
*/
protected abstract void writeInternal(DataOutput out) throws IOException;
protected static final ThreadLocal<Matcher> EXTERNAL_MATCHER =
new ThreadLocal<Matcher>() {
@Override protected Matcher initialValue() {
Pattern externalPattern = Pattern.compile(
"externalLob\\(lf,(.*),([0-9]+),([0-9]+)\\)");
return externalPattern.matcher("");
}
};
org.apache.sqoop.lib.LobRef.EXTERNAL_MATCHER;
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,7 +15,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import java.io.DataInput;
@ -26,6 +23,9 @@
/**
* Serialize LOB classes to/from DataInput and DataOutput objects.
*
* @deprecated use org.apache.sqoop.lib.LobSerializer instead.
* @see org.apache.sqoop.lib.LobSerializer
*/
public final class LobSerializer {
@ -33,23 +33,19 @@ private LobSerializer() { }
public static void writeClob(ClobRef clob, DataOutput out)
throws IOException {
clob.write(out);
org.apache.sqoop.lib.LobSerializer.writeClob(clob, out);
}
public static void writeBlob(BlobRef blob, DataOutput out)
throws IOException {
blob.write(out);
org.apache.sqoop.lib.LobSerializer.writeBlob(blob, out);
}
public static ClobRef readClobFields(DataInput in) throws IOException {
ClobRef clob = new ClobRef();
clob.readFields(in);
return clob;
return org.apache.sqoop.lib.LobSerializer.readClobFields(in);
}
public static BlobRef readBlobFields(DataInput in) throws IOException {
BlobRef blob = new BlobRef();
blob.readFields(in);
return blob;
return org.apache.sqoop.lib.LobSerializer.readBlobFields(in);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,14 +15,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
/**
* General error during processing of a SqoopRecord.
*
* @deprecated use org.apache.sqoop.lib.ProcessingException instead.
* @see org.apache.sqoop.lib.ProcessingException
*/
@SuppressWarnings("serial")
public class ProcessingException extends Exception {
public class ProcessingException
extends org.apache.sqoop.lib.ProcessingException {
public ProcessingException() {
super("ProcessingException");
@ -41,10 +42,4 @@ public ProcessingException(final Throwable cause) {
public ProcessingException(final String message, final Throwable cause) {
super(message, cause);
}
@Override
public String toString() {
String msg = getMessage();
return (null == msg) ? "ProcessingException" : msg;
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,18 +15,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.List;
/**
* Parses a record containing one or more fields. Fields are separated
@ -53,26 +42,25 @@
* The fields parsed by RecordParser are backed by an internal buffer
* which is cleared when the next call to parseRecord() is made. If
* the buffer is required to be preserved, you must copy it yourself.
*
* @deprecated use org.apache.sqoop.lib.RecordParser instead.
* @see org.apache.sqoop.lib.RecordParser
*/
public final class RecordParser {
public final class RecordParser extends org.apache.sqoop.lib.RecordParser {
public static final Log LOG = LogFactory.getLog(RecordParser.class.getName());
private enum ParseState {
FIELD_START,
ENCLOSED_FIELD,
UNENCLOSED_FIELD,
ENCLOSED_ESCAPE,
ENCLOSED_EXPECT_DELIMITER,
UNENCLOSED_ESCAPE
}
public static final Log LOG = org.apache.sqoop.lib.RecordParser.LOG;
/**
* An error thrown when parsing fails.
*
* @deprecated use org.apache.sqoop.lib.RecordParser.ParseError instead.
* @see org.apache.sqoop.lib.RecordParser.ParseError
*/
public static class ParseError extends Exception {
public static class ParseError
extends org.apache.sqoop.lib.RecordParser.ParseError {
public ParseError() {
super("ParseError");
super();
}
public ParseError(final String msg) {
@ -88,273 +76,7 @@ public ParseError(final Throwable cause) {
}
}
private DelimiterSet delimiters;
private ArrayList<String> outputs;
public RecordParser(final DelimiterSet delimitersIn) {
this.delimiters = delimitersIn.copy();
this.outputs = new ArrayList<String>();
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(CharSequence input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
return parseRecord(CharBuffer.wrap(input));
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(Text input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
// TODO(aaron): The parser should be able to handle UTF-8 strings
// as well, to avoid this transcode operation.
return parseRecord(input.toString());
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(byte [] input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
return parseRecord(ByteBuffer.wrap(input).asCharBuffer());
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(char [] input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
return parseRecord(CharBuffer.wrap(input));
}
public List<String> parseRecord(ByteBuffer input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
return parseRecord(input.asCharBuffer());
}
// TODO(aaron): Refactor this method to be much shorter.
// CHECKSTYLE:OFF
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(CharBuffer input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
/*
This method implements the following state machine to perform
parsing.
Note that there are no restrictions on whether particular characters
(e.g., field-sep, record-sep, etc) are distinct or the same. The
state transitions are processed in the order seen in this comment.
Starting state is FIELD_START
encloser -> ENCLOSED_FIELD
escape char -> UNENCLOSED_ESCAPE
field delim -> FIELD_START (for a new field)
record delim -> stops processing
all other letters get added to current field, -> UNENCLOSED FIELD
ENCLOSED_FIELD state:
escape char goes to ENCLOSED_ESCAPE
encloser goes to ENCLOSED_EXPECT_DELIMITER
field sep or record sep gets added to the current string
normal letters get added to the current string
ENCLOSED_ESCAPE state:
any character seen here is added literally, back to ENCLOSED_FIELD
ENCLOSED_EXPECT_DELIMITER state:
field sep goes to FIELD_START
record sep halts processing.
all other characters are errors.
UNENCLOSED_FIELD state:
ESCAPE char goes to UNENCLOSED_ESCAPE
FIELD_SEP char goes to FIELD_START
RECORD_SEP char halts processing
normal chars or the enclosing char get added to the current string
UNENCLOSED_ESCAPE:
add charater literal to current string, return to UNENCLOSED_FIELD
*/
char curChar = DelimiterSet.NULL_CHAR;
ParseState state = ParseState.FIELD_START;
int len = input.length();
StringBuilder sb = null;
outputs.clear();
char enclosingChar = delimiters.getEnclosedBy();
char fieldDelim = delimiters.getFieldsTerminatedBy();
char recordDelim = delimiters.getLinesTerminatedBy();
char escapeChar = delimiters.getEscapedBy();
boolean enclosingRequired = delimiters.isEncloseRequired();
for (int pos = 0; pos < len; pos++) {
curChar = input.get();
switch (state) {
case FIELD_START:
// ready to start processing a new field.
if (null != sb) {
// We finished processing a previous field. Add to the list.
outputs.add(sb.toString());
}
sb = new StringBuilder();
if (enclosingChar == curChar) {
// got an opening encloser.
state = ParseState.ENCLOSED_FIELD;
} else if (escapeChar == curChar) {
state = ParseState.UNENCLOSED_ESCAPE;
} else if (fieldDelim == curChar) {
// we have a zero-length field. This is a no-op.
continue;
} else if (recordDelim == curChar) {
// we have a zero-length field, that ends processing.
pos = len;
} else {
// current char is part of the field.
state = ParseState.UNENCLOSED_FIELD;
sb.append(curChar);
if (enclosingRequired) {
throw new ParseError(
"Opening field-encloser expected at position " + pos);
}
}
break;
case ENCLOSED_FIELD:
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.ENCLOSED_ESCAPE;
} else if (enclosingChar == curChar) {
// we're at the end of the enclosing field. Expect an EOF or EOR char.
state = ParseState.ENCLOSED_EXPECT_DELIMITER;
} else {
// this is a regular char, or an EOF / EOR inside an encloser. Add to
// the current field string, and remain in this state.
sb.append(curChar);
}
break;
case UNENCLOSED_FIELD:
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.UNENCLOSED_ESCAPE;
} else if (fieldDelim == curChar) {
// we're at the end of this field; may be the start of another one.
state = ParseState.FIELD_START;
} else if (recordDelim == curChar) {
pos = len; // terminate processing immediately.
} else {
// this is a regular char. Add to the current field string,
// and remain in this state.
sb.append(curChar);
}
break;
case ENCLOSED_ESCAPE:
// Treat this character literally, whatever it is, and return to
// enclosed field processing.
sb.append(curChar);
state = ParseState.ENCLOSED_FIELD;
break;
case ENCLOSED_EXPECT_DELIMITER:
// We were in an enclosed field, but got the final encloser. Now we
// expect either an end-of-field or an end-of-record.
if (fieldDelim == curChar) {
// end of one field is the beginning of the next.
state = ParseState.FIELD_START;
} else if (recordDelim == curChar) {
// stop processing.
pos = len;
} else {
// Don't know what to do with this character.
throw new ParseError("Expected delimiter at position " + pos);
}
break;
case UNENCLOSED_ESCAPE:
// Treat this character literally, whatever it is, and return to
// non-enclosed field processing.
sb.append(curChar);
state = ParseState.UNENCLOSED_FIELD;
break;
default:
throw new ParseError("Unexpected parser state: " + state);
}
}
if (state == ParseState.FIELD_START && curChar == fieldDelim) {
// we hit an EOF/EOR as the last legal character and we need to mark
// that string as recorded. This if block is outside the for-loop since
// we don't have a physical 'epsilon' token in our string.
if (null != sb) {
outputs.add(sb.toString());
sb = new StringBuilder();
}
}
if (null != sb) {
// There was a field that terminated by running out of chars or an EOR
// character. Add to the list.
outputs.add(sb.toString());
}
return outputs;
}
// CHECKSTYLE:ON
public boolean isEnclosingRequired() {
return delimiters.isEncloseRequired();
}
@Override
public String toString() {
return "RecordParser[" + delimiters.toString() + "]";
}
@Override
public int hashCode() {
return this.delimiters.hashCode();
super(delimitersIn);
}
}

View File

@ -1,6 +1,4 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,136 +15,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.sqoop.lib;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Map;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
/**
* Interface implemented by the classes generated by sqoop's orm.ClassWriter.
*
* @deprecated use org.apache.sqoop.lib.SqoopRecord instead.
* @see org.apache.sqoop.lib.SqoopRecord
*/
public abstract class SqoopRecord implements Cloneable, DBWritable,
FieldMappable, Writable {
public abstract class SqoopRecord extends org.apache.sqoop.lib.SqoopRecord {
public SqoopRecord() {
}
public abstract void parse(CharSequence s) throws RecordParser.ParseError;
public abstract void parse(Text s) throws RecordParser.ParseError;
public abstract void parse(byte [] s) throws RecordParser.ParseError;
public abstract void parse(char [] s) throws RecordParser.ParseError;
public abstract void parse(ByteBuffer s) throws RecordParser.ParseError;
public abstract void parse(CharBuffer s) throws RecordParser.ParseError;
public abstract void loadLargeObjects(LargeObjectLoader objLoader)
throws SQLException, IOException, InterruptedException;
/**
* Inserts the data in this object into the PreparedStatement, starting
* at parameter 'offset'.
* @return the number of fields written to the statement.
*/
public abstract int write(PreparedStatement stmt, int offset)
throws SQLException;
/**
* Format output data according to the specified delimiters.
*/
public abstract String toString(DelimiterSet delimiters);
/**
* Use the default delimiters, but only append an end-of-record delimiter
* if useRecordDelim is true.
*/
public String toString(boolean useRecordDelim) {
// Method body should be overridden by generated classes in 1.3.0+
if (useRecordDelim) {
// This is the existing functionality.
return toString();
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(useRecordDelim=false) requires a newer SqoopRecord. "
+ "Please regenerate your record class to use this function.");
}
}
/**
* Format the record according to the specified delimiters. An end-of-record
* delimiter is optional, and only used if useRecordDelim is true. For
* use with TextOutputFormat, calling this with useRecordDelim=false may
* make more sense.
*/
public String toString(DelimiterSet delimiters, boolean useRecordDelim) {
if (useRecordDelim) {
return toString(delimiters);
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(delimiters, useRecordDelim=false) requires a newer "
+ "SqoopRecord. Please regenerate your record class to use this "
+ "function.");
}
}
@Override
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
/**
* Returns an integer specifying which API format version the
* generated class conforms to. Used by internal APIs for backwards
* compatibility.
* @return the API version this class was generated against.
*/
public abstract int getClassFormatVersion();
/**
* Use the delegate pattern to allow arbitrary processing of the
* fields of this record.
* @param processor A delegate that operates on this object.
* @throws IOException if the processor encounters an IO error when
* operating on this object.
* @throws ProcessingException if the FieldMapProcessor encounters
* a general processing error when operating on this object.
*/
public void delegate(FieldMapProcessor processor)
throws IOException, ProcessingException {
processor.accept(this);
}
@Override
/**
* {@inheriDoc}
* @throws RuntimeException if used with a record that was generated
* before this capability was added (1.1.0).
*/
public Map<String, Object> getFieldMap() {
// Default implementation does not support field iteration.
// ClassWriter should provide an overriding version.
throw new RuntimeException(
"Got null field map from record. Regenerate your record class.");
}
/**
* Allows an arbitrary field to be set programmatically to the
* specified value object. The value object must match the
* type expected for the particular field or a RuntimeException
* will result.
* @throws RuntimeException if the specified field name does not exist.
*/
public void setField(String fieldName, Object fieldVal) {
throw new RuntimeException("This SqoopRecord does not support setField(). "
+ "Regenerate your record class.");
}
}

View File

@ -0,0 +1,170 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.ReflectionUtils;
/**
* Provides a mapping from codec names to concrete implementation class names.
*/
public final class CodecMap {
// Supported codec map values
// Note: do not add more values here, since codecs are discovered using the
// standard Hadoop mechanism (io.compression.codecs). See
// CompressionCodecFactory.
public static final String NONE = "none";
public static final String DEFLATE = "deflate";
public static final String LZO = "lzo";
public static final String LZOP = "lzop";
private static Map<String, String> codecNames;
static {
codecNames = new TreeMap<String, String>();
// Register the names of codecs we know about.
codecNames.put(NONE, null);
codecNames.put(DEFLATE, "org.apache.hadoop.io.compress.DefaultCodec");
codecNames.put(LZO, "com.hadoop.compression.lzo.LzoCodec");
codecNames.put(LZOP, "com.hadoop.compression.lzo.LzopCodec");
// add more from Hadoop CompressionCodecFactory
for (Class<? extends CompressionCodec> cls
: CompressionCodecFactory.getCodecClasses(new Configuration())) {
String simpleName = cls.getSimpleName();
String codecName = simpleName;
if (simpleName.endsWith("Codec")) {
codecName = simpleName.substring(0, simpleName.length()
- "Codec".length());
}
codecNames.put(codecName.toLowerCase(), cls.getCanonicalName());
}
}
private CodecMap() {
}
/**
* Given a codec name, return the name of the concrete class
* that implements it (or 'null' in the case of the "none" codec).
* @throws com.cloudera.sqoop.io.UnsupportedCodecException if a codec cannot
* be found with the supplied name.
*/
public static String getCodecClassName(String codecName)
throws com.cloudera.sqoop.io.UnsupportedCodecException {
if (!codecNames.containsKey(codecName)) {
throw new com.cloudera.sqoop.io.UnsupportedCodecException(codecName);
}
return codecNames.get(codecName);
}
/**
* Given a codec name, instantiate the concrete implementation
* class that implements it.
* @throws com.cloudera.sqoop.io.UnsupportedCodecException if a codec cannot
* be found with the supplied name.
*/
public static CompressionCodec getCodec(String codecName,
Configuration conf) throws com.cloudera.sqoop.io.UnsupportedCodecException {
// Try standard Hadoop mechanism first
CompressionCodec codec = getCodecByName(codecName, conf);
if (codec != null) {
return codec;
}
// Fall back to Sqoop mechanism
String codecClassName = null;
try {
codecClassName = getCodecClassName(codecName);
if (null == codecClassName) {
return null;
}
Class<? extends CompressionCodec> codecClass =
(Class<? extends CompressionCodec>)
conf.getClassByName(codecClassName);
return (CompressionCodec) ReflectionUtils.newInstance(
codecClass, conf);
} catch (ClassNotFoundException cnfe) {
throw new com.cloudera.sqoop.io.UnsupportedCodecException(
"Cannot find codec class "
+ codecClassName + " for codec " + codecName);
}
}
/**
* Return the set of available codec names.
*/
public static Set<String> getCodecNames() {
return codecNames.keySet();
}
/**
* Find the relevant compression codec for the codec's canonical class name
* or by codec alias.
* <p>
* Codec aliases are case insensitive.
* <p>
* The code alias is the short class name (without the package name).
* If the short class name ends with 'Codec', then there are two aliases for
* the codec, the complete short class name and the short class name without
* the 'Codec' ending. For example for the 'GzipCodec' codec class name the
* alias are 'gzip' and 'gzipcodec'.
* <p>
* Note: When HADOOP-7323 is available this method can be replaced with a call
* to CompressionCodecFactory.
* @param classname the canonical class name of the codec or the codec alias
* @return the codec object or null if none matching the name were found
*/
private static CompressionCodec getCodecByName(String codecName,
Configuration conf) {
List<Class<? extends CompressionCodec>> codecs =
CompressionCodecFactory.getCodecClasses(conf);
for (Class<? extends CompressionCodec> cls : codecs) {
if (codecMatches(cls, codecName)) {
return ReflectionUtils.newInstance(cls, conf);
}
}
return null;
}
private static boolean codecMatches(Class<? extends CompressionCodec> cls,
String codecName) {
String simpleName = cls.getSimpleName();
if (cls.getName().equals(codecName)
|| simpleName.equalsIgnoreCase(codecName)) {
return true;
}
if (simpleName.endsWith("Codec")) {
String prefix = simpleName.substring(0, simpleName.length()
- "Codec".length());
if (prefix.equalsIgnoreCase(codecName)) {
return true;
}
}
return false;
}
}

View File

@ -0,0 +1,87 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.commons.io.input.CountingInputStream;
import org.apache.commons.io.input.ProxyInputStream;
/**
* Provides an InputStream that can consume a fixed maximum number of bytes
* from an underlying stream. Closing the FixedLengthInputStream does not
* close the underlying stream. After reading the maximum number of available
* bytes this acts as though EOF has been reached.
*/
public class FixedLengthInputStream extends ProxyInputStream {
private CountingInputStream countingIn;
private long maxBytes;
public FixedLengthInputStream(InputStream stream, long maxLen) {
super(new CountingInputStream(new CloseShieldInputStream(stream)));
// Save a correctly-typed reference to the underlying stream.
this.countingIn = (CountingInputStream) this.in;
this.maxBytes = maxLen;
}
/** @return the number of bytes already consumed by the client. */
private long consumed() {
return countingIn.getByteCount();
}
/**
* @return number of bytes remaining to be read before the limit
* is reached.
*/
private long toLimit() {
return maxBytes - consumed();
}
@Override
public int available() throws IOException {
return (int) Math.min(toLimit(), countingIn.available());
}
@Override
public int read() throws IOException {
if (toLimit() > 0) {
return super.read();
} else {
return -1; // EOF.
}
}
@Override
public int read(byte [] buf) throws IOException {
return read(buf, 0, buf.length);
}
@Override
public int read(byte [] buf, int start, int count) throws IOException {
long limit = toLimit();
if (limit == 0) {
return -1; // EOF.
} else {
return super.read(buf, start, (int) Math.min(count, limit));
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,134 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.cloudera.sqoop.io.LobFile;
/**
* A cache of open LobFile.Reader objects.
* This maps from filenames to the open Reader, if any. This uses the
* Singleton pattern. While nothing prevents multiple LobReaderCache
* instances, it is most useful to have a single global cache. This cache is
* internally synchronized; only one thread can insert or retrieve a reader
* from the cache at a time.
*/
public class LobReaderCache {
public static final Log LOG =
LogFactory.getLog(LobReaderCache.class.getName());
private Map<Path, LobFile.Reader> readerMap;
/**
* Open a LobFile for read access, returning a cached reader if one is
* available, or a new reader otherwise.
* @param path the path to the LobFile to open
* @param conf the configuration to use to access the FS.
* @throws IOException if there's an error opening the file.
*/
public LobFile.Reader get(Path path, Configuration conf)
throws IOException {
LobFile.Reader reader = null;
Path canonicalPath = qualify(path, conf);
// Look up an entry in the cache.
synchronized(this) {
reader = readerMap.remove(canonicalPath);
}
if (null != reader && !reader.isClosed()) {
// Cache hit. return it.
LOG.debug("Using cached reader for " + canonicalPath);
return reader;
}
// Cache miss; open the file.
LOG.debug("No cached reader available for " + canonicalPath);
return LobFile.open(path, conf);
}
/**
* Return a reader back to the cache. If there's already a reader for
* this path, then the current reader is closed.
* @param reader the opened reader. Any record-specific subreaders should be
* closed.
* @throws IOException if there's an error accessing the path's filesystem.
*/
public void recycle(LobFile.Reader reader) throws IOException {
Path canonicalPath = reader.getPath();
// Check if the cache has a reader for this path already. If not, add this.
boolean cached = false;
synchronized(this) {
if (readerMap.get(canonicalPath) == null) {
LOG.debug("Caching reader for path: " + canonicalPath);
readerMap.put(canonicalPath, reader);
cached = true;
}
}
if (!cached) {
LOG.debug("Reader already present for path: " + canonicalPath
+ "; closing.");
reader.close();
}
}
@Override
protected synchronized void finalize() throws Throwable {
for (LobFile.Reader r : readerMap.values()) {
r.close();
}
super.finalize();
}
protected LobReaderCache() {
this.readerMap = new TreeMap<Path, LobFile.Reader>();
}
/**
* Created a fully-qualified path object.
* @param path the path to fully-qualify with its fs URI.
* @param conf the current Hadoop FS configuration.
* @return a new path representing the same location as the input 'path',
* but with a fully-qualified URI.
*/
public static Path qualify(Path path, Configuration conf)
throws IOException {
if (null == path) {
return null;
}
FileSystem fs = path.getFileSystem(conf);
if (null == fs) {
fs = FileSystem.get(conf);
}
return path.makeQualified(fs);
}
}

View File

@ -0,0 +1,94 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.util.Shell;
import org.apache.log4j.Logger;
/**
* A named FIFO channel.
*/
public class NamedFifo {
private static final Logger LOG = Logger.getLogger(NamedFifo.class);
private File fifoFile;
/** Create a named FIFO object at the local fs path given by 'pathname'. */
public NamedFifo(String pathname) {
this.fifoFile = new File(pathname);
}
/** Create a named FIFO object at the local fs path given by the 'fifo' File
* object. */
public NamedFifo(File fifo) {
this.fifoFile = fifo;
}
/**
* Return the File object representing the FIFO.
*/
public File getFile() {
return this.fifoFile;
}
/**
* Create a named FIFO object.
* The pipe will be created with permissions 0600.
* @throws IOException on failure.
*/
public void create() throws IOException {
create(0600);
}
/**
* Create a named FIFO object with the specified fs permissions.
* This depends on the 'mknod' or 'mkfifo' (Mac OS X) system utility
* existing. (for example, provided by Linux coreutils). This object
* will be deleted when the process exits.
* @throws IOException on failure.
*/
public void create(int permissions) throws IOException {
String filename = fifoFile.toString();
// Format permissions as a mode string in base 8.
String modeStr = Integer.toString(permissions, 8);
// Create the FIFO itself.
try {
String output = Shell.execCommand("mknod", "--mode=0" + modeStr,
filename, "p");
LOG.info("mknod output:\n"+output);
} catch (IOException ex) {
LOG.info("IO error running mknod: " + ex.getMessage());
LOG.debug("IO error running mknod", ex);
}
if (!this.fifoFile.exists()) {
LOG.info("mknod failed, falling back to mkfifo");
String output = Shell.execCommand("mkfifo", "-m", "0" + modeStr,
filename);
LOG.info("mkfifo output:\n"+output);
}
// Schedule the FIFO to be cleaned up when we exit.
this.fifoFile.deleteOnExit();
}
}

View File

@ -0,0 +1,72 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* A BufferedWriter implementation that wraps around a SplittingOutputStream
* and allows splitting of the underlying stream.
* Splits occur at allowSplit() calls, or newLine() calls.
*/
public class SplittableBufferedWriter extends BufferedWriter {
public static final Log LOG = LogFactory.getLog(
SplittableBufferedWriter.class.getName());
private SplittingOutputStream splitOutputStream;
private boolean alwaysFlush;
public SplittableBufferedWriter(
final SplittingOutputStream splitOutputStream) {
super(new OutputStreamWriter(splitOutputStream));
this.splitOutputStream = splitOutputStream;
this.alwaysFlush = false;
}
/** For testing. */
protected SplittableBufferedWriter(
final SplittingOutputStream splitOutputStream, final boolean alwaysFlush) {
super(new OutputStreamWriter(splitOutputStream));
this.splitOutputStream = splitOutputStream;
this.alwaysFlush = alwaysFlush;
}
public void newLine() throws IOException {
super.newLine();
this.allowSplit();
}
public void allowSplit() throws IOException {
if (alwaysFlush) {
this.flush();
}
if (this.splitOutputStream.wouldSplit()) {
LOG.debug("Starting new split");
this.flush();
this.splitOutputStream.allowSplit();
}
}
}

View File

@ -0,0 +1,159 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Formatter;
import org.apache.commons.io.output.CountingOutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
/**
* An output stream that writes to an underlying filesystem, opening
* a new file after a specified number of bytes have been written to the
* current one.
*/
public class SplittingOutputStream extends OutputStream {
public static final Log LOG = LogFactory.getLog(
SplittingOutputStream.class.getName());
private OutputStream writeStream;
private CountingOutputStream countingFilterStream;
private Configuration conf;
private Path destDir;
private String filePrefix;
private long cutoffBytes;
private CompressionCodec codec;
private int fileNum;
/**
* Create a new SplittingOutputStream.
* @param conf the Configuration to use to interface with HDFS
* @param destDir the directory where the files will go (should already
* exist).
* @param filePrefix the first part of the filename, which will be appended
* by a number. This file will be placed inside destDir.
* @param cutoff the approximate number of bytes to use per file
* @param doGzip if true, then output files will be gzipped and have a .gz
* suffix.
*/
public SplittingOutputStream(final Configuration conf, final Path destDir,
final String filePrefix, final long cutoff, final CompressionCodec codec)
throws IOException {
this.conf = conf;
this.destDir = destDir;
this.filePrefix = filePrefix;
this.cutoffBytes = cutoff;
if (this.cutoffBytes < 0) {
this.cutoffBytes = 0; // splitting disabled.
}
this.codec = codec;
this.fileNum = 0;
openNextFile();
}
/** Initialize the OutputStream to the next file to write to.
*/
private void openNextFile() throws IOException {
FileSystem fs = FileSystem.get(conf);
StringBuffer sb = new StringBuffer();
Formatter fmt = new Formatter(sb);
fmt.format("%05d", this.fileNum++);
String filename = filePrefix + fmt.toString();
if (codec != null) {
filename = filename + codec.getDefaultExtension();
}
Path destFile = new Path(destDir, filename);
LOG.debug("Opening next output file: " + destFile);
if (fs.exists(destFile)) {
Path canonicalDest = destFile.makeQualified(fs);
throw new IOException("Destination file " + canonicalDest
+ " already exists");
}
OutputStream fsOut = fs.create(destFile);
// Count how many actual bytes hit HDFS.
this.countingFilterStream = new CountingOutputStream(fsOut);
if (codec != null) {
// Wrap that in a compressing stream.
this.writeStream = codec.createOutputStream(this.countingFilterStream);
} else {
// Write to the counting stream directly.
this.writeStream = this.countingFilterStream;
}
}
/**
* @return true if allowSplit() would actually cause a split.
*/
public boolean wouldSplit() {
return this.cutoffBytes > 0
&& this.countingFilterStream.getByteCount() >= this.cutoffBytes;
}
/** If we've written more to the disk than the user's split size,
* open the next file.
*/
private void checkForNextFile() throws IOException {
if (wouldSplit()) {
LOG.debug("Starting new split");
this.writeStream.flush();
this.writeStream.close();
openNextFile();
}
}
/** Defines a point in the stream when it is acceptable to split to a new
file; e.g., the end of a record.
*/
public void allowSplit() throws IOException {
checkForNextFile();
}
public void close() throws IOException {
this.writeStream.close();
}
public void flush() throws IOException {
this.writeStream.flush();
}
public void write(byte [] b) throws IOException {
this.writeStream.write(b);
}
public void write(byte [] b, int off, int len) throws IOException {
this.writeStream.write(b, off, len);
}
public void write(int b) throws IOException {
this.writeStream.write(b);
}
}

View File

@ -0,0 +1,38 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.IOException;
/**
* Thrown when a compression codec cannot be recognized.
*/
public class UnsupportedCodecException extends IOException {
public UnsupportedCodecException() {
super("UnsupportedCodecException");
}
public UnsupportedCodecException(String msg) {
super(msg);
}
public UnsupportedCodecException(Throwable cause) {
super(cause);
}
}

View File

@ -0,0 +1,82 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.apache.hadoop.io.Text;
/**
* Serialize BigDecimal classes to/from DataInput and DataOutput objects.
*
* BigDecimal is comprised of a BigInteger with an integer 'scale' field.
* The BigDecimal/BigInteger can also return itself as a 'long' value.
*
* We serialize in one of two formats:
*
* First, check whether the BigInt can fit in a long:
* boolean b = BigIntegerPart &gt; LONG_MAX || BigIntegerPart &lt; LONG_MIN
*
* [int: scale][boolean: b == false][long: BigInt-part]
* [int: scale][boolean: b == true][string: BigInt-part.toString()]
*
* TODO(aaron): Get this to work with Hadoop's Serializations framework.
*/
public final class BigDecimalSerializer {
private BigDecimalSerializer() { }
public static final BigInteger LONG_MAX_AS_BIGINT =
BigInteger.valueOf(Long.MAX_VALUE);
public static final BigInteger LONG_MIN_AS_BIGINT =
BigInteger.valueOf(Long.MIN_VALUE);
public static void write(BigDecimal d, DataOutput out) throws IOException {
int scale = d.scale();
BigInteger bigIntPart = d.unscaledValue();
boolean fastpath = bigIntPart.compareTo(LONG_MAX_AS_BIGINT) < 0
&& bigIntPart .compareTo(LONG_MIN_AS_BIGINT) > 0;
out.writeInt(scale);
out.writeBoolean(fastpath);
if (fastpath) {
out.writeLong(bigIntPart.longValue());
} else {
Text.writeString(out, bigIntPart.toString());
}
}
public static BigDecimal readFields(DataInput in) throws IOException {
int scale = in.readInt();
boolean fastpath = in.readBoolean();
BigInteger unscaledIntPart;
if (fastpath) {
long unscaledValue = in.readLong();
unscaledIntPart = BigInteger.valueOf(unscaledValue);
} else {
String unscaledValueStr = Text.readString(in);
unscaledIntPart = new BigInteger(unscaledValueStr);
}
return new BigDecimal(unscaledIntPart, scale);
}
}

View File

@ -0,0 +1,130 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.regex.Matcher;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.BytesWritable;
import com.cloudera.sqoop.io.LobFile;
/**
* BlobRef is a wrapper that holds a BLOB either directly, or a
* reference to a file that holds the BLOB data.
*/
public class BlobRef extends
com.cloudera.sqoop.lib.LobRef<byte[], BytesWritable, InputStream> {
public static final Log LOG = LogFactory.getLog(BlobRef.class.getName());
public BlobRef() {
super();
}
public BlobRef(byte [] bytes) {
super(new BytesWritable(bytes));
}
/**
* Initialize a BlobRef to an external BLOB.
* @param file the filename to the BLOB. May be relative to the job dir.
* @param offset the offset (in bytes) into the LobFile for this record.
* @param length the length of the record in bytes.
*/
public BlobRef(String file, long offset, long length) {
super(file, offset, length);
}
@Override
protected InputStream getExternalSource(LobFile.Reader reader)
throws IOException {
return reader.readBlobRecord();
}
@Override
protected InputStream getInternalSource(BytesWritable data) {
return new ByteArrayInputStream(data.getBytes(), 0, data.getLength());
}
@Override
protected byte [] getInternalData(BytesWritable data) {
return Arrays.copyOf(data.getBytes(), data.getLength());
}
@Override
protected BytesWritable deepCopyData(BytesWritable data) {
return new BytesWritable(Arrays.copyOf(data.getBytes(), data.getLength()));
}
@Override
public void readFieldsInternal(DataInput in) throws IOException {
// For internally-stored BLOBs, the data is a BytesWritable
// containing the actual data.
BytesWritable data = getDataObj();
if (null == data) {
data = new BytesWritable();
}
data.readFields(in);
setDataObj(data);
}
@Override
public void writeInternal(DataOutput out) throws IOException {
getDataObj().write(out);
}
/**
* Create a BlobRef based on parsed data from a line of text.
* This only operates correctly on external blobs; inline blobs are simply
* returned as null. You should store BLOB data in SequenceFile format
* if reparsing is necessary.
* @param inputString the text-based input data to parse.
* @return a new BlobRef containing a reference to an external BLOB, or
* an empty BlobRef if the data to be parsed is actually inline.
*/
public static com.cloudera.sqoop.lib.BlobRef parse(String inputString) {
// If inputString is of the form 'externalLob(lf,%s,%d,%d)', then this is
// an external BLOB stored at the LobFile indicated by '%s' with the next
// two arguments representing its offset and length in the file.
// Otherwise, it is an inline BLOB, which we don't support parsing of.
Matcher m = org.apache.sqoop.lib.LobRef.EXTERNAL_MATCHER.get();
m.reset(inputString);
if (m.matches()) {
// This is a LobFile. Extract the filename, offset and len from the
// matcher.
return new com.cloudera.sqoop.lib.BlobRef(m.group(1),
Long.valueOf(m.group(2)), Long.valueOf(m.group(3)));
} else {
// This is inline BLOB string data.
LOG.warn(
"Reparsing inline BLOB data is not supported; use SequenceFiles.");
return new com.cloudera.sqoop.lib.BlobRef();
}
}
}

View File

@ -0,0 +1,41 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
/**
* Parse string representations of boolean values into boolean
* scalar types.
*/
public final class BooleanParser {
/**
* Return a boolean based on the value contained in the string.
*
* <p>The following values are considered true:
* "true", "t", "yes", "on", "1".</p>
* <p>All other values, including 'null', are false.</p>
* <p>All comparisons are case-insensitive.</p>
*/
public static boolean valueOf(final String s) {
return s != null && ("true".equalsIgnoreCase(s) || "t".equalsIgnoreCase(s)
|| "1".equals(s) || "on".equalsIgnoreCase(s)
|| "yes".equalsIgnoreCase(s));
}
private BooleanParser() { }
}

View File

@ -0,0 +1,113 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.regex.Matcher;
import org.apache.hadoop.io.Text;
import com.cloudera.sqoop.io.LobFile;
/**
* ClobRef is a wrapper that holds a CLOB either directly, or a
* reference to a file that holds the CLOB data.
*/
public class ClobRef
extends com.cloudera.sqoop.lib.LobRef<String, String, Reader> {
public ClobRef() {
super();
}
public ClobRef(String chars) {
super(chars);
}
/**
* Initialize a clobref to an external CLOB.
* @param file the filename to the CLOB. May be relative to the job dir.
* @param offset the offset (in bytes) into the LobFile for this record.
* @param length the length of the record in characters.
*/
public ClobRef(String file, long offset, long length) {
super(file, offset, length);
}
@Override
protected Reader getExternalSource(LobFile.Reader reader)
throws IOException {
return reader.readClobRecord();
}
@Override
protected Reader getInternalSource(String data) {
return new StringReader(data);
}
@Override
protected String deepCopyData(String data) {
return data;
}
@Override
protected String getInternalData(String data) {
return data;
}
@Override
public void readFieldsInternal(DataInput in) throws IOException {
// For internally-stored clobs, the data is written as UTF8 Text.
setDataObj(Text.readString(in));
}
@Override
public void writeInternal(DataOutput out) throws IOException {
Text.writeString(out, getDataObj());
}
/**
* Create a ClobRef based on parsed data from a line of text.
* @param inputString the text-based input data to parse.
* @return a ClobRef to the given data.
*/
public static com.cloudera.sqoop.lib.ClobRef parse(String inputString) {
// If inputString is of the form 'externalLob(lf,%s,%d,%d)', then this is
// an external CLOB stored at the LobFile indicated by '%s' with the next
// two arguments representing its offset and length in the file.
// Otherwise, it is an inline CLOB, which we read as-is.
Matcher m = EXTERNAL_MATCHER.get();
m.reset(inputString);
if (m.matches()) {
// This is a LobFile. Extract the filename, offset and len from the
// matcher.
return new com.cloudera.sqoop.lib.ClobRef(m.group(1),
Long.valueOf(m.group(2)), Long.valueOf(m.group(3)));
} else {
// This is inline CLOB string data.
return new com.cloudera.sqoop.lib.ClobRef(inputString);
}
}
}

View File

@ -0,0 +1,205 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
/**
* Encapsulates a set of delimiters used to encode a record.
*/
public class DelimiterSet implements Cloneable {
public static final char NULL_CHAR = '\000';
private char fieldDelim; // fields terminated by this.
private char recordDelim; // records terminated by this.
// If these next two fields are '\000', then they are ignored.
private char enclosedBy;
private char escapedBy;
// If true, then the enclosed-by character is applied to every
// field, not just ones containing embedded delimiters.
private boolean encloseRequired;
/**
* Create a delimiter set with the default delimiters
* (comma for fields, newline for records).
*/
public DelimiterSet() {
this(',', '\n', NULL_CHAR, NULL_CHAR, false);
}
/**
* Create a delimiter set with the specified delimiters.
* @param field the fields-terminated-by delimiter
* @param record the lines-terminated-by delimiter
* @param enclose the enclosed-by character
* @param escape the escaped-by character
* @param isEncloseRequired If true, enclosed-by is applied to all
* fields. If false, only applied to fields that embed delimiters.
*/
public DelimiterSet(char field, char record, char enclose, char escape,
boolean isEncloseRequired) {
this.fieldDelim = field;
this.recordDelim = record;
this.enclosedBy = enclose;
this.escapedBy = escape;
this.encloseRequired = isEncloseRequired;
}
/**
* Sets the fields-terminated-by character.
*/
public void setFieldsTerminatedBy(char f) {
this.fieldDelim = f;
}
/**
* @return the fields-terminated-by character.
*/
public char getFieldsTerminatedBy() {
return this.fieldDelim;
}
/**
* Sets the end-of-record lines-terminated-by character.
*/
public void setLinesTerminatedBy(char r) {
this.recordDelim = r;
}
/**
* @return the end-of-record (lines-terminated-by) character.
*/
public char getLinesTerminatedBy() {
return this.recordDelim;
}
/**
* Sets the enclosed-by character.
* @param e the enclosed-by character, or '\000' for no enclosing character.
*/
public void setEnclosedBy(char e) {
this.enclosedBy = e;
}
/**
* @return the enclosed-by character, or '\000' for none.
*/
public char getEnclosedBy() {
return this.enclosedBy;
}
/**
* Sets the escaped-by character.
* @param e the escaped-by character, or '\000' for no escape character.
*/
public void setEscapedBy(char e) {
this.escapedBy = e;
}
/**
* @return the escaped-by character, or '\000' for none.
*/
public char getEscapedBy() {
return this.escapedBy;
}
/**
* Set whether the enclosed-by character must be applied to all fields,
* or only fields with embedded delimiters.
*/
public void setEncloseRequired(boolean required) {
this.encloseRequired = required;
}
/**
* @return true if the enclosed-by character must be applied to all fields,
* or false if it's only used for fields with embedded delimiters.
*/
public boolean isEncloseRequired() {
return this.encloseRequired;
}
@Override
/**
* @return a string representation of the delimiters.
*/
public String toString() {
return "fields=" + this.fieldDelim
+ " records=" + this.recordDelim
+ " escape=" + this.escapedBy
+ " enclose=" + this.enclosedBy
+ " required=" + this.encloseRequired;
}
/**
* Format this set of delimiters as a call to the constructor for
* this object, that would generate identical delimiters.
* @return a String that can be embedded in generated code that
* provides this set of delimiters.
*/
public String formatConstructor() {
return "new DelimiterSet((char) " + (int) this.fieldDelim + ", "
+ "(char) " + (int) this.recordDelim + ", "
+ "(char) " + (int) this.enclosedBy + ", "
+ "(char) " + (int) this.escapedBy + ", "
+ this.encloseRequired + ")";
}
@Override
/**
* @return a hash code for this set of delimiters.
*/
public int hashCode() {
return (int) this.fieldDelim
+ (((int) this.recordDelim) << 4)
+ (((int) this.escapedBy) << 8)
+ (((int) this.enclosedBy) << 12)
+ (((int) this.recordDelim) << 16)
+ (this.encloseRequired ? 0xFEFE : 0x7070);
}
@Override
/**
* @return true if this delimiter set is the same as another set of
* delimiters.
*/
public boolean equals(Object other) {
if (null == other) {
return false;
} else if (!other.getClass().equals(getClass())) {
return false;
}
DelimiterSet set = (DelimiterSet) other;
return this.fieldDelim == set.fieldDelim
&& this.recordDelim == set.recordDelim
&& this.escapedBy == set.escapedBy
&& this.enclosedBy == set.enclosedBy
&& this.encloseRequired == set.encloseRequired;
}
@Override
/**
* @return a new copy of this same set of delimiters.
*/
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
}

View File

@ -0,0 +1,139 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
/**
* Static helper class that will help format data with quotes and escape chars.
*/
public final class FieldFormatter {
/**
* only pass fields that are strings when --hive-drop-delims option is on.
* @param str
* @param delimiters
* @return
*/
public static String hiveStringDropDelims(String str,
com.cloudera.sqoop.lib.DelimiterSet delimiters) {
return hiveStringReplaceDelims(str, "", delimiters);
}
/**
* replace hive delimiters with a user-defined string passed to the
* --hive-delims-replacement option.
* @param str
* @param delimiters
* @return
*/
public static String hiveStringReplaceDelims(String str, String replacement,
com.cloudera.sqoop.lib.DelimiterSet delimiters) {
String droppedDelims = str.replaceAll("\\n|\\r|\01", replacement);
return escapeAndEnclose(droppedDelims, delimiters);
}
/**
* Takes an input string representing the value of a field, encloses it in
* enclosing chars, and escapes any occurrences of such characters in the
* middle. The escape character itself is also escaped if it appears in the
* text of the field. If there is no enclosing character, then any
* delimiters present in the field body are escaped instead.
*
* The field is enclosed only if:
* enclose != '\000', and:
* encloseRequired is true, or
* one of the fields-terminated-by or lines-terminated-by characters is
* present in the string.
*
* Escaping is not performed if the escape char is '\000'.
*
* @param str - The user's string to escape and enclose
* @param delimiters - The DelimiterSet to use identifying the escape and
* enclose semantics. If the specified escape or enclose characters are
* '\000', those operations are not performed.
* @return the escaped, enclosed version of 'str'.
*/
public static String escapeAndEnclose(String str,
com.cloudera.sqoop.lib.DelimiterSet delimiters) {
char escape = delimiters.getEscapedBy();
char enclose = delimiters.getEnclosedBy();
boolean encloseRequired = delimiters.isEncloseRequired();
// true if we can use an escape character.
boolean escapingLegal =
com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR != escape;
String withEscapes;
if (null == str) {
return null;
}
if (escapingLegal) {
// escaping is legal. Escape any instances of the escape char itself.
withEscapes = str.replace("" + escape, "" + escape + escape);
} else {
// no need to double-escape
withEscapes = str;
}
if (com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR == enclose) {
// The enclose-with character was left unset, so we can't enclose items.
if (escapingLegal) {
// If the user has used the fields-terminated-by or
// lines-terminated-by characters in the string, escape them if we
// have an escape character.
String fields = "" + delimiters.getFieldsTerminatedBy();
String lines = "" + delimiters.getLinesTerminatedBy();
withEscapes = withEscapes.replace(fields, "" + escape + fields);
withEscapes = withEscapes.replace(lines, "" + escape + lines);
}
// No enclosing possible, so now return this.
return withEscapes;
}
// if we have an enclosing character, and escaping is legal, then the
// encloser must always be escaped.
if (escapingLegal) {
withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose);
}
boolean actuallyDoEnclose = encloseRequired;
if (!actuallyDoEnclose) {
// check if the string requires enclosing.
char [] mustEncloseFor = new char[2];
mustEncloseFor[0] = delimiters.getFieldsTerminatedBy();
mustEncloseFor[1] = delimiters.getLinesTerminatedBy();
for (char reason : mustEncloseFor) {
if (str.indexOf(reason) != -1) {
actuallyDoEnclose = true;
break;
}
}
}
if (actuallyDoEnclose) {
return "" + enclose + withEscapes + enclose;
} else {
return withEscapes;
}
}
private FieldFormatter() { }
}

View File

@ -0,0 +1,39 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.IOException;
import com.cloudera.sqoop.lib.FieldMappable;
import com.cloudera.sqoop.lib.ProcessingException;
/**
* Interface implemented by classes that process FieldMappable objects.
*/
public interface FieldMapProcessor {
/**
* Allow arbitrary processing of a FieldMappable object.
* @param record an object which can emit a map of its field names to values.
* @throws IOException if the processor encounters an IO error when
* operating on this object.
* @throws ProcessingException if the FieldMapProcessor encounters
* a general processing error when operating on this object.
*/
void accept(FieldMappable record) throws IOException, ProcessingException;
}

View File

@ -0,0 +1,34 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.util.Map;
/**
* Interface describing a class capable of returning a map of the fields
* of the object to their values.
*/
public interface FieldMappable {
/**
* Returns a map containing all fields of this record.
* @return a map from column names to the object-based values for
* this record. The map may not be null, though it may be empty.
*/
Map<String, Object> getFieldMap();
}

View File

@ -0,0 +1,256 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.math.BigDecimal;
import java.sql.Date;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Time;
import java.sql.Timestamp;
import org.apache.hadoop.io.BytesWritable;
import com.cloudera.sqoop.lib.BlobRef;
import com.cloudera.sqoop.lib.ClobRef;
/**
* Contains a set of methods which can read db columns from a ResultSet into
* Java types, and do serialization of these types to/from DataInput/DataOutput
* for use with Hadoop's Writable implementation. This supports null values
* for all types.
*/
public final class JdbcWritableBridge {
// Currently, cap BLOB/CLOB objects at 16 MB until we can use external
// storage.
public static final long MAX_BLOB_LENGTH = 16 * 1024 * 1024;
public static final long MAX_CLOB_LENGTH = 16 * 1024 * 1024;
private JdbcWritableBridge() {
}
public static Integer readInteger(int colNum, ResultSet r)
throws SQLException {
int val;
val = r.getInt(colNum);
if (r.wasNull()) {
return null;
} else {
return Integer.valueOf(val);
}
}
public static Long readLong(int colNum, ResultSet r) throws SQLException {
long val;
val = r.getLong(colNum);
if (r.wasNull()) {
return null;
} else {
return Long.valueOf(val);
}
}
public static String readString(int colNum, ResultSet r) throws SQLException {
return r.getString(colNum);
}
public static Float readFloat(int colNum, ResultSet r) throws SQLException {
float val;
val = r.getFloat(colNum);
if (r.wasNull()) {
return null;
} else {
return Float.valueOf(val);
}
}
public static Double readDouble(int colNum, ResultSet r) throws SQLException {
double val;
val = r.getDouble(colNum);
if (r.wasNull()) {
return null;
} else {
return Double.valueOf(val);
}
}
public static Boolean readBoolean(int colNum, ResultSet r)
throws SQLException {
boolean val;
val = r.getBoolean(colNum);
if (r.wasNull()) {
return null;
} else {
return Boolean.valueOf(val);
}
}
public static Time readTime(int colNum, ResultSet r) throws SQLException {
return r.getTime(colNum);
}
public static Timestamp readTimestamp(int colNum, ResultSet r)
throws SQLException {
return r.getTimestamp(colNum);
}
public static Date readDate(int colNum, ResultSet r) throws SQLException {
return r.getDate(colNum);
}
public static BytesWritable readBytesWritable(int colNum, ResultSet r)
throws SQLException {
byte [] bytes = r.getBytes(colNum);
return bytes == null ? null : new BytesWritable(bytes);
}
public static BigDecimal readBigDecimal(int colNum, ResultSet r)
throws SQLException {
return r.getBigDecimal(colNum);
}
public static BlobRef readBlobRef(int colNum, ResultSet r)
throws SQLException {
// Loading of BLOBs is delayed; handled by LargeObjectLoader.
return null;
}
public static ClobRef readClobRef(int colNum, ResultSet r)
throws SQLException {
// Loading of CLOBs is delayed; handled by LargeObjectLoader.
return null;
}
public static void writeInteger(Integer val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setInt(paramIdx, val);
}
}
public static void writeLong(Long val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setLong(paramIdx, val);
}
}
public static void writeDouble(Double val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setDouble(paramIdx, val);
}
}
public static void writeBoolean(Boolean val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setBoolean(paramIdx, val);
}
}
public static void writeFloat(Float val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setFloat(paramIdx, val);
}
}
public static void writeString(String val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setString(paramIdx, val);
}
}
public static void writeTimestamp(Timestamp val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setTimestamp(paramIdx, val);
}
}
public static void writeTime(Time val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setTime(paramIdx, val);
}
}
public static void writeDate(Date val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setDate(paramIdx, val);
}
}
public static void writeBytesWritable(BytesWritable val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
// val.getBytes() is only valid in [0, len)
byte [] rawBytes = val.getBytes();
int len = val.getLength();
byte [] outBytes = new byte[len];
System.arraycopy(rawBytes, 0, outBytes, 0, len);
s.setBytes(paramIdx, outBytes);
}
}
public static void writeBigDecimal(BigDecimal val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setBigDecimal(paramIdx, val);
}
}
public static void writeBlobRef(com.cloudera.sqoop.lib.BlobRef val,
int paramIdx, int sqlType, PreparedStatement s) throws SQLException {
// TODO: support this.
throw new RuntimeException("Unsupported: Cannot export BLOB data");
}
public static void writeClobRef(com.cloudera.sqoop.lib.ClobRef val,
int paramIdx, int sqlType, PreparedStatement s) throws SQLException {
// TODO: support this.
throw new RuntimeException("Unsupported: Cannot export CLOB data");
}
}

View File

@ -0,0 +1,322 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.cloudera.sqoop.io.LobFile;
import com.cloudera.sqoop.util.TaskId;
/**
* Contains a set of methods which can read db columns from a ResultSet into
* Java types, and do serialization of these types to/from DataInput/DataOutput
* for use with Hadoop's Writable implementation. This supports null values
* for all types.
*
* This is a singleton instance class; only one may exist at a time.
* However, its lifetime is limited to the current TaskInputOutputContext's
* life.
*/
public class LargeObjectLoader implements Closeable {
// Spill to external storage for BLOB/CLOB objects > 16 MB.
public static final long DEFAULT_MAX_LOB_LENGTH = 16 * 1024 * 1024;
public static final String MAX_INLINE_LOB_LEN_KEY =
"sqoop.inline.lob.length.max";
private Configuration conf;
private Path workPath;
private FileSystem fs;
// Handles to the open BLOB / CLOB file writers.
private LobFile.Writer curBlobWriter;
private LobFile.Writer curClobWriter;
// Counter that is used with the current task attempt id to
// generate unique LOB file names.
private long nextLobFileId = 0;
/**
* Create a new LargeObjectLoader.
* @param conf the Configuration to use
* @param workPath the HDFS working directory for this task.
*/
public LargeObjectLoader(Configuration conf, Path workPath)
throws IOException {
this.conf = conf;
this.workPath = workPath;
this.fs = FileSystem.get(conf);
this.curBlobWriter = null;
this.curClobWriter = null;
}
@Override
protected synchronized void finalize() throws Throwable {
close();
super.finalize();
}
@Override
public void close() throws IOException {
if (null != curBlobWriter) {
curBlobWriter.close();
curBlobWriter = null;
}
if (null != curClobWriter) {
curClobWriter.close();
curClobWriter = null;
}
}
/**
* @return a filename to use to put an external LOB in.
*/
private String getNextLobFileName() {
String file = "_lob/large_obj_" + TaskId.get(conf, "unknown_task_id")
+ nextLobFileId + ".lob";
nextLobFileId++;
return file;
}
/**
* Calculates a path to a new LobFile object, creating any
* missing directories.
* @return a Path to a LobFile to write
*/
private Path getNextLobFilePath() throws IOException {
Path p = new Path(workPath, getNextLobFileName());
Path parent = p.getParent();
if (!fs.exists(parent)) {
fs.mkdirs(parent);
}
return p;
}
/**
* @return the current LobFile writer for BLOBs, creating one if necessary.
*/
private LobFile.Writer getBlobWriter() throws IOException {
if (null == this.curBlobWriter) {
this.curBlobWriter = LobFile.create(getNextLobFilePath(), conf, false);
}
return this.curBlobWriter;
}
/**
* @return the current LobFile writer for CLOBs, creating one if necessary.
*/
private LobFile.Writer getClobWriter() throws IOException {
if (null == this.curClobWriter) {
this.curClobWriter = LobFile.create(getNextLobFilePath(), conf, true);
}
return this.curClobWriter;
}
/**
* Returns the path being written to by a given LobFile.Writer, relative
* to the working directory of this LargeObjectLoader.
* @param w the LobFile.Writer whose path should be examined.
* @return the path this is writing to, relative to the current working dir.
*/
private String getRelativePath(LobFile.Writer w) {
Path writerPath = w.getPath();
String writerPathStr = writerPath.toString();
String workPathStr = workPath.toString();
if (!workPathStr.endsWith(File.separator)) {
workPathStr = workPathStr + File.separator;
}
if (writerPathStr.startsWith(workPathStr)) {
return writerPathStr.substring(workPathStr.length());
}
// Outside the working dir; return the whole thing.
return writerPathStr;
}
/**
* Copies all character data from the provided Reader to the provided
* Writer. Does not close handles when it's done.
* @param reader data source
* @param writer data sink
* @throws IOException if an I/O error occurs either reading or writing.
*/
private void copyAll(Reader reader, Writer writer) throws IOException {
int bufferSize = conf.getInt("io.file.buffer.size",
4096);
char [] buf = new char[bufferSize];
while (true) {
int charsRead = reader.read(buf);
if (-1 == charsRead) {
break; // no more stream to read.
}
writer.write(buf, 0, charsRead);
}
}
/**
* Copies all byte data from the provided InputStream to the provided
* OutputStream. Does not close handles when it's done.
* @param input data source
* @param output data sink
* @throws IOException if an I/O error occurs either reading or writing.
*/
private void copyAll(InputStream input, OutputStream output)
throws IOException {
int bufferSize = conf.getInt("io.file.buffer.size",
4096);
byte [] buf = new byte[bufferSize];
while (true) {
int bytesRead = input.read(buf, 0, bufferSize);
if (-1 == bytesRead) {
break; // no more stream to read.
}
output.write(buf, 0, bytesRead);
}
}
/**
* Actually read a BlobRef instance from the ResultSet and materialize
* the data either inline or to a file.
*
* @param colNum the column of the ResultSet's current row to read.
* @param r the ResultSet to read from.
* @return a BlobRef encapsulating the data in this field.
* @throws IOException if an error occurs writing to the FileSystem.
* @throws SQLException if an error occurs reading from the database.
*/
public com.cloudera.sqoop.lib.BlobRef readBlobRef(int colNum, ResultSet r)
throws IOException, InterruptedException, SQLException {
long maxInlineLobLen = conf.getLong(
MAX_INLINE_LOB_LEN_KEY,
DEFAULT_MAX_LOB_LENGTH);
Blob b = r.getBlob(colNum);
if (null == b) {
return null;
} else if (b.length() > maxInlineLobLen) {
// Deserialize very large BLOBs into separate files.
long len = b.length();
LobFile.Writer lobWriter = getBlobWriter();
long recordOffset = lobWriter.tell();
InputStream is = null;
OutputStream os = lobWriter.writeBlobRecord(len);
try {
is = b.getBinaryStream();
copyAll(is, os);
} finally {
if (null != os) {
os.close();
}
if (null != is) {
is.close();
}
// Mark the record as finished.
lobWriter.finishRecord();
}
return new com.cloudera.sqoop.lib.BlobRef(
getRelativePath(curBlobWriter), recordOffset, len);
} else {
// This is a 1-based array.
return new com.cloudera.sqoop.lib.BlobRef(
b.getBytes(1, (int) b.length()));
}
}
/**
* Actually read a ClobRef instance from the ResultSet and materialize
* the data either inline or to a file.
*
* @param colNum the column of the ResultSet's current row to read.
* @param r the ResultSet to read from.
* @return a ClobRef encapsulating the data in this field.
* @throws IOException if an error occurs writing to the FileSystem.
* @throws SQLException if an error occurs reading from the database.
*/
public com.cloudera.sqoop.lib.ClobRef readClobRef(int colNum, ResultSet r)
throws IOException, InterruptedException, SQLException {
long maxInlineLobLen = conf.getLong(
MAX_INLINE_LOB_LEN_KEY,
DEFAULT_MAX_LOB_LENGTH);
Clob c = r.getClob(colNum);
if (null == c) {
return null;
} else if (c.length() > maxInlineLobLen) {
// Deserialize large CLOB into separate file.
long len = c.length();
LobFile.Writer lobWriter = getClobWriter();
long recordOffset = lobWriter.tell();
Reader reader = null;
Writer w = lobWriter.writeClobRecord(len);
try {
reader = c.getCharacterStream();
copyAll(reader, w);
} finally {
if (null != w) {
w.close();
}
if (null != reader) {
reader.close();
}
// Mark the record as finished.
lobWriter.finishRecord();
}
return new com.cloudera.sqoop.lib.ClobRef(
getRelativePath(lobWriter), recordOffset, len);
} else {
// This is a 1-based array.
return new com.cloudera.sqoop.lib.ClobRef(
c.getSubString(1, (int) c.length()));
}
}
}

View File

@ -0,0 +1,329 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import com.cloudera.sqoop.io.LobFile;
import com.cloudera.sqoop.io.LobReaderCache;
/**
* Abstract base class that holds a reference to a Blob or a Clob.
* DATATYPE is the type being held (e.g., a byte array).
* CONTAINERTYPE is the type used to hold this data (e.g., BytesWritable).
* ACCESSORTYPE is the type used to access this data in a streaming fashion
* (either an InputStream or a Reader).
*/
public abstract class LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE>
implements Closeable, Writable {
public static final Log LOG = LogFactory.getLog(LobRef.class.getName());
protected LobRef() {
this.fileName = null;
this.offset = 0;
this.length = 0;
this.realData = null;
}
protected LobRef(CONTAINERTYPE container) {
this.fileName = null;
this.offset = 0;
this.length = 0;
this.realData = container;
}
protected LobRef(String file, long offset, long length) {
this.fileName = file;
this.offset = offset;
this.length = length;
this.realData = null;
}
// If the data is 'small', it's held directly, here.
private CONTAINERTYPE realData;
/** Internal API to retrieve the data object. */
protected CONTAINERTYPE getDataObj() {
return realData;
}
/** Internal API to set the data object. */
protected void setDataObj(CONTAINERTYPE data) {
this.realData = data;
}
// If there data is too large to materialize fully, it's written into a file
// whose path (relative to the rest of the dataset) is recorded here. This
// takes precedence if the value fof fileName is non-null. These records are
// currently written into LobFile-formatted files, which hold multiple
// records. The starting offset and length of the record are recorded here
// as well.
private String fileName;
private long offset;
private long length;
// If we've opened a LobFile object, track our reference to it here.
private LobFile.Reader lobReader;
@Override
@SuppressWarnings("unchecked")
/**
* Clone the current reference object. data is deep-copied; any open
* file handle remains with the original only.
*/
public Object clone() throws CloneNotSupportedException {
LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE> r =
(LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE>) super.clone();
r.lobReader = null; // Reference to opened reader is not duplicated.
if (null != realData) {
r.realData = deepCopyData(realData);
}
return r;
}
@Override
protected synchronized void finalize() throws Throwable {
close();
super.finalize();
}
public void close() throws IOException {
// Discard any open LobReader.
if (null != this.lobReader) {
LobReaderCache.getCache().recycle(this.lobReader);
}
}
/**
* @return true if the LOB data is in an external file; false if
* it materialized inline.
*/
public boolean isExternal() {
return fileName != null;
}
/**
* Convenience method to access #getDataStream(Configuration, Path)
* from within a map task that read this LobRef from a file-based
* InputSplit.
* @param mapContext the Mapper.Context instance that encapsulates
* the current map task.
* @return an object that lazily streams the record to the client.
* @throws IllegalArgumentException if it cannot find the source
* path for this LOB based on the MapContext.
* @throws IOException if it could not read the LOB from external storage.
*/
public ACCESSORTYPE getDataStream(Mapper.Context mapContext)
throws IOException {
InputSplit split = mapContext.getInputSplit();
if (split instanceof FileSplit) {
Path basePath = ((FileSplit) split).getPath().getParent();
return getDataStream(mapContext.getConfiguration(),
basePath);
} else {
throw new IllegalArgumentException(
"Could not ascertain LOB base path from MapContext.");
}
}
/**
* Get access to the LOB data itself.
* This method returns a lazy reader of the LOB data, accessing the
* filesystem for external LOB storage as necessary.
* @param conf the Configuration used to access the filesystem
* @param basePath the base directory where the table records are
* stored.
* @return an object that lazily streams the record to the client.
* @throws IOException if it could not read the LOB from external storage.
*/
public ACCESSORTYPE getDataStream(Configuration conf, Path basePath)
throws IOException {
if (isExternal()) {
// Read from external storage.
Path pathToRead = LobReaderCache.qualify(
new Path(basePath, fileName), conf);
LOG.debug("Retreving data stream from external path: " + pathToRead);
if (lobReader != null) {
// We already have a reader open to a LobFile. Is it the correct file?
if (!pathToRead.equals(lobReader.getPath())) {
// No. Close this.lobReader and get the correct one.
LOG.debug("Releasing previous external reader for "
+ lobReader.getPath());
LobReaderCache.getCache().recycle(lobReader);
lobReader = LobReaderCache.getCache().get(pathToRead, conf);
}
} else {
lobReader = LobReaderCache.getCache().get(pathToRead, conf);
}
// We now have a LobFile.Reader associated with the correct file. Get to
// the correct offset and return an InputStream/Reader to the user.
if (lobReader.tell() != offset) {
LOG.debug("Seeking to record start offset " + offset);
lobReader.seek(offset);
}
if (!lobReader.next()) {
throw new IOException("Could not locate record at " + pathToRead
+ ":" + offset);
}
return getExternalSource(lobReader);
} else {
// This data is already materialized in memory; wrap it and return.
return getInternalSource(realData);
}
}
/**
* Using the LobFile reader, get an accessor InputStream or Reader to the
* underlying data.
*/
protected abstract ACCESSORTYPE getExternalSource(LobFile.Reader reader)
throws IOException;
/**
* Wrap the materialized data in an InputStream or Reader.
*/
protected abstract ACCESSORTYPE getInternalSource(CONTAINERTYPE data);
/**
* @return the materialized data itself.
*/
protected abstract DATATYPE getInternalData(CONTAINERTYPE data);
/**
* Make a copy of the materialized data.
*/
protected abstract CONTAINERTYPE deepCopyData(CONTAINERTYPE data);
public DATATYPE getData() {
if (isExternal()) {
throw new RuntimeException(
"External LOBs must be read via getDataStream()");
}
return getInternalData(realData);
}
@Override
public String toString() {
if (isExternal()) {
return "externalLob(lf," + fileName + "," + Long.toString(offset)
+ "," + Long.toString(length) + ")";
} else {
return realData.toString();
}
}
@Override
public void readFields(DataInput in) throws IOException {
// The serialization format for this object is:
// boolean isExternal
// if true, then:
// a string identifying the external storage type
// and external-storage-specific data.
// if false, then we use readFieldsInternal() to allow BlobRef/ClobRef
// to serialize as it sees fit.
//
// Currently the only external storage supported is LobFile, identified
// by the string "lf". This serializes with the filename (as a string),
// followed by a long-valued offset and a long-valued length.
boolean isExternal = in.readBoolean();
if (isExternal) {
this.realData = null;
String storageType = Text.readString(in);
if (!storageType.equals("lf")) {
throw new IOException("Unsupported external LOB storage code: "
+ storageType);
}
// Storage type "lf" is LobFile: filename, offset, length.
this.fileName = Text.readString(in);
this.offset = in.readLong();
this.length = in.readLong();
} else {
readFieldsInternal(in);
this.fileName = null;
this.offset = 0;
this.length = 0;
}
}
/**
* Perform the readFields() operation on a fully-materializable record.
* @param in the DataInput to deserialize from.
*/
protected abstract void readFieldsInternal(DataInput in) throws IOException;
@Override
public void write(DataOutput out) throws IOException {
out.writeBoolean(isExternal());
if (isExternal()) {
Text.writeString(out, "lf"); // storage type "lf" for LobFile.
Text.writeString(out, fileName);
out.writeLong(offset);
out.writeLong(length);
} else {
writeInternal(out);
}
}
/**
* Perform the write() operation on a fully-materializable record.
* @param out the DataOutput to deserialize to.
*/
protected abstract void writeInternal(DataOutput out) throws IOException;
protected static final ThreadLocal<Matcher> EXTERNAL_MATCHER =
new ThreadLocal<Matcher>() {
@Override protected Matcher initialValue() {
Pattern externalPattern = Pattern.compile(
"externalLob\\(lf,(.*),([0-9]+),([0-9]+)\\)");
return externalPattern.matcher("");
}
};
}

View File

@ -0,0 +1,54 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* Serialize LOB classes to/from DataInput and DataOutput objects.
*/
public final class LobSerializer {
private LobSerializer() { }
public static void writeClob(
com.cloudera.sqoop.lib.ClobRef clob, DataOutput out) throws IOException {
clob.write(out);
}
public static void writeBlob(
com.cloudera.sqoop.lib.BlobRef blob, DataOutput out) throws IOException {
blob.write(out);
}
public static com.cloudera.sqoop.lib.ClobRef readClobFields(
DataInput in) throws IOException {
com.cloudera.sqoop.lib.ClobRef clob = new com.cloudera.sqoop.lib.ClobRef();
clob.readFields(in);
return clob;
}
public static com.cloudera.sqoop.lib.BlobRef readBlobFields(
DataInput in) throws IOException {
com.cloudera.sqoop.lib.BlobRef blob = new com.cloudera.sqoop.lib.BlobRef();
blob.readFields(in);
return blob;
}
}

View File

@ -0,0 +1,47 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
/**
* General error during processing of a SqoopRecord.
*/
@SuppressWarnings("serial")
public class ProcessingException extends Exception {
public ProcessingException() {
super("ProcessingException");
}
public ProcessingException(final String message) {
super(message);
}
public ProcessingException(final Throwable cause) {
super(cause);
}
public ProcessingException(final String message, final Throwable cause) {
super(message, cause);
}
@Override
public String toString() {
String msg = getMessage();
return (null == msg) ? "ProcessingException" : msg;
}
}

View File

@ -0,0 +1,371 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
/**
* Parses a record containing one or more fields. Fields are separated
* by some FIELD_DELIMITER character, e.g. a comma or a ^A character.
* Records are terminated by a RECORD_DELIMITER character, e.g., a newline.
*
* Fields may be (optionally or mandatorily) enclosed by a quoting char
* e.g., '\"'
*
* Fields may contain escaped characters. An escape character may be, e.g.,
* the '\\' character. Any character following an escape character
* is treated literally. e.g., '\n' is recorded as an 'n' character, not a
* newline.
*
* Unexpected results may occur if the enclosing character escapes itself.
* e.g., this cannot parse SQL SELECT statements where the single character
* ['] escapes to [''].
*
* This class is not synchronized. Multiple threads must use separate
* instances of RecordParser.
*
* The fields parsed by RecordParser are backed by an internal buffer
* which is cleared when the next call to parseRecord() is made. If
* the buffer is required to be preserved, you must copy it yourself.
*/
public class RecordParser {
public static final Log LOG = LogFactory.getLog(RecordParser.class.getName());
private enum ParseState {
FIELD_START,
ENCLOSED_FIELD,
UNENCLOSED_FIELD,
ENCLOSED_ESCAPE,
ENCLOSED_EXPECT_DELIMITER,
UNENCLOSED_ESCAPE
}
/**
* An error thrown when parsing fails.
*/
public static class ParseError extends Exception {
public ParseError() {
super("ParseError");
}
public ParseError(final String msg) {
super(msg);
}
public ParseError(final String msg, final Throwable cause) {
super(msg, cause);
}
public ParseError(final Throwable cause) {
super(cause);
}
}
private com.cloudera.sqoop.lib.DelimiterSet delimiters;
private ArrayList<String> outputs;
public RecordParser(final com.cloudera.sqoop.lib.DelimiterSet delimitersIn) {
this.delimiters = delimitersIn.copy();
this.outputs = new ArrayList<String>();
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(CharSequence input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
return parseRecord(CharBuffer.wrap(input));
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(Text input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
// TODO(aaron): The parser should be able to handle UTF-8 strings
// as well, to avoid this transcode operation.
return parseRecord(input.toString());
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(byte [] input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
return parseRecord(ByteBuffer.wrap(input).asCharBuffer());
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(char [] input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
return parseRecord(CharBuffer.wrap(input));
}
public List<String> parseRecord(ByteBuffer input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
return parseRecord(input.asCharBuffer());
}
// TODO(aaron): Refactor this method to be much shorter.
// CHECKSTYLE:OFF
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(CharBuffer input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
/*
This method implements the following state machine to perform
parsing.
Note that there are no restrictions on whether particular characters
(e.g., field-sep, record-sep, etc) are distinct or the same. The
state transitions are processed in the order seen in this comment.
Starting state is FIELD_START
encloser -> ENCLOSED_FIELD
escape char -> UNENCLOSED_ESCAPE
field delim -> FIELD_START (for a new field)
record delim -> stops processing
all other letters get added to current field, -> UNENCLOSED FIELD
ENCLOSED_FIELD state:
escape char goes to ENCLOSED_ESCAPE
encloser goes to ENCLOSED_EXPECT_DELIMITER
field sep or record sep gets added to the current string
normal letters get added to the current string
ENCLOSED_ESCAPE state:
any character seen here is added literally, back to ENCLOSED_FIELD
ENCLOSED_EXPECT_DELIMITER state:
field sep goes to FIELD_START
record sep halts processing.
all other characters are errors.
UNENCLOSED_FIELD state:
ESCAPE char goes to UNENCLOSED_ESCAPE
FIELD_SEP char goes to FIELD_START
RECORD_SEP char halts processing
normal chars or the enclosing char get added to the current string
UNENCLOSED_ESCAPE:
add charater literal to current string, return to UNENCLOSED_FIELD
*/
char curChar = com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR;
ParseState state = ParseState.FIELD_START;
int len = input.length();
StringBuilder sb = null;
outputs.clear();
char enclosingChar = delimiters.getEnclosedBy();
char fieldDelim = delimiters.getFieldsTerminatedBy();
char recordDelim = delimiters.getLinesTerminatedBy();
char escapeChar = delimiters.getEscapedBy();
boolean enclosingRequired = delimiters.isEncloseRequired();
for (int pos = 0; pos < len; pos++) {
curChar = input.get();
switch (state) {
case FIELD_START:
// ready to start processing a new field.
if (null != sb) {
// We finished processing a previous field. Add to the list.
outputs.add(sb.toString());
}
sb = new StringBuilder();
if (enclosingChar == curChar) {
// got an opening encloser.
state = ParseState.ENCLOSED_FIELD;
} else if (escapeChar == curChar) {
state = ParseState.UNENCLOSED_ESCAPE;
} else if (fieldDelim == curChar) {
// we have a zero-length field. This is a no-op.
continue;
} else if (recordDelim == curChar) {
// we have a zero-length field, that ends processing.
pos = len;
} else {
// current char is part of the field.
state = ParseState.UNENCLOSED_FIELD;
sb.append(curChar);
if (enclosingRequired) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"Opening field-encloser expected at position " + pos);
}
}
break;
case ENCLOSED_FIELD:
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.ENCLOSED_ESCAPE;
} else if (enclosingChar == curChar) {
// we're at the end of the enclosing field. Expect an EOF or EOR char.
state = ParseState.ENCLOSED_EXPECT_DELIMITER;
} else {
// this is a regular char, or an EOF / EOR inside an encloser. Add to
// the current field string, and remain in this state.
sb.append(curChar);
}
break;
case UNENCLOSED_FIELD:
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.UNENCLOSED_ESCAPE;
} else if (fieldDelim == curChar) {
// we're at the end of this field; may be the start of another one.
state = ParseState.FIELD_START;
} else if (recordDelim == curChar) {
pos = len; // terminate processing immediately.
} else {
// this is a regular char. Add to the current field string,
// and remain in this state.
sb.append(curChar);
}
break;
case ENCLOSED_ESCAPE:
// Treat this character literally, whatever it is, and return to
// enclosed field processing.
sb.append(curChar);
state = ParseState.ENCLOSED_FIELD;
break;
case ENCLOSED_EXPECT_DELIMITER:
// We were in an enclosed field, but got the final encloser. Now we
// expect either an end-of-field or an end-of-record.
if (fieldDelim == curChar) {
// end of one field is the beginning of the next.
state = ParseState.FIELD_START;
} else if (recordDelim == curChar) {
// stop processing.
pos = len;
} else {
// Don't know what to do with this character.
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"Expected delimiter at position " + pos);
}
break;
case UNENCLOSED_ESCAPE:
// Treat this character literally, whatever it is, and return to
// non-enclosed field processing.
sb.append(curChar);
state = ParseState.UNENCLOSED_FIELD;
break;
default:
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"Unexpected parser state: " + state);
}
}
if (state == ParseState.FIELD_START && curChar == fieldDelim) {
// we hit an EOF/EOR as the last legal character and we need to mark
// that string as recorded. This if block is outside the for-loop since
// we don't have a physical 'epsilon' token in our string.
if (null != sb) {
outputs.add(sb.toString());
sb = new StringBuilder();
}
}
if (null != sb) {
// There was a field that terminated by running out of chars or an EOR
// character. Add to the list.
outputs.add(sb.toString());
}
return outputs;
}
// CHECKSTYLE:ON
public boolean isEnclosingRequired() {
return delimiters.isEncloseRequired();
}
@Override
public String toString() {
return "RecordParser[" + delimiters.toString() + "]";
}
@Override
public int hashCode() {
return this.delimiters.hashCode();
}
}

View File

@ -0,0 +1,159 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Map;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
/**
* Interface implemented by the classes generated by sqoop's orm.ClassWriter.
*/
public abstract class SqoopRecord implements Cloneable, DBWritable,
com.cloudera.sqoop.lib.FieldMappable, Writable {
public SqoopRecord() {
}
public abstract void parse(CharSequence s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(Text s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(byte [] s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(char [] s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(ByteBuffer s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(CharBuffer s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void loadLargeObjects(
com.cloudera.sqoop.lib.LargeObjectLoader objLoader)
throws SQLException, IOException, InterruptedException;
/**
* Inserts the data in this object into the PreparedStatement, starting
* at parameter 'offset'.
* @return the number of fields written to the statement.
*/
public abstract int write(PreparedStatement stmt, int offset)
throws SQLException;
/**
* Format output data according to the specified delimiters.
*/
public abstract String toString(
com.cloudera.sqoop.lib.DelimiterSet delimiters);
/**
* Use the default delimiters, but only append an end-of-record delimiter
* if useRecordDelim is true.
*/
public String toString(boolean useRecordDelim) {
// Method body should be overridden by generated classes in 1.3.0+
if (useRecordDelim) {
// This is the existing functionality.
return toString();
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(useRecordDelim=false) requires a newer SqoopRecord. "
+ "Please regenerate your record class to use this function.");
}
}
/**
* Format the record according to the specified delimiters. An end-of-record
* delimiter is optional, and only used if useRecordDelim is true. For
* use with TextOutputFormat, calling this with useRecordDelim=false may
* make more sense.
*/
public String toString(
com.cloudera.sqoop.lib.DelimiterSet delimiters, boolean useRecordDelim) {
if (useRecordDelim) {
return toString(delimiters);
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(delimiters, useRecordDelim=false) requires a newer "
+ "SqoopRecord. Please regenerate your record class to use this "
+ "function.");
}
}
@Override
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
/**
* Returns an integer specifying which API format version the
* generated class conforms to. Used by internal APIs for backwards
* compatibility.
* @return the API version this class was generated against.
*/
public abstract int getClassFormatVersion();
/**
* Use the delegate pattern to allow arbitrary processing of the
* fields of this record.
* @param processor A delegate that operates on this object.
* @throws IOException if the processor encounters an IO error when
* operating on this object.
* @throws com.cloudera.sqoop.lib.ProcessingException if the FieldMapProcessor
* encounters a general processing error when operating on this object.
*/
public void delegate(com.cloudera.sqoop.lib.FieldMapProcessor processor)
throws IOException, com.cloudera.sqoop.lib.ProcessingException {
processor.accept(this);
}
@Override
/**
* {@inheriDoc}
* @throws RuntimeException if used with a record that was generated
* before this capability was added (1.1.0).
*/
public Map<String, Object> getFieldMap() {
// Default implementation does not support field iteration.
// ClassWriter should provide an overriding version.
throw new RuntimeException(
"Got null field map from record. Regenerate your record class.");
}
/**
* Allows an arbitrary field to be set programmatically to the
* specified value object. The value object must match the
* type expected for the particular field or a RuntimeException
* will result.
* @throws RuntimeException if the specified field name does not exist.
*/
public void setField(String fieldName, Object fieldVal) {
throw new RuntimeException("This SqoopRecord does not support setField(). "
+ "Regenerate your record class.");
}
}