5
0
mirror of https://github.com/apache/sqoop.git synced 2025-05-03 19:21:37 +08:00

SQOOP-379 Migrate lib and io packages to new name space

git-svn-id: https://svn.apache.org/repos/asf/incubator/sqoop/trunk@1190430 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Bilung Lee 2011-10-28 16:32:43 +00:00
parent a8cdad50d2
commit 315fff82b9
47 changed files with 5119 additions and 3756 deletions

View File

@ -808,7 +808,6 @@
<arg value="+%Y" /> <arg value="+%Y" />
</exec> </exec>
<javadoc <javadoc
packagenames="com.cloudera.sqoop.lib.*"
destdir="${build.javadoc}" destdir="${build.javadoc}"
author="true" author="true"
version="true" version="true"
@ -818,6 +817,7 @@
bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"> bottom="Copyright &amp;copy; ${year} The Apache Software Foundation">
<packageset dir="${src.dir}"> <packageset dir="${src.dir}">
<include name="com/cloudera/sqoop/lib/**" /> <include name="com/cloudera/sqoop/lib/**" />
<include name="org/apache/sqoop/lib/**" />
</packageset> </packageset>
<classpath> <classpath>
<path refid="compile.classpath" /> <path refid="compile.classpath" />

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,22 +15,18 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.io; package com.cloudera.sqoop.io;
import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.ReflectionUtils;
/** /**
* Provides a mapping from codec names to concrete implementation class names. * Provides a mapping from codec names to concrete implementation class names.
*
* @deprecated use org.apache.sqoop.io.CodecMap instead.
* @see org.apache.sqoop.io.CodecMap
*/ */
public final class CodecMap { public final class CodecMap {
@ -40,33 +34,10 @@ public final class CodecMap {
// Note: do not add more values here, since codecs are discovered using the // Note: do not add more values here, since codecs are discovered using the
// standard Hadoop mechanism (io.compression.codecs). See // standard Hadoop mechanism (io.compression.codecs). See
// CompressionCodecFactory. // CompressionCodecFactory.
public static final String NONE = "none"; public static final String NONE = org.apache.sqoop.io.CodecMap.NONE;
public static final String DEFLATE = "deflate"; public static final String DEFLATE = org.apache.sqoop.io.CodecMap.DEFLATE;
public static final String LZO = "lzo"; public static final String LZO = org.apache.sqoop.io.CodecMap.LZO;
public static final String LZOP = "lzop"; public static final String LZOP = org.apache.sqoop.io.CodecMap.LZOP;
private static Map<String, String> codecNames;
static {
codecNames = new TreeMap<String, String>();
// Register the names of codecs we know about.
codecNames.put(NONE, null);
codecNames.put(DEFLATE, "org.apache.hadoop.io.compress.DefaultCodec");
codecNames.put(LZO, "com.hadoop.compression.lzo.LzoCodec");
codecNames.put(LZOP, "com.hadoop.compression.lzo.LzopCodec");
// add more from Hadoop CompressionCodecFactory
for (Class<? extends CompressionCodec> cls
: CompressionCodecFactory.getCodecClasses(new Configuration())) {
String simpleName = cls.getSimpleName();
String codecName = simpleName;
if (simpleName.endsWith("Codec")) {
codecName = simpleName.substring(0, simpleName.length()
- "Codec".length());
}
codecNames.put(codecName.toLowerCase(), cls.getCanonicalName());
}
}
private CodecMap() { private CodecMap() {
} }
@ -79,11 +50,7 @@ private CodecMap() {
*/ */
public static String getCodecClassName(String codecName) public static String getCodecClassName(String codecName)
throws UnsupportedCodecException { throws UnsupportedCodecException {
if (!codecNames.containsKey(codecName)) { return org.apache.sqoop.io.CodecMap.getCodecClassName(codecName);
throw new UnsupportedCodecException(codecName);
}
return codecNames.get(codecName);
} }
/** /**
@ -94,79 +61,13 @@ public static String getCodecClassName(String codecName)
*/ */
public static CompressionCodec getCodec(String codecName, public static CompressionCodec getCodec(String codecName,
Configuration conf) throws UnsupportedCodecException { Configuration conf) throws UnsupportedCodecException {
// Try standard Hadoop mechanism first return org.apache.sqoop.io.CodecMap.getCodec(codecName, conf);
CompressionCodec codec = getCodecByName(codecName, conf);
if (codec != null) {
return codec;
}
// Fall back to Sqoop mechanism
String codecClassName = null;
try {
codecClassName = getCodecClassName(codecName);
if (null == codecClassName) {
return null;
}
Class<? extends CompressionCodec> codecClass =
(Class<? extends CompressionCodec>)
conf.getClassByName(codecClassName);
return (CompressionCodec) ReflectionUtils.newInstance(
codecClass, conf);
} catch (ClassNotFoundException cnfe) {
throw new UnsupportedCodecException("Cannot find codec class "
+ codecClassName + " for codec " + codecName);
}
}
/**
* Find the relevant compression codec for the codec's canonical class name
* or by codec alias.
* <p>
* Codec aliases are case insensitive.
* <p>
* The code alias is the short class name (without the package name).
* If the short class name ends with 'Codec', then there are two aliases for
* the codec, the complete short class name and the short class name without
* the 'Codec' ending. For example for the 'GzipCodec' codec class name the
* alias are 'gzip' and 'gzipcodec'.
* <p>
* Note: When HADOOP-7323 is available this method can be replaced with a call
* to CompressionCodecFactory.
* @param classname the canonical class name of the codec or the codec alias
* @return the codec object or null if none matching the name were found
*/
private static CompressionCodec getCodecByName(String codecName,
Configuration conf) {
List<Class<? extends CompressionCodec>> codecs =
CompressionCodecFactory.getCodecClasses(conf);
for (Class<? extends CompressionCodec> cls : codecs) {
if (codecMatches(cls, codecName)) {
return ReflectionUtils.newInstance(cls, conf);
}
}
return null;
}
private static boolean codecMatches(Class<? extends CompressionCodec> cls,
String codecName) {
String simpleName = cls.getSimpleName();
if (cls.getName().equals(codecName)
|| simpleName.equalsIgnoreCase(codecName)) {
return true;
}
if (simpleName.endsWith("Codec")) {
String prefix = simpleName.substring(0, simpleName.length()
- "Codec".length());
if (prefix.equalsIgnoreCase(codecName)) {
return true;
}
}
return false;
} }
/** /**
* Return the set of available codec names. * Return the set of available codec names.
*/ */
public static Set<String> getCodecNames() { public static Set<String> getCodecNames() {
return codecNames.keySet(); return org.apache.sqoop.io.CodecMap.getCodecNames();
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,76 +15,24 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.io; package com.cloudera.sqoop.io;
import java.io.InputStream; import java.io.InputStream;
import java.io.IOException;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.commons.io.input.CountingInputStream;
import org.apache.commons.io.input.ProxyInputStream;
/** /**
* Provides an InputStream that can consume a fixed maximum number of bytes * Provides an InputStream that can consume a fixed maximum number of bytes
* from an underlying stream. Closing the FixedLengthInputStream does not * from an underlying stream. Closing the FixedLengthInputStream does not
* close the underlying stream. After reading the maximum number of available * close the underlying stream. After reading the maximum number of available
* bytes this acts as though EOF has been reached. * bytes this acts as though EOF has been reached.
*
* @deprecated use org.apache.sqoop.io.FixedLengthInputStream instead.
* @see org.apache.sqoop.io.FixedLengthInputStream
*/ */
public class FixedLengthInputStream extends ProxyInputStream { public class FixedLengthInputStream
extends org.apache.sqoop.io.FixedLengthInputStream {
private CountingInputStream countingIn; public FixedLengthInputStream(InputStream stream, long maxLen) {
private long maxBytes; super(stream, maxLen);
}
public FixedLengthInputStream(InputStream stream, long maxLen) {
super(new CountingInputStream(new CloseShieldInputStream(stream)));
// Save a correctly-typed reference to the underlying stream.
this.countingIn = (CountingInputStream) this.in;
this.maxBytes = maxLen;
}
/** @return the number of bytes already consumed by the client. */
private long consumed() {
return countingIn.getByteCount();
}
/**
* @return number of bytes remaining to be read before the limit
* is reached.
*/
private long toLimit() {
return maxBytes - consumed();
}
@Override
public int available() throws IOException {
return (int) Math.min(toLimit(), countingIn.available());
}
@Override
public int read() throws IOException {
if (toLimit() > 0) {
return super.read();
} else {
return -1; // EOF.
}
}
@Override
public int read(byte [] buf) throws IOException {
return read(buf, 0, buf.length);
}
@Override
public int read(byte [] buf, int start, int count) throws IOException {
long limit = toLimit();
if (limit == 0) {
return -1; // EOF.
} else {
return super.read(buf, start, (int) Math.min(count, limit));
}
}
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,19 +15,13 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.io; package com.cloudera.sqoop.io;
import java.io.IOException; import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
/** /**
* A cache of open LobFile.Reader objects. * A cache of open LobFile.Reader objects.
@ -38,17 +30,13 @@
* instances, it is most useful to have a single global cache. This cache is * instances, it is most useful to have a single global cache. This cache is
* internally synchronized; only one thread can insert or retrieve a reader * internally synchronized; only one thread can insert or retrieve a reader
* from the cache at a time. * from the cache at a time.
*
* @deprecated use org.apache.sqoop.io.LobReaderCache instead.
* @see org.apache.sqoop.io.LobReaderCache
*/ */
public final class LobReaderCache { public final class LobReaderCache extends org.apache.sqoop.io.LobReaderCache {
public static final Log LOG = LogFactory.getLog( public static final Log LOG = org.apache.sqoop.io.LobReaderCache.LOG;
LobReaderCache.class.getName());
private Map<Path, LobFile.Reader> readerMap;
private LobReaderCache() {
this.readerMap = new TreeMap<Path, LobFile.Reader>();
}
private static final LobReaderCache CACHE; private static final LobReaderCache CACHE;
static { static {
@ -71,79 +59,7 @@ public static LobReaderCache getCache() {
*/ */
public static Path qualify(Path path, Configuration conf) public static Path qualify(Path path, Configuration conf)
throws IOException { throws IOException {
if (null == path) { return org.apache.sqoop.io.LobReaderCache.qualify(path, conf);
return null;
}
FileSystem fs = path.getFileSystem(conf);
if (null == fs) {
fs = FileSystem.get(conf);
}
return path.makeQualified(fs);
}
/**
* Open a LobFile for read access, returning a cached reader if one is
* available, or a new reader otherwise.
* @param path the path to the LobFile to open
* @param conf the configuration to use to access the FS.
* @throws IOException if there's an error opening the file.
*/
public LobFile.Reader get(Path path, Configuration conf)
throws IOException {
LobFile.Reader reader = null;
Path canonicalPath = qualify(path, conf);
// Look up an entry in the cache.
synchronized(this) {
reader = readerMap.remove(canonicalPath);
}
if (null != reader && !reader.isClosed()) {
// Cache hit. return it.
LOG.debug("Using cached reader for " + canonicalPath);
return reader;
}
// Cache miss; open the file.
LOG.debug("No cached reader available for " + canonicalPath);
return LobFile.open(path, conf);
}
/**
* Return a reader back to the cache. If there's already a reader for
* this path, then the current reader is closed.
* @param reader the opened reader. Any record-specific subreaders should be
* closed.
* @throws IOException if there's an error accessing the path's filesystem.
*/
public void recycle(LobFile.Reader reader) throws IOException {
Path canonicalPath = reader.getPath();
// Check if the cache has a reader for this path already. If not, add this.
boolean cached = false;
synchronized(this) {
if (readerMap.get(canonicalPath) == null) {
LOG.debug("Caching reader for path: " + canonicalPath);
readerMap.put(canonicalPath, reader);
cached = true;
}
}
if (!cached) {
LOG.debug("Reader already present for path: " + canonicalPath
+ "; closing.");
reader.close();
}
}
@Override
protected synchronized void finalize() throws Throwable {
for (LobFile.Reader r : readerMap.values()) {
r.close();
}
super.finalize();
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,82 +15,24 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.io; package com.cloudera.sqoop.io;
import java.io.File; import java.io.File;
import java.io.IOException;
import org.apache.hadoop.util.Shell;
import org.apache.log4j.Logger;
/** /**
* A named FIFO channel. * A named FIFO channel.
*
* @deprecated use org.apache.sqoop.io.NamedFifo instead.
* @see org.apache.sqoop.io.NamedFifo
*/ */
public class NamedFifo { public class NamedFifo extends org.apache.sqoop.io.NamedFifo {
private static final Logger LOG = Logger.getLogger(NamedFifo.class);
private File fifoFile;
/** Create a named FIFO object at the local fs path given by 'pathname'. */
public NamedFifo(String pathname) { public NamedFifo(String pathname) {
this.fifoFile = new File(pathname); super(pathname);
} }
/** Create a named FIFO object at the local fs path given by the 'fifo' File
* object. */
public NamedFifo(File fifo) { public NamedFifo(File fifo) {
this.fifoFile = fifo; super(fifo);
}
/**
* Return the File object representing the FIFO.
*/
public File getFile() {
return this.fifoFile;
}
/**
* Create a named FIFO object.
* The pipe will be created with permissions 0600.
* @throws IOException on failure.
*/
public void create() throws IOException {
create(0600);
}
/**
* Create a named FIFO object with the specified fs permissions.
* This depends on the 'mknod' or 'mkfifo' (Mac OS X) system utility
* existing. (for example, provided by Linux coreutils). This object
* will be deleted when the process exits.
* @throws IOException on failure.
*/
public void create(int permissions) throws IOException {
String filename = fifoFile.toString();
// Format permissions as a mode string in base 8.
String modeStr = Integer.toString(permissions, 8);
// Create the FIFO itself.
try {
String output = Shell.execCommand("mknod", "--mode=0" + modeStr,
filename, "p");
LOG.info("mknod output:\n"+output);
} catch (IOException ex) {
LOG.info("IO error running mknod: " + ex.getMessage());
LOG.debug("IO error running mknod", ex);
}
if (!this.fifoFile.exists()) {
LOG.info("mknod failed, falling back to mkfifo");
String output = Shell.execCommand("mkfifo", "-m", "0" + modeStr,
filename);
LOG.info("mkfifo output:\n"+output);
}
// Schedule the FIFO to be cleaned up when we exit.
this.fifoFile.deleteOnExit();
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -20,56 +18,27 @@
package com.cloudera.sqoop.io; package com.cloudera.sqoop.io;
import java.io.BufferedWriter; import org.apache.sqoop.io.SplittingOutputStream;
import java.io.OutputStreamWriter;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/** /**
* A BufferedWriter implementation that wraps around a SplittingOutputStream * A BufferedWriter implementation that wraps around a SplittingOutputStream
* and allows splitting of the underlying stream. * and allows splitting of the underlying stream.
* Splits occur at allowSplit() calls, or newLine() calls. * Splits occur at allowSplit() calls, or newLine() calls.
*
* @deprecated use org.apache.sqoop.io.SplittableBufferedWriter instead.
* @see org.apache.sqoop.io.SplittableBufferedWriter
*/ */
public class SplittableBufferedWriter extends BufferedWriter { public class SplittableBufferedWriter
extends org.apache.sqoop.io.SplittableBufferedWriter {
public static final Log LOG = LogFactory.getLog(
SplittableBufferedWriter.class.getName());
private SplittingOutputStream splitOutputStream;
private boolean alwaysFlush;
public SplittableBufferedWriter( public SplittableBufferedWriter(
final SplittingOutputStream splitOutputStream) { final SplittingOutputStream splitOutputStream) {
super(new OutputStreamWriter(splitOutputStream)); super(splitOutputStream);
this.splitOutputStream = splitOutputStream;
this.alwaysFlush = false;
} }
/** For testing. */
SplittableBufferedWriter(final SplittingOutputStream splitOutputStream, SplittableBufferedWriter(final SplittingOutputStream splitOutputStream,
final boolean alwaysFlush) { final boolean alwaysFlush) {
super(new OutputStreamWriter(splitOutputStream)); super(splitOutputStream, alwaysFlush);
this.splitOutputStream = splitOutputStream;
this.alwaysFlush = alwaysFlush;
}
public void newLine() throws IOException {
super.newLine();
this.allowSplit();
}
public void allowSplit() throws IOException {
if (alwaysFlush) {
this.flush();
}
if (this.splitOutputStream.wouldSplit()) {
LOG.debug("Starting new split");
this.flush();
this.splitOutputStream.allowSplit();
}
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,19 +15,12 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.io; package com.cloudera.sqoop.io;
import java.io.OutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.Formatter;
import org.apache.commons.io.output.CountingOutputStream;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodec;
@ -37,127 +28,18 @@
* An output stream that writes to an underlying filesystem, opening * An output stream that writes to an underlying filesystem, opening
* a new file after a specified number of bytes have been written to the * a new file after a specified number of bytes have been written to the
* current one. * current one.
*
* @deprecated use org.apache.sqoop.io.SplittingOutputStream instead.
* @see org.apache.sqoop.io.SplittingOutputStream
*/ */
public class SplittingOutputStream extends OutputStream { public class SplittingOutputStream
extends org.apache.sqoop.io.SplittingOutputStream {
public static final Log LOG = LogFactory.getLog( public static final Log LOG = org.apache.sqoop.io.SplittingOutputStream.LOG;
SplittingOutputStream.class.getName());
private OutputStream writeStream;
private CountingOutputStream countingFilterStream;
private Configuration conf;
private Path destDir;
private String filePrefix;
private long cutoffBytes;
private CompressionCodec codec;
private int fileNum;
/**
* Create a new SplittingOutputStream.
* @param conf the Configuration to use to interface with HDFS
* @param destDir the directory where the files will go (should already
* exist).
* @param filePrefix the first part of the filename, which will be appended
* by a number. This file will be placed inside destDir.
* @param cutoff the approximate number of bytes to use per file
* @param doGzip if true, then output files will be gzipped and have a .gz
* suffix.
*/
public SplittingOutputStream(final Configuration conf, final Path destDir, public SplittingOutputStream(final Configuration conf, final Path destDir,
final String filePrefix, final long cutoff, final CompressionCodec codec) final String filePrefix, final long cutoff, final CompressionCodec codec)
throws IOException { throws IOException {
super(conf, destDir, filePrefix, cutoff, codec);
this.conf = conf;
this.destDir = destDir;
this.filePrefix = filePrefix;
this.cutoffBytes = cutoff;
if (this.cutoffBytes < 0) {
this.cutoffBytes = 0; // splitting disabled.
}
this.codec = codec;
this.fileNum = 0;
openNextFile();
}
/** Initialize the OutputStream to the next file to write to.
*/
private void openNextFile() throws IOException {
FileSystem fs = FileSystem.get(conf);
StringBuffer sb = new StringBuffer();
Formatter fmt = new Formatter(sb);
fmt.format("%05d", this.fileNum++);
String filename = filePrefix + fmt.toString();
if (codec != null) {
filename = filename + codec.getDefaultExtension();
}
Path destFile = new Path(destDir, filename);
LOG.debug("Opening next output file: " + destFile);
if (fs.exists(destFile)) {
Path canonicalDest = destFile.makeQualified(fs);
throw new IOException("Destination file " + canonicalDest
+ " already exists");
}
OutputStream fsOut = fs.create(destFile);
// Count how many actual bytes hit HDFS.
this.countingFilterStream = new CountingOutputStream(fsOut);
if (codec != null) {
// Wrap that in a compressing stream.
this.writeStream = codec.createOutputStream(this.countingFilterStream);
} else {
// Write to the counting stream directly.
this.writeStream = this.countingFilterStream;
}
}
/**
* @return true if allowSplit() would actually cause a split.
*/
public boolean wouldSplit() {
return this.cutoffBytes > 0
&& this.countingFilterStream.getByteCount() >= this.cutoffBytes;
}
/** If we've written more to the disk than the user's split size,
* open the next file.
*/
private void checkForNextFile() throws IOException {
if (wouldSplit()) {
LOG.debug("Starting new split");
this.writeStream.flush();
this.writeStream.close();
openNextFile();
}
}
/** Defines a point in the stream when it is acceptable to split to a new
file; e.g., the end of a record.
*/
public void allowSplit() throws IOException {
checkForNextFile();
}
public void close() throws IOException {
this.writeStream.close();
}
public void flush() throws IOException {
this.writeStream.flush();
}
public void write(byte [] b) throws IOException {
this.writeStream.write(b);
}
public void write(byte [] b, int off, int len) throws IOException {
this.writeStream.write(b, off, len);
}
public void write(int b) throws IOException {
this.writeStream.write(b);
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,15 +15,18 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.io; package com.cloudera.sqoop.io;
import java.io.IOException;
/** /**
* Thrown when a compression codec cannot be recognized. * Thrown when a compression codec cannot be recognized.
*
* @deprecated use org.apache.sqoop.io.UnsupportedCodecException instead.
* @see org.apache.sqoop.io.UnsupportedCodecException
*/ */
public class UnsupportedCodecException extends IOException { public class UnsupportedCodecException
extends org.apache.sqoop.io.UnsupportedCodecException {
public UnsupportedCodecException() { public UnsupportedCodecException() {
super("UnsupportedCodecException"); super("UnsupportedCodecException");
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,7 +15,6 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import java.io.DataInput; import java.io.DataInput;
@ -26,8 +23,6 @@
import java.math.BigDecimal; import java.math.BigDecimal;
import java.math.BigInteger; import java.math.BigInteger;
import org.apache.hadoop.io.Text;
/** /**
* Serialize BigDecimal classes to/from DataInput and DataOutput objects. * Serialize BigDecimal classes to/from DataInput and DataOutput objects.
* *
@ -43,43 +38,24 @@
* [int: scale][boolean: b == true][string: BigInt-part.toString()] * [int: scale][boolean: b == true][string: BigInt-part.toString()]
* *
* TODO(aaron): Get this to work with Hadoop's Serializations framework. * TODO(aaron): Get this to work with Hadoop's Serializations framework.
*
* @deprecated use org.apache.sqoop.lib.BigDecimalSerializer instead.
* @see org.apache.sqoop.lib.BigDecimalSerializer
*/ */
public final class BigDecimalSerializer { public final class BigDecimalSerializer {
private BigDecimalSerializer() { } private BigDecimalSerializer() { }
static final BigInteger LONG_MAX_AS_BIGINT = static final BigInteger LONG_MAX_AS_BIGINT =
BigInteger.valueOf(Long.MAX_VALUE); org.apache.sqoop.lib.BigDecimalSerializer.LONG_MAX_AS_BIGINT;
static final BigInteger LONG_MIN_AS_BIGINT = static final BigInteger LONG_MIN_AS_BIGINT =
BigInteger.valueOf(Long.MIN_VALUE); org.apache.sqoop.lib.BigDecimalSerializer.LONG_MIN_AS_BIGINT;
public static void write(BigDecimal d, DataOutput out) throws IOException { public static void write(BigDecimal d, DataOutput out) throws IOException {
int scale = d.scale(); org.apache.sqoop.lib.BigDecimalSerializer.write(d, out);
BigInteger bigIntPart = d.unscaledValue();
boolean fastpath = bigIntPart.compareTo(LONG_MAX_AS_BIGINT) < 0
&& bigIntPart .compareTo(LONG_MIN_AS_BIGINT) > 0;
out.writeInt(scale);
out.writeBoolean(fastpath);
if (fastpath) {
out.writeLong(bigIntPart.longValue());
} else {
Text.writeString(out, bigIntPart.toString());
}
} }
public static BigDecimal readFields(DataInput in) throws IOException { public static BigDecimal readFields(DataInput in) throws IOException {
int scale = in.readInt(); return org.apache.sqoop.lib.BigDecimalSerializer.readFields(in);
boolean fastpath = in.readBoolean();
BigInteger unscaledIntPart;
if (fastpath) {
long unscaledValue = in.readLong();
unscaledIntPart = BigInteger.valueOf(unscaledValue);
} else {
String unscaledValueStr = Text.readString(in);
unscaledIntPart = new BigInteger(unscaledValueStr);
}
return new BigDecimal(unscaledIntPart, scale);
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,37 +15,27 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.regex.Matcher;
import org.apache.hadoop.io.BytesWritable;
import com.cloudera.sqoop.io.LobFile;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/** /**
* BlobRef is a wrapper that holds a BLOB either directly, or a * BlobRef is a wrapper that holds a BLOB either directly, or a
* reference to a file that holds the BLOB data. * reference to a file that holds the BLOB data.
*
* @deprecated use org.apache.sqoop.lib.BlobRef instead.
* @see org.apache.sqoop.lib.BlobRef
*/ */
public class BlobRef extends LobRef<byte[], BytesWritable, InputStream> { public class BlobRef extends org.apache.sqoop.lib.BlobRef {
public static final Log LOG = LogFactory.getLog(BlobRef.class.getName()); public static final Log LOG = org.apache.sqoop.lib.BlobRef.LOG;
public BlobRef() { public BlobRef() {
super(); super();
} }
public BlobRef(byte [] bytes) { public BlobRef(byte [] bytes) {
super(new BytesWritable(bytes)); super(bytes);
} }
/** /**
@ -60,45 +48,6 @@ public BlobRef(String file, long offset, long length) {
super(file, offset, length); super(file, offset, length);
} }
@Override
protected InputStream getExternalSource(LobFile.Reader reader)
throws IOException {
return reader.readBlobRecord();
}
@Override
protected InputStream getInternalSource(BytesWritable data) {
return new ByteArrayInputStream(data.getBytes(), 0, data.getLength());
}
@Override
protected byte [] getInternalData(BytesWritable data) {
return Arrays.copyOf(data.getBytes(), data.getLength());
}
@Override
protected BytesWritable deepCopyData(BytesWritable data) {
return new BytesWritable(Arrays.copyOf(data.getBytes(), data.getLength()));
}
@Override
public void readFieldsInternal(DataInput in) throws IOException {
// For internally-stored BLOBs, the data is a BytesWritable
// containing the actual data.
BytesWritable data = getDataObj();
if (null == data) {
data = new BytesWritable();
}
data.readFields(in);
setDataObj(data);
}
@Override
public void writeInternal(DataOutput out) throws IOException {
getDataObj().write(out);
}
/** /**
* Create a BlobRef based on parsed data from a line of text. * Create a BlobRef based on parsed data from a line of text.
@ -110,24 +59,7 @@ public void writeInternal(DataOutput out) throws IOException {
* an empty BlobRef if the data to be parsed is actually inline. * an empty BlobRef if the data to be parsed is actually inline.
*/ */
public static BlobRef parse(String inputString) { public static BlobRef parse(String inputString) {
// If inputString is of the form 'externalLob(lf,%s,%d,%d)', then this is return org.apache.sqoop.lib.BlobRef.parse(inputString);
// an external BLOB stored at the LobFile indicated by '%s' with the next
// two arguments representing its offset and length in the file.
// Otherwise, it is an inline BLOB, which we don't support parsing of.
Matcher m = EXTERNAL_MATCHER.get();
m.reset(inputString);
if (m.matches()) {
// This is a LobFile. Extract the filename, offset and len from the
// matcher.
return new BlobRef(m.group(1), Long.valueOf(m.group(2)),
Long.valueOf(m.group(3)));
} else {
// This is inline BLOB string data.
LOG.warn(
"Reparsing inline BLOB data is not supported; use SequenceFiles.");
return new BlobRef();
}
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -23,6 +21,8 @@
/** /**
* Parse string representations of boolean values into boolean * Parse string representations of boolean values into boolean
* scalar types. * scalar types.
* @deprecated use org.apache.sqoop.lib.BooleanParser instead.
* @see org.apache.sqoop.lib.BooleanParser
*/ */
public final class BooleanParser { public final class BooleanParser {
private BooleanParser() { private BooleanParser() {
@ -37,9 +37,7 @@ private BooleanParser() {
* <p>All comparisons are case-insensitive.</p> * <p>All comparisons are case-insensitive.</p>
*/ */
public static boolean valueOf(final String s) { public static boolean valueOf(final String s) {
return s != null && ("true".equalsIgnoreCase(s) || "t".equalsIgnoreCase(s) return org.apache.sqoop.lib.BooleanParser.valueOf(s);
|| "1".equals(s) || "on".equalsIgnoreCase(s)
|| "yes".equalsIgnoreCase(s));
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -20,21 +18,14 @@
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.regex.Matcher;
import org.apache.hadoop.io.Text;
import com.cloudera.sqoop.io.LobFile;
/** /**
* ClobRef is a wrapper that holds a CLOB either directly, or a * ClobRef is a wrapper that holds a CLOB either directly, or a
* reference to a file that holds the CLOB data. * reference to a file that holds the CLOB data.
*
* @deprecated use org.apache.sqoop.lib.ClobRef instead.
* @see org.apache.sqoop.lib.ClobRef
*/ */
public class ClobRef extends LobRef<String, String, Reader> { public class ClobRef extends org.apache.sqoop.lib.ClobRef {
public ClobRef() { public ClobRef() {
super(); super();
@ -54,60 +45,13 @@ public ClobRef(String file, long offset, long length) {
super(file, offset, length); super(file, offset, length);
} }
@Override
protected Reader getExternalSource(LobFile.Reader reader)
throws IOException {
return reader.readClobRecord();
}
@Override
protected Reader getInternalSource(String data) {
return new StringReader(data);
}
@Override
protected String deepCopyData(String data) {
return data;
}
@Override
protected String getInternalData(String data) {
return data;
}
@Override
public void readFieldsInternal(DataInput in) throws IOException {
// For internally-stored clobs, the data is written as UTF8 Text.
setDataObj(Text.readString(in));
}
@Override
public void writeInternal(DataOutput out) throws IOException {
Text.writeString(out, getDataObj());
}
/** /**
* Create a ClobRef based on parsed data from a line of text. * Create a ClobRef based on parsed data from a line of text.
* @param inputString the text-based input data to parse. * @param inputString the text-based input data to parse.
* @return a ClobRef to the given data. * @return a ClobRef to the given data.
*/ */
public static ClobRef parse(String inputString) { public static ClobRef parse(String inputString) {
// If inputString is of the form 'externalLob(lf,%s,%d,%d)', then this is return org.apache.sqoop.lib.ClobRef.parse(inputString);
// an external CLOB stored at the LobFile indicated by '%s' with the next
// two arguments representing its offset and length in the file.
// Otherwise, it is an inline CLOB, which we read as-is.
Matcher m = EXTERNAL_MATCHER.get();
m.reset(inputString);
if (m.matches()) {
// This is a LobFile. Extract the filename, offset and len from the
// matcher.
return new ClobRef(m.group(1), Long.valueOf(m.group(2)),
Long.valueOf(m.group(3)));
} else {
// This is inline CLOB string data.
return new ClobRef(inputString);
}
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,33 +15,24 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
/** /**
* Encapsulates a set of delimiters used to encode a record. * Encapsulates a set of delimiters used to encode a record.
* @deprecated use org.apache.sqoop.lib.DelimiterSet instead.
* @see org.apache.sqoop.lib.DelimiterSet
*/ */
public class DelimiterSet implements Cloneable { public class DelimiterSet extends org.apache.sqoop.lib.DelimiterSet {
public static final char NULL_CHAR = '\000'; public static final char NULL_CHAR =
org.apache.sqoop.lib.DelimiterSet.NULL_CHAR;
private char fieldDelim; // fields terminated by this.
private char recordDelim; // records terminated by this.
// If these next two fields are '\000', then they are ignored.
private char enclosedBy;
private char escapedBy;
// If true, then the enclosed-by character is applied to every
// field, not just ones containing embedded delimiters.
private boolean encloseRequired;
/** /**
* Create a delimiter set with the default delimiters * Create a delimiter set with the default delimiters
* (comma for fields, newline for records). * (comma for fields, newline for records).
*/ */
public DelimiterSet() { public DelimiterSet() {
this(',', '\n', NULL_CHAR, NULL_CHAR, false); super();
} }
/** /**
@ -57,152 +46,7 @@ public DelimiterSet() {
*/ */
public DelimiterSet(char field, char record, char enclose, char escape, public DelimiterSet(char field, char record, char enclose, char escape,
boolean isEncloseRequired) { boolean isEncloseRequired) {
this.fieldDelim = field; super(field, record, enclose, escape, isEncloseRequired);
this.recordDelim = record;
this.enclosedBy = enclose;
this.escapedBy = escape;
this.encloseRequired = isEncloseRequired;
}
/**
* Sets the fields-terminated-by character.
*/
public void setFieldsTerminatedBy(char f) {
this.fieldDelim = f;
}
/**
* @return the fields-terminated-by character.
*/
public char getFieldsTerminatedBy() {
return this.fieldDelim;
}
/**
* Sets the end-of-record lines-terminated-by character.
*/
public void setLinesTerminatedBy(char r) {
this.recordDelim = r;
}
/**
* @return the end-of-record (lines-terminated-by) character.
*/
public char getLinesTerminatedBy() {
return this.recordDelim;
}
/**
* Sets the enclosed-by character.
* @param e the enclosed-by character, or '\000' for no enclosing character.
*/
public void setEnclosedBy(char e) {
this.enclosedBy = e;
}
/**
* @return the enclosed-by character, or '\000' for none.
*/
public char getEnclosedBy() {
return this.enclosedBy;
}
/**
* Sets the escaped-by character.
* @param e the escaped-by character, or '\000' for no escape character.
*/
public void setEscapedBy(char e) {
this.escapedBy = e;
}
/**
* @return the escaped-by character, or '\000' for none.
*/
public char getEscapedBy() {
return this.escapedBy;
}
/**
* Set whether the enclosed-by character must be applied to all fields,
* or only fields with embedded delimiters.
*/
public void setEncloseRequired(boolean required) {
this.encloseRequired = required;
}
/**
* @return true if the enclosed-by character must be applied to all fields,
* or false if it's only used for fields with embedded delimiters.
*/
public boolean isEncloseRequired() {
return this.encloseRequired;
}
@Override
/**
* @return a string representation of the delimiters.
*/
public String toString() {
return "fields=" + this.fieldDelim
+ " records=" + this.recordDelim
+ " escape=" + this.escapedBy
+ " enclose=" + this.enclosedBy
+ " required=" + this.encloseRequired;
}
/**
* Format this set of delimiters as a call to the constructor for
* this object, that would generate identical delimiters.
* @return a String that can be embedded in generated code that
* provides this set of delimiters.
*/
public String formatConstructor() {
return "new DelimiterSet((char) " + (int) this.fieldDelim + ", "
+ "(char) " + (int) this.recordDelim + ", "
+ "(char) " + (int) this.enclosedBy + ", "
+ "(char) " + (int) this.escapedBy + ", "
+ this.encloseRequired + ")";
}
@Override
/**
* @return a hash code for this set of delimiters.
*/
public int hashCode() {
return (int) this.fieldDelim
+ (((int) this.recordDelim) << 4)
+ (((int) this.escapedBy) << 8)
+ (((int) this.enclosedBy) << 12)
+ (((int) this.recordDelim) << 16)
+ (this.encloseRequired ? 0xFEFE : 0x7070);
}
@Override
/**
* @return true if this delimiter set is the same as another set of
* delimiters.
*/
public boolean equals(Object other) {
if (null == other) {
return false;
} else if (!other.getClass().equals(getClass())) {
return false;
}
DelimiterSet set = (DelimiterSet) other;
return this.fieldDelim == set.fieldDelim
&& this.recordDelim == set.recordDelim
&& this.escapedBy == set.escapedBy
&& this.enclosedBy == set.enclosedBy
&& this.encloseRequired == set.encloseRequired;
}
@Override
/**
* @return a new copy of this same set of delimiters.
*/
public Object clone() throws CloneNotSupportedException {
return super.clone();
} }
/** /**

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,11 +15,13 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
/** /**
* Static helper class that will help format data with quotes and escape chars. * Static helper class that will help format data with quotes and escape chars.
*
* @deprecated use org.apache.sqoop.lib.FieldFormatter instead.
* @see org.apache.sqoop.lib.FieldFormatter
*/ */
public final class FieldFormatter { public final class FieldFormatter {
@ -35,7 +35,8 @@ private FieldFormatter() { }
*/ */
public static String hiveStringDropDelims(String str, public static String hiveStringDropDelims(String str,
DelimiterSet delimiters) { DelimiterSet delimiters) {
return hiveStringReplaceDelims(str, "", delimiters); return org.apache.sqoop.lib.FieldFormatter.hiveStringDropDelims(
str, delimiters);
} }
/** /**
@ -47,8 +48,8 @@ public static String hiveStringDropDelims(String str,
*/ */
public static String hiveStringReplaceDelims(String str, String replacement, public static String hiveStringReplaceDelims(String str, String replacement,
DelimiterSet delimiters) { DelimiterSet delimiters) {
String droppedDelims = str.replaceAll("\\n|\\r|\01", replacement); return org.apache.sqoop.lib.FieldFormatter.hiveStringReplaceDelims(
return escapeAndEnclose(droppedDelims, delimiters); str, replacement, delimiters);
} }
/** /**
@ -73,68 +74,7 @@ public static String hiveStringReplaceDelims(String str, String replacement,
* @return the escaped, enclosed version of 'str'. * @return the escaped, enclosed version of 'str'.
*/ */
public static String escapeAndEnclose(String str, DelimiterSet delimiters) { public static String escapeAndEnclose(String str, DelimiterSet delimiters) {
return org.apache.sqoop.lib.FieldFormatter.escapeAndEnclose(
char escape = delimiters.getEscapedBy(); str, delimiters);
char enclose = delimiters.getEnclosedBy();
boolean encloseRequired = delimiters.isEncloseRequired();
// true if we can use an escape character.
boolean escapingLegal = DelimiterSet.NULL_CHAR != escape;
String withEscapes;
if (null == str) {
return null;
}
if (escapingLegal) {
// escaping is legal. Escape any instances of the escape char itself.
withEscapes = str.replace("" + escape, "" + escape + escape);
} else {
// no need to double-escape
withEscapes = str;
}
if (DelimiterSet.NULL_CHAR == enclose) {
// The enclose-with character was left unset, so we can't enclose items.
if (escapingLegal) {
// If the user has used the fields-terminated-by or
// lines-terminated-by characters in the string, escape them if we
// have an escape character.
String fields = "" + delimiters.getFieldsTerminatedBy();
String lines = "" + delimiters.getLinesTerminatedBy();
withEscapes = withEscapes.replace(fields, "" + escape + fields);
withEscapes = withEscapes.replace(lines, "" + escape + lines);
}
// No enclosing possible, so now return this.
return withEscapes;
}
// if we have an enclosing character, and escaping is legal, then the
// encloser must always be escaped.
if (escapingLegal) {
withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose);
}
boolean actuallyDoEnclose = encloseRequired;
if (!actuallyDoEnclose) {
// check if the string requires enclosing.
char [] mustEncloseFor = new char[2];
mustEncloseFor[0] = delimiters.getFieldsTerminatedBy();
mustEncloseFor[1] = delimiters.getLinesTerminatedBy();
for (char reason : mustEncloseFor) {
if (str.indexOf(reason) != -1) {
actuallyDoEnclose = true;
break;
}
}
}
if (actuallyDoEnclose) {
return "" + enclose + withEscapes + enclose;
} else {
return withEscapes;
}
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,24 +15,15 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import java.io.IOException;
/** /**
* Interface implemented by classes that process FieldMappable objects. * Interface implemented by classes that process FieldMappable objects.
*
* @deprecated use org.apache.sqoop.lib.FieldMapProcessor instead.
* @see org.apache.sqoop.lib.FieldMapProcessor
*/ */
public interface FieldMapProcessor { public interface FieldMapProcessor
extends org.apache.sqoop.lib.FieldMapProcessor {
/**
* Allow arbitrary processing of a FieldMappable object.
* @param record an object which can emit a map of its field names to values.
* @throws IOException if the processor encounters an IO error when
* operating on this object.
* @throws ProcessingException if the FieldMapProcessor encounters
* a general processing error when operating on this object.
*/
void accept(FieldMappable record) throws IOException, ProcessingException;
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,22 +15,16 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import java.util.Map;
/** /**
* Interface describing a class capable of returning a map of the fields * Interface describing a class capable of returning a map of the fields
* of the object to their values. * of the object to their values.
*
* @deprecated use org.apache.sqoop.lib.FieldMappable instead.
* @see org.apache.sqoop.lib.FieldMappable
*/ */
public interface FieldMappable { public interface FieldMappable extends org.apache.sqoop.lib.FieldMappable {
/**
* Returns a map containing all fields of this record.
* @return a map from column names to the object-based values for
* this record. The map may not be null, though it may be empty.
*/
Map<String, Object> getFieldMap();
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,10 +15,8 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import org.apache.hadoop.io.BytesWritable;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.sql.Date; import java.sql.Date;
import java.sql.PreparedStatement; import java.sql.PreparedStatement;
@ -29,228 +25,163 @@
import java.sql.Time; import java.sql.Time;
import java.sql.Timestamp; import java.sql.Timestamp;
import org.apache.hadoop.io.BytesWritable;
/** /**
* Contains a set of methods which can read db columns from a ResultSet into * Contains a set of methods which can read db columns from a ResultSet into
* Java types, and do serialization of these types to/from DataInput/DataOutput * Java types, and do serialization of these types to/from DataInput/DataOutput
* for use with Hadoop's Writable implementation. This supports null values * for use with Hadoop's Writable implementation. This supports null values
* for all types. * for all types.
*
* @deprecated use org.apache.sqoop.lib.JdbcWritableBridge instead.
* @see org.apache.sqoop.lib.JdbcWritableBridge
*/ */
public final class JdbcWritableBridge { public final class JdbcWritableBridge {
// Currently, cap BLOB/CLOB objects at 16 MB until we can use external // Currently, cap BLOB/CLOB objects at 16 MB until we can use external
// storage. // storage.
public static final long MAX_BLOB_LENGTH = 16 * 1024 * 1024; public static final long MAX_BLOB_LENGTH =
public static final long MAX_CLOB_LENGTH = 16 * 1024 * 1024; org.apache.sqoop.lib.JdbcWritableBridge.MAX_BLOB_LENGTH;
public static final long MAX_CLOB_LENGTH =
org.apache.sqoop.lib.JdbcWritableBridge.MAX_CLOB_LENGTH;
private JdbcWritableBridge() { private JdbcWritableBridge() {
} }
public static Integer readInteger(int colNum, ResultSet r) public static Integer readInteger(int colNum, ResultSet r)
throws SQLException { throws SQLException {
int val; return org.apache.sqoop.lib.JdbcWritableBridge.readInteger(colNum, r);
val = r.getInt(colNum);
if (r.wasNull()) {
return null;
} else {
return Integer.valueOf(val);
}
} }
public static Long readLong(int colNum, ResultSet r) throws SQLException { public static Long readLong(int colNum, ResultSet r) throws SQLException {
long val; return org.apache.sqoop.lib.JdbcWritableBridge.readLong(colNum, r);
val = r.getLong(colNum);
if (r.wasNull()) {
return null;
} else {
return Long.valueOf(val);
}
} }
public static String readString(int colNum, ResultSet r) throws SQLException { public static String readString(int colNum, ResultSet r) throws SQLException {
return r.getString(colNum); return org.apache.sqoop.lib.JdbcWritableBridge.readString(colNum, r);
} }
public static Float readFloat(int colNum, ResultSet r) throws SQLException { public static Float readFloat(int colNum, ResultSet r) throws SQLException {
float val; return org.apache.sqoop.lib.JdbcWritableBridge.readFloat(colNum, r);
val = r.getFloat(colNum);
if (r.wasNull()) {
return null;
} else {
return Float.valueOf(val);
}
} }
public static Double readDouble(int colNum, ResultSet r) throws SQLException { public static Double readDouble(int colNum, ResultSet r) throws SQLException {
double val; return org.apache.sqoop.lib.JdbcWritableBridge.readDouble(colNum, r);
val = r.getDouble(colNum);
if (r.wasNull()) {
return null;
} else {
return Double.valueOf(val);
}
} }
public static Boolean readBoolean(int colNum, ResultSet r) public static Boolean readBoolean(int colNum, ResultSet r)
throws SQLException { throws SQLException {
boolean val; return org.apache.sqoop.lib.JdbcWritableBridge.readBoolean(colNum, r);
val = r.getBoolean(colNum);
if (r.wasNull()) {
return null;
} else {
return Boolean.valueOf(val);
}
} }
public static Time readTime(int colNum, ResultSet r) throws SQLException { public static Time readTime(int colNum, ResultSet r) throws SQLException {
return r.getTime(colNum); return org.apache.sqoop.lib.JdbcWritableBridge.readTime(colNum, r);
} }
public static Timestamp readTimestamp(int colNum, ResultSet r) public static Timestamp readTimestamp(int colNum, ResultSet r)
throws SQLException { throws SQLException {
return r.getTimestamp(colNum); return org.apache.sqoop.lib.JdbcWritableBridge.readTimestamp(colNum, r);
} }
public static Date readDate(int colNum, ResultSet r) throws SQLException { public static Date readDate(int colNum, ResultSet r) throws SQLException {
return r.getDate(colNum); return org.apache.sqoop.lib.JdbcWritableBridge.readDate(colNum, r);
} }
public static BytesWritable readBytesWritable(int colNum, ResultSet r) public static BytesWritable readBytesWritable(int colNum, ResultSet r)
throws SQLException { throws SQLException {
byte [] bytes = r.getBytes(colNum); return org.apache.sqoop.lib.JdbcWritableBridge.readBytesWritable(colNum, r);
return bytes == null ? null : new BytesWritable(bytes);
} }
public static BigDecimal readBigDecimal(int colNum, ResultSet r) public static BigDecimal readBigDecimal(int colNum, ResultSet r)
throws SQLException { throws SQLException {
return r.getBigDecimal(colNum); return org.apache.sqoop.lib.JdbcWritableBridge.readBigDecimal(colNum, r);
} }
public static BlobRef readBlobRef(int colNum, ResultSet r) public static BlobRef readBlobRef(int colNum, ResultSet r)
throws SQLException { throws SQLException {
// Loading of BLOBs is delayed; handled by LargeObjectLoader. return org.apache.sqoop.lib.JdbcWritableBridge.readBlobRef(colNum, r);
return null;
} }
public static ClobRef readClobRef(int colNum, ResultSet r) public static ClobRef readClobRef(int colNum, ResultSet r)
throws SQLException { throws SQLException {
// Loading of CLOBs is delayed; handled by LargeObjectLoader. return org.apache.sqoop.lib.JdbcWritableBridge.readClobRef(colNum, r);
return null;
} }
public static void writeInteger(Integer val, int paramIdx, int sqlType, public static void writeInteger(Integer val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException { PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeInteger(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setInt(paramIdx, val);
}
} }
public static void writeLong(Long val, int paramIdx, int sqlType, public static void writeLong(Long val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException { PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeLong(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setLong(paramIdx, val);
}
} }
public static void writeDouble(Double val, int paramIdx, int sqlType, public static void writeDouble(Double val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException { PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeDouble(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setDouble(paramIdx, val);
}
} }
public static void writeBoolean(Boolean val, int paramIdx, int sqlType, public static void writeBoolean(Boolean val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException { PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeBoolean(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setBoolean(paramIdx, val);
}
} }
public static void writeFloat(Float val, int paramIdx, int sqlType, public static void writeFloat(Float val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException { PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeFloat(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setFloat(paramIdx, val);
}
} }
public static void writeString(String val, int paramIdx, int sqlType, public static void writeString(String val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException { PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeString(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setString(paramIdx, val);
}
} }
public static void writeTimestamp(Timestamp val, int paramIdx, int sqlType, public static void writeTimestamp(Timestamp val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException { PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeTimestamp(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setTimestamp(paramIdx, val);
}
} }
public static void writeTime(Time val, int paramIdx, int sqlType, public static void writeTime(Time val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException { PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeTime(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setTime(paramIdx, val);
}
} }
public static void writeDate(Date val, int paramIdx, int sqlType, public static void writeDate(Date val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException { PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeDate(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setDate(paramIdx, val);
}
} }
public static void writeBytesWritable(BytesWritable val, int paramIdx, public static void writeBytesWritable(BytesWritable val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException { int sqlType, PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeBytesWritable(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
// val.getBytes() is only valid in [0, len)
byte [] rawBytes = val.getBytes();
int len = val.getLength();
byte [] outBytes = new byte[len];
System.arraycopy(rawBytes, 0, outBytes, 0, len);
s.setBytes(paramIdx, outBytes);
}
} }
public static void writeBigDecimal(BigDecimal val, int paramIdx, public static void writeBigDecimal(BigDecimal val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException { int sqlType, PreparedStatement s) throws SQLException {
if (null == val) { org.apache.sqoop.lib.JdbcWritableBridge.writeBigDecimal(
s.setNull(paramIdx, sqlType); val, paramIdx, sqlType, s);
} else {
s.setBigDecimal(paramIdx, val);
}
} }
public static void writeBlobRef(BlobRef val, int paramIdx, public static void writeBlobRef(BlobRef val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException { int sqlType, PreparedStatement s) throws SQLException {
// TODO: support this. org.apache.sqoop.lib.JdbcWritableBridge.writeBlobRef(
throw new RuntimeException("Unsupported: Cannot export BLOB data"); val, paramIdx, sqlType, s);
} }
public static void writeClobRef(ClobRef val, int paramIdx, public static void writeClobRef(ClobRef val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException { int sqlType, PreparedStatement s) throws SQLException {
// TODO: support this. org.apache.sqoop.lib.JdbcWritableBridge.writeClobRef(
throw new RuntimeException("Unsupported: Cannot export CLOB data"); val, paramIdx, sqlType, s);
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,26 +15,12 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import java.io.Closeable;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import com.cloudera.sqoop.io.LobFile;
import com.cloudera.sqoop.util.TaskId;
/** /**
* Contains a set of methods which can read db columns from a ResultSet into * Contains a set of methods which can read db columns from a ResultSet into
@ -47,26 +31,18 @@
* This is a singleton instance class; only one may exist at a time. * This is a singleton instance class; only one may exist at a time.
* However, its lifetime is limited to the current TaskInputOutputContext's * However, its lifetime is limited to the current TaskInputOutputContext's
* life. * life.
*
* @deprecated use org.apache.sqoop.lib.LargeObjectLoader instead.
* @see org.apache.sqoop.lib.LargeObjectLoader
*/ */
public class LargeObjectLoader implements Closeable { public class LargeObjectLoader extends org.apache.sqoop.lib.LargeObjectLoader {
// Spill to external storage for BLOB/CLOB objects > 16 MB. // Spill to external storage for BLOB/CLOB objects > 16 MB.
public static final long DEFAULT_MAX_LOB_LENGTH = 16 * 1024 * 1024; public static final long DEFAULT_MAX_LOB_LENGTH =
org.apache.sqoop.lib.LargeObjectLoader.DEFAULT_MAX_LOB_LENGTH;
public static final String MAX_INLINE_LOB_LEN_KEY = public static final String MAX_INLINE_LOB_LEN_KEY =
"sqoop.inline.lob.length.max"; org.apache.sqoop.lib.LargeObjectLoader.MAX_INLINE_LOB_LEN_KEY;
private Configuration conf;
private Path workPath;
private FileSystem fs;
// Handles to the open BLOB / CLOB file writers.
private LobFile.Writer curBlobWriter;
private LobFile.Writer curClobWriter;
// Counter that is used with the current task attempt id to
// generate unique LOB file names.
private long nextLobFileId = 0;
/** /**
* Create a new LargeObjectLoader. * Create a new LargeObjectLoader.
@ -75,246 +51,6 @@ public class LargeObjectLoader implements Closeable {
*/ */
public LargeObjectLoader(Configuration conf, Path workPath) public LargeObjectLoader(Configuration conf, Path workPath)
throws IOException { throws IOException {
this.conf = conf; super(conf, workPath);
this.workPath = workPath;
this.fs = FileSystem.get(conf);
this.curBlobWriter = null;
this.curClobWriter = null;
}
@Override
protected synchronized void finalize() throws Throwable {
close();
super.finalize();
}
@Override
public void close() throws IOException {
if (null != curBlobWriter) {
curBlobWriter.close();
curBlobWriter = null;
}
if (null != curClobWriter) {
curClobWriter.close();
curClobWriter = null;
}
}
/**
* @return a filename to use to put an external LOB in.
*/
private String getNextLobFileName() {
String file = "_lob/large_obj_" + TaskId.get(conf, "unknown_task_id")
+ nextLobFileId + ".lob";
nextLobFileId++;
return file;
}
/**
* Calculates a path to a new LobFile object, creating any
* missing directories.
* @return a Path to a LobFile to write
*/
private Path getNextLobFilePath() throws IOException {
Path p = new Path(workPath, getNextLobFileName());
Path parent = p.getParent();
if (!fs.exists(parent)) {
fs.mkdirs(parent);
}
return p;
}
/**
* @return the current LobFile writer for BLOBs, creating one if necessary.
*/
private LobFile.Writer getBlobWriter() throws IOException {
if (null == this.curBlobWriter) {
this.curBlobWriter = LobFile.create(getNextLobFilePath(), conf, false);
}
return this.curBlobWriter;
}
/**
* @return the current LobFile writer for CLOBs, creating one if necessary.
*/
private LobFile.Writer getClobWriter() throws IOException {
if (null == this.curClobWriter) {
this.curClobWriter = LobFile.create(getNextLobFilePath(), conf, true);
}
return this.curClobWriter;
}
/**
* Returns the path being written to by a given LobFile.Writer, relative
* to the working directory of this LargeObjectLoader.
* @param w the LobFile.Writer whose path should be examined.
* @return the path this is writing to, relative to the current working dir.
*/
private String getRelativePath(LobFile.Writer w) {
Path writerPath = w.getPath();
String writerPathStr = writerPath.toString();
String workPathStr = workPath.toString();
if (!workPathStr.endsWith(File.separator)) {
workPathStr = workPathStr + File.separator;
}
if (writerPathStr.startsWith(workPathStr)) {
return writerPathStr.substring(workPathStr.length());
}
// Outside the working dir; return the whole thing.
return writerPathStr;
}
/**
* Copies all character data from the provided Reader to the provided
* Writer. Does not close handles when it's done.
* @param reader data source
* @param writer data sink
* @throws IOException if an I/O error occurs either reading or writing.
*/
private void copyAll(Reader reader, Writer writer) throws IOException {
int bufferSize = conf.getInt("io.file.buffer.size",
4096);
char [] buf = new char[bufferSize];
while (true) {
int charsRead = reader.read(buf);
if (-1 == charsRead) {
break; // no more stream to read.
}
writer.write(buf, 0, charsRead);
}
}
/**
* Copies all byte data from the provided InputStream to the provided
* OutputStream. Does not close handles when it's done.
* @param input data source
* @param output data sink
* @throws IOException if an I/O error occurs either reading or writing.
*/
private void copyAll(InputStream input, OutputStream output)
throws IOException {
int bufferSize = conf.getInt("io.file.buffer.size",
4096);
byte [] buf = new byte[bufferSize];
while (true) {
int bytesRead = input.read(buf, 0, bufferSize);
if (-1 == bytesRead) {
break; // no more stream to read.
}
output.write(buf, 0, bytesRead);
}
}
/**
* Actually read a BlobRef instance from the ResultSet and materialize
* the data either inline or to a file.
*
* @param colNum the column of the ResultSet's current row to read.
* @param r the ResultSet to read from.
* @return a BlobRef encapsulating the data in this field.
* @throws IOException if an error occurs writing to the FileSystem.
* @throws SQLException if an error occurs reading from the database.
*/
public BlobRef readBlobRef(int colNum, ResultSet r)
throws IOException, InterruptedException, SQLException {
long maxInlineLobLen = conf.getLong(
MAX_INLINE_LOB_LEN_KEY,
DEFAULT_MAX_LOB_LENGTH);
Blob b = r.getBlob(colNum);
if (null == b) {
return null;
} else if (b.length() > maxInlineLobLen) {
// Deserialize very large BLOBs into separate files.
long len = b.length();
LobFile.Writer lobWriter = getBlobWriter();
long recordOffset = lobWriter.tell();
InputStream is = null;
OutputStream os = lobWriter.writeBlobRecord(len);
try {
is = b.getBinaryStream();
copyAll(is, os);
} finally {
if (null != os) {
os.close();
}
if (null != is) {
is.close();
}
// Mark the record as finished.
lobWriter.finishRecord();
}
return new BlobRef(getRelativePath(curBlobWriter), recordOffset, len);
} else {
// This is a 1-based array.
return new BlobRef(b.getBytes(1, (int) b.length()));
}
}
/**
* Actually read a ClobRef instance from the ResultSet and materialize
* the data either inline or to a file.
*
* @param colNum the column of the ResultSet's current row to read.
* @param r the ResultSet to read from.
* @return a ClobRef encapsulating the data in this field.
* @throws IOException if an error occurs writing to the FileSystem.
* @throws SQLException if an error occurs reading from the database.
*/
public ClobRef readClobRef(int colNum, ResultSet r)
throws IOException, InterruptedException, SQLException {
long maxInlineLobLen = conf.getLong(
MAX_INLINE_LOB_LEN_KEY,
DEFAULT_MAX_LOB_LENGTH);
Clob c = r.getClob(colNum);
if (null == c) {
return null;
} else if (c.length() > maxInlineLobLen) {
// Deserialize large CLOB into separate file.
long len = c.length();
LobFile.Writer lobWriter = getClobWriter();
long recordOffset = lobWriter.tell();
Reader reader = null;
Writer w = lobWriter.writeClobRecord(len);
try {
reader = c.getCharacterStream();
copyAll(reader, w);
} finally {
if (null != w) {
w.close();
}
if (null != reader) {
reader.close();
}
// Mark the record as finished.
lobWriter.finishRecord();
}
return new ClobRef(getRelativePath(lobWriter), recordOffset, len);
} else {
// This is a 1-based array.
return new ClobRef(c.getSubString(1, (int) c.length()));
}
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,28 +15,11 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import com.cloudera.sqoop.io.LobFile;
import com.cloudera.sqoop.io.LobReaderCache;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/** /**
* Abstract base class that holds a reference to a Blob or a Clob. * Abstract base class that holds a reference to a Blob or a Clob.
@ -46,284 +27,28 @@
* CONTAINERTYPE is the type used to hold this data (e.g., BytesWritable). * CONTAINERTYPE is the type used to hold this data (e.g., BytesWritable).
* ACCESSORTYPE is the type used to access this data in a streaming fashion * ACCESSORTYPE is the type used to access this data in a streaming fashion
* (either an InputStream or a Reader). * (either an InputStream or a Reader).
*
* @deprecated use org.apache.sqoop.lib.LobRef instead.
* @see org.apache.sqoop.lib.LobRef
*/ */
public abstract class LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE> public abstract class LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE>
implements Closeable, Writable { extends org.apache.sqoop.lib.LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE> {
public static final Log LOG = LogFactory.getLog(LobRef.class.getName()); public static final Log LOG = org.apache.sqoop.lib.LobRef.LOG;
protected LobRef() { protected LobRef() {
this.fileName = null; super();
this.offset = 0;
this.length = 0;
this.realData = null;
} }
protected LobRef(CONTAINERTYPE container) { protected LobRef(CONTAINERTYPE container) {
this.fileName = null; super(container);
this.offset = 0;
this.length = 0;
this.realData = container;
} }
protected LobRef(String file, long offset, long length) { protected LobRef(String file, long offset, long length) {
this.fileName = file; super(file, offset, length);
this.offset = offset;
this.length = length;
this.realData = null;
} }
// If the data is 'small', it's held directly, here.
private CONTAINERTYPE realData;
/** Internal API to retrieve the data object. */
protected CONTAINERTYPE getDataObj() {
return realData;
}
/** Internal API to set the data object. */
protected void setDataObj(CONTAINERTYPE data) {
this.realData = data;
}
// If there data is too large to materialize fully, it's written into a file
// whose path (relative to the rest of the dataset) is recorded here. This
// takes precedence if the value fof fileName is non-null. These records are
// currently written into LobFile-formatted files, which hold multiple
// records. The starting offset and length of the record are recorded here
// as well.
private String fileName;
private long offset;
private long length;
// If we've opened a LobFile object, track our reference to it here.
private LobFile.Reader lobReader;
@Override
@SuppressWarnings("unchecked")
/**
* Clone the current reference object. data is deep-copied; any open
* file handle remains with the original only.
*/
public Object clone() throws CloneNotSupportedException {
LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE> r =
(LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE>) super.clone();
r.lobReader = null; // Reference to opened reader is not duplicated.
if (null != realData) {
r.realData = deepCopyData(realData);
}
return r;
}
@Override
protected synchronized void finalize() throws Throwable {
close();
super.finalize();
}
public void close() throws IOException {
// Discard any open LobReader.
if (null != this.lobReader) {
LobReaderCache.getCache().recycle(this.lobReader);
}
}
/**
* @return true if the LOB data is in an external file; false if
* it materialized inline.
*/
public boolean isExternal() {
return fileName != null;
}
/**
* Convenience method to access #getDataStream(Configuration, Path)
* from within a map task that read this LobRef from a file-based
* InputSplit.
* @param mapContext the Mapper.Context instance that encapsulates
* the current map task.
* @return an object that lazily streams the record to the client.
* @throws IllegalArgumentException if it cannot find the source
* path for this LOB based on the MapContext.
* @throws IOException if it could not read the LOB from external storage.
*/
public ACCESSORTYPE getDataStream(Mapper.Context mapContext)
throws IOException {
InputSplit split = mapContext.getInputSplit();
if (split instanceof FileSplit) {
Path basePath = ((FileSplit) split).getPath().getParent();
return getDataStream(mapContext.getConfiguration(),
basePath);
} else {
throw new IllegalArgumentException(
"Could not ascertain LOB base path from MapContext.");
}
}
/**
* Get access to the LOB data itself.
* This method returns a lazy reader of the LOB data, accessing the
* filesystem for external LOB storage as necessary.
* @param conf the Configuration used to access the filesystem
* @param basePath the base directory where the table records are
* stored.
* @return an object that lazily streams the record to the client.
* @throws IOException if it could not read the LOB from external storage.
*/
public ACCESSORTYPE getDataStream(Configuration conf, Path basePath)
throws IOException {
if (isExternal()) {
// Read from external storage.
Path pathToRead = LobReaderCache.qualify(
new Path(basePath, fileName), conf);
LOG.debug("Retreving data stream from external path: " + pathToRead);
if (lobReader != null) {
// We already have a reader open to a LobFile. Is it the correct file?
if (!pathToRead.equals(lobReader.getPath())) {
// No. Close this.lobReader and get the correct one.
LOG.debug("Releasing previous external reader for "
+ lobReader.getPath());
LobReaderCache.getCache().recycle(lobReader);
lobReader = LobReaderCache.getCache().get(pathToRead, conf);
}
} else {
lobReader = LobReaderCache.getCache().get(pathToRead, conf);
}
// We now have a LobFile.Reader associated with the correct file. Get to
// the correct offset and return an InputStream/Reader to the user.
if (lobReader.tell() != offset) {
LOG.debug("Seeking to record start offset " + offset);
lobReader.seek(offset);
}
if (!lobReader.next()) {
throw new IOException("Could not locate record at " + pathToRead
+ ":" + offset);
}
return getExternalSource(lobReader);
} else {
// This data is already materialized in memory; wrap it and return.
return getInternalSource(realData);
}
}
/**
* Using the LobFile reader, get an accessor InputStream or Reader to the
* underlying data.
*/
protected abstract ACCESSORTYPE getExternalSource(LobFile.Reader reader)
throws IOException;
/**
* Wrap the materialized data in an InputStream or Reader.
*/
protected abstract ACCESSORTYPE getInternalSource(CONTAINERTYPE data);
/**
* @return the materialized data itself.
*/
protected abstract DATATYPE getInternalData(CONTAINERTYPE data);
/**
* Make a copy of the materialized data.
*/
protected abstract CONTAINERTYPE deepCopyData(CONTAINERTYPE data);
public DATATYPE getData() {
if (isExternal()) {
throw new RuntimeException(
"External LOBs must be read via getDataStream()");
}
return getInternalData(realData);
}
@Override
public String toString() {
if (isExternal()) {
return "externalLob(lf," + fileName + "," + Long.toString(offset)
+ "," + Long.toString(length) + ")";
} else {
return realData.toString();
}
}
@Override
public void readFields(DataInput in) throws IOException {
// The serialization format for this object is:
// boolean isExternal
// if true, then:
// a string identifying the external storage type
// and external-storage-specific data.
// if false, then we use readFieldsInternal() to allow BlobRef/ClobRef
// to serialize as it sees fit.
//
// Currently the only external storage supported is LobFile, identified
// by the string "lf". This serializes with the filename (as a string),
// followed by a long-valued offset and a long-valued length.
boolean isExternal = in.readBoolean();
if (isExternal) {
this.realData = null;
String storageType = Text.readString(in);
if (!storageType.equals("lf")) {
throw new IOException("Unsupported external LOB storage code: "
+ storageType);
}
// Storage type "lf" is LobFile: filename, offset, length.
this.fileName = Text.readString(in);
this.offset = in.readLong();
this.length = in.readLong();
} else {
readFieldsInternal(in);
this.fileName = null;
this.offset = 0;
this.length = 0;
}
}
/**
* Perform the readFields() operation on a fully-materializable record.
* @param in the DataInput to deserialize from.
*/
protected abstract void readFieldsInternal(DataInput in) throws IOException;
@Override
public void write(DataOutput out) throws IOException {
out.writeBoolean(isExternal());
if (isExternal()) {
Text.writeString(out, "lf"); // storage type "lf" for LobFile.
Text.writeString(out, fileName);
out.writeLong(offset);
out.writeLong(length);
} else {
writeInternal(out);
}
}
/**
* Perform the write() operation on a fully-materializable record.
* @param out the DataOutput to deserialize to.
*/
protected abstract void writeInternal(DataOutput out) throws IOException;
protected static final ThreadLocal<Matcher> EXTERNAL_MATCHER = protected static final ThreadLocal<Matcher> EXTERNAL_MATCHER =
new ThreadLocal<Matcher>() { org.apache.sqoop.lib.LobRef.EXTERNAL_MATCHER;
@Override protected Matcher initialValue() {
Pattern externalPattern = Pattern.compile(
"externalLob\\(lf,(.*),([0-9]+),([0-9]+)\\)");
return externalPattern.matcher("");
}
};
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,7 +15,6 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import java.io.DataInput; import java.io.DataInput;
@ -26,6 +23,9 @@
/** /**
* Serialize LOB classes to/from DataInput and DataOutput objects. * Serialize LOB classes to/from DataInput and DataOutput objects.
*
* @deprecated use org.apache.sqoop.lib.LobSerializer instead.
* @see org.apache.sqoop.lib.LobSerializer
*/ */
public final class LobSerializer { public final class LobSerializer {
@ -33,23 +33,19 @@ private LobSerializer() { }
public static void writeClob(ClobRef clob, DataOutput out) public static void writeClob(ClobRef clob, DataOutput out)
throws IOException { throws IOException {
clob.write(out); org.apache.sqoop.lib.LobSerializer.writeClob(clob, out);
} }
public static void writeBlob(BlobRef blob, DataOutput out) public static void writeBlob(BlobRef blob, DataOutput out)
throws IOException { throws IOException {
blob.write(out); org.apache.sqoop.lib.LobSerializer.writeBlob(blob, out);
} }
public static ClobRef readClobFields(DataInput in) throws IOException { public static ClobRef readClobFields(DataInput in) throws IOException {
ClobRef clob = new ClobRef(); return org.apache.sqoop.lib.LobSerializer.readClobFields(in);
clob.readFields(in);
return clob;
} }
public static BlobRef readBlobFields(DataInput in) throws IOException { public static BlobRef readBlobFields(DataInput in) throws IOException {
BlobRef blob = new BlobRef(); return org.apache.sqoop.lib.LobSerializer.readBlobFields(in);
blob.readFields(in);
return blob;
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,14 +15,17 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
/** /**
* General error during processing of a SqoopRecord. * General error during processing of a SqoopRecord.
*
* @deprecated use org.apache.sqoop.lib.ProcessingException instead.
* @see org.apache.sqoop.lib.ProcessingException
*/ */
@SuppressWarnings("serial") @SuppressWarnings("serial")
public class ProcessingException extends Exception { public class ProcessingException
extends org.apache.sqoop.lib.ProcessingException {
public ProcessingException() { public ProcessingException() {
super("ProcessingException"); super("ProcessingException");
@ -41,10 +42,4 @@ public ProcessingException(final Throwable cause) {
public ProcessingException(final String message, final Throwable cause) { public ProcessingException(final String message, final Throwable cause) {
super(message, cause); super(message, cause);
} }
@Override
public String toString() {
String msg = getMessage();
return (null == msg) ? "ProcessingException" : msg;
}
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,18 +15,9 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.List;
/** /**
* Parses a record containing one or more fields. Fields are separated * Parses a record containing one or more fields. Fields are separated
@ -53,26 +42,25 @@
* The fields parsed by RecordParser are backed by an internal buffer * The fields parsed by RecordParser are backed by an internal buffer
* which is cleared when the next call to parseRecord() is made. If * which is cleared when the next call to parseRecord() is made. If
* the buffer is required to be preserved, you must copy it yourself. * the buffer is required to be preserved, you must copy it yourself.
*
* @deprecated use org.apache.sqoop.lib.RecordParser instead.
* @see org.apache.sqoop.lib.RecordParser
*/ */
public final class RecordParser { public final class RecordParser extends org.apache.sqoop.lib.RecordParser {
public static final Log LOG = LogFactory.getLog(RecordParser.class.getName()); public static final Log LOG = org.apache.sqoop.lib.RecordParser.LOG;
private enum ParseState {
FIELD_START,
ENCLOSED_FIELD,
UNENCLOSED_FIELD,
ENCLOSED_ESCAPE,
ENCLOSED_EXPECT_DELIMITER,
UNENCLOSED_ESCAPE
}
/** /**
* An error thrown when parsing fails. * An error thrown when parsing fails.
*
* @deprecated use org.apache.sqoop.lib.RecordParser.ParseError instead.
* @see org.apache.sqoop.lib.RecordParser.ParseError
*/ */
public static class ParseError extends Exception { public static class ParseError
extends org.apache.sqoop.lib.RecordParser.ParseError {
public ParseError() { public ParseError() {
super("ParseError"); super();
} }
public ParseError(final String msg) { public ParseError(final String msg) {
@ -88,273 +76,7 @@ public ParseError(final Throwable cause) {
} }
} }
private DelimiterSet delimiters;
private ArrayList<String> outputs;
public RecordParser(final DelimiterSet delimitersIn) { public RecordParser(final DelimiterSet delimitersIn) {
this.delimiters = delimitersIn.copy(); super(delimitersIn);
this.outputs = new ArrayList<String>();
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(CharSequence input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
return parseRecord(CharBuffer.wrap(input));
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(Text input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
// TODO(aaron): The parser should be able to handle UTF-8 strings
// as well, to avoid this transcode operation.
return parseRecord(input.toString());
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(byte [] input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
return parseRecord(ByteBuffer.wrap(input).asCharBuffer());
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(char [] input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
return parseRecord(CharBuffer.wrap(input));
}
public List<String> parseRecord(ByteBuffer input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
return parseRecord(input.asCharBuffer());
}
// TODO(aaron): Refactor this method to be much shorter.
// CHECKSTYLE:OFF
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(CharBuffer input) throws ParseError {
if (null == input) {
throw new ParseError("null input string");
}
/*
This method implements the following state machine to perform
parsing.
Note that there are no restrictions on whether particular characters
(e.g., field-sep, record-sep, etc) are distinct or the same. The
state transitions are processed in the order seen in this comment.
Starting state is FIELD_START
encloser -> ENCLOSED_FIELD
escape char -> UNENCLOSED_ESCAPE
field delim -> FIELD_START (for a new field)
record delim -> stops processing
all other letters get added to current field, -> UNENCLOSED FIELD
ENCLOSED_FIELD state:
escape char goes to ENCLOSED_ESCAPE
encloser goes to ENCLOSED_EXPECT_DELIMITER
field sep or record sep gets added to the current string
normal letters get added to the current string
ENCLOSED_ESCAPE state:
any character seen here is added literally, back to ENCLOSED_FIELD
ENCLOSED_EXPECT_DELIMITER state:
field sep goes to FIELD_START
record sep halts processing.
all other characters are errors.
UNENCLOSED_FIELD state:
ESCAPE char goes to UNENCLOSED_ESCAPE
FIELD_SEP char goes to FIELD_START
RECORD_SEP char halts processing
normal chars or the enclosing char get added to the current string
UNENCLOSED_ESCAPE:
add charater literal to current string, return to UNENCLOSED_FIELD
*/
char curChar = DelimiterSet.NULL_CHAR;
ParseState state = ParseState.FIELD_START;
int len = input.length();
StringBuilder sb = null;
outputs.clear();
char enclosingChar = delimiters.getEnclosedBy();
char fieldDelim = delimiters.getFieldsTerminatedBy();
char recordDelim = delimiters.getLinesTerminatedBy();
char escapeChar = delimiters.getEscapedBy();
boolean enclosingRequired = delimiters.isEncloseRequired();
for (int pos = 0; pos < len; pos++) {
curChar = input.get();
switch (state) {
case FIELD_START:
// ready to start processing a new field.
if (null != sb) {
// We finished processing a previous field. Add to the list.
outputs.add(sb.toString());
}
sb = new StringBuilder();
if (enclosingChar == curChar) {
// got an opening encloser.
state = ParseState.ENCLOSED_FIELD;
} else if (escapeChar == curChar) {
state = ParseState.UNENCLOSED_ESCAPE;
} else if (fieldDelim == curChar) {
// we have a zero-length field. This is a no-op.
continue;
} else if (recordDelim == curChar) {
// we have a zero-length field, that ends processing.
pos = len;
} else {
// current char is part of the field.
state = ParseState.UNENCLOSED_FIELD;
sb.append(curChar);
if (enclosingRequired) {
throw new ParseError(
"Opening field-encloser expected at position " + pos);
}
}
break;
case ENCLOSED_FIELD:
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.ENCLOSED_ESCAPE;
} else if (enclosingChar == curChar) {
// we're at the end of the enclosing field. Expect an EOF or EOR char.
state = ParseState.ENCLOSED_EXPECT_DELIMITER;
} else {
// this is a regular char, or an EOF / EOR inside an encloser. Add to
// the current field string, and remain in this state.
sb.append(curChar);
}
break;
case UNENCLOSED_FIELD:
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.UNENCLOSED_ESCAPE;
} else if (fieldDelim == curChar) {
// we're at the end of this field; may be the start of another one.
state = ParseState.FIELD_START;
} else if (recordDelim == curChar) {
pos = len; // terminate processing immediately.
} else {
// this is a regular char. Add to the current field string,
// and remain in this state.
sb.append(curChar);
}
break;
case ENCLOSED_ESCAPE:
// Treat this character literally, whatever it is, and return to
// enclosed field processing.
sb.append(curChar);
state = ParseState.ENCLOSED_FIELD;
break;
case ENCLOSED_EXPECT_DELIMITER:
// We were in an enclosed field, but got the final encloser. Now we
// expect either an end-of-field or an end-of-record.
if (fieldDelim == curChar) {
// end of one field is the beginning of the next.
state = ParseState.FIELD_START;
} else if (recordDelim == curChar) {
// stop processing.
pos = len;
} else {
// Don't know what to do with this character.
throw new ParseError("Expected delimiter at position " + pos);
}
break;
case UNENCLOSED_ESCAPE:
// Treat this character literally, whatever it is, and return to
// non-enclosed field processing.
sb.append(curChar);
state = ParseState.UNENCLOSED_FIELD;
break;
default:
throw new ParseError("Unexpected parser state: " + state);
}
}
if (state == ParseState.FIELD_START && curChar == fieldDelim) {
// we hit an EOF/EOR as the last legal character and we need to mark
// that string as recorded. This if block is outside the for-loop since
// we don't have a physical 'epsilon' token in our string.
if (null != sb) {
outputs.add(sb.toString());
sb = new StringBuilder();
}
}
if (null != sb) {
// There was a field that terminated by running out of chars or an EOR
// character. Add to the list.
outputs.add(sb.toString());
}
return outputs;
}
// CHECKSTYLE:ON
public boolean isEnclosingRequired() {
return delimiters.isEncloseRequired();
}
@Override
public String toString() {
return "RecordParser[" + delimiters.toString() + "]";
}
@Override
public int hashCode() {
return this.delimiters.hashCode();
} }
} }

View File

@ -1,6 +1,4 @@
/** /**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,136 +15,18 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package com.cloudera.sqoop.lib; package com.cloudera.sqoop.lib;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Map;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
/** /**
* Interface implemented by the classes generated by sqoop's orm.ClassWriter. * Interface implemented by the classes generated by sqoop's orm.ClassWriter.
*
* @deprecated use org.apache.sqoop.lib.SqoopRecord instead.
* @see org.apache.sqoop.lib.SqoopRecord
*/ */
public abstract class SqoopRecord implements Cloneable, DBWritable, public abstract class SqoopRecord extends org.apache.sqoop.lib.SqoopRecord {
FieldMappable, Writable {
public SqoopRecord() { public SqoopRecord() {
} }
public abstract void parse(CharSequence s) throws RecordParser.ParseError;
public abstract void parse(Text s) throws RecordParser.ParseError;
public abstract void parse(byte [] s) throws RecordParser.ParseError;
public abstract void parse(char [] s) throws RecordParser.ParseError;
public abstract void parse(ByteBuffer s) throws RecordParser.ParseError;
public abstract void parse(CharBuffer s) throws RecordParser.ParseError;
public abstract void loadLargeObjects(LargeObjectLoader objLoader)
throws SQLException, IOException, InterruptedException;
/**
* Inserts the data in this object into the PreparedStatement, starting
* at parameter 'offset'.
* @return the number of fields written to the statement.
*/
public abstract int write(PreparedStatement stmt, int offset)
throws SQLException;
/**
* Format output data according to the specified delimiters.
*/
public abstract String toString(DelimiterSet delimiters);
/**
* Use the default delimiters, but only append an end-of-record delimiter
* if useRecordDelim is true.
*/
public String toString(boolean useRecordDelim) {
// Method body should be overridden by generated classes in 1.3.0+
if (useRecordDelim) {
// This is the existing functionality.
return toString();
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(useRecordDelim=false) requires a newer SqoopRecord. "
+ "Please regenerate your record class to use this function.");
}
}
/**
* Format the record according to the specified delimiters. An end-of-record
* delimiter is optional, and only used if useRecordDelim is true. For
* use with TextOutputFormat, calling this with useRecordDelim=false may
* make more sense.
*/
public String toString(DelimiterSet delimiters, boolean useRecordDelim) {
if (useRecordDelim) {
return toString(delimiters);
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(delimiters, useRecordDelim=false) requires a newer "
+ "SqoopRecord. Please regenerate your record class to use this "
+ "function.");
}
}
@Override
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
/**
* Returns an integer specifying which API format version the
* generated class conforms to. Used by internal APIs for backwards
* compatibility.
* @return the API version this class was generated against.
*/
public abstract int getClassFormatVersion();
/**
* Use the delegate pattern to allow arbitrary processing of the
* fields of this record.
* @param processor A delegate that operates on this object.
* @throws IOException if the processor encounters an IO error when
* operating on this object.
* @throws ProcessingException if the FieldMapProcessor encounters
* a general processing error when operating on this object.
*/
public void delegate(FieldMapProcessor processor)
throws IOException, ProcessingException {
processor.accept(this);
}
@Override
/**
* {@inheriDoc}
* @throws RuntimeException if used with a record that was generated
* before this capability was added (1.1.0).
*/
public Map<String, Object> getFieldMap() {
// Default implementation does not support field iteration.
// ClassWriter should provide an overriding version.
throw new RuntimeException(
"Got null field map from record. Regenerate your record class.");
}
/**
* Allows an arbitrary field to be set programmatically to the
* specified value object. The value object must match the
* type expected for the particular field or a RuntimeException
* will result.
* @throws RuntimeException if the specified field name does not exist.
*/
public void setField(String fieldName, Object fieldVal) {
throw new RuntimeException("This SqoopRecord does not support setField(). "
+ "Regenerate your record class.");
}
} }

View File

@ -0,0 +1,170 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.util.ReflectionUtils;
/**
* Provides a mapping from codec names to concrete implementation class names.
*/
public final class CodecMap {
// Supported codec map values
// Note: do not add more values here, since codecs are discovered using the
// standard Hadoop mechanism (io.compression.codecs). See
// CompressionCodecFactory.
public static final String NONE = "none";
public static final String DEFLATE = "deflate";
public static final String LZO = "lzo";
public static final String LZOP = "lzop";
private static Map<String, String> codecNames;
static {
codecNames = new TreeMap<String, String>();
// Register the names of codecs we know about.
codecNames.put(NONE, null);
codecNames.put(DEFLATE, "org.apache.hadoop.io.compress.DefaultCodec");
codecNames.put(LZO, "com.hadoop.compression.lzo.LzoCodec");
codecNames.put(LZOP, "com.hadoop.compression.lzo.LzopCodec");
// add more from Hadoop CompressionCodecFactory
for (Class<? extends CompressionCodec> cls
: CompressionCodecFactory.getCodecClasses(new Configuration())) {
String simpleName = cls.getSimpleName();
String codecName = simpleName;
if (simpleName.endsWith("Codec")) {
codecName = simpleName.substring(0, simpleName.length()
- "Codec".length());
}
codecNames.put(codecName.toLowerCase(), cls.getCanonicalName());
}
}
private CodecMap() {
}
/**
* Given a codec name, return the name of the concrete class
* that implements it (or 'null' in the case of the "none" codec).
* @throws com.cloudera.sqoop.io.UnsupportedCodecException if a codec cannot
* be found with the supplied name.
*/
public static String getCodecClassName(String codecName)
throws com.cloudera.sqoop.io.UnsupportedCodecException {
if (!codecNames.containsKey(codecName)) {
throw new com.cloudera.sqoop.io.UnsupportedCodecException(codecName);
}
return codecNames.get(codecName);
}
/**
* Given a codec name, instantiate the concrete implementation
* class that implements it.
* @throws com.cloudera.sqoop.io.UnsupportedCodecException if a codec cannot
* be found with the supplied name.
*/
public static CompressionCodec getCodec(String codecName,
Configuration conf) throws com.cloudera.sqoop.io.UnsupportedCodecException {
// Try standard Hadoop mechanism first
CompressionCodec codec = getCodecByName(codecName, conf);
if (codec != null) {
return codec;
}
// Fall back to Sqoop mechanism
String codecClassName = null;
try {
codecClassName = getCodecClassName(codecName);
if (null == codecClassName) {
return null;
}
Class<? extends CompressionCodec> codecClass =
(Class<? extends CompressionCodec>)
conf.getClassByName(codecClassName);
return (CompressionCodec) ReflectionUtils.newInstance(
codecClass, conf);
} catch (ClassNotFoundException cnfe) {
throw new com.cloudera.sqoop.io.UnsupportedCodecException(
"Cannot find codec class "
+ codecClassName + " for codec " + codecName);
}
}
/**
* Return the set of available codec names.
*/
public static Set<String> getCodecNames() {
return codecNames.keySet();
}
/**
* Find the relevant compression codec for the codec's canonical class name
* or by codec alias.
* <p>
* Codec aliases are case insensitive.
* <p>
* The code alias is the short class name (without the package name).
* If the short class name ends with 'Codec', then there are two aliases for
* the codec, the complete short class name and the short class name without
* the 'Codec' ending. For example for the 'GzipCodec' codec class name the
* alias are 'gzip' and 'gzipcodec'.
* <p>
* Note: When HADOOP-7323 is available this method can be replaced with a call
* to CompressionCodecFactory.
* @param classname the canonical class name of the codec or the codec alias
* @return the codec object or null if none matching the name were found
*/
private static CompressionCodec getCodecByName(String codecName,
Configuration conf) {
List<Class<? extends CompressionCodec>> codecs =
CompressionCodecFactory.getCodecClasses(conf);
for (Class<? extends CompressionCodec> cls : codecs) {
if (codecMatches(cls, codecName)) {
return ReflectionUtils.newInstance(cls, conf);
}
}
return null;
}
private static boolean codecMatches(Class<? extends CompressionCodec> cls,
String codecName) {
String simpleName = cls.getSimpleName();
if (cls.getName().equals(codecName)
|| simpleName.equalsIgnoreCase(codecName)) {
return true;
}
if (simpleName.endsWith("Codec")) {
String prefix = simpleName.substring(0, simpleName.length()
- "Codec".length());
if (prefix.equalsIgnoreCase(codecName)) {
return true;
}
}
return false;
}
}

View File

@ -0,0 +1,87 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.commons.io.input.CountingInputStream;
import org.apache.commons.io.input.ProxyInputStream;
/**
* Provides an InputStream that can consume a fixed maximum number of bytes
* from an underlying stream. Closing the FixedLengthInputStream does not
* close the underlying stream. After reading the maximum number of available
* bytes this acts as though EOF has been reached.
*/
public class FixedLengthInputStream extends ProxyInputStream {
private CountingInputStream countingIn;
private long maxBytes;
public FixedLengthInputStream(InputStream stream, long maxLen) {
super(new CountingInputStream(new CloseShieldInputStream(stream)));
// Save a correctly-typed reference to the underlying stream.
this.countingIn = (CountingInputStream) this.in;
this.maxBytes = maxLen;
}
/** @return the number of bytes already consumed by the client. */
private long consumed() {
return countingIn.getByteCount();
}
/**
* @return number of bytes remaining to be read before the limit
* is reached.
*/
private long toLimit() {
return maxBytes - consumed();
}
@Override
public int available() throws IOException {
return (int) Math.min(toLimit(), countingIn.available());
}
@Override
public int read() throws IOException {
if (toLimit() > 0) {
return super.read();
} else {
return -1; // EOF.
}
}
@Override
public int read(byte [] buf) throws IOException {
return read(buf, 0, buf.length);
}
@Override
public int read(byte [] buf, int start, int count) throws IOException {
long limit = toLimit();
if (limit == 0) {
return -1; // EOF.
} else {
return super.read(buf, start, (int) Math.min(count, limit));
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,134 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.IOException;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.cloudera.sqoop.io.LobFile;
/**
* A cache of open LobFile.Reader objects.
* This maps from filenames to the open Reader, if any. This uses the
* Singleton pattern. While nothing prevents multiple LobReaderCache
* instances, it is most useful to have a single global cache. This cache is
* internally synchronized; only one thread can insert or retrieve a reader
* from the cache at a time.
*/
public class LobReaderCache {
public static final Log LOG =
LogFactory.getLog(LobReaderCache.class.getName());
private Map<Path, LobFile.Reader> readerMap;
/**
* Open a LobFile for read access, returning a cached reader if one is
* available, or a new reader otherwise.
* @param path the path to the LobFile to open
* @param conf the configuration to use to access the FS.
* @throws IOException if there's an error opening the file.
*/
public LobFile.Reader get(Path path, Configuration conf)
throws IOException {
LobFile.Reader reader = null;
Path canonicalPath = qualify(path, conf);
// Look up an entry in the cache.
synchronized(this) {
reader = readerMap.remove(canonicalPath);
}
if (null != reader && !reader.isClosed()) {
// Cache hit. return it.
LOG.debug("Using cached reader for " + canonicalPath);
return reader;
}
// Cache miss; open the file.
LOG.debug("No cached reader available for " + canonicalPath);
return LobFile.open(path, conf);
}
/**
* Return a reader back to the cache. If there's already a reader for
* this path, then the current reader is closed.
* @param reader the opened reader. Any record-specific subreaders should be
* closed.
* @throws IOException if there's an error accessing the path's filesystem.
*/
public void recycle(LobFile.Reader reader) throws IOException {
Path canonicalPath = reader.getPath();
// Check if the cache has a reader for this path already. If not, add this.
boolean cached = false;
synchronized(this) {
if (readerMap.get(canonicalPath) == null) {
LOG.debug("Caching reader for path: " + canonicalPath);
readerMap.put(canonicalPath, reader);
cached = true;
}
}
if (!cached) {
LOG.debug("Reader already present for path: " + canonicalPath
+ "; closing.");
reader.close();
}
}
@Override
protected synchronized void finalize() throws Throwable {
for (LobFile.Reader r : readerMap.values()) {
r.close();
}
super.finalize();
}
protected LobReaderCache() {
this.readerMap = new TreeMap<Path, LobFile.Reader>();
}
/**
* Created a fully-qualified path object.
* @param path the path to fully-qualify with its fs URI.
* @param conf the current Hadoop FS configuration.
* @return a new path representing the same location as the input 'path',
* but with a fully-qualified URI.
*/
public static Path qualify(Path path, Configuration conf)
throws IOException {
if (null == path) {
return null;
}
FileSystem fs = path.getFileSystem(conf);
if (null == fs) {
fs = FileSystem.get(conf);
}
return path.makeQualified(fs);
}
}

View File

@ -0,0 +1,94 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.util.Shell;
import org.apache.log4j.Logger;
/**
* A named FIFO channel.
*/
public class NamedFifo {
private static final Logger LOG = Logger.getLogger(NamedFifo.class);
private File fifoFile;
/** Create a named FIFO object at the local fs path given by 'pathname'. */
public NamedFifo(String pathname) {
this.fifoFile = new File(pathname);
}
/** Create a named FIFO object at the local fs path given by the 'fifo' File
* object. */
public NamedFifo(File fifo) {
this.fifoFile = fifo;
}
/**
* Return the File object representing the FIFO.
*/
public File getFile() {
return this.fifoFile;
}
/**
* Create a named FIFO object.
* The pipe will be created with permissions 0600.
* @throws IOException on failure.
*/
public void create() throws IOException {
create(0600);
}
/**
* Create a named FIFO object with the specified fs permissions.
* This depends on the 'mknod' or 'mkfifo' (Mac OS X) system utility
* existing. (for example, provided by Linux coreutils). This object
* will be deleted when the process exits.
* @throws IOException on failure.
*/
public void create(int permissions) throws IOException {
String filename = fifoFile.toString();
// Format permissions as a mode string in base 8.
String modeStr = Integer.toString(permissions, 8);
// Create the FIFO itself.
try {
String output = Shell.execCommand("mknod", "--mode=0" + modeStr,
filename, "p");
LOG.info("mknod output:\n"+output);
} catch (IOException ex) {
LOG.info("IO error running mknod: " + ex.getMessage());
LOG.debug("IO error running mknod", ex);
}
if (!this.fifoFile.exists()) {
LOG.info("mknod failed, falling back to mkfifo");
String output = Shell.execCommand("mkfifo", "-m", "0" + modeStr,
filename);
LOG.info("mkfifo output:\n"+output);
}
// Schedule the FIFO to be cleaned up when we exit.
this.fifoFile.deleteOnExit();
}
}

View File

@ -0,0 +1,72 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* A BufferedWriter implementation that wraps around a SplittingOutputStream
* and allows splitting of the underlying stream.
* Splits occur at allowSplit() calls, or newLine() calls.
*/
public class SplittableBufferedWriter extends BufferedWriter {
public static final Log LOG = LogFactory.getLog(
SplittableBufferedWriter.class.getName());
private SplittingOutputStream splitOutputStream;
private boolean alwaysFlush;
public SplittableBufferedWriter(
final SplittingOutputStream splitOutputStream) {
super(new OutputStreamWriter(splitOutputStream));
this.splitOutputStream = splitOutputStream;
this.alwaysFlush = false;
}
/** For testing. */
protected SplittableBufferedWriter(
final SplittingOutputStream splitOutputStream, final boolean alwaysFlush) {
super(new OutputStreamWriter(splitOutputStream));
this.splitOutputStream = splitOutputStream;
this.alwaysFlush = alwaysFlush;
}
public void newLine() throws IOException {
super.newLine();
this.allowSplit();
}
public void allowSplit() throws IOException {
if (alwaysFlush) {
this.flush();
}
if (this.splitOutputStream.wouldSplit()) {
LOG.debug("Starting new split");
this.flush();
this.splitOutputStream.allowSplit();
}
}
}

View File

@ -0,0 +1,159 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Formatter;
import org.apache.commons.io.output.CountingOutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
/**
* An output stream that writes to an underlying filesystem, opening
* a new file after a specified number of bytes have been written to the
* current one.
*/
public class SplittingOutputStream extends OutputStream {
public static final Log LOG = LogFactory.getLog(
SplittingOutputStream.class.getName());
private OutputStream writeStream;
private CountingOutputStream countingFilterStream;
private Configuration conf;
private Path destDir;
private String filePrefix;
private long cutoffBytes;
private CompressionCodec codec;
private int fileNum;
/**
* Create a new SplittingOutputStream.
* @param conf the Configuration to use to interface with HDFS
* @param destDir the directory where the files will go (should already
* exist).
* @param filePrefix the first part of the filename, which will be appended
* by a number. This file will be placed inside destDir.
* @param cutoff the approximate number of bytes to use per file
* @param doGzip if true, then output files will be gzipped and have a .gz
* suffix.
*/
public SplittingOutputStream(final Configuration conf, final Path destDir,
final String filePrefix, final long cutoff, final CompressionCodec codec)
throws IOException {
this.conf = conf;
this.destDir = destDir;
this.filePrefix = filePrefix;
this.cutoffBytes = cutoff;
if (this.cutoffBytes < 0) {
this.cutoffBytes = 0; // splitting disabled.
}
this.codec = codec;
this.fileNum = 0;
openNextFile();
}
/** Initialize the OutputStream to the next file to write to.
*/
private void openNextFile() throws IOException {
FileSystem fs = FileSystem.get(conf);
StringBuffer sb = new StringBuffer();
Formatter fmt = new Formatter(sb);
fmt.format("%05d", this.fileNum++);
String filename = filePrefix + fmt.toString();
if (codec != null) {
filename = filename + codec.getDefaultExtension();
}
Path destFile = new Path(destDir, filename);
LOG.debug("Opening next output file: " + destFile);
if (fs.exists(destFile)) {
Path canonicalDest = destFile.makeQualified(fs);
throw new IOException("Destination file " + canonicalDest
+ " already exists");
}
OutputStream fsOut = fs.create(destFile);
// Count how many actual bytes hit HDFS.
this.countingFilterStream = new CountingOutputStream(fsOut);
if (codec != null) {
// Wrap that in a compressing stream.
this.writeStream = codec.createOutputStream(this.countingFilterStream);
} else {
// Write to the counting stream directly.
this.writeStream = this.countingFilterStream;
}
}
/**
* @return true if allowSplit() would actually cause a split.
*/
public boolean wouldSplit() {
return this.cutoffBytes > 0
&& this.countingFilterStream.getByteCount() >= this.cutoffBytes;
}
/** If we've written more to the disk than the user's split size,
* open the next file.
*/
private void checkForNextFile() throws IOException {
if (wouldSplit()) {
LOG.debug("Starting new split");
this.writeStream.flush();
this.writeStream.close();
openNextFile();
}
}
/** Defines a point in the stream when it is acceptable to split to a new
file; e.g., the end of a record.
*/
public void allowSplit() throws IOException {
checkForNextFile();
}
public void close() throws IOException {
this.writeStream.close();
}
public void flush() throws IOException {
this.writeStream.flush();
}
public void write(byte [] b) throws IOException {
this.writeStream.write(b);
}
public void write(byte [] b, int off, int len) throws IOException {
this.writeStream.write(b, off, len);
}
public void write(int b) throws IOException {
this.writeStream.write(b);
}
}

View File

@ -0,0 +1,38 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.io;
import java.io.IOException;
/**
* Thrown when a compression codec cannot be recognized.
*/
public class UnsupportedCodecException extends IOException {
public UnsupportedCodecException() {
super("UnsupportedCodecException");
}
public UnsupportedCodecException(String msg) {
super(msg);
}
public UnsupportedCodecException(Throwable cause) {
super(cause);
}
}

View File

@ -0,0 +1,82 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import org.apache.hadoop.io.Text;
/**
* Serialize BigDecimal classes to/from DataInput and DataOutput objects.
*
* BigDecimal is comprised of a BigInteger with an integer 'scale' field.
* The BigDecimal/BigInteger can also return itself as a 'long' value.
*
* We serialize in one of two formats:
*
* First, check whether the BigInt can fit in a long:
* boolean b = BigIntegerPart &gt; LONG_MAX || BigIntegerPart &lt; LONG_MIN
*
* [int: scale][boolean: b == false][long: BigInt-part]
* [int: scale][boolean: b == true][string: BigInt-part.toString()]
*
* TODO(aaron): Get this to work with Hadoop's Serializations framework.
*/
public final class BigDecimalSerializer {
private BigDecimalSerializer() { }
public static final BigInteger LONG_MAX_AS_BIGINT =
BigInteger.valueOf(Long.MAX_VALUE);
public static final BigInteger LONG_MIN_AS_BIGINT =
BigInteger.valueOf(Long.MIN_VALUE);
public static void write(BigDecimal d, DataOutput out) throws IOException {
int scale = d.scale();
BigInteger bigIntPart = d.unscaledValue();
boolean fastpath = bigIntPart.compareTo(LONG_MAX_AS_BIGINT) < 0
&& bigIntPart .compareTo(LONG_MIN_AS_BIGINT) > 0;
out.writeInt(scale);
out.writeBoolean(fastpath);
if (fastpath) {
out.writeLong(bigIntPart.longValue());
} else {
Text.writeString(out, bigIntPart.toString());
}
}
public static BigDecimal readFields(DataInput in) throws IOException {
int scale = in.readInt();
boolean fastpath = in.readBoolean();
BigInteger unscaledIntPart;
if (fastpath) {
long unscaledValue = in.readLong();
unscaledIntPart = BigInteger.valueOf(unscaledValue);
} else {
String unscaledValueStr = Text.readString(in);
unscaledIntPart = new BigInteger(unscaledValueStr);
}
return new BigDecimal(unscaledIntPart, scale);
}
}

View File

@ -0,0 +1,130 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.regex.Matcher;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.BytesWritable;
import com.cloudera.sqoop.io.LobFile;
/**
* BlobRef is a wrapper that holds a BLOB either directly, or a
* reference to a file that holds the BLOB data.
*/
public class BlobRef extends
com.cloudera.sqoop.lib.LobRef<byte[], BytesWritable, InputStream> {
public static final Log LOG = LogFactory.getLog(BlobRef.class.getName());
public BlobRef() {
super();
}
public BlobRef(byte [] bytes) {
super(new BytesWritable(bytes));
}
/**
* Initialize a BlobRef to an external BLOB.
* @param file the filename to the BLOB. May be relative to the job dir.
* @param offset the offset (in bytes) into the LobFile for this record.
* @param length the length of the record in bytes.
*/
public BlobRef(String file, long offset, long length) {
super(file, offset, length);
}
@Override
protected InputStream getExternalSource(LobFile.Reader reader)
throws IOException {
return reader.readBlobRecord();
}
@Override
protected InputStream getInternalSource(BytesWritable data) {
return new ByteArrayInputStream(data.getBytes(), 0, data.getLength());
}
@Override
protected byte [] getInternalData(BytesWritable data) {
return Arrays.copyOf(data.getBytes(), data.getLength());
}
@Override
protected BytesWritable deepCopyData(BytesWritable data) {
return new BytesWritable(Arrays.copyOf(data.getBytes(), data.getLength()));
}
@Override
public void readFieldsInternal(DataInput in) throws IOException {
// For internally-stored BLOBs, the data is a BytesWritable
// containing the actual data.
BytesWritable data = getDataObj();
if (null == data) {
data = new BytesWritable();
}
data.readFields(in);
setDataObj(data);
}
@Override
public void writeInternal(DataOutput out) throws IOException {
getDataObj().write(out);
}
/**
* Create a BlobRef based on parsed data from a line of text.
* This only operates correctly on external blobs; inline blobs are simply
* returned as null. You should store BLOB data in SequenceFile format
* if reparsing is necessary.
* @param inputString the text-based input data to parse.
* @return a new BlobRef containing a reference to an external BLOB, or
* an empty BlobRef if the data to be parsed is actually inline.
*/
public static com.cloudera.sqoop.lib.BlobRef parse(String inputString) {
// If inputString is of the form 'externalLob(lf,%s,%d,%d)', then this is
// an external BLOB stored at the LobFile indicated by '%s' with the next
// two arguments representing its offset and length in the file.
// Otherwise, it is an inline BLOB, which we don't support parsing of.
Matcher m = org.apache.sqoop.lib.LobRef.EXTERNAL_MATCHER.get();
m.reset(inputString);
if (m.matches()) {
// This is a LobFile. Extract the filename, offset and len from the
// matcher.
return new com.cloudera.sqoop.lib.BlobRef(m.group(1),
Long.valueOf(m.group(2)), Long.valueOf(m.group(3)));
} else {
// This is inline BLOB string data.
LOG.warn(
"Reparsing inline BLOB data is not supported; use SequenceFiles.");
return new com.cloudera.sqoop.lib.BlobRef();
}
}
}

View File

@ -0,0 +1,41 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
/**
* Parse string representations of boolean values into boolean
* scalar types.
*/
public final class BooleanParser {
/**
* Return a boolean based on the value contained in the string.
*
* <p>The following values are considered true:
* "true", "t", "yes", "on", "1".</p>
* <p>All other values, including 'null', are false.</p>
* <p>All comparisons are case-insensitive.</p>
*/
public static boolean valueOf(final String s) {
return s != null && ("true".equalsIgnoreCase(s) || "t".equalsIgnoreCase(s)
|| "1".equals(s) || "on".equalsIgnoreCase(s)
|| "yes".equalsIgnoreCase(s));
}
private BooleanParser() { }
}

View File

@ -0,0 +1,113 @@
/**
* Copyright 2011 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.regex.Matcher;
import org.apache.hadoop.io.Text;
import com.cloudera.sqoop.io.LobFile;
/**
* ClobRef is a wrapper that holds a CLOB either directly, or a
* reference to a file that holds the CLOB data.
*/
public class ClobRef
extends com.cloudera.sqoop.lib.LobRef<String, String, Reader> {
public ClobRef() {
super();
}
public ClobRef(String chars) {
super(chars);
}
/**
* Initialize a clobref to an external CLOB.
* @param file the filename to the CLOB. May be relative to the job dir.
* @param offset the offset (in bytes) into the LobFile for this record.
* @param length the length of the record in characters.
*/
public ClobRef(String file, long offset, long length) {
super(file, offset, length);
}
@Override
protected Reader getExternalSource(LobFile.Reader reader)
throws IOException {
return reader.readClobRecord();
}
@Override
protected Reader getInternalSource(String data) {
return new StringReader(data);
}
@Override
protected String deepCopyData(String data) {
return data;
}
@Override
protected String getInternalData(String data) {
return data;
}
@Override
public void readFieldsInternal(DataInput in) throws IOException {
// For internally-stored clobs, the data is written as UTF8 Text.
setDataObj(Text.readString(in));
}
@Override
public void writeInternal(DataOutput out) throws IOException {
Text.writeString(out, getDataObj());
}
/**
* Create a ClobRef based on parsed data from a line of text.
* @param inputString the text-based input data to parse.
* @return a ClobRef to the given data.
*/
public static com.cloudera.sqoop.lib.ClobRef parse(String inputString) {
// If inputString is of the form 'externalLob(lf,%s,%d,%d)', then this is
// an external CLOB stored at the LobFile indicated by '%s' with the next
// two arguments representing its offset and length in the file.
// Otherwise, it is an inline CLOB, which we read as-is.
Matcher m = EXTERNAL_MATCHER.get();
m.reset(inputString);
if (m.matches()) {
// This is a LobFile. Extract the filename, offset and len from the
// matcher.
return new com.cloudera.sqoop.lib.ClobRef(m.group(1),
Long.valueOf(m.group(2)), Long.valueOf(m.group(3)));
} else {
// This is inline CLOB string data.
return new com.cloudera.sqoop.lib.ClobRef(inputString);
}
}
}

View File

@ -0,0 +1,205 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
/**
* Encapsulates a set of delimiters used to encode a record.
*/
public class DelimiterSet implements Cloneable {
public static final char NULL_CHAR = '\000';
private char fieldDelim; // fields terminated by this.
private char recordDelim; // records terminated by this.
// If these next two fields are '\000', then they are ignored.
private char enclosedBy;
private char escapedBy;
// If true, then the enclosed-by character is applied to every
// field, not just ones containing embedded delimiters.
private boolean encloseRequired;
/**
* Create a delimiter set with the default delimiters
* (comma for fields, newline for records).
*/
public DelimiterSet() {
this(',', '\n', NULL_CHAR, NULL_CHAR, false);
}
/**
* Create a delimiter set with the specified delimiters.
* @param field the fields-terminated-by delimiter
* @param record the lines-terminated-by delimiter
* @param enclose the enclosed-by character
* @param escape the escaped-by character
* @param isEncloseRequired If true, enclosed-by is applied to all
* fields. If false, only applied to fields that embed delimiters.
*/
public DelimiterSet(char field, char record, char enclose, char escape,
boolean isEncloseRequired) {
this.fieldDelim = field;
this.recordDelim = record;
this.enclosedBy = enclose;
this.escapedBy = escape;
this.encloseRequired = isEncloseRequired;
}
/**
* Sets the fields-terminated-by character.
*/
public void setFieldsTerminatedBy(char f) {
this.fieldDelim = f;
}
/**
* @return the fields-terminated-by character.
*/
public char getFieldsTerminatedBy() {
return this.fieldDelim;
}
/**
* Sets the end-of-record lines-terminated-by character.
*/
public void setLinesTerminatedBy(char r) {
this.recordDelim = r;
}
/**
* @return the end-of-record (lines-terminated-by) character.
*/
public char getLinesTerminatedBy() {
return this.recordDelim;
}
/**
* Sets the enclosed-by character.
* @param e the enclosed-by character, or '\000' for no enclosing character.
*/
public void setEnclosedBy(char e) {
this.enclosedBy = e;
}
/**
* @return the enclosed-by character, or '\000' for none.
*/
public char getEnclosedBy() {
return this.enclosedBy;
}
/**
* Sets the escaped-by character.
* @param e the escaped-by character, or '\000' for no escape character.
*/
public void setEscapedBy(char e) {
this.escapedBy = e;
}
/**
* @return the escaped-by character, or '\000' for none.
*/
public char getEscapedBy() {
return this.escapedBy;
}
/**
* Set whether the enclosed-by character must be applied to all fields,
* or only fields with embedded delimiters.
*/
public void setEncloseRequired(boolean required) {
this.encloseRequired = required;
}
/**
* @return true if the enclosed-by character must be applied to all fields,
* or false if it's only used for fields with embedded delimiters.
*/
public boolean isEncloseRequired() {
return this.encloseRequired;
}
@Override
/**
* @return a string representation of the delimiters.
*/
public String toString() {
return "fields=" + this.fieldDelim
+ " records=" + this.recordDelim
+ " escape=" + this.escapedBy
+ " enclose=" + this.enclosedBy
+ " required=" + this.encloseRequired;
}
/**
* Format this set of delimiters as a call to the constructor for
* this object, that would generate identical delimiters.
* @return a String that can be embedded in generated code that
* provides this set of delimiters.
*/
public String formatConstructor() {
return "new DelimiterSet((char) " + (int) this.fieldDelim + ", "
+ "(char) " + (int) this.recordDelim + ", "
+ "(char) " + (int) this.enclosedBy + ", "
+ "(char) " + (int) this.escapedBy + ", "
+ this.encloseRequired + ")";
}
@Override
/**
* @return a hash code for this set of delimiters.
*/
public int hashCode() {
return (int) this.fieldDelim
+ (((int) this.recordDelim) << 4)
+ (((int) this.escapedBy) << 8)
+ (((int) this.enclosedBy) << 12)
+ (((int) this.recordDelim) << 16)
+ (this.encloseRequired ? 0xFEFE : 0x7070);
}
@Override
/**
* @return true if this delimiter set is the same as another set of
* delimiters.
*/
public boolean equals(Object other) {
if (null == other) {
return false;
} else if (!other.getClass().equals(getClass())) {
return false;
}
DelimiterSet set = (DelimiterSet) other;
return this.fieldDelim == set.fieldDelim
&& this.recordDelim == set.recordDelim
&& this.escapedBy == set.escapedBy
&& this.enclosedBy == set.enclosedBy
&& this.encloseRequired == set.encloseRequired;
}
@Override
/**
* @return a new copy of this same set of delimiters.
*/
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
}

View File

@ -0,0 +1,139 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
/**
* Static helper class that will help format data with quotes and escape chars.
*/
public final class FieldFormatter {
/**
* only pass fields that are strings when --hive-drop-delims option is on.
* @param str
* @param delimiters
* @return
*/
public static String hiveStringDropDelims(String str,
com.cloudera.sqoop.lib.DelimiterSet delimiters) {
return hiveStringReplaceDelims(str, "", delimiters);
}
/**
* replace hive delimiters with a user-defined string passed to the
* --hive-delims-replacement option.
* @param str
* @param delimiters
* @return
*/
public static String hiveStringReplaceDelims(String str, String replacement,
com.cloudera.sqoop.lib.DelimiterSet delimiters) {
String droppedDelims = str.replaceAll("\\n|\\r|\01", replacement);
return escapeAndEnclose(droppedDelims, delimiters);
}
/**
* Takes an input string representing the value of a field, encloses it in
* enclosing chars, and escapes any occurrences of such characters in the
* middle. The escape character itself is also escaped if it appears in the
* text of the field. If there is no enclosing character, then any
* delimiters present in the field body are escaped instead.
*
* The field is enclosed only if:
* enclose != '\000', and:
* encloseRequired is true, or
* one of the fields-terminated-by or lines-terminated-by characters is
* present in the string.
*
* Escaping is not performed if the escape char is '\000'.
*
* @param str - The user's string to escape and enclose
* @param delimiters - The DelimiterSet to use identifying the escape and
* enclose semantics. If the specified escape or enclose characters are
* '\000', those operations are not performed.
* @return the escaped, enclosed version of 'str'.
*/
public static String escapeAndEnclose(String str,
com.cloudera.sqoop.lib.DelimiterSet delimiters) {
char escape = delimiters.getEscapedBy();
char enclose = delimiters.getEnclosedBy();
boolean encloseRequired = delimiters.isEncloseRequired();
// true if we can use an escape character.
boolean escapingLegal =
com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR != escape;
String withEscapes;
if (null == str) {
return null;
}
if (escapingLegal) {
// escaping is legal. Escape any instances of the escape char itself.
withEscapes = str.replace("" + escape, "" + escape + escape);
} else {
// no need to double-escape
withEscapes = str;
}
if (com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR == enclose) {
// The enclose-with character was left unset, so we can't enclose items.
if (escapingLegal) {
// If the user has used the fields-terminated-by or
// lines-terminated-by characters in the string, escape them if we
// have an escape character.
String fields = "" + delimiters.getFieldsTerminatedBy();
String lines = "" + delimiters.getLinesTerminatedBy();
withEscapes = withEscapes.replace(fields, "" + escape + fields);
withEscapes = withEscapes.replace(lines, "" + escape + lines);
}
// No enclosing possible, so now return this.
return withEscapes;
}
// if we have an enclosing character, and escaping is legal, then the
// encloser must always be escaped.
if (escapingLegal) {
withEscapes = withEscapes.replace("" + enclose, "" + escape + enclose);
}
boolean actuallyDoEnclose = encloseRequired;
if (!actuallyDoEnclose) {
// check if the string requires enclosing.
char [] mustEncloseFor = new char[2];
mustEncloseFor[0] = delimiters.getFieldsTerminatedBy();
mustEncloseFor[1] = delimiters.getLinesTerminatedBy();
for (char reason : mustEncloseFor) {
if (str.indexOf(reason) != -1) {
actuallyDoEnclose = true;
break;
}
}
}
if (actuallyDoEnclose) {
return "" + enclose + withEscapes + enclose;
} else {
return withEscapes;
}
}
private FieldFormatter() { }
}

View File

@ -0,0 +1,39 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.IOException;
import com.cloudera.sqoop.lib.FieldMappable;
import com.cloudera.sqoop.lib.ProcessingException;
/**
* Interface implemented by classes that process FieldMappable objects.
*/
public interface FieldMapProcessor {
/**
* Allow arbitrary processing of a FieldMappable object.
* @param record an object which can emit a map of its field names to values.
* @throws IOException if the processor encounters an IO error when
* operating on this object.
* @throws ProcessingException if the FieldMapProcessor encounters
* a general processing error when operating on this object.
*/
void accept(FieldMappable record) throws IOException, ProcessingException;
}

View File

@ -0,0 +1,34 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.util.Map;
/**
* Interface describing a class capable of returning a map of the fields
* of the object to their values.
*/
public interface FieldMappable {
/**
* Returns a map containing all fields of this record.
* @return a map from column names to the object-based values for
* this record. The map may not be null, though it may be empty.
*/
Map<String, Object> getFieldMap();
}

View File

@ -0,0 +1,256 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.math.BigDecimal;
import java.sql.Date;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Time;
import java.sql.Timestamp;
import org.apache.hadoop.io.BytesWritable;
import com.cloudera.sqoop.lib.BlobRef;
import com.cloudera.sqoop.lib.ClobRef;
/**
* Contains a set of methods which can read db columns from a ResultSet into
* Java types, and do serialization of these types to/from DataInput/DataOutput
* for use with Hadoop's Writable implementation. This supports null values
* for all types.
*/
public final class JdbcWritableBridge {
// Currently, cap BLOB/CLOB objects at 16 MB until we can use external
// storage.
public static final long MAX_BLOB_LENGTH = 16 * 1024 * 1024;
public static final long MAX_CLOB_LENGTH = 16 * 1024 * 1024;
private JdbcWritableBridge() {
}
public static Integer readInteger(int colNum, ResultSet r)
throws SQLException {
int val;
val = r.getInt(colNum);
if (r.wasNull()) {
return null;
} else {
return Integer.valueOf(val);
}
}
public static Long readLong(int colNum, ResultSet r) throws SQLException {
long val;
val = r.getLong(colNum);
if (r.wasNull()) {
return null;
} else {
return Long.valueOf(val);
}
}
public static String readString(int colNum, ResultSet r) throws SQLException {
return r.getString(colNum);
}
public static Float readFloat(int colNum, ResultSet r) throws SQLException {
float val;
val = r.getFloat(colNum);
if (r.wasNull()) {
return null;
} else {
return Float.valueOf(val);
}
}
public static Double readDouble(int colNum, ResultSet r) throws SQLException {
double val;
val = r.getDouble(colNum);
if (r.wasNull()) {
return null;
} else {
return Double.valueOf(val);
}
}
public static Boolean readBoolean(int colNum, ResultSet r)
throws SQLException {
boolean val;
val = r.getBoolean(colNum);
if (r.wasNull()) {
return null;
} else {
return Boolean.valueOf(val);
}
}
public static Time readTime(int colNum, ResultSet r) throws SQLException {
return r.getTime(colNum);
}
public static Timestamp readTimestamp(int colNum, ResultSet r)
throws SQLException {
return r.getTimestamp(colNum);
}
public static Date readDate(int colNum, ResultSet r) throws SQLException {
return r.getDate(colNum);
}
public static BytesWritable readBytesWritable(int colNum, ResultSet r)
throws SQLException {
byte [] bytes = r.getBytes(colNum);
return bytes == null ? null : new BytesWritable(bytes);
}
public static BigDecimal readBigDecimal(int colNum, ResultSet r)
throws SQLException {
return r.getBigDecimal(colNum);
}
public static BlobRef readBlobRef(int colNum, ResultSet r)
throws SQLException {
// Loading of BLOBs is delayed; handled by LargeObjectLoader.
return null;
}
public static ClobRef readClobRef(int colNum, ResultSet r)
throws SQLException {
// Loading of CLOBs is delayed; handled by LargeObjectLoader.
return null;
}
public static void writeInteger(Integer val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setInt(paramIdx, val);
}
}
public static void writeLong(Long val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setLong(paramIdx, val);
}
}
public static void writeDouble(Double val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setDouble(paramIdx, val);
}
}
public static void writeBoolean(Boolean val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setBoolean(paramIdx, val);
}
}
public static void writeFloat(Float val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setFloat(paramIdx, val);
}
}
public static void writeString(String val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setString(paramIdx, val);
}
}
public static void writeTimestamp(Timestamp val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setTimestamp(paramIdx, val);
}
}
public static void writeTime(Time val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setTime(paramIdx, val);
}
}
public static void writeDate(Date val, int paramIdx, int sqlType,
PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setDate(paramIdx, val);
}
}
public static void writeBytesWritable(BytesWritable val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
// val.getBytes() is only valid in [0, len)
byte [] rawBytes = val.getBytes();
int len = val.getLength();
byte [] outBytes = new byte[len];
System.arraycopy(rawBytes, 0, outBytes, 0, len);
s.setBytes(paramIdx, outBytes);
}
}
public static void writeBigDecimal(BigDecimal val, int paramIdx,
int sqlType, PreparedStatement s) throws SQLException {
if (null == val) {
s.setNull(paramIdx, sqlType);
} else {
s.setBigDecimal(paramIdx, val);
}
}
public static void writeBlobRef(com.cloudera.sqoop.lib.BlobRef val,
int paramIdx, int sqlType, PreparedStatement s) throws SQLException {
// TODO: support this.
throw new RuntimeException("Unsupported: Cannot export BLOB data");
}
public static void writeClobRef(com.cloudera.sqoop.lib.ClobRef val,
int paramIdx, int sqlType, PreparedStatement s) throws SQLException {
// TODO: support this.
throw new RuntimeException("Unsupported: Cannot export CLOB data");
}
}

View File

@ -0,0 +1,322 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.cloudera.sqoop.io.LobFile;
import com.cloudera.sqoop.util.TaskId;
/**
* Contains a set of methods which can read db columns from a ResultSet into
* Java types, and do serialization of these types to/from DataInput/DataOutput
* for use with Hadoop's Writable implementation. This supports null values
* for all types.
*
* This is a singleton instance class; only one may exist at a time.
* However, its lifetime is limited to the current TaskInputOutputContext's
* life.
*/
public class LargeObjectLoader implements Closeable {
// Spill to external storage for BLOB/CLOB objects > 16 MB.
public static final long DEFAULT_MAX_LOB_LENGTH = 16 * 1024 * 1024;
public static final String MAX_INLINE_LOB_LEN_KEY =
"sqoop.inline.lob.length.max";
private Configuration conf;
private Path workPath;
private FileSystem fs;
// Handles to the open BLOB / CLOB file writers.
private LobFile.Writer curBlobWriter;
private LobFile.Writer curClobWriter;
// Counter that is used with the current task attempt id to
// generate unique LOB file names.
private long nextLobFileId = 0;
/**
* Create a new LargeObjectLoader.
* @param conf the Configuration to use
* @param workPath the HDFS working directory for this task.
*/
public LargeObjectLoader(Configuration conf, Path workPath)
throws IOException {
this.conf = conf;
this.workPath = workPath;
this.fs = FileSystem.get(conf);
this.curBlobWriter = null;
this.curClobWriter = null;
}
@Override
protected synchronized void finalize() throws Throwable {
close();
super.finalize();
}
@Override
public void close() throws IOException {
if (null != curBlobWriter) {
curBlobWriter.close();
curBlobWriter = null;
}
if (null != curClobWriter) {
curClobWriter.close();
curClobWriter = null;
}
}
/**
* @return a filename to use to put an external LOB in.
*/
private String getNextLobFileName() {
String file = "_lob/large_obj_" + TaskId.get(conf, "unknown_task_id")
+ nextLobFileId + ".lob";
nextLobFileId++;
return file;
}
/**
* Calculates a path to a new LobFile object, creating any
* missing directories.
* @return a Path to a LobFile to write
*/
private Path getNextLobFilePath() throws IOException {
Path p = new Path(workPath, getNextLobFileName());
Path parent = p.getParent();
if (!fs.exists(parent)) {
fs.mkdirs(parent);
}
return p;
}
/**
* @return the current LobFile writer for BLOBs, creating one if necessary.
*/
private LobFile.Writer getBlobWriter() throws IOException {
if (null == this.curBlobWriter) {
this.curBlobWriter = LobFile.create(getNextLobFilePath(), conf, false);
}
return this.curBlobWriter;
}
/**
* @return the current LobFile writer for CLOBs, creating one if necessary.
*/
private LobFile.Writer getClobWriter() throws IOException {
if (null == this.curClobWriter) {
this.curClobWriter = LobFile.create(getNextLobFilePath(), conf, true);
}
return this.curClobWriter;
}
/**
* Returns the path being written to by a given LobFile.Writer, relative
* to the working directory of this LargeObjectLoader.
* @param w the LobFile.Writer whose path should be examined.
* @return the path this is writing to, relative to the current working dir.
*/
private String getRelativePath(LobFile.Writer w) {
Path writerPath = w.getPath();
String writerPathStr = writerPath.toString();
String workPathStr = workPath.toString();
if (!workPathStr.endsWith(File.separator)) {
workPathStr = workPathStr + File.separator;
}
if (writerPathStr.startsWith(workPathStr)) {
return writerPathStr.substring(workPathStr.length());
}
// Outside the working dir; return the whole thing.
return writerPathStr;
}
/**
* Copies all character data from the provided Reader to the provided
* Writer. Does not close handles when it's done.
* @param reader data source
* @param writer data sink
* @throws IOException if an I/O error occurs either reading or writing.
*/
private void copyAll(Reader reader, Writer writer) throws IOException {
int bufferSize = conf.getInt("io.file.buffer.size",
4096);
char [] buf = new char[bufferSize];
while (true) {
int charsRead = reader.read(buf);
if (-1 == charsRead) {
break; // no more stream to read.
}
writer.write(buf, 0, charsRead);
}
}
/**
* Copies all byte data from the provided InputStream to the provided
* OutputStream. Does not close handles when it's done.
* @param input data source
* @param output data sink
* @throws IOException if an I/O error occurs either reading or writing.
*/
private void copyAll(InputStream input, OutputStream output)
throws IOException {
int bufferSize = conf.getInt("io.file.buffer.size",
4096);
byte [] buf = new byte[bufferSize];
while (true) {
int bytesRead = input.read(buf, 0, bufferSize);
if (-1 == bytesRead) {
break; // no more stream to read.
}
output.write(buf, 0, bytesRead);
}
}
/**
* Actually read a BlobRef instance from the ResultSet and materialize
* the data either inline or to a file.
*
* @param colNum the column of the ResultSet's current row to read.
* @param r the ResultSet to read from.
* @return a BlobRef encapsulating the data in this field.
* @throws IOException if an error occurs writing to the FileSystem.
* @throws SQLException if an error occurs reading from the database.
*/
public com.cloudera.sqoop.lib.BlobRef readBlobRef(int colNum, ResultSet r)
throws IOException, InterruptedException, SQLException {
long maxInlineLobLen = conf.getLong(
MAX_INLINE_LOB_LEN_KEY,
DEFAULT_MAX_LOB_LENGTH);
Blob b = r.getBlob(colNum);
if (null == b) {
return null;
} else if (b.length() > maxInlineLobLen) {
// Deserialize very large BLOBs into separate files.
long len = b.length();
LobFile.Writer lobWriter = getBlobWriter();
long recordOffset = lobWriter.tell();
InputStream is = null;
OutputStream os = lobWriter.writeBlobRecord(len);
try {
is = b.getBinaryStream();
copyAll(is, os);
} finally {
if (null != os) {
os.close();
}
if (null != is) {
is.close();
}
// Mark the record as finished.
lobWriter.finishRecord();
}
return new com.cloudera.sqoop.lib.BlobRef(
getRelativePath(curBlobWriter), recordOffset, len);
} else {
// This is a 1-based array.
return new com.cloudera.sqoop.lib.BlobRef(
b.getBytes(1, (int) b.length()));
}
}
/**
* Actually read a ClobRef instance from the ResultSet and materialize
* the data either inline or to a file.
*
* @param colNum the column of the ResultSet's current row to read.
* @param r the ResultSet to read from.
* @return a ClobRef encapsulating the data in this field.
* @throws IOException if an error occurs writing to the FileSystem.
* @throws SQLException if an error occurs reading from the database.
*/
public com.cloudera.sqoop.lib.ClobRef readClobRef(int colNum, ResultSet r)
throws IOException, InterruptedException, SQLException {
long maxInlineLobLen = conf.getLong(
MAX_INLINE_LOB_LEN_KEY,
DEFAULT_MAX_LOB_LENGTH);
Clob c = r.getClob(colNum);
if (null == c) {
return null;
} else if (c.length() > maxInlineLobLen) {
// Deserialize large CLOB into separate file.
long len = c.length();
LobFile.Writer lobWriter = getClobWriter();
long recordOffset = lobWriter.tell();
Reader reader = null;
Writer w = lobWriter.writeClobRecord(len);
try {
reader = c.getCharacterStream();
copyAll(reader, w);
} finally {
if (null != w) {
w.close();
}
if (null != reader) {
reader.close();
}
// Mark the record as finished.
lobWriter.finishRecord();
}
return new com.cloudera.sqoop.lib.ClobRef(
getRelativePath(lobWriter), recordOffset, len);
} else {
// This is a 1-based array.
return new com.cloudera.sqoop.lib.ClobRef(
c.getSubString(1, (int) c.length()));
}
}
}

View File

@ -0,0 +1,329 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.Closeable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import com.cloudera.sqoop.io.LobFile;
import com.cloudera.sqoop.io.LobReaderCache;
/**
* Abstract base class that holds a reference to a Blob or a Clob.
* DATATYPE is the type being held (e.g., a byte array).
* CONTAINERTYPE is the type used to hold this data (e.g., BytesWritable).
* ACCESSORTYPE is the type used to access this data in a streaming fashion
* (either an InputStream or a Reader).
*/
public abstract class LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE>
implements Closeable, Writable {
public static final Log LOG = LogFactory.getLog(LobRef.class.getName());
protected LobRef() {
this.fileName = null;
this.offset = 0;
this.length = 0;
this.realData = null;
}
protected LobRef(CONTAINERTYPE container) {
this.fileName = null;
this.offset = 0;
this.length = 0;
this.realData = container;
}
protected LobRef(String file, long offset, long length) {
this.fileName = file;
this.offset = offset;
this.length = length;
this.realData = null;
}
// If the data is 'small', it's held directly, here.
private CONTAINERTYPE realData;
/** Internal API to retrieve the data object. */
protected CONTAINERTYPE getDataObj() {
return realData;
}
/** Internal API to set the data object. */
protected void setDataObj(CONTAINERTYPE data) {
this.realData = data;
}
// If there data is too large to materialize fully, it's written into a file
// whose path (relative to the rest of the dataset) is recorded here. This
// takes precedence if the value fof fileName is non-null. These records are
// currently written into LobFile-formatted files, which hold multiple
// records. The starting offset and length of the record are recorded here
// as well.
private String fileName;
private long offset;
private long length;
// If we've opened a LobFile object, track our reference to it here.
private LobFile.Reader lobReader;
@Override
@SuppressWarnings("unchecked")
/**
* Clone the current reference object. data is deep-copied; any open
* file handle remains with the original only.
*/
public Object clone() throws CloneNotSupportedException {
LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE> r =
(LobRef<DATATYPE, CONTAINERTYPE, ACCESSORTYPE>) super.clone();
r.lobReader = null; // Reference to opened reader is not duplicated.
if (null != realData) {
r.realData = deepCopyData(realData);
}
return r;
}
@Override
protected synchronized void finalize() throws Throwable {
close();
super.finalize();
}
public void close() throws IOException {
// Discard any open LobReader.
if (null != this.lobReader) {
LobReaderCache.getCache().recycle(this.lobReader);
}
}
/**
* @return true if the LOB data is in an external file; false if
* it materialized inline.
*/
public boolean isExternal() {
return fileName != null;
}
/**
* Convenience method to access #getDataStream(Configuration, Path)
* from within a map task that read this LobRef from a file-based
* InputSplit.
* @param mapContext the Mapper.Context instance that encapsulates
* the current map task.
* @return an object that lazily streams the record to the client.
* @throws IllegalArgumentException if it cannot find the source
* path for this LOB based on the MapContext.
* @throws IOException if it could not read the LOB from external storage.
*/
public ACCESSORTYPE getDataStream(Mapper.Context mapContext)
throws IOException {
InputSplit split = mapContext.getInputSplit();
if (split instanceof FileSplit) {
Path basePath = ((FileSplit) split).getPath().getParent();
return getDataStream(mapContext.getConfiguration(),
basePath);
} else {
throw new IllegalArgumentException(
"Could not ascertain LOB base path from MapContext.");
}
}
/**
* Get access to the LOB data itself.
* This method returns a lazy reader of the LOB data, accessing the
* filesystem for external LOB storage as necessary.
* @param conf the Configuration used to access the filesystem
* @param basePath the base directory where the table records are
* stored.
* @return an object that lazily streams the record to the client.
* @throws IOException if it could not read the LOB from external storage.
*/
public ACCESSORTYPE getDataStream(Configuration conf, Path basePath)
throws IOException {
if (isExternal()) {
// Read from external storage.
Path pathToRead = LobReaderCache.qualify(
new Path(basePath, fileName), conf);
LOG.debug("Retreving data stream from external path: " + pathToRead);
if (lobReader != null) {
// We already have a reader open to a LobFile. Is it the correct file?
if (!pathToRead.equals(lobReader.getPath())) {
// No. Close this.lobReader and get the correct one.
LOG.debug("Releasing previous external reader for "
+ lobReader.getPath());
LobReaderCache.getCache().recycle(lobReader);
lobReader = LobReaderCache.getCache().get(pathToRead, conf);
}
} else {
lobReader = LobReaderCache.getCache().get(pathToRead, conf);
}
// We now have a LobFile.Reader associated with the correct file. Get to
// the correct offset and return an InputStream/Reader to the user.
if (lobReader.tell() != offset) {
LOG.debug("Seeking to record start offset " + offset);
lobReader.seek(offset);
}
if (!lobReader.next()) {
throw new IOException("Could not locate record at " + pathToRead
+ ":" + offset);
}
return getExternalSource(lobReader);
} else {
// This data is already materialized in memory; wrap it and return.
return getInternalSource(realData);
}
}
/**
* Using the LobFile reader, get an accessor InputStream or Reader to the
* underlying data.
*/
protected abstract ACCESSORTYPE getExternalSource(LobFile.Reader reader)
throws IOException;
/**
* Wrap the materialized data in an InputStream or Reader.
*/
protected abstract ACCESSORTYPE getInternalSource(CONTAINERTYPE data);
/**
* @return the materialized data itself.
*/
protected abstract DATATYPE getInternalData(CONTAINERTYPE data);
/**
* Make a copy of the materialized data.
*/
protected abstract CONTAINERTYPE deepCopyData(CONTAINERTYPE data);
public DATATYPE getData() {
if (isExternal()) {
throw new RuntimeException(
"External LOBs must be read via getDataStream()");
}
return getInternalData(realData);
}
@Override
public String toString() {
if (isExternal()) {
return "externalLob(lf," + fileName + "," + Long.toString(offset)
+ "," + Long.toString(length) + ")";
} else {
return realData.toString();
}
}
@Override
public void readFields(DataInput in) throws IOException {
// The serialization format for this object is:
// boolean isExternal
// if true, then:
// a string identifying the external storage type
// and external-storage-specific data.
// if false, then we use readFieldsInternal() to allow BlobRef/ClobRef
// to serialize as it sees fit.
//
// Currently the only external storage supported is LobFile, identified
// by the string "lf". This serializes with the filename (as a string),
// followed by a long-valued offset and a long-valued length.
boolean isExternal = in.readBoolean();
if (isExternal) {
this.realData = null;
String storageType = Text.readString(in);
if (!storageType.equals("lf")) {
throw new IOException("Unsupported external LOB storage code: "
+ storageType);
}
// Storage type "lf" is LobFile: filename, offset, length.
this.fileName = Text.readString(in);
this.offset = in.readLong();
this.length = in.readLong();
} else {
readFieldsInternal(in);
this.fileName = null;
this.offset = 0;
this.length = 0;
}
}
/**
* Perform the readFields() operation on a fully-materializable record.
* @param in the DataInput to deserialize from.
*/
protected abstract void readFieldsInternal(DataInput in) throws IOException;
@Override
public void write(DataOutput out) throws IOException {
out.writeBoolean(isExternal());
if (isExternal()) {
Text.writeString(out, "lf"); // storage type "lf" for LobFile.
Text.writeString(out, fileName);
out.writeLong(offset);
out.writeLong(length);
} else {
writeInternal(out);
}
}
/**
* Perform the write() operation on a fully-materializable record.
* @param out the DataOutput to deserialize to.
*/
protected abstract void writeInternal(DataOutput out) throws IOException;
protected static final ThreadLocal<Matcher> EXTERNAL_MATCHER =
new ThreadLocal<Matcher>() {
@Override protected Matcher initialValue() {
Pattern externalPattern = Pattern.compile(
"externalLob\\(lf,(.*),([0-9]+),([0-9]+)\\)");
return externalPattern.matcher("");
}
};
}

View File

@ -0,0 +1,54 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* Serialize LOB classes to/from DataInput and DataOutput objects.
*/
public final class LobSerializer {
private LobSerializer() { }
public static void writeClob(
com.cloudera.sqoop.lib.ClobRef clob, DataOutput out) throws IOException {
clob.write(out);
}
public static void writeBlob(
com.cloudera.sqoop.lib.BlobRef blob, DataOutput out) throws IOException {
blob.write(out);
}
public static com.cloudera.sqoop.lib.ClobRef readClobFields(
DataInput in) throws IOException {
com.cloudera.sqoop.lib.ClobRef clob = new com.cloudera.sqoop.lib.ClobRef();
clob.readFields(in);
return clob;
}
public static com.cloudera.sqoop.lib.BlobRef readBlobFields(
DataInput in) throws IOException {
com.cloudera.sqoop.lib.BlobRef blob = new com.cloudera.sqoop.lib.BlobRef();
blob.readFields(in);
return blob;
}
}

View File

@ -0,0 +1,47 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
/**
* General error during processing of a SqoopRecord.
*/
@SuppressWarnings("serial")
public class ProcessingException extends Exception {
public ProcessingException() {
super("ProcessingException");
}
public ProcessingException(final String message) {
super(message);
}
public ProcessingException(final Throwable cause) {
super(cause);
}
public ProcessingException(final String message, final Throwable cause) {
super(message, cause);
}
@Override
public String toString() {
String msg = getMessage();
return (null == msg) ? "ProcessingException" : msg;
}
}

View File

@ -0,0 +1,371 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
/**
* Parses a record containing one or more fields. Fields are separated
* by some FIELD_DELIMITER character, e.g. a comma or a ^A character.
* Records are terminated by a RECORD_DELIMITER character, e.g., a newline.
*
* Fields may be (optionally or mandatorily) enclosed by a quoting char
* e.g., '\"'
*
* Fields may contain escaped characters. An escape character may be, e.g.,
* the '\\' character. Any character following an escape character
* is treated literally. e.g., '\n' is recorded as an 'n' character, not a
* newline.
*
* Unexpected results may occur if the enclosing character escapes itself.
* e.g., this cannot parse SQL SELECT statements where the single character
* ['] escapes to [''].
*
* This class is not synchronized. Multiple threads must use separate
* instances of RecordParser.
*
* The fields parsed by RecordParser are backed by an internal buffer
* which is cleared when the next call to parseRecord() is made. If
* the buffer is required to be preserved, you must copy it yourself.
*/
public class RecordParser {
public static final Log LOG = LogFactory.getLog(RecordParser.class.getName());
private enum ParseState {
FIELD_START,
ENCLOSED_FIELD,
UNENCLOSED_FIELD,
ENCLOSED_ESCAPE,
ENCLOSED_EXPECT_DELIMITER,
UNENCLOSED_ESCAPE
}
/**
* An error thrown when parsing fails.
*/
public static class ParseError extends Exception {
public ParseError() {
super("ParseError");
}
public ParseError(final String msg) {
super(msg);
}
public ParseError(final String msg, final Throwable cause) {
super(msg, cause);
}
public ParseError(final Throwable cause) {
super(cause);
}
}
private com.cloudera.sqoop.lib.DelimiterSet delimiters;
private ArrayList<String> outputs;
public RecordParser(final com.cloudera.sqoop.lib.DelimiterSet delimitersIn) {
this.delimiters = delimitersIn.copy();
this.outputs = new ArrayList<String>();
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(CharSequence input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
return parseRecord(CharBuffer.wrap(input));
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(Text input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
// TODO(aaron): The parser should be able to handle UTF-8 strings
// as well, to avoid this transcode operation.
return parseRecord(input.toString());
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(byte [] input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
return parseRecord(ByteBuffer.wrap(input).asCharBuffer());
}
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(char [] input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
return parseRecord(CharBuffer.wrap(input));
}
public List<String> parseRecord(ByteBuffer input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
return parseRecord(input.asCharBuffer());
}
// TODO(aaron): Refactor this method to be much shorter.
// CHECKSTYLE:OFF
/**
* Return a list of strings representing the fields of the input line.
* This list is backed by an internal buffer which is cleared by the
* next call to parseRecord().
*/
public List<String> parseRecord(CharBuffer input)
throws com.cloudera.sqoop.lib.RecordParser.ParseError {
if (null == input) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"null input string");
}
/*
This method implements the following state machine to perform
parsing.
Note that there are no restrictions on whether particular characters
(e.g., field-sep, record-sep, etc) are distinct or the same. The
state transitions are processed in the order seen in this comment.
Starting state is FIELD_START
encloser -> ENCLOSED_FIELD
escape char -> UNENCLOSED_ESCAPE
field delim -> FIELD_START (for a new field)
record delim -> stops processing
all other letters get added to current field, -> UNENCLOSED FIELD
ENCLOSED_FIELD state:
escape char goes to ENCLOSED_ESCAPE
encloser goes to ENCLOSED_EXPECT_DELIMITER
field sep or record sep gets added to the current string
normal letters get added to the current string
ENCLOSED_ESCAPE state:
any character seen here is added literally, back to ENCLOSED_FIELD
ENCLOSED_EXPECT_DELIMITER state:
field sep goes to FIELD_START
record sep halts processing.
all other characters are errors.
UNENCLOSED_FIELD state:
ESCAPE char goes to UNENCLOSED_ESCAPE
FIELD_SEP char goes to FIELD_START
RECORD_SEP char halts processing
normal chars or the enclosing char get added to the current string
UNENCLOSED_ESCAPE:
add charater literal to current string, return to UNENCLOSED_FIELD
*/
char curChar = com.cloudera.sqoop.lib.DelimiterSet.NULL_CHAR;
ParseState state = ParseState.FIELD_START;
int len = input.length();
StringBuilder sb = null;
outputs.clear();
char enclosingChar = delimiters.getEnclosedBy();
char fieldDelim = delimiters.getFieldsTerminatedBy();
char recordDelim = delimiters.getLinesTerminatedBy();
char escapeChar = delimiters.getEscapedBy();
boolean enclosingRequired = delimiters.isEncloseRequired();
for (int pos = 0; pos < len; pos++) {
curChar = input.get();
switch (state) {
case FIELD_START:
// ready to start processing a new field.
if (null != sb) {
// We finished processing a previous field. Add to the list.
outputs.add(sb.toString());
}
sb = new StringBuilder();
if (enclosingChar == curChar) {
// got an opening encloser.
state = ParseState.ENCLOSED_FIELD;
} else if (escapeChar == curChar) {
state = ParseState.UNENCLOSED_ESCAPE;
} else if (fieldDelim == curChar) {
// we have a zero-length field. This is a no-op.
continue;
} else if (recordDelim == curChar) {
// we have a zero-length field, that ends processing.
pos = len;
} else {
// current char is part of the field.
state = ParseState.UNENCLOSED_FIELD;
sb.append(curChar);
if (enclosingRequired) {
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"Opening field-encloser expected at position " + pos);
}
}
break;
case ENCLOSED_FIELD:
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.ENCLOSED_ESCAPE;
} else if (enclosingChar == curChar) {
// we're at the end of the enclosing field. Expect an EOF or EOR char.
state = ParseState.ENCLOSED_EXPECT_DELIMITER;
} else {
// this is a regular char, or an EOF / EOR inside an encloser. Add to
// the current field string, and remain in this state.
sb.append(curChar);
}
break;
case UNENCLOSED_FIELD:
if (escapeChar == curChar) {
// the next character is escaped. Treat it literally.
state = ParseState.UNENCLOSED_ESCAPE;
} else if (fieldDelim == curChar) {
// we're at the end of this field; may be the start of another one.
state = ParseState.FIELD_START;
} else if (recordDelim == curChar) {
pos = len; // terminate processing immediately.
} else {
// this is a regular char. Add to the current field string,
// and remain in this state.
sb.append(curChar);
}
break;
case ENCLOSED_ESCAPE:
// Treat this character literally, whatever it is, and return to
// enclosed field processing.
sb.append(curChar);
state = ParseState.ENCLOSED_FIELD;
break;
case ENCLOSED_EXPECT_DELIMITER:
// We were in an enclosed field, but got the final encloser. Now we
// expect either an end-of-field or an end-of-record.
if (fieldDelim == curChar) {
// end of one field is the beginning of the next.
state = ParseState.FIELD_START;
} else if (recordDelim == curChar) {
// stop processing.
pos = len;
} else {
// Don't know what to do with this character.
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"Expected delimiter at position " + pos);
}
break;
case UNENCLOSED_ESCAPE:
// Treat this character literally, whatever it is, and return to
// non-enclosed field processing.
sb.append(curChar);
state = ParseState.UNENCLOSED_FIELD;
break;
default:
throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
"Unexpected parser state: " + state);
}
}
if (state == ParseState.FIELD_START && curChar == fieldDelim) {
// we hit an EOF/EOR as the last legal character and we need to mark
// that string as recorded. This if block is outside the for-loop since
// we don't have a physical 'epsilon' token in our string.
if (null != sb) {
outputs.add(sb.toString());
sb = new StringBuilder();
}
}
if (null != sb) {
// There was a field that terminated by running out of chars or an EOR
// character. Add to the list.
outputs.add(sb.toString());
}
return outputs;
}
// CHECKSTYLE:ON
public boolean isEnclosingRequired() {
return delimiters.isEncloseRequired();
}
@Override
public String toString() {
return "RecordParser[" + delimiters.toString() + "]";
}
@Override
public int hashCode() {
return this.delimiters.hashCode();
}
}

View File

@ -0,0 +1,159 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sqoop.lib;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Map;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
/**
* Interface implemented by the classes generated by sqoop's orm.ClassWriter.
*/
public abstract class SqoopRecord implements Cloneable, DBWritable,
com.cloudera.sqoop.lib.FieldMappable, Writable {
public SqoopRecord() {
}
public abstract void parse(CharSequence s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(Text s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(byte [] s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(char [] s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(ByteBuffer s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void parse(CharBuffer s)
throws com.cloudera.sqoop.lib.RecordParser.ParseError;
public abstract void loadLargeObjects(
com.cloudera.sqoop.lib.LargeObjectLoader objLoader)
throws SQLException, IOException, InterruptedException;
/**
* Inserts the data in this object into the PreparedStatement, starting
* at parameter 'offset'.
* @return the number of fields written to the statement.
*/
public abstract int write(PreparedStatement stmt, int offset)
throws SQLException;
/**
* Format output data according to the specified delimiters.
*/
public abstract String toString(
com.cloudera.sqoop.lib.DelimiterSet delimiters);
/**
* Use the default delimiters, but only append an end-of-record delimiter
* if useRecordDelim is true.
*/
public String toString(boolean useRecordDelim) {
// Method body should be overridden by generated classes in 1.3.0+
if (useRecordDelim) {
// This is the existing functionality.
return toString();
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(useRecordDelim=false) requires a newer SqoopRecord. "
+ "Please regenerate your record class to use this function.");
}
}
/**
* Format the record according to the specified delimiters. An end-of-record
* delimiter is optional, and only used if useRecordDelim is true. For
* use with TextOutputFormat, calling this with useRecordDelim=false may
* make more sense.
*/
public String toString(
com.cloudera.sqoop.lib.DelimiterSet delimiters, boolean useRecordDelim) {
if (useRecordDelim) {
return toString(delimiters);
} else {
// Setting this to false requires behavior in the generated class.
throw new RuntimeException(
"toString(delimiters, useRecordDelim=false) requires a newer "
+ "SqoopRecord. Please regenerate your record class to use this "
+ "function.");
}
}
@Override
public Object clone() throws CloneNotSupportedException {
return super.clone();
}
/**
* Returns an integer specifying which API format version the
* generated class conforms to. Used by internal APIs for backwards
* compatibility.
* @return the API version this class was generated against.
*/
public abstract int getClassFormatVersion();
/**
* Use the delegate pattern to allow arbitrary processing of the
* fields of this record.
* @param processor A delegate that operates on this object.
* @throws IOException if the processor encounters an IO error when
* operating on this object.
* @throws com.cloudera.sqoop.lib.ProcessingException if the FieldMapProcessor
* encounters a general processing error when operating on this object.
*/
public void delegate(com.cloudera.sqoop.lib.FieldMapProcessor processor)
throws IOException, com.cloudera.sqoop.lib.ProcessingException {
processor.accept(this);
}
@Override
/**
* {@inheriDoc}
* @throws RuntimeException if used with a record that was generated
* before this capability was added (1.1.0).
*/
public Map<String, Object> getFieldMap() {
// Default implementation does not support field iteration.
// ClassWriter should provide an overriding version.
throw new RuntimeException(
"Got null field map from record. Regenerate your record class.");
}
/**
* Allows an arbitrary field to be set programmatically to the
* specified value object. The value object must match the
* type expected for the particular field or a RuntimeException
* will result.
* @throws RuntimeException if the specified field name does not exist.
*/
public void setField(String fieldName, Object fieldVal) {
throw new RuntimeException("This SqoopRecord does not support setField(). "
+ "Regenerate your record class.");
}
}