summaryrefslogtreecommitdiffstats
path: root/lurker/libesort/File.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lurker/libesort/File.cpp')
-rw-r--r--lurker/libesort/File.cpp296
1 files changed, 296 insertions, 0 deletions
diff --git a/lurker/libesort/File.cpp b/lurker/libesort/File.cpp
new file mode 100644
index 0000000..2c9ff92
--- /dev/null
+++ b/lurker/libesort/File.cpp
@@ -0,0 +1,296 @@
+/* $Id: File.cpp 1649 2009-10-19 14:35:01Z terpstra $
+ *
+ * File.cpp - Disk segment for commit'd inserts
+ *
+ * Copyright (C) 2002 - Wesley W. Terpstra
+ *
+ * License: GPL
+ *
+ * Authors: 'Wesley W. Terpstra' <wesley@terpstra.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define _FILE_OFFSET_BITS 64
+
+#include "io.h"
+
+#include "File.h"
+#include "Source.h"
+#include "esort.h"
+
+/* #define DEBUG */
+
+#include <cassert>
+#include <cstring>
+#include <iostream>
+
+namespace ESort
+{
+
+class FileSource : public Source
+{
+ protected:
+ File* f;
+ long block;
+ unsigned char* buf;
+ unsigned char* off;
+ bool forward;
+
+ unsigned char* inverseBuffer();
+
+ public:
+ FileSource(File* f_, long block_, bool forward_);
+ ~FileSource();
+
+ int loadBuf();
+ int advance();
+};
+
+FileSource::FileSource(File* f_, long block_, bool forward_)
+ : f(f_), block(block_),
+ buf(new unsigned char[f->p->blockSize()]), off(0), forward(forward_)
+{
+}
+
+FileSource::~FileSource()
+{
+ delete [] buf;
+}
+
+int FileSource::loadBuf()
+{
+ assert (block <= f->blocks && block >= -1);
+
+ if (forward)
+ {
+ if (block == f->blocks)
+ { // hit eof
+ errno = 0;
+ return -1;
+ }
+ }
+ else
+ {
+ if (block == -1)
+ { // hit eof
+ errno = 0;
+ return -1;
+ }
+ }
+
+ if (f->where != block)
+ {
+ off_t go = block;
+ go *= f->p->blockSize();
+
+ if (lseek(f->fd, go, SEEK_SET) != go)
+ {
+#ifdef DEBUG
+ perror(__FILE__ ":" #__LINE__ ": lseek");
+#endif
+ // keep errno
+ return -1;
+ }
+ }
+
+ if (read(f->fd, buf, f->p->blockSize()) != (ssize_t)f->p->blockSize())
+ {
+#ifdef DEBUG
+ perror(__FILE__ ":" #__LINE__ ": read");
+#endif
+ if (errno == 0) errno = EAGAIN; // something obviously wrong
+ return -1;
+ }
+
+ if (forward)
+ {
+ f->where = ++block;
+ off = buf;
+ }
+ else
+ {
+ f->where = block+1;
+ --block;
+
+ off = inverseBuffer(); // flip the order of the records
+ }
+
+ return 0;
+}
+
+unsigned char* FileSource::inverseBuffer()
+{
+ unsigned char* target = new unsigned char[f->p->blockSize()];
+ unsigned char* scratch = new unsigned char[f->p->keySize()];
+
+ unsigned char* r = buf;
+ unsigned char* w = target + f->p->blockSize();
+
+ long thisLength, nextLength; // total key length
+ long thisDup, nextDup; // amount in common
+ long thisDelta, nextDelta; // amount unique
+
+ //----------------
+
+ // teminate the scratch (0, 1) == eof
+ --w;
+ *w = 1;
+ w -= f->p->keyWidth();
+ memset(w, 0, f->p->keyWidth());
+
+ thisLength = 0;
+ for (unsigned int i = 0; i < f->p->keyWidth(); ++i)
+ {
+ thisLength <<= 8;
+ thisLength += *r;
+ ++r;
+ }
+ thisDup = *r;
+ ++r;
+ thisDelta = thisLength - thisDup;
+ if (thisDelta < 0) return w; // corrupt! must have one record at least
+
+ while (thisDelta != -1)
+ {
+ // r points at the unique tail
+ // w point at the last completed record
+
+ // Read the next record's values
+ nextLength = 0;
+ unsigned char* nextr = r + thisDelta;
+ for (unsigned int i = 0; i < f->p->keyWidth(); ++i)
+ {
+ nextLength <<= 8;
+ nextLength += *nextr;
+ ++nextr;
+ }
+
+ nextDup = *nextr;
+ ++nextr;
+
+ nextDelta = nextLength - nextDup;
+ if (nextDelta < -1) return w; // corrupt! (-1 == eof)
+ if (nextDelta == -1) nextDup = 0; // first record has 0 dup'd
+
+ // Skip to next cell
+ long wDelta = thisLength - nextDup;
+ unsigned char* tailw = w - wDelta;
+ unsigned char* nextw = tailw - 1 - f->p->keyWidth();
+
+ // Write out the lengh
+ for (int i = f->p->keyWidth()-1; i >= 0; --i)
+ {
+ nextw[i] = thisLength;
+ thisLength >>= 8;
+ }
+
+ // We use nextDup for our dup
+ *(tailw-1) = nextDup;
+
+ // Now, write out the tail
+ memcpy(scratch+thisDup, r, thisDelta);
+ memcpy(tailw, scratch+nextDup, wDelta);
+
+// std::cout << " SCRATCH: ";
+// std::cout.write((const char*)scratch, thisDup + thisDelta);
+// std::cout << std::endl;
+
+ // move along
+ thisLength = nextLength;
+ thisDup = nextDup;
+ thisDelta = nextDelta;
+ r = nextr;
+ w = nextw;
+ }
+
+ delete [] scratch;
+ delete [] buf;
+ buf = target;
+
+ return w;
+}
+
+int FileSource::advance()
+{
+// printf("off: %d, buf: %d, wid: %d\n", off, buf, f->p->keyWidth());
+ assert ((off - buf) + f->p->keyWidth() + 1 <= f->p->blockSize());
+
+ length = 0;
+ while (1)
+ {
+ for (unsigned int i = 0; i < f->p->keyWidth(); ++i)
+ {
+ length <<= 8;
+ length += *off;
+ ++off;
+ }
+
+ dup = *off;
+ ++off;
+
+ if (length == 0)
+ {
+ if (dup == 0) break;
+ // (0, 1) == eof
+ assert (dup == 1);
+ }
+ else break;
+
+ if (loadBuf() != 0) return -1;
+ }
+
+ tail = off;
+ off += length - dup;
+
+// std::cout << " GET: " << length << ", " << dup << ": ";
+// std::cout.write((const char*)tail, length-dup);
+// std::cout << std::endl;
+
+ return 0;
+}
+
+File::File(const string& id_, int fd_, const Parameters* p_)
+ : fd(fd_), p(p_), where(0), id(id_)
+{
+ off_t len = lseek(fd, 0, SEEK_END);
+ assert (len % p->blockSize() == 0);
+
+ where = blocks = len / p->blockSize();
+ category = categorize(blocks);
+}
+
+File::File(const File& f)
+ : fd(dup(f.fd)), p(f.p), where(f.where), id(f.id), blocks(f.blocks), category(f.category)
+{
+}
+
+File::~File()
+{
+ close(fd);
+}
+
+auto_ptr<Source> File::openBlock(long block, bool forward)
+{
+ assert (block <= blocks);
+
+ auto_ptr<FileSource> out(new FileSource(this, block, forward));
+
+ if (out->loadBuf() != 0)
+ return auto_ptr<Source>(0);
+
+ return auto_ptr<Source>(out);
+}
+
+}