diff options
Diffstat (limited to 'lurker/index/search.cpp')
-rw-r--r-- | lurker/index/search.cpp | 295 |
1 files changed, 295 insertions, 0 deletions
diff --git a/lurker/index/search.cpp b/lurker/index/search.cpp new file mode 100644 index 0000000..22bfb0d --- /dev/null +++ b/lurker/index/search.cpp @@ -0,0 +1,295 @@ +/* $Id: search.cpp 1649 2009-10-19 14:35:01Z terpstra $ + * + * search.cpp - Search for messages in lurker database (optionally delete) + * + * Copyright (C) 2004 - Wesley W. Terpstra + * + * License: GPL + * + * Authors: 'Wesley W. Terpstra' <wesley@terpstra.ca> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define _FILE_OFFSET_BITS 64 + +#include <Search.h> +#include <Keys.h> + +#include <iostream> +#include <sys/stat.h> +#include <sys/types.h> +#include <assert.h> +#include <stdio.h> +#include <unistd.h> + +using namespace std; + +void help(const char* name) +{ + cerr << "Lurker-search (v" << VERSION << ") searches for messages in a lurker database.\n"; + cerr << "\n"; + cerr << "Usage: " << name << " [-c <config-file>] [-k <keyword>] [ -d -f -v -i -q ] <terms>*\n"; + cerr << "\n"; + cerr << "\t-c <config-file> Use this config file for lurker settings\n"; + cerr << "\t-k <keyword> Add the specified keyword tag to hits\n"; + cerr << "\t-d Delete matching messages\n"; + cerr << "\t-f Don't prompt before deleting\n"; + cerr << "\t-v Output message summaries\n"; + cerr << "\t-i Take lurker message ids instead of keywords\n"; + cerr << "\t-q Don't output message ids or status\n"; + cerr << "\n"; + cerr << "Execute a keyword search to find messages.\n"; + cerr << "The following search terms are supported (all must match):\n"; + cerr << " -xxx - select messages which do NOT match this term\n"; + cerr << " id:xxx - select a message by the Message-ID field\n"; + cerr << " rt:xxx - select messages which reply to the chosen Message-ID\n"; + cerr << " th:xxx - select messages included in this thread\n"; + cerr << " ml:xxx - select messages in this mailing list\n"; + cerr << " gr:xxx - select messages in this mailing list group\n"; + cerr << " au:xxx - select messages with this term in the author fields\n"; + cerr << " sb:xxx - select messages with this word in the subject\n"; + cerr << " lang:xx - select messages in this language\n"; + cerr << "\n"; +} + +int main(int argc, char** argv) +{ + int c; + + const char* config = DEFAULT_CONFIG_FILE; + bool erase = false; + bool force = false; + bool verbose = false; + bool quiet = false; + bool ids = false; + string keyword; + + while ((c = getopt(argc, (char*const*)argv, "c:k:dvfiq?")) != -1) + { + switch ((char)c) + { + case 'c': + config = optarg; + break; + case 'k': + keyword = optarg; + break; + case 'd': + erase = true; + break; + case 'f': + force = true; + break; + case 'v': + verbose = true; + break; + case 'q': + quiet = true; + break; + case 'i': + ids = true; + break; + default: + help(argv[0]); + return 1; + } + } + + if (optind >= argc) + { + help(argv[0]); + return 1; + } + + Config cfg; + if (cfg.load(config) != 0) + { + cerr << cfg.getError() << flush; + return 1; + } + + ESort::Writer* db; + auto_ptr<ESort::Reader> dbr; + + if (erase || keyword != "") + { + if (verbose) cerr << "opening " << cfg.dbdir << "/db read-write" << endl; + // Work around g++ 2.95 bug + if (cfg.db_umask != -1) umask(cfg.db_umask); + auto_ptr<ESort::Writer> w + (ESort::Writer::opendb(cfg.dbdir + "/db")); + db = w.get(); + dbr = w; + } + else + { + if (verbose) cerr << "opening " << cfg.dbdir << "/db read-only" << endl; + auto_ptr<ESort::Reader> r + (ESort::Reader::opendb(cfg.dbdir + "/db")); + dbr = r; + } + + if (!dbr.get()) + { + perror("opening database"); + return 1; + } + + vector<Summary> result; + + if (ids) + { + for (; optind < argc; ++optind) + { + if (!argv[optind][0]) continue; // skip empty + MessageId id(argv[optind]); + if (id.serialize() != argv[optind]) + { + cerr << "'" << argv[optind] << "' is not a message-id\n"; + return 1; + } + result.push_back(Summary(id)); + } + } + else + { + Search s(cfg, dbr.get(), ESort::Forward); + + for (; optind < argc; ++optind) + { + if (!argv[optind][0]) continue; // skip empty + s.keyword(argv[optind]); + } + + vector<Summary>::size_type sz = 0; + while (s.pull(1, result) && result.size() == sz+1) + { + sz = result.size(); + } + } + + vector<Summary>::iterator i, e = result.end(); + for (i = result.begin(); i != e; ++i) + { + if (!quiet) + cout << "id: " << i->id().serialize() << "\n"; + if (verbose) + { + string ok; + if ((ok = i->load(dbr.get(), cfg)) != "") + { + cerr << "Failed to load: " << ok << "\n"; + return 1; + } + cout << "sb: " << i->subject() << "\n"; + cout << "au: \"" << i->author_name() << "\" <" + << i->author_email() << ">\n"; + } + } + + if (result.empty()) return 0; + + if (erase && !force) + { + cout << flush; + cerr << "Are you certain you want to delete these messages? (yes/no) [no] " << flush; + string ok; + std::getline(cin, ok); + if (ok != "yes") + { + cerr << "aborted!\n"; + return 1; + } + } + + if (keyword != "") + { + if (!quiet) cerr << "Tagging messages with keyword" << endl; + for (i = result.begin(); i != e; ++i) + { + if (db->insert( + LU_KEYWORD + + keyword + + '\0' + + i->id().raw()) != 0) + { + perror("insert"); + cerr << "Tagging with keyword failed; operation aborted.\n"; + return 1; + } + } + } + + if (erase) + { + if (!quiet) cerr << "Marking messages as deleted" << endl; + + // The idea is that lurker-prune already kills cache which + // refers to a newly imported message (regardless of time). + // Therefore, report the deleted message as new. + MessageId importStamp(time(0)); + + for (i = result.begin(); i != e; ++i) + { + if (db->insert( + LU_KEYWORD + + string(LU_KEYWORD_DELETED) + + '\0' + + i->id().raw()) != 0) + { + perror("insert"); + cerr << "Delete keyword failed; operation aborted.\n"; + return 1; + } + + if (db->insert( + LU_SUMMARY + + i->id().raw() + + LU_MESSAGE_DELETED) != 0) + { + perror("insert"); + cerr << "Delete summary failed; operation aborted.\n"; + return 1; + } + if (db->insert( + LU_CACHE + + importStamp.raw().substr(0, 4) + + i->id().raw()) != 0) + { + perror("insert"); + cerr << "Delete cache eviction failed; operation aborted.\n"; + return 1; + } + } + } + + if (erase || keyword != "") + { + if (!quiet) cerr << "Committing changes to disk" << endl; + if (db->commit() != 0) + { + perror("commit"); + cerr << "Commit failed; operation aborted.\n"; + return 1; + } + + if (!quiet) + { + cerr << "Cache will be automatically corrected when the cronjob next runs.\n"; + } + } + + return 0; +} |