summaryrefslogtreecommitdiffstats
path: root/lurker/common/Keys.h
blob: 07effe5e6c2e1ea0787953a4300cc5abe530ff20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*  $Id: Keys.h 1649 2009-10-19 14:35:01Z terpstra $
 *  
 *  Keys.h - What prefixs various database keys
 *  
 *  Copyright (C) 2002 - Wesley W. Terpstra
 *  
 *  License: GPL
 *  
 *  Authors: 'Wesley W. Terpstra' <wesley@terpstra.ca>
 *  
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; version 2.1.
 *    
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *    
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef KEYS_H
#define KEYS_H

#include <string>
#include <vector>

using std::string;
using std::vector;

// These must have length 1
#define LU_THREADING	"t"
#define LU_KEYWORD	"k"
#define LU_SUMMARY	"s"
#define LU_CACHE	"c"
#define LU_NEW_TOPICS	"n"

#define LU_MESSAGE_DELETED	'd'
#define LU_MESSAGE_AUTHOR_EMAIL	'e'
#define LU_MESSAGE_AUTHOR_NAME	'n'
#define LU_MESSAGE_SUBJECT	's'
#define LU_MESSAGE_MBOX		'x'

/* special keywords */
#define LU_KEYWORD_DELETED	"id:deleted"	/* a deleted message */

/* fields indexed early */
#define LU_KEYWORD_EVERYTHING	"id:any"	/* empty search */
#define LU_KEYWORD_MESSAGE_ID	"id:"		/* Message id */
#define LU_KEYWORD_REPLY_TO	"rt:"		/* Reply-to */
#define LU_KEYWORD_THREAD	"th:"		/* THread */

/* lu_import_message */
#define	LU_KEYWORD_LIST		"ml:"	/* Mailing List */
#define LU_KEYWORD_AUTHOR	"au:"	/* AUthor */
#define LU_KEYWORD_SUBJECT	"sb:"	/* SuBject */

/* magic automatic keywords */
#define LU_KEYWORD_GROUP	"gr:"	/* Mailing group */
#define LU_KEYWORD_LANGUAGE	"lang:"	/* Language */

/* import processing */
#define LU_KEYWORD_WORD		""	/* body keywords */

/* The longest length a keyword can be */
#define LU_KEYWORD_LEN		80

/* Analyze the string for keywords. */
extern int my_keyword_digest_string(
	const char* buf,
	int         len,
	const char* prefix,
	int       (*writefn)(const char* keyword, void* arg),
	void*       arg,
	int         do_div);

const char* skipSubjectStart(const char* subject);
string subject_hash(const char* subject);
vector<string> extract_message_ids(const char* str);

/* The database has these types:
 *
 * Threading:
 *   LU_THREADING 
 *   subject_hash 
 *   message_id
 *   (reply_to_hash*)
 * 
 * Summary:
 *  LU_SUMMARY
 *  message_id
 *  DELETED/AUTHOR_EMAIL/AUTHOR_NAME/SUBJECT/MBOX
 *  ... if it has any of these then it must have all of them, in that order
 *  For all except the MBOX+DELETED, one has just a string value.
 *  For Mbox:
 *   <mailbox-as-string> '\0' <64bit offset> <32bit length>
 *
 * Keyword:
 *  LU_KEYWORD
 *  keyword
 *  '\0'
 *  mid
 *
 * Cache cleaning:
 *  LU_CACHE
 *  import timestamp as 4 bytes in bigendian order
 *  mid
 * 
 * New topics fields:
 *  LU_NEW_TOPICS
 *  listid '\0'
 *  timestamp
 *  threadhash
 */

#endif