00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef XAPIAN_INCLUDED_BRASS_INVERTER_H
00022 #define XAPIAN_INCLUDED_BRASS_INVERTER_H
00023
00024 #include "xapian/types.h"
00025
00026 #include <map>
00027 #include <string>
00028
00029 #include "omassert.h"
00030 #include "str.h"
00031 #include "xapian/error.h"
00032
00033 class BrassPostListTable;
00034
00036 const Xapian::termcount DELETED_POSTING = Xapian::termcount(-1);
00037
00039 class Inverter {
00040 friend class BrassPostListTable;
00041
00043 class PostingChanges {
00044 friend class BrassPostListTable;
00045
00047 Xapian::termcount_diff tf_delta;
00048
00050 Xapian::termcount_diff cf_delta;
00051
00053 std::map<Xapian::docid, Xapian::termcount> pl_changes;
00054
00055 public:
00057 PostingChanges(Xapian::docid did, Xapian::termcount wdf)
00058 : tf_delta(1), cf_delta(Xapian::termcount_diff(wdf))
00059 {
00060 pl_changes.insert(std::make_pair(did, wdf));
00061 }
00062
00064 PostingChanges(Xapian::docid did, Xapian::termcount wdf, bool)
00065 : tf_delta(-1), cf_delta(-Xapian::termcount_diff(wdf))
00066 {
00067 pl_changes.insert(std::make_pair(did, DELETED_POSTING));
00068 }
00069
00071 PostingChanges(Xapian::docid did, Xapian::termcount old_wdf,
00072 Xapian::termcount new_wdf)
00073 : tf_delta(0), cf_delta(Xapian::termcount_diff(new_wdf - old_wdf))
00074 {
00075 pl_changes.insert(std::make_pair(did, new_wdf));
00076 }
00077
00079 void add_posting(Xapian::docid did, Xapian::termcount wdf) {
00080 ++tf_delta;
00081 cf_delta += wdf;
00082
00083 pl_changes[did] = wdf;
00084 }
00085
00087 void remove_posting(Xapian::docid did, Xapian::termcount wdf) {
00088 --tf_delta;
00089 cf_delta -= wdf;
00090
00091 pl_changes[did] = DELETED_POSTING;
00092 }
00093
00095 void update_posting(Xapian::docid did, Xapian::termcount old_wdf,
00096 Xapian::termcount new_wdf) {
00097 cf_delta += new_wdf - old_wdf;
00098 pl_changes[did] = new_wdf;
00099 }
00100
00102 Xapian::termcount_diff get_tfdelta() const { return tf_delta; }
00103
00105 Xapian::termcount_diff get_cfdelta() const { return cf_delta; }
00106 };
00107
00109 std::map<std::string, PostingChanges> postlist_changes;
00110
00111 public:
00113 std::map<Xapian::docid, Xapian::termcount> doclen_changes;
00114
00115 public:
00116 void add_posting(Xapian::docid did, const std::string & term,
00117 Xapian::doccount wdf) {
00118 std::map<std::string, PostingChanges>::iterator i;
00119 i = postlist_changes.find(term);
00120 if (i == postlist_changes.end()) {
00121 postlist_changes.insert(
00122 std::make_pair(term, PostingChanges(did, wdf)));
00123 } else {
00124 i->second.add_posting(did, wdf);
00125 }
00126 }
00127
00128 void remove_posting(Xapian::docid did, const std::string & term,
00129 Xapian::doccount wdf) {
00130 std::map<std::string, PostingChanges>::iterator i;
00131 i = postlist_changes.find(term);
00132 if (i == postlist_changes.end()) {
00133 postlist_changes.insert(
00134 std::make_pair(term, PostingChanges(did, wdf, false)));
00135 } else {
00136 i->second.remove_posting(did, wdf);
00137 }
00138 }
00139
00140 void update_posting(Xapian::docid did, const std::string & term,
00141 Xapian::termcount old_wdf,
00142 Xapian::termcount new_wdf) {
00143 std::map<std::string, PostingChanges>::iterator i;
00144 i = postlist_changes.find(term);
00145 if (i == postlist_changes.end()) {
00146 postlist_changes.insert(
00147 std::make_pair(term, PostingChanges(did, old_wdf, new_wdf)));
00148 } else {
00149 i->second.update_posting(did, old_wdf, new_wdf);
00150 }
00151 }
00152
00153 void clear() {
00154 doclen_changes.clear();
00155 postlist_changes.clear();
00156 }
00157
00158 void set_doclength(Xapian::docid did, Xapian::termcount doclen, bool add) {
00159 if (add) {
00160 Assert(doclen_changes.find(did) == doclen_changes.end() || doclen_changes[did] == DELETED_POSTING);
00161 }
00162 doclen_changes[did] = doclen;
00163 }
00164
00165 void delete_doclength(Xapian::docid did) {
00166 Assert(doclen_changes.find(did) == doclen_changes.end() || doclen_changes[did] != DELETED_POSTING);
00167 doclen_changes[did] = DELETED_POSTING;
00168 }
00169
00170 bool get_doclength(Xapian::docid did, Xapian::termcount & doclen) const {
00171 std::map<Xapian::docid, Xapian::termcount>::const_iterator i;
00172 i = doclen_changes.find(did);
00173 if (i == doclen_changes.end())
00174 return false;
00175 if (rare(i->second == DELETED_POSTING))
00176 throw Xapian::DocNotFoundError("Document not found: " + str(did));
00177 doclen = i->second;
00178 return true;
00179 }
00180
00182 void flush_doclengths(BrassPostListTable & table);
00183
00185 void flush_post_list(BrassPostListTable & table, const std::string & term);
00186
00188 void flush_all_post_lists(BrassPostListTable & table);
00189
00191 void flush_post_lists(BrassPostListTable & table, const std::string & pfx);
00192
00194 void flush(BrassPostListTable & table);
00195
00196 Xapian::termcount_diff get_tfdelta(const std::string & term) const {
00197 std::map<std::string, PostingChanges>::const_iterator i;
00198 i = postlist_changes.find(term);
00199 if (i == postlist_changes.end())
00200 return 0;
00201 return i->second.get_tfdelta();
00202 }
00203
00204 Xapian::termcount_diff get_cfdelta(const std::string & term) const {
00205 std::map<std::string, PostingChanges>::const_iterator i;
00206 i = postlist_changes.find(term);
00207 if (i == postlist_changes.end())
00208 return 0;
00209 return i->second.get_cfdelta();
00210 }
00211 };
00212
00213 #endif // XAPIAN_INCLUDED_BRASS_INVERTER_H