39 #include <string_view>
59 vector<InMemoryPosting>::iterator
p;
60 p = lower_bound(docs.begin(), docs.end(),
69 docs.insert(
p, std::move(posting));
70 }
else if (!
p->valid) {
75 p->positions.push_back(position);
79 }
else if (use_position) {
81 p->add_position(position);
92 termentry.
tname = tname;
95 vector<InMemoryTermEntry>::iterator
p;
96 p = lower_bound(terms.begin(), terms.end(),
104 terms.insert(
p, std::move(termentry));
105 }
else if (use_position) {
106 p->add_position(position);
116 std::string_view term_)
118 pos(imterm.docs.begin()),
119 end(imterm.docs.end()),
128 auto first_wdf = (*pos).wdf;
170 while (!
at_end() && (*pos).did < did) {
190 last = (
end - 1)->did;
220 if (
pos->positions.empty())
return nullptr;
246 :
pos(doc.terms.begin()), end(doc.terms.end()), terms(doc.terms.size()),
247 started(
false), db(db_), did(did_), document_length(len)
249 LOGLINE(DB,
"InMemoryTermList::InMemoryTermList(): " <<
250 terms <<
" terms starting from " <<
pos->tname);
347 collfreq = termfreq = db->totdocs;
355 Assert(did <= db->termlists.size());
385 }
while (did <= db->termlists.size() && !
db->
termlists[
did - 1].is_valid);
396 while (did <= db->termlists.size() && !
db->
termlists[
did - 1].is_valid) {
419 return "InMemoryAllDocsPostList " +
str(
did);
428 :
Xapian::Database::Internal(TRANSACTION_UNIMPLEMENTED),
429 totdocs(0), totlen(0), positions_present(
false), closed(
false)
470 bool need_read_pos)
const
487 if (i ==
postlists.end() || i->second.term_freq == 0) {
509 *termfreq_ptr = i->second.term_freq;
511 *collfreq_ptr = i->second.collection_freq;
526 return i->second.freq;
535 return i->second.lower_bound;
544 return i->second.upper_bound;
599 string(
" not found"));
610 string(
" not found"));
624 string(
" not found"));
626 for (
auto&& i :
termlists[did - 1].terms) {
627 if (i.wdf > max_wdf) max_wdf = i.wdf;
640 string(
" not found"));
660 string(
" not found"));
686 std::string_view value)
689 if (!value.empty()) {
690 #ifdef __cpp_lib_associative_heterogeneous_insertion
691 metadata.insert_or_assign(key, value);
693 metadata.insert_or_assign(
string(key), value);
696 #ifdef __cpp_lib_associative_heterogeneous_erasure
706 string_view tname)
const
716 auto t = lower_bound(doc.
terms.begin(), doc.
terms.end(),
718 if (t != doc.
terms.end() && t->tname == tname) {
719 return t->positions.size();
726 string_view tname)
const
734 auto t = lower_bound(doc.
terms.begin(), doc.
terms.end(),
736 if (t != doc.
terms.end() && t->tname == tname) {
745 const map<Xapian::valueno, string> &values_)
754 for (
auto&& j : values_) {
758 if ((i.first->second.freq)++ == 0) {
761 i.first->second.lower_bound = j.second;
762 i.first->second.upper_bound = j.second;
765 if (j.second < i.first->second.lower_bound) {
766 i.first->second.lower_bound = j.second;
768 if (j.second > i.first->second.upper_bound) {
769 i.first->second.upper_bound = j.second;
793 string(
" not found"));
799 if (--(i->second.freq) == 0) {
800 i->second.lower_bound.resize(0);
801 i->second.upper_bound.resize(0);
813 for (
auto&& i :
termlists[did - 1].terms) {
816 t->second.collection_freq -= i.wdf;
817 --t->second.term_freq;
824 auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
826 if (
p != t->second.docs.end() &&
p->did == did) {
837 LOGCALL_VOID(DB,
"InMemoryDatabase::replace_document", did | document);
844 if (--(i->second.freq) == 0) {
845 i->second.lower_bound.resize(0);
846 i->second.upper_bound.resize(0);
862 for (
auto&& i :
termlists[did - 1].terms) {
865 t->second.collection_freq -= i.wdf;
866 --t->second.term_freq;
873 auto p = lower_bound(t->second.docs.begin(), t->second.docs.end(),
875 if (
p != t->second.docs.end() &&
p->did == did) {
903 map<Xapian::valueno, string> values;
907 LOGLINE(DB,
"InMemoryDatabase::finish_add_doc(): adding value " <<
918 LOGLINE(DB,
"InMemoryDatabase::finish_add_doc(): adding term " << *i);
964 const string & tname,
976 postlists[tname].add_posting(did, wdf, position, use_position);
977 doc->
add_posting(tname, wdf, position, use_position);
989 return (i->second.term_freq != 0);
1015 if (last == 0 || last ==
totdocs) {
1019 while (!
termlists[first - 1].is_valid) ++first;
1020 while (!
termlists[last - 1].is_valid) --last;
1042 #ifdef DISABLE_GPL_LIBXAPIAN
1043 # error GPL source we cannot relicense included in libxapian
A PostList iterating all docids when they form a contiguous range.
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
Xapian::docid get_docid() const
Return the current docid.
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
bool at_end() const
Return true if the current position is past the last entry in this list.
InMemoryAllDocsPostList(const InMemoryDatabase *db)
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
std::string get_description() const
Return a string description of this object.
Xapian::termcount get_wdf_upper_bound() const
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
class for alltermslists over several databases
A database held entirely in memory.
Xapian::termcount get_wdfdocmax(Xapian::docid did) const
Get the max wdf in document.
std::string get_value_upper_bound(Xapian::valueno slot) const
Get an upper bound on the values stored in the given value slot.
static void throw_database_closed()
TermList * open_term_list_direct(Xapian::docid did) const
Like open_term_list() but without MultiTermList wrapper.
Xapian::termcount get_doclength(Xapian::docid did) const
TermList * open_term_list(Xapian::docid did) const
Xapian::termcount get_wdf_upper_bound(std::string_view term) const
Get an upper bound on the wdf of term term.
std::vector< InMemoryDoc > termlists
Xapian::termcount get_doclength_lower_bound() const
Get a lower bound on the length of a document in this DB.
TermList * open_metadata_keylist(std::string_view prefix) const
Open a termlist returning each metadata key.
void commit()
Implementation of virtual methods: see Database for details.
std::vector< Xapian::termcount > doclengths
void cancel()
Cancel pending modifications to the database.
std::string get_value_lower_bound(Xapian::valueno slot) const
Get a lower bound on the values stored in the given value slot.
void replace_document(Xapian::docid did, const Xapian::Document &document)
Xapian::doccount get_value_freq(Xapian::valueno slot) const
Return the frequency of a given value slot.
void get_used_docid_range(Xapian::docid &first, Xapian::docid &last) const
Find lowest and highest docids actually in use.
std::string get_description() const
Return a string describing this object.
void add_values(Xapian::docid did, const std::map< Xapian::valueno, std::string > &values_)
std::map< Xapian::valueno, ValueStats > valuestats
PostList * open_post_list(std::string_view tname) const
Return a PostList suitable for use in a PostingIterator.
Xapian::docid add_document(const Xapian::Document &document)
void make_term(const std::string &tname)
Xapian::docid get_lastdocid() const
Return the last used document id of this (sub) database.
void delete_document(Xapian::docid did)
LeafPostList * open_leaf_post_list(std::string_view term, bool need_read_pos) const
Create a LeafPostList for use during a match.
PositionList * open_position_list(Xapian::docid did, std::string_view tname) const
Xapian::docid make_doc(const std::string &docdata)
Xapian::termcount positionlist_count(Xapian::docid did, std::string_view tname) const
bool reopen()
Reopen the database to the latest available revision.
Xapian::Database::Internal * update_lock(int flags)
Lock a read-only database for writing or unlock a writable database.
void make_posting(InMemoryDoc *doc, const std::string &tname, Xapian::docid did, Xapian::termpos position, Xapian::termcount wdf, bool use_position=true)
Xapian::termcount get_unique_terms(Xapian::docid did) const
Get the number of unique terms in document.
bool doc_exists(Xapian::docid did) const
std::vector< std::string > doclists
Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const
Open a handle on a document.
Xapian::doccount get_doccount() const
bool term_exists(std::string_view term) const
void close()
Close the database.
TermList * open_allterms(std::string_view prefix) const
friend class InMemoryDocument
std::map< std::string, std::string, std::less<> > metadata
Xapian::totallength totlen
std::map< std::string, InMemoryTerm, std::less<> > postlists
std::vector< std::map< Xapian::valueno, std::string > > valuelists
friend class InMemoryAllDocsPostList
void finish_add_doc(Xapian::docid did, const Xapian::Document &document)
void set_metadata(std::string_view key, std::string_view value)
Set the metadata associated with a given key.
Xapian::termcount get_doclength_upper_bound() const
Get an upper bound on the length of a document in this DB.
void get_freqs(std::string_view term, Xapian::doccount *termfreq_ptr, Xapian::termcount *collfreq_ptr) const
Returns frequencies for a term.
std::string get_metadata(std::string_view key) const
Get the metadata associated with a given key.
Xapian::totallength get_total_length() const
Return the total length of all documents in this database.
bool has_positions() const
Check whether this database contains any positional information.
InMemoryDatabase()
Create and open an in-memory database.
Class representing a document and the terms indexing it.
void add_posting(const std::string &tname, Xapian::termcount wdf, Xapian::termpos position, bool use_position)
std::vector< InMemoryTermEntry > terms
PositionList from an InMemory DB or a Document object.
void assign(Xapian::VecCOW< Xapian::termpos > &&positions_)
Move assign positional data.
A PostList in an inmemory database.
std::string get_description() const
Return a string description of this object.
Xapian::termcount wdf_upper_bound
Xapian::docid get_docid() const
Return the current docid.
InMemoryPositionList mypositions
List of positions of the current term.
InMemoryPostList(const InMemoryDatabase *db, const InMemoryTerm &imterm, std::string_view term_)
void get_docid_range(Xapian::docid &first, Xapian::docid &last) const
Get the bounds on the range of docids this PostList can return.
PostList * skip_to(Xapian::docid did, double w_min)
Skip forward to the specified docid.
Xapian::termcount get_wdf() const
Return the wdf for the document at the current position.
Xapian::termcount get_wdfdocmax() const
std::vector< InMemoryPosting >::const_iterator pos
bool at_end() const
Return true if the current position is past the last entry in this list.
PositionList * open_position_list() const
Read the position list for the term in the current document and return a pointer to it (not owned by ...
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
PositionList * read_position_list()
Read the position list for the term in the current document and return a pointer to it (owned by the ...
Xapian::termcount get_wdf_upper_bound() const
std::vector< InMemoryPosting >::const_iterator end
Xapian::VecCOW< Xapian::termpos > positions
Xapian::VecCOW< Xapian::termpos > positions
InMemoryTermList(Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db, Xapian::docid did, const InMemoryDoc &doc, Xapian::termcount len)
Xapian::termcount get_approx_size() const
Return approximate size of this termlist.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
void accumulate_stats(Xapian::Internal::ExpandStats &stats) const
Collate weighting information for the current term.
Xapian::doccount get_termfreq() const
Return the term frequency for the term at the current position.
Xapian::Internal::intrusive_ptr< const InMemoryDatabase > db
TermList * next()
Advance the current position to the next term in the termlist.
std::vector< InMemoryTermEntry >::const_iterator pos
TermList * skip_to(std::string_view term)
Skip forward to the specified term.
Xapian::termcount document_length
std::vector< InMemoryTermEntry >::const_iterator end
Xapian::termcount positionlist_count() const
Return the length of the position list for the current position.
PositionList * positionlist_begin() const
Return PositionList for the current position.
Xapian::termcount collection_freq
void add_posting(Xapian::docid did, Xapian::termcount wdf, Xapian::termpos position, bool use_position)
Xapian::termcount term_freq
Abstract base class for leaf postlists.
Xapian::termcount collfreq
The collection frequency of the term.
std::string term
The term name for this postlist (empty for an alldocs postlist).
Indicates an attempt to access a closed database.
DatabaseError indicates some sort of database related error.
Virtual base class for Database internals.
void dtor_called()
Helper to process uncommitted changes when a writable db is destroyed.
Indicates an attempt to access a document not present in the database.
Abstract base class for a document.
Class representing a document.
std::string get_data() const
Get the document data.
ValueIterator values_begin() const
Start iterating the values in this document.
TermIterator termlist_end() const noexcept
End iterator corresponding to termlist_begin().
TermIterator termlist_begin() const
Start iterating the terms in this document.
ValueIterator values_end() const noexcept
End iterator corresponding to values_begin().
Collates statistics while calculating term weight in an ESet.
void accumulate(size_t shard_index, Xapian::termcount wdf, Xapian::termcount doclen, Xapian::doccount subtf, Xapian::doccount subdbsize)
Abstract base class for postlists.
PostList * next()
Advance the current position to the next document in the postlist.
Xapian::doccount termfreq
Estimate of the number of documents this PostList will return.
A smart pointer that uses intrusive reference counting.
Abstract base class for iterating term positions in a document.
Class for iterating over term positions.
Abstract base class for termlists.
std::string current_term
The current term.
size_t shard_index
Which shard of a multidatabase this is from.
Class for iterating over a list of terms.
PositionIterator positionlist_end() const noexcept
Return an end PositionIterator for the current term.
Xapian::termcount get_wdf() const
Return the wdf for the term at the current position.
PositionIterator positionlist_begin() const
Return a PositionIterator for the current term.
UnimplementedError indicates an attempt to use an unimplemented feature.
Class for iterating over document values.
Xapian::valueno get_valueno() const
Return the value slot number for the current position.
Iterate all document ids when they form a contiguous range.
#define LOGCALL(CATEGORY, TYPE, FUNC, PARAMS)
#define LOGCALL_VOID(CATEGORY, FUNC, PARAMS)
Hierarchy of classes which Xapian can throw as exceptions.
Collate statistics and calculate the term weights for the ESet.
Iterate all terms in an inmemory db.
C++ class definition for inmemory database access.
A document read from a InMemoryDatabase.
string str(int value)
Convert int to std::string.
The Xapian namespace contains public interfaces for the Xapian library.
unsigned XAPIAN_TERMCOUNT_BASE_TYPE termcount
A counts of terms.
unsigned valueno
The number for a value slot in a document.
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
unsigned XAPIAN_TERMPOS_BASE_TYPE termpos
A term position within a document or query.
XAPIAN_TOTALLENGTH_TYPE totallength
The total length of all documents in a database.
#define AssertEqParanoid(A, B)
Convert types to std::string.
Class to hold statistics for a given slot.
Class for iterating over document values.