backends/quartz/quartz_database.cc

Go to the documentation of this file.
00001 /* quartz_database.cc: quartz database
00002  *
00003  * Copyright 1999,2000,2001 BrightStation PLC
00004  * Copyright 2001 Hein Ragas
00005  * Copyright 2002 Ananova Ltd
00006  * Copyright 2002,2003,2004,2005,2006,2007,2008 Olly Betts
00007  * Copyright 2006 Richard Boulton
00008  *
00009  * This program is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU General Public License as
00011  * published by the Free Software Foundation; either version 2 of the
00012  * License, or (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00022  * USA
00023  */
00024 
00025 #include <config.h>
00026 
00027 #include "safeerrno.h"
00028 
00029 #include "quartz_database.h"
00030 #include "utils.h"
00031 #include "omdebug.h"
00032 #include "autoptr.h"
00033 #include <xapian/error.h>
00034 #include <xapian/valueiterator.h>
00035 
00036 #include "quartz_postlist.h"
00037 #include "quartz_alldocspostlist.h"
00038 #include "quartz_termlist.h"
00039 #include "quartz_positionlist.h"
00040 #include "quartz_utils.h"
00041 #include "quartz_record.h"
00042 #include "quartz_values.h"
00043 #include "quartz_document.h"
00044 #include "quartz_alltermslist.h"
00045 
00046 #include <sys/types.h>
00047 #include "safesysstat.h"
00048 #include "safefcntl.h"
00049 #include "safeunistd.h"
00050 #ifdef HAVE_SYS_UTSNAME_H
00051 # include <sys/utsname.h>
00052 #endif
00053 
00054 #ifdef __CYGWIN__
00055 # include "safewindows.h"
00056 # include <sys/cygwin.h>
00057 #endif
00058 
00059 #include <list>
00060 #include <string>
00061 
00062 using namespace std;
00063 using namespace Xapian;
00064 
00065 /* This finds the tables, opens them at consistent revisions, manages
00066  * determining the current and next revision numbers, and stores handles
00067  * to the tables.
00068  */
00069 QuartzDatabase::QuartzDatabase(const string &quartz_dir, int action,
00070                                unsigned int block_size)
00071         : db_dir(quartz_dir),
00072           readonly(action == OM_DB_READONLY),
00073           metafile(db_dir + "/meta"),
00074           postlist_table(db_dir, readonly),
00075           positionlist_table(db_dir, readonly),
00076           termlist_table(db_dir, readonly),
00077           value_table(db_dir, readonly),
00078           record_table(db_dir, readonly),
00079           log(db_dir + "/log")
00080 {
00081     DEBUGCALL(DB, void, "QuartzDatabase", quartz_dir << ", " << action <<
00082               ", " << block_size);
00083     static const char *acts[] = {
00084         "Open readonly", "Create or open", "Create", "Create or overwrite",
00085         "Open" // , "Overwrite"
00086     };
00087     log.make_entry(string(acts[action]) + " database at `" + db_dir + "'");
00088 
00089     // set cache size parameters, etc, here.
00090 
00091     // open environment here
00092 
00093     bool dbexists = database_exists();
00094     // open tables
00095     if (action == OM_DB_READONLY) {
00096         if (!dbexists) {
00097             // Catch pre-0.6 Xapian databases and give a better error
00098             if (file_exists(db_dir + "/attribute_DB"))
00099                 throw Xapian::DatabaseVersionError("Cannot open database at `" + db_dir + "' - it was created by a pre-0.6 version of Xapian");
00100             throw Xapian::DatabaseOpeningError("Cannot open database at `" + db_dir + "' - it does not exist");
00101         }
00102         // Can still allow searches even if recovery is needed
00103         open_tables_consistent();
00104     } else {
00105         if (!dbexists) {
00106             // FIXME: if we allow Xapian::DB_OVERWRITE, check it here
00107             if (action == Xapian::DB_OPEN) {
00108                 // Catch pre-0.6 Xapian databases and give a better error
00109                 if (file_exists(db_dir + "/attribute_DB"))
00110                     throw Xapian::DatabaseVersionError("Cannot open database at `" + db_dir + "' - it was created by a pre-0.6 version of Xapian");
00111                 throw Xapian::DatabaseOpeningError("Cannot open database at `" + db_dir + "' - it does not exist");
00112             }
00113 
00114             // Create the directory for the database, if it doesn't exist
00115             // already.
00116             bool fail = false;
00117             struct stat statbuf;
00118             if (stat(db_dir, &statbuf) == 0) {
00119                 if (!S_ISDIR(statbuf.st_mode)) fail = true;
00120             } else if (errno != ENOENT || mkdir(db_dir, 0755) == -1) {
00121                 fail = true;
00122             }
00123             if (fail) {
00124                 throw Xapian::DatabaseCreateError("Cannot create directory `"
00125                                                    + db_dir + "'", errno);
00126             }
00127             get_database_write_lock();
00128 
00129             create_and_open_tables(block_size);
00130             return;
00131         }
00132 
00133         log.make_entry("Old database exists");
00134         if (action == Xapian::DB_CREATE) {
00135             throw Xapian::DatabaseCreateError("Can't create new database at `" +
00136                     db_dir + "': a database already exists and I was told "
00137                     "not to overwrite it");
00138         }
00139 
00140         get_database_write_lock();
00141         // if we're overwriting, pretend the db doesn't exists
00142         // FIXME: if we allow Xapian::DB_OVERWRITE, check it here
00143         if (action == Xapian::DB_CREATE_OR_OVERWRITE) {
00144             create_and_open_tables(block_size);
00145             return;
00146         }
00147 
00148         // Get latest consistent version
00149         open_tables_consistent();
00150 
00151         // Check that there are no more recent versions of tables.  If there
00152         // are, perform recovery by writing a new revision number to all
00153         // tables.
00154         if (record_table.get_open_revision_number() !=
00155             postlist_table.get_latest_revision_number()) {
00156             quartz_revision_number_t new_revision = get_next_revision_number();
00157 
00158             log.make_entry("Detected partially applied changes, updating "
00159                             "all revision numbers to consistent state (" +
00160                             om_tostring(new_revision) + ") to proceed - "
00161                             "this will remove partial changes");
00162             postlist_table.commit(new_revision);
00163             positionlist_table.commit(new_revision);
00164             termlist_table.commit(new_revision);
00165             value_table.commit(new_revision);
00166             record_table.commit(new_revision);
00167         }
00168         if (record_table.get_doccount() == 0) {
00169             record_table.set_total_length_and_lastdocid(0, record_table.get_lastdocid());
00170         }
00171     }
00172 }
00173 
00174 QuartzDatabase::~QuartzDatabase()
00175 {
00176     DEBUGCALL(DB, void, "~QuartzDatabase", "");
00177     // Only needed for a writable database: dtor_called();
00178     log.make_entry("Closing database");
00179     if (!readonly) release_database_write_lock();
00180 }
00181 
00182 bool
00183 QuartzDatabase::database_exists() {
00184     DEBUGCALL(DB, bool, "QuartzDatabase::database_exists", "");
00185     return record_table.exists() &&
00186            postlist_table.exists() &&
00187            positionlist_table.exists() &&
00188            termlist_table.exists() &&
00189            value_table.exists();
00190 }
00191 
00192 void
00193 QuartzDatabase::create_and_open_tables(unsigned int block_size)
00194 {
00195     DEBUGCALL(DB, void, "QuartzDatabase::create_and_open_tables", "");
00196     //FIXME - check that database directory exists.
00197 
00198     log.make_entry("Creating new database");
00199     // Create postlist_table first, and record_table last.  Existence of
00200     // record_table is considered to imply existence of the database.
00201     metafile.create();
00202     postlist_table.create(block_size);
00203     positionlist_table.create(block_size);
00204     termlist_table.create(block_size);
00205     value_table.create(block_size);
00206     record_table.create(block_size);
00207 
00208     Assert(database_exists());
00209 
00210     log.make_entry("Opening new database");
00211     metafile.open();
00212     record_table.open();
00213     value_table.open();
00214     termlist_table.open();
00215     positionlist_table.open();
00216     postlist_table.open();
00217 
00218     // Check consistency
00219     quartz_revision_number_t revision = record_table.get_open_revision_number();
00220     if (revision != value_table.get_open_revision_number() ||
00221         revision != termlist_table.get_open_revision_number() ||
00222         revision != positionlist_table.get_open_revision_number() ||
00223         revision != postlist_table.get_open_revision_number()) {
00224         log.make_entry("Revisions are not consistent: have " +
00225                         om_tostring(revision) + ", " +
00226                         om_tostring(value_table.get_open_revision_number()) + ", " +
00227                         om_tostring(termlist_table.get_open_revision_number()) + ", " +
00228                         om_tostring(positionlist_table.get_open_revision_number()) + " and " +
00229                         om_tostring(postlist_table.get_open_revision_number()));
00230         throw Xapian::DatabaseCreateError("Newly created tables are not in consistent state");
00231     }
00232     log.make_entry("Opened tables at revision " + om_tostring(revision));
00233     record_table.set_total_length_and_lastdocid(0, 0);
00234 }
00235 
00236 void
00237 QuartzDatabase::open_tables_consistent()
00238 {
00239     DEBUGCALL(DB, void, "QuartzDatabase::open_tables_consistent", "");
00240     // Open record_table first, since it's the last to be written to,
00241     // and hence if a revision is available in it, it should be available
00242     // in all the other tables (unless they've moved on already).
00243     //
00244     // If we find that a table can't open the desired revision, we
00245     // go back and open record_table again, until record_table has
00246     // the same revision as the last time we opened it.
00247 
00248     log.make_entry("Opening tables at latest consistent revision");
00249     metafile.open();
00250     record_table.open();
00251     quartz_revision_number_t revision = record_table.get_open_revision_number();
00252 
00253     bool fully_opened = false;
00254     int tries = 100;
00255     int tries_left = tries;
00256     while (!fully_opened && (tries_left--) > 0) {
00257         log.make_entry("Trying revision " + om_tostring(revision));
00258 
00259         bool opened;
00260         opened = value_table.open(revision);
00261         if (opened) opened = termlist_table.open(revision);
00262         if (opened) opened = positionlist_table.open(revision);
00263         if (opened) opened = postlist_table.open(revision);
00264         if (opened) {
00265             fully_opened = true;
00266         } else {
00267             // Couldn't open consistent revision: two cases possible:
00268             // i)   An update has completed and a second one has begun since
00269             //      record was opened.  This leaves a consistent revision
00270             //      available, but not the one we were trying to open.
00271             // ii)  Tables have become corrupt / have no consistent revision
00272             //      available.  In this case, updates must have ceased.
00273             //
00274             // So, we reopen the record table, and check its revision number,
00275             // if it's changed we try the opening again, otherwise we give up.
00276             //
00277             record_table.open();
00278             quartz_revision_number_t newrevision =
00279                     record_table.get_open_revision_number();
00280             if (revision == newrevision) {
00281                 // Revision number hasn't changed - therefore a second index
00282                 // sweep hasn't begun and the system must have failed.  Database
00283                 // is inconsistent.
00284                 log.make_entry("Cannot open all tables at revision in record table: " + om_tostring(revision));
00285                 throw Xapian::DatabaseCorruptError("Cannot open tables at consistent revisions");
00286             }
00287             revision = newrevision;
00288         }
00289     }
00290 
00291     if (!fully_opened) {
00292         log.make_entry("Cannot open all tables in a consistent state - keep changing too fast, giving up after " + om_tostring(tries) + " attempts");
00293         throw Xapian::DatabaseModifiedError("Cannot open tables at stable revision - changing too fast");
00294     }
00295 
00296     log.make_entry("Opened tables at revision " + om_tostring(revision));
00297 }
00298 
00299 void
00300 QuartzDatabase::open_tables(quartz_revision_number_t revision)
00301 {
00302     DEBUGCALL(DB, void, "QuartzDatabase::open_tables", revision);
00303     log.make_entry("Opening tables at revision " + om_tostring(revision));
00304     metafile.open();
00305     record_table.open(revision);
00306     value_table.open(revision);
00307     termlist_table.open(revision);
00308     positionlist_table.open(revision);
00309     postlist_table.open(revision);
00310     log.make_entry("Opened tables at revision " + om_tostring(revision));
00311 }
00312 
00313 quartz_revision_number_t
00314 QuartzDatabase::get_revision_number() const
00315 {
00316     DEBUGCALL(DB, quartz_revision_number_t, "QuartzDatabase::get_revision_number", "");
00317     // We could use any table here, theoretically.
00318     RETURN(postlist_table.get_open_revision_number());
00319 }
00320 
00321 quartz_revision_number_t
00322 QuartzDatabase::get_next_revision_number() const
00323 {
00324     DEBUGCALL(DB, quartz_revision_number_t, "QuartzDatabase::get_next_revision_number", "");
00325     /* We _must_ use postlist_table here, since it is always the first
00326      * to be written, and hence will have the greatest available revision
00327      * number.
00328      */
00329     quartz_revision_number_t new_revision =
00330             postlist_table.get_latest_revision_number();
00331     new_revision += 1;
00332     RETURN(new_revision);
00333 }
00334 
00335 void
00336 QuartzDatabase::set_revision_number(quartz_revision_number_t new_revision)
00337 {
00338     DEBUGCALL(DB, void, "QuartzDatabase::set_revision_number", new_revision);
00339     postlist_table.commit(new_revision);
00340     positionlist_table.commit(new_revision);
00341     termlist_table.commit(new_revision);
00342     value_table.commit(new_revision);
00343     record_table.commit(new_revision);
00344 }
00345 
00346 void
00347 QuartzDatabase::reopen()
00348 {
00349     DEBUGCALL(DB, void, "QuartzDatabase::reopen", "");
00350     if (readonly) {
00351         open_tables_consistent();
00352     }
00353 }
00354 
00355 void
00356 QuartzDatabase::get_database_write_lock()
00357 {
00358     DEBUGCALL(DB, void, "QuartzDatabase::get_database_write_lock", "");
00359     // FIXME:: have a backoff strategy to avoid stalling on a stale lockfile
00360 #ifdef HAVE_SYS_UTSNAME_H
00361     const char *hostname;
00362     struct utsname host;
00363     if (!uname(&host)) {
00364         host.nodename[0] = '\0';
00365     }
00366     hostname = host.nodename;
00367 #elif defined(HAVE_GETHOSTNAME)
00368     char hostname[256];
00369     if (gethostname(hostname, sizeof hostname) == -1) {
00370         *hostname = '\0';
00371     }
00372 #else
00373     const char *hostname = "";
00374 #endif
00375     string tempname = db_dir + "/db_lock.tmp." + om_tostring(getpid()) + "." +
00376             hostname + "." +
00377             om_tostring(reinterpret_cast<long>(this)); /* should work within
00378                                                           one process too! */
00379     DEBUGLINE(DB, "Temporary file " << tempname << " created");
00380     int num_tries = 5;
00381     while (true) {
00382         num_tries--;
00383         if (num_tries < 0) {
00384             throw Xapian::DatabaseLockError("Unable to acquire database write lock "
00385                                       + db_dir + "/db_lock");
00386         }
00387 
00388         int tempfd = open(tempname.c_str(), O_CREAT | O_EXCL, 0600);
00389         if (tempfd < 0) {
00390             throw Xapian::DatabaseLockError("Unable to create " + tempname +
00391                                       ": " + strerror(errno),
00392                                       errno);
00393         }
00394 
00395 #if defined __CYGWIN__
00396         close(tempfd);
00397         // Cygwin carefully tries to recreate Unix semantics for rename(), so
00398         // we can't use rename for locking.  And link() works on NTFS but not
00399         // FAT.  So we use the underlying API call and translate the paths.
00400         char fr[MAX_PATH], to[MAX_PATH];
00401         cygwin_conv_to_win32_path(tempname.c_str(), fr);
00402         cygwin_conv_to_win32_path((db_dir + "/db_lock").c_str(), to);
00403         if (MoveFile(fr, to)) {
00404             return;
00405         }
00406 #elif defined __WIN32__
00407         // MS Windows can't rename an open file, so make sure we close it
00408         // first.
00409         close(tempfd);
00410         // MS Windows doesn't support link(), but rename() won't overwrite an
00411         // existing file, which is exactly the semantics we want.
00412         if (rename(tempname.c_str(), (db_dir + "/db_lock").c_str()) == 0) {
00413             return;
00414         }
00415 #else
00416         /* Now link(2) the temporary file to the lockfile name.
00417          * If either link() returns 0, or the temporary file has
00418          * link count 2 afterwards, then the lock succeeded.
00419          * Otherwise, it failed.  (Reference: Linux open() manpage)
00420          */
00421         /* FIXME: sort out all these unlinks */
00422         int result = link(tempname, db_dir + "/db_lock");
00423         if (result == 0) {
00424             close(tempfd);
00425             unlink(tempname);
00426             return;
00427         }
00428 #ifdef XAPIAN_DEBUG_VERBOSE
00429         int link_errno = errno;
00430 #endif
00431         struct stat statbuf;
00432         int statresult = fstat(tempfd, &statbuf);
00433         int fstat_errno = errno;
00434         close(tempfd);
00435         unlink(tempname);
00436         if (statresult != 0) {
00437             throw Xapian::DatabaseLockError("Unable to fstat() temporary file " +
00438                                       tempname + " while locking: " +
00439                                       strerror(fstat_errno));
00440         }
00441         if (statbuf.st_nlink == 2) {
00442             /* success */
00443             return;
00444         }
00445         DEBUGLINE(DB, "link() returned " << result << "(" <<
00446                   strerror(link_errno) << ")");
00447         DEBUGLINE(DB, "Links in statbuf: " << statbuf.st_nlink);
00448         /* also failed */
00449 #endif
00450     }
00451 }
00452 
00453 void
00454 QuartzDatabase::release_database_write_lock()
00455 {
00456     DEBUGCALL(DB, void, "QuartzDatabase::release_database_write_lock", "");
00457     unlink(db_dir + "/db_lock");
00458 }
00459 
00460 void
00461 QuartzDatabase::apply()
00462 {
00463     DEBUGCALL(DB, void, "QuartzDatabase::apply", "");
00464     if (!postlist_table.is_modified() &&
00465         !positionlist_table.is_modified() &&
00466         !termlist_table.is_modified() &&
00467         !value_table.is_modified() &&
00468         !record_table.is_modified()) {
00469         log.make_entry("No modifications to apply");
00470         return;
00471     }
00472 
00473     quartz_revision_number_t old_revision = get_revision_number();
00474     quartz_revision_number_t new_revision = get_next_revision_number();
00475 
00476     log.make_entry("Applying modifications.  New revision number is " + om_tostring(new_revision));
00477 
00478     try {
00479         postlist_table.commit(new_revision);
00480         positionlist_table.commit(new_revision);
00481         termlist_table.commit(new_revision);
00482         value_table.commit(new_revision);
00483         record_table.commit(new_revision);
00484 
00485         log.make_entry("Modifications succeeded");
00486     } catch (...) {
00487         // Modifications failed.  Wipe all the modifications from memory.
00488         log.make_entry("Attempted modifications failed.  Wiping partial modifications");
00489 
00490         // Reopen tables with old revision number.
00491         log.make_entry("Reopening tables without modifications: old revision is " + om_tostring(old_revision));
00492         open_tables(old_revision);
00493 
00494         // Increase revision numbers to new revision number plus one,
00495         // writing increased numbers to all tables.
00496         new_revision += 1;
00497         log.make_entry("Increasing revision number in all tables to " + om_tostring(new_revision));
00498 
00499         try {
00500             set_revision_number(new_revision);
00501 
00502             // This cancel() causes any buffered changes to be thrown away,
00503             // and the buffer to be reinitialised with the old entry count.
00504             cancel();
00505         } catch (const Xapian::Error & e) {
00506             string msg("Setting revision number failed: ");
00507             msg += e.get_description();
00508             log.make_entry(msg);
00509             throw Xapian::DatabaseError("Modifications failed, and cannot set revision numbers in database to a consistent state");
00510         }
00511         throw;
00512     }
00513 }
00514 
00515 void
00516 QuartzDatabase::cancel()
00517 {
00518     DEBUGCALL(DB, void, "QuartzDatabase::cancel", "");
00519     postlist_table.cancel();
00520     positionlist_table.cancel();
00521     termlist_table.cancel();
00522     value_table.cancel();
00523     record_table.cancel();
00524 }
00525 
00526 Xapian::doccount
00527 QuartzDatabase::get_doccount() const
00528 {
00529     DEBUGCALL(DB, Xapian::doccount, "QuartzDatabase::get_doccount", "");
00530     RETURN(record_table.get_doccount());
00531 }
00532 
00533 Xapian::docid
00534 QuartzDatabase::get_lastdocid() const
00535 {
00536     DEBUGCALL(DB, Xapian::docid, "QuartzDatabase::get_lastdocid", "");
00537     RETURN(record_table.get_lastdocid());
00538 }
00539 
00540 Xapian::doclength
00541 QuartzDatabase::get_avlength() const
00542 {
00543     DEBUGCALL(DB, Xapian::doclength, "QuartzDatabase::get_avlength", "");
00544     Xapian::doccount docs = record_table.get_doccount();
00545     if (docs == 0) RETURN(0);
00546     RETURN(double(record_table.get_total_length()) / docs);
00547 }
00548 
00549 Xapian::doclength
00550 QuartzDatabase::get_doclength(Xapian::docid did) const
00551 {
00552     DEBUGCALL(DB, Xapian::doclength, "QuartzDatabase::get_doclength", did);
00553     Assert(did != 0);
00554 
00555     QuartzTermList termlist(0, &termlist_table, did, 0);
00556     RETURN(termlist.get_doclength());
00557 }
00558 
00559 Xapian::doccount
00560 QuartzDatabase::get_termfreq(const string & tname) const
00561 {
00562     DEBUGCALL(DB, Xapian::doccount, "QuartzDatabase::get_termfreq", tname);
00563     Assert(!tname.empty());
00564 
00565     RETURN(postlist_table.get_termfreq(tname));
00566     RETURN(postlist_table.get_collection_freq(tname));
00567 }
00568 
00569 Xapian::termcount
00570 QuartzDatabase::get_collection_freq(const string & tname) const
00571 {
00572     DEBUGCALL(DB, Xapian::termcount, "QuartzDatabase::get_collection_freq", tname);
00573     Assert(!tname.empty());
00574 
00575     RETURN(postlist_table.get_collection_freq(tname));
00576 }
00577 
00578 bool
00579 QuartzDatabase::term_exists(const string & tname) const
00580 {
00581     DEBUGCALL(DB, bool, "QuartzDatabase::term_exists", tname);
00582     Assert(!tname.empty());
00583     AutoPtr<Bcursor> cursor(postlist_table.cursor_get());
00584     // FIXME: nasty C&P from backends/quartz/quartz_postlist.cc
00585     string key = pack_string_preserving_sort(tname);
00586     return cursor->find_entry(key);
00587 }
00588 
00589 bool
00590 QuartzDatabase::has_positions() const
00591 {
00592     return positionlist_table.get_entry_count() > 0;
00593 }
00594 
00595 
00596 LeafPostList *
00597 QuartzDatabase::open_post_list(const string& tname) const
00598 {
00599     DEBUGCALL(DB, LeafPostList *, "QuartzDatabase::open_post_list", tname);
00600     Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this);
00601 
00602     if (tname.empty()) {
00603         RETURN(new QuartzAllDocsPostList(ptrtothis,
00604                                          &termlist_table,
00605                                          get_doccount()));
00606     }
00607 
00608     RETURN(new QuartzPostList(ptrtothis,
00609                               &postlist_table,
00610                               &positionlist_table,
00611                               tname));
00612 }
00613 
00614 TermList *
00615 QuartzDatabase::open_term_list(Xapian::docid did) const
00616 {
00617     DEBUGCALL(DB, TermList *, "QuartzDatabase::open_term_list", did);
00618     Assert(did != 0);
00619 
00620     Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this);
00621     RETURN(new QuartzTermList(ptrtothis, &termlist_table, did, get_doccount()));
00622 }
00623 
00624 Xapian::Document::Internal *
00625 QuartzDatabase::open_document(Xapian::docid did, bool lazy) const
00626 {
00627     DEBUGCALL(DB, Xapian::Document::Internal *, "QuartzDatabase::open_document",
00628               did << ", " << lazy);
00629     Assert(did != 0);
00630 
00631     Xapian::Internal::RefCntPtr<const QuartzDatabase> ptrtothis(this);
00632     RETURN(new QuartzDocument(ptrtothis,
00633                               &value_table,
00634                               &record_table,
00635                               did, lazy));
00636 }
00637 
00638 PositionList *
00639 QuartzDatabase::open_position_list(Xapian::docid did,
00640                                    const string & tname) const
00641 {
00642     Assert(did != 0);
00643 
00644     AutoPtr<QuartzPositionList> poslist(new QuartzPositionList());
00645     poslist->read_data(&positionlist_table, did, tname);
00646     if (poslist->get_size() == 0) {
00647         // Check that term / document combination exists.
00648         // If the doc doesn't exist, this will throw Xapian::DocNotFoundError:
00649         AutoPtr<TermList> tl(open_term_list(did));
00650         tl->skip_to(tname);
00651         if (tl->at_end() || tl->get_termname() != tname)
00652             throw Xapian::RangeError("Can't open position list: requested term is not present in document.");
00653     }
00654 
00655     return poslist.release();
00656 }
00657 
00658 TermList *
00659 QuartzDatabase::open_allterms(const string & prefix) const
00660 {
00661     DEBUGCALL(DB, TermList *, "QuartzDatabase::open_allterms", "");
00662     AutoPtr<Bcursor> pl_cursor(postlist_table.cursor_get());
00663     RETURN(new QuartzAllTermsList(Xapian::Internal::RefCntPtr<const QuartzDatabase>(this),
00664                                   pl_cursor, postlist_table.get_entry_count(), prefix));
00665 }
00666 
00667 size_t QuartzWritableDatabase::flush_threshold = 0;
00668 
00669 QuartzWritableDatabase::QuartzWritableDatabase(const string &dir, int action,
00670                                                int block_size)
00671         : freq_deltas(),
00672           doclens(),
00673           mod_plists(),
00674           database_ro(dir, action, block_size),
00675           total_length(database_ro.record_table.get_total_length()),
00676           lastdocid(database_ro.get_lastdocid()),
00677           changes_made(0)
00678 {
00679     DEBUGCALL(DB, void, "QuartzWritableDatabase", dir << ", " << action << ", "
00680               << block_size);
00681     if (flush_threshold == 0) {
00682         const char *p = getenv("XAPIAN_FLUSH_THRESHOLD");
00683         if (p) flush_threshold = atoi(p);
00684     }
00685     if (flush_threshold == 0) flush_threshold = 10000;
00686 }
00687 
00688 QuartzWritableDatabase::~QuartzWritableDatabase()
00689 {
00690     DEBUGCALL(DB, void, "~QuartzWritableDatabase", "");
00691     dtor_called();
00692 }
00693 
00694 void
00695 QuartzWritableDatabase::flush()
00696 {
00697     if (transaction_active())
00698         throw Xapian::InvalidOperationError("Can't flush during a transaction");
00699     if (changes_made) do_flush_const();
00700 }
00701 
00702 void
00703 QuartzWritableDatabase::do_flush_const() const
00704 {
00705     DEBUGCALL(DB, void, "QuartzWritableDatabase::do_flush_const", "");
00706 
00707     database_ro.postlist_table.merge_changes(mod_plists, doclens, freq_deltas);
00708 
00709     // Update the total document length and last used docid.
00710     database_ro.record_table.set_total_length_and_lastdocid(total_length,
00711                                                             lastdocid);
00712     database_ro.apply();
00713     freq_deltas.clear();
00714     doclens.clear();
00715     mod_plists.clear();
00716     changes_made = 0;
00717 }
00718 
00719 Xapian::docid
00720 QuartzWritableDatabase::add_document(const Xapian::Document & document)
00721 {
00722     DEBUGCALL(DB, Xapian::docid,
00723               "QuartzWritableDatabase::add_document", document);
00724     // Make sure the docid counter doesn't overflow.
00725     if (lastdocid == Xapian::docid(-1))
00726         throw Xapian::DatabaseError("Run out of docids - you'll have to use copydatabase to eliminate any gaps before you can add more documents");
00727     // Use the next unused document ID.
00728     RETURN(add_document_(++lastdocid, document));
00729 }
00730 
00731 Xapian::docid
00732 QuartzWritableDatabase::add_document_(Xapian::docid did,
00733                                       const Xapian::Document & document)
00734 {
00735     Assert(did != 0);
00736     try {
00737         // Add the record using that document ID.
00738         database_ro.record_table.replace_record(document.get_data(), did);
00739 
00740         // Set the values.
00741         {
00742             Xapian::ValueIterator value = document.values_begin();
00743             Xapian::ValueIterator value_end = document.values_end();
00744             string s;
00745             database_ro.value_table.encode_values(s, value, value_end);
00746             database_ro.value_table.set_encoded_values(did, s);
00747         }
00748 
00749         quartz_doclen_t new_doclen = 0;
00750         {
00751             Xapian::TermIterator term = document.termlist_begin();
00752             Xapian::TermIterator term_end = document.termlist_end();
00753             for ( ; term != term_end; ++term) {
00754                 termcount wdf = term.get_wdf();
00755                 // Calculate the new document length
00756                 new_doclen += wdf;
00757 
00758                 string tname = *term;
00759                 map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00760                 i = freq_deltas.find(tname);
00761                 if (i == freq_deltas.end()) {
00762                     freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf))));
00763                 } else {
00764                     ++i->second.first;
00765                     i->second.second += wdf;
00766                 }
00767 
00768                 // Add did to tname's postlist
00769                 map<string, map<docid, pair<char, termcount> > >::iterator j;
00770                 j = mod_plists.find(tname);
00771                 if (j == mod_plists.end()) {
00772                     map<docid, pair<char, termcount> > m;
00773                     j = mod_plists.insert(make_pair(tname, m)).first;
00774                 }
00775                 j->second[did] = make_pair('A', wdf);
00776 
00777                 if (term.positionlist_begin() != term.positionlist_end()) {
00778                     database_ro.positionlist_table.set_positionlist(
00779                         did, tname,
00780                         term.positionlist_begin(), term.positionlist_end());
00781                 }
00782             }
00783         }
00784 
00785         // Set the termlist
00786         database_ro.termlist_table.set_entries(did,
00787                 document.termlist_begin(), document.termlist_end(),
00788                 new_doclen, false);
00789 
00790         // Set the new document length
00791         Assert(doclens.find(did) == doclens.end());
00792         doclens[did] = new_doclen;
00793         total_length += new_doclen;
00794     } catch (...) {
00795         // If an error occurs while adding a document, or doing any other
00796         // transaction, the modifications so far must be cleared before
00797         // returning control to the user - otherwise partial modifications will
00798         // persist in memory, and eventually get written to disk.
00799         cancel();
00800         throw;
00801     }
00802 
00803     // FIXME: this should be done by checking memory usage, not the number of
00804     // changes.
00805     // We could also look at:
00806     // * mod_plists.size()
00807     // * doclens.size()
00808     // * freq_deltas.size()
00809     //
00810     // cout << "+++ mod_plists.size() " << mod_plists.size() <<
00811     //     ", doclens.size() " << doclens.size() <<
00812     //     ", freq_deltas.size() " << freq_deltas.size() << endl;
00813     if (++changes_made >= flush_threshold && !transaction_active())
00814         do_flush_const();
00815 
00816     return did;
00817 }
00818 
00819 void
00820 QuartzWritableDatabase::delete_document(Xapian::docid did)
00821 {
00822     DEBUGCALL(DB, void, "QuartzWritableDatabase::delete_document", did);
00823     Assert(did != 0);
00824 
00825     // Remove the record.  If this fails, just propagate the exception since
00826     // the state should still be consistent (most likely it's
00827     // DocNotFoundError).
00828     database_ro.record_table.delete_record(did);
00829 
00830     try {
00831         // Remove the values
00832         database_ro.value_table.delete_all_values(did);
00833 
00834         // OK, now add entries to remove the postings in the underlying record.
00835         Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
00836         QuartzTermList termlist(ptrtothis,
00837                                 &database_ro.termlist_table,
00838                                 did, get_doccount());
00839 
00840         total_length -= termlist.get_doclength();
00841 
00842         termlist.next();
00843         while (!termlist.at_end()) {
00844             string tname = termlist.get_termname();
00845             database_ro.positionlist_table.delete_positionlist(did, tname);
00846             termcount wdf = termlist.get_wdf();
00847 
00848             map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00849             i = freq_deltas.find(tname);
00850             if (i == freq_deltas.end()) {
00851                 freq_deltas.insert(make_pair(tname, make_pair(-1, -termcount_diff(wdf))));
00852             } else {
00853                 --i->second.first;
00854                 i->second.second -= wdf;
00855             }
00856 
00857             // Remove did from tname's postlist
00858             map<string, map<docid, pair<char, termcount> > >::iterator j;
00859             j = mod_plists.find(tname);
00860             if (j == mod_plists.end()) {
00861                 map<docid, pair<char, termcount> > m;
00862                 j = mod_plists.insert(make_pair(tname, m)).first;
00863             }
00864 
00865             map<docid, pair<char, termcount> >::iterator k;
00866             k = j->second.find(did);
00867             if (k == j->second.end()) {
00868                 j->second.insert(make_pair(did, make_pair('D', 0u)));
00869             } else {
00870                 // Deleting a document we added/modified since the last flush.
00871                 k->second = make_pair('D', 0u);
00872             }
00873 
00874             termlist.next();
00875         }
00876 
00877         // Remove the termlist.
00878         database_ro.termlist_table.delete_termlist(did);
00879 
00880         // Remove the new doclength.
00881         doclens.erase(did);
00882     } catch (...) {
00883         // If an error occurs while deleting a document, or doing any other
00884         // transaction, the modifications so far must be cleared before
00885         // returning control to the user - otherwise partial modifications will
00886         // persist in memory, and eventually get written to disk.
00887         cancel();
00888         throw;
00889     }
00890 
00891     if (++changes_made >= flush_threshold && !transaction_active())
00892         do_flush_const();
00893 }
00894 
00895 void
00896 QuartzWritableDatabase::replace_document(Xapian::docid did,
00897                                          const Xapian::Document & document)
00898 {
00899     DEBUGCALL(DB, void, "QuartzWritableDatabase::replace_document", did << ", " << document);
00900     Assert(did != 0);
00901 
00902     try {
00903         if (did > lastdocid) {
00904             lastdocid = did;
00905             // If this docid is above the highwatermark, then we can't be
00906             // replacing an existing document.
00907             (void)add_document_(did, document);
00908             return;
00909         }
00910 
00911         // OK, now add entries to remove the postings in the underlying record.
00912         Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
00913         QuartzTermList termlist(ptrtothis,
00914                                 &database_ro.termlist_table,
00915                                 did, get_doccount());
00916         Xapian::TermIterator term = document.termlist_begin();
00917 
00918         termlist.next();
00919         while (!termlist.at_end()) {
00920             string tname = termlist.get_termname();
00921             termcount wdf = termlist.get_wdf();
00922 
00923             map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00924             i = freq_deltas.find(tname);
00925             if (i == freq_deltas.end()) {
00926                 freq_deltas.insert(make_pair(tname, make_pair(-1, -termcount_diff(wdf))));
00927             } else {
00928                 --i->second.first;
00929                 i->second.second -= wdf;
00930             }
00931 
00932             // Remove did from tname's postlist
00933             map<string, map<docid, pair<char, termcount> > >::iterator j;
00934             j = mod_plists.find(tname);
00935             if (j == mod_plists.end()) {
00936                 map<docid, pair<char, termcount> > m;
00937                 j = mod_plists.insert(make_pair(tname, m)).first;
00938             }
00939 
00940             map<docid, pair<char, termcount> >::iterator k;
00941             k = j->second.find(did);
00942             if (k == j->second.end()) {
00943                 j->second.insert(make_pair(did, make_pair('D', 0u)));
00944             } else {
00945                 // Modifying a document we added/modified since the last flush.
00946                 k->second = make_pair('D', 0u);
00947             }
00948 
00949             term.skip_to(tname);
00950             if (term == document.termlist_end() || *term != tname) {
00951                 database_ro.positionlist_table.delete_positionlist(did, tname);
00952             }
00953 
00954             termlist.next();
00955         }
00956 
00957         total_length -= termlist.get_doclength();
00958 
00959         // Replace the record
00960         database_ro.record_table.replace_record(document.get_data(), did);
00961 
00962         // FIXME: we read the values delete them and then replace in case
00963         // they come from where they're going!  Better to ask Document
00964         // nicely and shortcut in this case!
00965         {
00966             Xapian::ValueIterator value = document.values_begin();
00967             Xapian::ValueIterator value_end = document.values_end();
00968             string s;
00969             database_ro.value_table.encode_values(s, value, value_end);
00970 
00971             // Replace the values.
00972             database_ro.value_table.delete_all_values(did);
00973             database_ro.value_table.set_encoded_values(did, s);
00974         }
00975 
00976         quartz_doclen_t new_doclen = 0;
00977         {
00978             term = document.termlist_begin();
00979             Xapian::TermIterator term_end = document.termlist_end();
00980             for ( ; term != term_end; ++term) {
00981                 // Calculate the new document length
00982                 termcount wdf = term.get_wdf();
00983                 new_doclen += wdf;
00984 
00985                 string tname = *term;
00986                 map<string, pair<termcount_diff, termcount_diff> >::iterator i;
00987                 i = freq_deltas.find(tname);
00988                 if (i == freq_deltas.end()) {
00989                     freq_deltas.insert(make_pair(tname, make_pair(1, termcount_diff(wdf))));
00990                 } else {
00991                     ++i->second.first;
00992                     i->second.second += wdf;
00993                 }
00994 
00995                 // Add did to tname's postlist
00996                 map<string, map<docid, pair<char, termcount> > >::iterator j;
00997                 j = mod_plists.find(tname);
00998                 if (j == mod_plists.end()) {
00999                     map<docid, pair<char, termcount> > m;
01000                     j = mod_plists.insert(make_pair(tname, m)).first;
01001                 }
01002                 map<docid, pair<char, termcount> >::iterator k;
01003                 k = j->second.find(did);
01004                 if (k != j->second.end()) {
01005                     Assert(k->second.first == 'D');
01006                     k->second.first = 'M';
01007                     k->second.second = wdf;
01008                 } else {
01009                     j->second.insert(make_pair(did, make_pair('A', wdf)));
01010                 }
01011 
01012                 PositionIterator it = term.positionlist_begin();
01013                 PositionIterator it_end = term.positionlist_end();
01014                 if (it != it_end) {
01015                     database_ro.positionlist_table.set_positionlist(
01016                         did, tname, it, it_end);
01017                 } else {
01018                     database_ro.positionlist_table.delete_positionlist(did, tname);
01019                 }
01020             }
01021         }
01022 
01023         // Set the termlist
01024         database_ro.termlist_table.set_entries(did,
01025                 document.termlist_begin(), document.termlist_end(),
01026                 new_doclen, false);
01027 
01028         // Set the new document length
01029         doclens[did] = new_doclen;
01030         total_length += new_doclen;
01031     } catch (const Xapian::DocNotFoundError &) {
01032         (void)add_document_(did, document);
01033         return;
01034     } catch (...) {
01035         // If an error occurs while replacing a document, or doing any other
01036         // transaction, the modifications so far must be cleared before
01037         // returning control to the user - otherwise partial modifications will
01038         // persist in memory, and eventually get written to disk.
01039         cancel();
01040         throw;
01041     }
01042 
01043     if (++changes_made >= flush_threshold && !transaction_active())
01044         do_flush_const();
01045 }
01046 
01047 Xapian::doccount
01048 QuartzWritableDatabase::get_doccount() const
01049 {
01050     DEBUGCALL(DB, Xapian::doccount, "QuartzWritableDatabase::get_doccount", "");
01051     RETURN(database_ro.get_doccount());
01052 }
01053 
01054 Xapian::docid
01055 QuartzWritableDatabase::get_lastdocid() const
01056 {
01057     DEBUGCALL(DB, Xapian::docid, "QuartzWritableDatabase::get_lastdocid", "");
01058     RETURN(lastdocid);
01059 }
01060 
01061 Xapian::doclength
01062 QuartzWritableDatabase::get_avlength() const
01063 {
01064     DEBUGCALL(DB, Xapian::doclength, "QuartzWritableDatabase::get_avlength", "");
01065     Xapian::doccount docs = database_ro.get_doccount();
01066     if (docs == 0) RETURN(0);
01067     RETURN(double(total_length) / docs);
01068 }
01069 
01070 Xapian::doclength
01071 QuartzWritableDatabase::get_doclength(Xapian::docid did) const
01072 {
01073     DEBUGCALL(DB, Xapian::doclength, "QuartzWritableDatabase::get_doclength", did);
01074     map<docid, termcount>::const_iterator i = doclens.find(did);
01075     if (i != doclens.end()) RETURN(i->second);
01076 
01077     RETURN(database_ro.get_doclength(did));
01078 }
01079 
01080 Xapian::doccount
01081 QuartzWritableDatabase::get_termfreq(const string & tname) const
01082 {
01083     DEBUGCALL(DB, Xapian::doccount, "QuartzWritableDatabase::get_termfreq", tname);
01084     Xapian::doccount termfreq = database_ro.get_termfreq(tname);
01085     map<string, pair<termcount_diff, termcount_diff> >::const_iterator i;
01086     i = freq_deltas.find(tname);
01087     if (i != freq_deltas.end()) termfreq += i->second.first;
01088     RETURN(termfreq);
01089 }
01090 
01091 Xapian::termcount
01092 QuartzWritableDatabase::get_collection_freq(const string & tname) const
01093 {
01094     DEBUGCALL(DB, Xapian::termcount, "QuartzWritableDatabase::get_collection_freq", tname);
01095     Xapian::termcount collfreq = database_ro.get_collection_freq(tname);
01096 
01097     map<string, pair<termcount_diff, termcount_diff> >::const_iterator i;
01098     i = freq_deltas.find(tname);
01099     if (i != freq_deltas.end()) collfreq += i->second.second;
01100 
01101     RETURN(collfreq);
01102 }
01103 
01104 bool
01105 QuartzWritableDatabase::term_exists(const string & tname) const
01106 {
01107     DEBUGCALL(DB, bool, "QuartzWritableDatabase::term_exists", tname);
01108     RETURN(get_termfreq(tname) != 0);
01109 }
01110 
01111 bool
01112 QuartzWritableDatabase::has_positions() const
01113 {
01114     return database_ro.has_positions();
01115 }
01116 
01117 
01118 LeafPostList *
01119 QuartzWritableDatabase::open_post_list(const string& tname) const
01120 {
01121     DEBUGCALL(DB, LeafPostList *, "QuartzWritableDatabase::open_post_list", tname);
01122     Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
01123 
01124     if (tname.empty()) {
01125         RETURN(new QuartzAllDocsPostList(ptrtothis,
01126                                          &database_ro.termlist_table,
01127                                          get_doccount()));
01128     }
01129 
01130     // Need to flush iff we've got buffered changes to this term's postlist.
01131     map<string, map<docid, pair<char, termcount> > >::const_iterator j;
01132     j = mod_plists.find(tname);
01133     if (j != mod_plists.end()) {
01134         if (transaction_active())
01135             throw Xapian::UnimplementedError("Can't open modified postlist during a transaction");
01136         do_flush_const();
01137     }
01138 
01139     RETURN(new QuartzPostList(ptrtothis,
01140                               &database_ro.postlist_table,
01141                               &database_ro.positionlist_table,
01142                               tname));
01143 }
01144 
01145 TermList *
01146 QuartzWritableDatabase::open_term_list(Xapian::docid did) const
01147 {
01148     DEBUGCALL(DB, TermList *, "QuartzWritableDatabase::open_term_list",
01149               did);
01150     Assert(did != 0);
01151 
01152     Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
01153     RETURN(new QuartzTermList(ptrtothis, &database_ro.termlist_table, did,
01154                               get_doccount()));
01155 }
01156 
01157 Xapian::Document::Internal *
01158 QuartzWritableDatabase::open_document(Xapian::docid did, bool lazy) const
01159 {
01160     DEBUGCALL(DB, Xapian::Document::Internal *, "QuartzWritableDatabase::open_document",
01161               did << ", " << lazy);
01162     Assert(did != 0);
01163 
01164     Xapian::Internal::RefCntPtr<const QuartzWritableDatabase> ptrtothis(this);
01165     RETURN(new QuartzDocument(ptrtothis,
01166                               &database_ro.value_table,
01167                               &database_ro.record_table,
01168                               did, lazy));
01169 }
01170 
01171 PositionList *
01172 QuartzWritableDatabase::open_position_list(Xapian::docid did,
01173                                    const string & tname) const
01174 {
01175     Assert(did != 0);
01176 
01177     AutoPtr<QuartzPositionList> poslist(new QuartzPositionList());
01178     poslist->read_data(&database_ro.positionlist_table, did, tname);
01179     if (poslist->get_size() == 0) {
01180         // Check that term / document combination exists.
01181         // If the doc doesn't exist, this will throw Xapian::DocNotFoundError:
01182         AutoPtr<TermList> tl(open_term_list(did));
01183         tl->skip_to(tname);
01184         if (tl->at_end() || tl->get_termname() != tname)
01185             throw Xapian::RangeError("Can't open position list: requested term is not present in document.");
01186     }
01187 
01188     return poslist.release();
01189 }
01190 
01191 TermList *
01192 QuartzWritableDatabase::open_allterms(const string & prefix) const
01193 {
01194     DEBUGCALL(DB, TermList *, "QuartzWritableDatabase::open_allterms", "");
01195     if (transaction_active())
01196         throw Xapian::UnimplementedError("Can't open allterms iterator during a transaction");
01197     // Terms may have been added or removed, so we need to flush.
01198     if (changes_made) do_flush_const();
01199     QuartzPostListTable *t = &database_ro.postlist_table;
01200     AutoPtr<Bcursor> pl_cursor(t->cursor_get());
01201     RETURN(new QuartzAllTermsList(Xapian::Internal::RefCntPtr<const QuartzWritableDatabase>(this),
01202                                   pl_cursor, t->get_entry_count(), prefix));
01203 }
01204 
01205 void
01206 QuartzWritableDatabase::cancel()
01207 {
01208     database_ro.cancel();
01209     total_length = database_ro.record_table.get_total_length();
01210     lastdocid = database_ro.get_lastdocid();
01211     freq_deltas.clear();
01212     doclens.clear();
01213     mod_plists.clear();
01214     changes_made = 0;
01215 }

Documentation for Xapian (version 1.0.20).
Generated on 28 Apr 2010 by Doxygen 1.5.2.