xapian-core  1.4.25
api_compactold.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2009,2010,2011,2012,2013,2015,2019 Olly Betts
5  * Copyright (C) 2010 Richard Boulton
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "api_compactold.h"
26 
27 #define XAPIAN_DEPRECATED(X) X
28 #include <xapian.h>
29 
30 #include "apitest.h"
31 #include "dbcheck.h"
32 #include "filetests.h"
33 #include "str.h"
34 #include "testsuite.h"
35 #include "testutils.h"
36 
37 #include <cstdlib>
38 #include <fstream>
39 
40 #include "unixcmds.h"
41 
42 using namespace std;
43 
44 static void
46 {
47  // Need non-const pointer for strtoul(), but data isn't modified.
48  char * p = const_cast<char *>(s.c_str());
49 
50  while (*p) {
51  bool del = (*p == '!');
52  if (del) ++p;
53  Xapian::docid first = strtoul(p, &p, 10);
54  Xapian::docid last = first;
55  if (*p == '-') {
56  last = strtoul(p + 1, &p, 10);
57  }
58  if (*p && *p != ' ') {
59  tout << p - s.c_str() << endl;
60  FAIL_TEST("Bad sparse db spec (expected space): " << s);
61  }
62  if (first > last) {
63  FAIL_TEST("Bad sparse db spec (first > last): " << s);
64  }
65 
66  do {
67  if (del) {
68  db.delete_document(first);
69  } else {
70  Xapian::Document doc;
71  string id = str(first);
72  doc.set_data(id);
73  doc.add_term("Q" + str(first));
74  doc.add_term(string(first % 7 + 1, char((first % 26) + 'a')));
75  db.replace_document(first, doc);
76  }
77  } while (first++ < last);
78 
79  if (*p == '\0') break;
80  ++p;
81  }
82 
83  db.commit();
84 }
85 
86 static void
87 check_sparse_uid_terms(const string & path)
88 {
89  Xapian::Database db(path);
91  for (t = db.allterms_begin("Q"); t != db.allterms_end("Q"); ++t) {
92  Xapian::docid did = atoi((*t).c_str() + 1);
94  TEST_EQUAL(*p, did);
95  }
96 }
97 
98 // With multi the docids in the shards change the behaviour.
99 DEFINE_TESTCASE(compactoldnorenumber1, compact && !multi) {
100  string a = get_database_path("compactnorenumber1a", make_sparse_db,
101  "5-7 24 76 987 1023-1027 9999 !9999");
102  string a_uuid;
103  {
104  Xapian::Database db(a);
105  a_uuid = db.get_uuid();
106  }
107  string b = get_database_path("compactnorenumber1b", make_sparse_db,
108  "1027-1030");
109  string c = get_database_path("compactnorenumber1c", make_sparse_db,
110  "1028-1040");
111  string d = get_database_path("compactnorenumber1d", make_sparse_db,
112  "3000 999999 !999999");
113 
114  string out = get_compaction_output_path("compactnorenumber1out");
115 
116  rm_rf(out);
117  {
118  Xapian::Compactor compact;
119  compact.set_renumber(false);
120  compact.set_destdir(out);
121  compact.add_source(a);
122  compact.compact();
123  }
124 
126 
127  {
128  TEST(!dir_exists(out + "/donor"));
129  Xapian::Database db(out);
130  // xapian-compact should change the UUID of the database, but didn't
131  // prior to 1.0.18/1.1.4.
132  string out_uuid = db.get_uuid();
133  TEST_NOT_EQUAL(a_uuid, out_uuid);
134  TEST_EQUAL(out_uuid.size(), 36);
135  TEST_NOT_EQUAL(out_uuid, "00000000-0000-0000-0000-000000000000");
136 
137  // White box test - ensure that the donor database is removed.
138  TEST(!dir_exists(out + "/donor"));
139  }
140 
141  rm_rf(out);
142  {
143  Xapian::Compactor compact;
144  compact.set_renumber(false);
145  compact.set_destdir(out);
146  compact.add_source(a);
147  compact.add_source(c);
148  compact.compact();
149  }
151  {
152  // Check that xapian-compact is producing a consistent database. Also,
153  // regression test - xapian 1.1.4 set lastdocid to 0 in the output
154  // database.
155  Xapian::Database outdb(out);
156  dbcheck(outdb, 24, 9999);
157  }
158 
159  rm_rf(out);
160  {
161  Xapian::Compactor compact;
162  compact.set_renumber(false);
163  compact.set_destdir(out);
164  compact.add_source(d);
165  compact.add_source(a);
166  compact.add_source(c);
167  compact.compact();
168  }
170 
171  rm_rf(out);
172  {
173  Xapian::Compactor compact;
174  compact.set_renumber(false);
175  compact.set_destdir(out);
176  compact.add_source(c);
177  compact.add_source(a);
178  compact.add_source(d);
179  compact.compact();
180  }
182 
183  // Should fail.
184  rm_rf(out);
185  {
186  Xapian::Compactor compact;
187  compact.set_renumber(false);
188  compact.set_destdir(out);
189  compact.add_source(a);
190  compact.add_source(b);
192  }
193 
194  // Should fail.
195  rm_rf(out);
196  {
197  Xapian::Compactor compact;
198  compact.set_renumber(false);
199  compact.set_destdir(out);
200  compact.add_source(b);
201  compact.add_source(a);
203  }
204 
205  // Should fail.
206  rm_rf(out);
207  {
208  Xapian::Compactor compact;
209  compact.set_renumber(false);
210  compact.set_destdir(out);
211  compact.add_source(a);
212  compact.add_source(b);
213  compact.add_source(d);
215  }
216 
217  // Should fail.
218  rm_rf(out);
219  {
220  Xapian::Compactor compact;
221  compact.set_renumber(false);
222  compact.set_destdir(out);
223  compact.add_source(d);
224  compact.add_source(b);
225  compact.add_source(a);
227  }
228 
229  // Should fail.
230  rm_rf(out);
231  {
232  Xapian::Compactor compact;
233  compact.set_renumber(false);
234  compact.set_destdir(out);
235  compact.add_source(b);
236  compact.add_source(a);
237  compact.add_source(d);
239  }
240 }
241 
242 // Test use of compact to merge two databases.
243 DEFINE_TESTCASE(compactoldmerge1, compact) {
244  string indbpath = get_database_path("apitest_simpledata");
245  string outdbpath = get_compaction_output_path("compactmerge1out");
246  rm_rf(outdbpath);
247 
248  Xapian::Compactor compact;
249  compact.set_destdir(outdbpath);
250  compact.add_source(indbpath);
251  compact.add_source(indbpath);
252  compact.compact();
253 
254  Xapian::Database indb(get_database("apitest_simpledata"));
255  Xapian::Database outdb(outdbpath);
256 
257  TEST_EQUAL(indb.get_doccount() * 2, outdb.get_doccount());
258  dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
259 }
260 
261 static void
263 {
264  int count = 10000;
265 
266  Xapian::Document doc;
267  doc.add_term("a");
268  while (count) {
269  db.add_document(doc);
270  --count;
271  }
272 
273  db.commit();
274 }
275 
276 // Test use of compact on a database which has multiple chunks for a term.
277 // This is a regression test for ticket #427
278 DEFINE_TESTCASE(compactoldmultichunks1, compact) {
279  string indbpath = get_database_path("compactmultichunks1in",
280  make_multichunk_db, "");
281  string outdbpath = get_compaction_output_path("compactmultichunks1out");
282  rm_rf(outdbpath);
283 
284  Xapian::Compactor compact;
285  compact.set_destdir(outdbpath);
286  compact.add_source(indbpath);
287  compact.compact();
288 
289  Xapian::Database indb(indbpath);
290  Xapian::Database outdb(outdbpath);
291 
292  TEST_EQUAL(indb.get_doccount(), outdb.get_doccount());
293  dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
294 }
295 
296 // Test compacting from a stub database directory.
297 DEFINE_TESTCASE(compactoldstub1, compact) {
298  const char * stubpath = ".stub/compactstub1";
299  const char * stubpathfile = ".stub/compactstub1/XAPIANDB";
300  mkdir(".stub", 0755);
301  mkdir(stubpath, 0755);
302  ofstream stub(stubpathfile);
303  TEST(stub.is_open());
304  stub << "auto ../../" << get_database_path("apitest_simpledata") << endl;
305  stub << "auto ../../" << get_database_path("apitest_simpledata2") << endl;
306  stub.close();
307 
308  string outdbpath = get_compaction_output_path("compactstub1out");
309  rm_rf(outdbpath);
310 
311  Xapian::Compactor compact;
312  compact.set_destdir(outdbpath);
313  compact.add_source(stubpath);
314  compact.compact();
315 
316  Xapian::Database indb(stubpath);
317  Xapian::Database outdb(outdbpath);
318 
319  TEST_EQUAL(indb.get_doccount(), outdb.get_doccount());
320  dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
321 }
322 
323 // Test compacting from a stub database file.
324 DEFINE_TESTCASE(compactoldstub2, compact) {
325  const char * stubpath = ".stub/compactstub2";
326  mkdir(".stub", 0755);
327  ofstream stub(stubpath);
328  TEST(stub.is_open());
329  stub << "auto ../" << get_database_path("apitest_simpledata") << endl;
330  stub << "auto ../" << get_database_path("apitest_simpledata2") << endl;
331  stub.close();
332 
333  string outdbpath = get_compaction_output_path("compactstub2out");
334  rm_rf(outdbpath);
335 
336  Xapian::Compactor compact;
337  compact.set_destdir(outdbpath);
338  compact.add_source(stubpath);
339  compact.compact();
340 
341  Xapian::Database indb(stubpath);
342  Xapian::Database outdb(outdbpath);
343 
344  TEST_EQUAL(indb.get_doccount(), outdb.get_doccount());
345  dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
346 }
347 
348 // Test compacting a stub database file to itself.
349 DEFINE_TESTCASE(compactoldstub3, compact) {
350  const char * stubpath = ".stub/compactstub3";
351  mkdir(".stub", 0755);
352  ofstream stub(stubpath);
353  TEST(stub.is_open());
354  stub << "auto ../" << get_database_path("apitest_simpledata") << endl;
355  stub << "auto ../" << get_database_path("apitest_simpledata2") << endl;
356  stub.close();
357 
358  Xapian::doccount in_docs;
359  {
360  Xapian::Database indb(stubpath);
361  in_docs = indb.get_doccount();
362  }
363 
364  Xapian::Compactor compact;
365  compact.set_destdir(stubpath);
366  compact.add_source(stubpath);
367  compact.compact();
368 
369  Xapian::Database outdb(stubpath);
370 
371  TEST_EQUAL(in_docs, outdb.get_doccount());
372  dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
373 }
374 
375 // Test compacting a stub database directory to itself.
376 DEFINE_TESTCASE(compactoldstub4, compact) {
377  const char * stubpath = ".stub/compactstub4";
378  const char * stubpathfile = ".stub/compactstub4/XAPIANDB";
379  mkdir(".stub", 0755);
380  mkdir(stubpath, 0755);
381  ofstream stub(stubpathfile);
382  TEST(stub.is_open());
383  stub << "auto ../../" << get_database_path("apitest_simpledata") << endl;
384  stub << "auto ../../" << get_database_path("apitest_simpledata2") << endl;
385  stub.close();
386 
387  Xapian::doccount in_docs;
388  {
389  Xapian::Database indb(stubpath);
390  in_docs = indb.get_doccount();
391  }
392 
393  Xapian::Compactor compact;
394  compact.set_destdir(stubpath);
395  compact.add_source(stubpath);
396  compact.compact();
397 
398  Xapian::Database outdb(stubpath);
399 
400  TEST_EQUAL(in_docs, outdb.get_doccount());
401  dbcheck(outdb, outdb.get_doccount(), outdb.get_doccount());
402 }
403 
404 static void
406 {
407  Xapian::Document doc;
408  doc.add_term("foo");
409  db.add_document(doc);
410  db.add_spelling("foo");
411  db.add_synonym("bar", "pub");
412  db.add_synonym("foobar", "foo");
413 
414  db.commit();
415 }
416 
417 static void
419 {
420  Xapian::Document doc;
421  doc.add_term("foo");
422  db.add_document(doc);
423 
424  db.commit();
425 }
426 
427 DEFINE_TESTCASE(compactoldmissingtables1, compact) {
428  string a = get_database_path("compactmissingtables1a",
430  string b = get_database_path("compactmissingtables1b",
432 
433  string out = get_compaction_output_path("compactmissingtables1out");
434  rm_rf(out);
435 
436  Xapian::Compactor compact;
437  compact.set_destdir(out);
438  compact.add_source(a);
439  compact.add_source(b);
440  compact.compact();
441 
442  {
443  Xapian::Database db(out);
446  // FIXME: arrange for input b to not have a termlist table.
447 // TEST_EXCEPTION(Xapian::FeatureUnavailableError, db.termlist_begin(1));
448  }
449 }
450 
451 static void
453 {
454  Xapian::Document doc;
455  doc.add_term("bar");
456  db.add_document(doc);
457  db.add_spelling("bar");
458  db.add_synonym("bar", "baa");
459  db.add_synonym("barfoo", "barbar");
460  db.add_synonym("foofoo", "barfoo");
461 
462  db.commit();
463 }
464 
466 DEFINE_TESTCASE(compactoldmergesynonym1, compact) {
467  string a = get_database_path("compactmergesynonym1a",
469  string b = get_database_path("compactmergesynonym1b",
471 
472  string out = get_compaction_output_path("compactmergesynonym1out");
473  rm_rf(out);
474 
475  Xapian::Compactor compact;
476  compact.set_destdir(out);
477  compact.add_source(a);
478  compact.add_source(b);
479  compact.compact();
480 
481  {
482  Xapian::Database db(out);
483 
485  TEST_NOT_EQUAL(i, db.spellings_end());
486  TEST_EQUAL(*i, "bar");
487  ++i;
488  TEST_NOT_EQUAL(i, db.spellings_end());
489  TEST_EQUAL(*i, "foo");
490  ++i;
491  TEST_EQUAL(i, db.spellings_end());
492 
493  i = db.synonym_keys_begin();
495  TEST_EQUAL(*i, "bar");
496  ++i;
498  TEST_EQUAL(*i, "barfoo");
499  ++i;
501  TEST_EQUAL(*i, "foobar");
502  ++i;
504  TEST_EQUAL(*i, "foofoo");
505  ++i;
506  TEST_EQUAL(i, db.synonym_keys_end());
507  }
508 }
509 
510 DEFINE_TESTCASE(compactoldempty1, compact) {
511  string empty_dbpath = get_database_path(string());
512  string outdbpath = get_compaction_output_path("compactempty1out");
513  rm_rf(outdbpath);
514 
515  {
516  // Compacting an empty database tried to divide by zero in 1.3.0.
517  Xapian::Compactor compact;
518  compact.set_destdir(outdbpath);
519  compact.add_source(empty_dbpath);
520  compact.compact();
521 
522  Xapian::Database outdb(outdbpath);
523  TEST_EQUAL(outdb.get_doccount(), 0);
524  dbcheck(outdb, 0, 0);
525  }
526 
527  {
528  // Check compacting two empty databases together.
529  Xapian::Compactor compact;
530  compact.set_destdir(outdbpath);
531  compact.add_source(empty_dbpath);
532  compact.add_source(empty_dbpath);
533  compact.compact();
534 
535  Xapian::Database outdb(outdbpath);
536  TEST_EQUAL(outdb.get_doccount(), 0);
537  dbcheck(outdb, 0, 0);
538  }
539 }
540 
541 DEFINE_TESTCASE(compactoldmultipass1, compact) {
542  string outdbpath = get_compaction_output_path("compactmultipass1");
543  rm_rf(outdbpath);
544 
545  string a = get_database_path("compactnorenumber1a", make_sparse_db,
546  "5-7 24 76 987 1023-1027 9999 !9999");
547  string b = get_database_path("compactnorenumber1b", make_sparse_db,
548  "1027-1030");
549  string c = get_database_path("compactnorenumber1c", make_sparse_db,
550  "1028-1040");
551  string d = get_database_path("compactnorenumber1d", make_sparse_db,
552  "3000 999999 !999999");
553 
554  Xapian::Compactor compact;
555  compact.set_destdir(outdbpath);
556  compact.add_source(a);
557  compact.add_source(b);
558  compact.add_source(c);
559  compact.add_source(d);
560  compact.set_multipass(true);
561  compact.compact();
562 
563  Xapian::Database outdb(outdbpath);
564  dbcheck(outdb, 29, 1041);
565 }
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
static void make_sparse_db(Xapian::WritableDatabase &db, const string &s)
void dbcheck(const Xapian::Database &db, Xapian::doccount expected_doccount, Xapian::docid expected_lastdocid)
Check consistency of database and statistics.
Definition: dbcheck.cc:126
#define TEST(a)
Test a condition, without an additional explanation for failure.
Definition: testsuite.h:275
This class is used to access a database, or a group of databases.
Definition: database.h:68
test database contents and consistency.
InvalidOperationError indicates the API was used in an invalid way.
Definition: error.h:283
void set_multipass(bool multipass)
Set whether to merge postlists in multiple passes.
Definition: compactor.h:102
TermIterator allterms_end(const std::string &=std::string()) const
Corresponding end iterator to allterms_begin(prefix).
Definition: database.h:269
DEFINE_TESTCASE(compactoldnorenumber1, compact &&!multi)
a generic test suite engine
void set_destdir(const std::string &destdir)
Set where to write the output.
Definition: compactor.cc:112
C++ function versions of useful Unix commands.
STL namespace.
Convert types to std::string.
Utility functions for testing files.
static void make_missing_tables(Xapian::WritableDatabase &db, const string &)
void replace_document(Xapian::docid did, const Xapian::Document &document)
Replace a given document in the database.
Definition: omdatabase.cc:952
static void make_all_tables2(Xapian::WritableDatabase &db, const string &)
Xapian::doccount get_doccount() const
Get the number of documents in the database.
Definition: omdatabase.cc:267
void compact()
Perform the actual compaction/merging operation.
Definition: compactor.cc:124
static void make_all_tables(Xapian::WritableDatabase &db, const string &)
test functionality of the Xapian API
void rm_rf(const string &filename)
Remove a directory and contents, just like the Unix "rm -rf" command.
Definition: unixcmds.cc:111
Class for iterating over a list of terms.
Definition: termiterator.h:41
Class for iterating over a list of terms.
#define TEST_NOT_EQUAL(a, b)
Test for non-equality of two things.
Definition: testsuite.h:305
string get_database_path(const string &dbname)
Definition: apitest.cc:72
static void check_sparse_uid_terms(const string &path)
This class provides read/write access to a database.
Definition: database.h:789
Xapian::TermIterator spellings_begin() const
An iterator which returns all the spelling correction targets.
Definition: omdatabase.cc:704
std::ostringstream tout
The debug printing stream.
Definition: testsuite.cc:103
Public interfaces for the Xapian library.
Xapian::TermIterator synonym_keys_begin(const std::string &prefix=std::string()) const
An iterator which returns all terms which have synonyms.
Definition: omdatabase.cc:740
Compact a database, or merge and compact several.
Definition: compactor.h:42
void delete_document(Xapian::docid did)
Delete a document from the database.
Definition: omdatabase.cc:925
#define TEST_EXCEPTION(TYPE, CODE)
Check that CODE throws exactly Xapian exception TYPE.
Definition: testutils.h:109
Xapian::TermIterator synonym_keys_end(const std::string &=std::string()) const
Corresponding end iterator to synonym_keys_begin(prefix).
Definition: database.h:459
string str(int value)
Convert int to std::string.
Definition: str.cc:90
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
TermIterator allterms_begin(const std::string &prefix=std::string()) const
An iterator which runs across all terms with a given prefix.
Definition: omdatabase.cc:223
bool dir_exists(const char *path)
Test if a directory exists.
Definition: filetests.h:136
std::string get_compaction_output_path(const std::string &name)
Definition: apitest.cc:105
#define FAIL_TEST(MSG)
Fail the current testcase with message MSG.
Definition: testsuite.h:68
Xapian::Database get_database(const string &dbname)
Definition: apitest.cc:48
unsigned XAPIAN_DOCID_BASE_TYPE doccount
A count of documents.
Definition: types.h:38
Xapian-specific test helper functions and macros.
void set_renumber(bool renumber)
Set whether to preserve existing document id values.
Definition: compactor.h:90
Xapian::TermIterator spellings_end() const
Corresponding end iterator to spellings_begin().
Definition: database.h:436
void add_synonym(const std::string &term, const std::string &synonym) const
Add a synonym for a term.
Definition: omdatabase.cc:1028
unsigned XAPIAN_DOCID_BASE_TYPE docid
A unique identifier for a document.
Definition: types.h:52
#define TEST_EQUAL(a, b)
Test for equality of two things.
Definition: testsuite.h:278
static void make_multichunk_db(Xapian::WritableDatabase &db, const string &)
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
void add_source(const std::string &srcdir)
Add a source database.
Definition: compactor.cc:118
A handle representing a document in a Xapian database.
Definition: document.h:61
std::string get_uuid() const
Get a UUID for the database.
Definition: omdatabase.cc:776
void add_spelling(const std::string &word, Xapian::termcount freqinc=1) const
Add a word to the spelling dictionary.
Definition: omdatabase.cc:1004
PostingIterator postlist_begin(const std::string &tname) const
An iterator pointing to the start of the postlist for a given term.
Definition: omdatabase.cc:162
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140