xapian-core  1.4.27
perftest_randomidx.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008 Lemur Consulting Ltd
5  * Copyright 2009,2015 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
26 
27 #include <cstdlib>
28 #include <string>
29 #include <xapian.h>
30 
31 #include "backendmanager.h"
32 #include "perftest.h"
33 #include "str.h"
34 #include "testrunner.h"
35 #include "testsuite.h"
36 #include "testutils.h"
37 
38 using namespace std;
39 
42 static unsigned int
43 rand_int(unsigned int range)
44 {
45  return unsigned(range * (rand() / (RAND_MAX + 1.0)));
46 }
47 
50 static unsigned int
51 rand_int(unsigned int min, unsigned int max)
52 {
53  return min + unsigned((max + 1 - min) * (rand() / (RAND_MAX + 1.0)));
54 }
55 
58 static double
60 {
61  return rand() / (RAND_MAX + 1.0);
62 }
63 
69 static string
70 gen_word(unsigned int length, unsigned int char_range)
71 {
72  string result;
73  result.reserve(length);
74  for (unsigned int i = 0; i != length; ++i) {
75  char ch = char('a' + rand_int(char_range));
76  result.append(1, ch);
77  }
78  return result;
79 }
80 
81 // Test the performance using randomly generated data.
82 DEFINE_TESTCASE(randomidx1, writable && !inmemory) {
83  logger.testcase_begin("randomidx1");
84 
85  std::string dbname("randomidx1");
87 
88  unsigned int runsize = 100000;
89  unsigned int seed = 42;
90 
91  // Some parameters used to control generation of documents.
92  unsigned int slots_used = 10;
93  double slot_probability = 0.7;
94  unsigned int slotval_minlen = 1;
95  unsigned int slotval_maxlen = 6;
96 
97  unsigned int minterms = 100;
98  unsigned int maxterms = 1000;
99  unsigned int mintermlen = 1;
100  unsigned int maxtermlen = 10;
101  unsigned int termcharrange = 10;
102 
103  srand(seed);
104 
105  std::map<std::string, std::string> params;
106  params["runsize"] = str(runsize);
107  params["seed"] = str(seed);
108  params["slots_used"] = str(slots_used);
109  params["slot_probability"] = str(slot_probability);
110  params["slotval_minlen"] = str(slotval_minlen);
111  params["slotval_maxlen"] = str(slotval_maxlen);
112  params["minterms"] = str(minterms);
113  params["maxterms"] = str(maxterms);
114  params["mintermlen"] = str(mintermlen);
115  params["maxtermlen"] = str(maxtermlen);
116  params["termcharrange"] = str(termcharrange);
117  logger.indexing_begin(dbname, params);
118 
119  unsigned int i;
120  for (i = 0; i < runsize; ++i) {
121  Xapian::Document doc;
122  doc.set_data("random document " + str(i));
123 
124  unsigned int terms = rand_int(minterms, maxterms);
125  for (unsigned int j = 0; j < terms; ++j) {
126  unsigned int termlen = rand_int(mintermlen, maxtermlen);
127  doc.add_term(gen_word(termlen, termcharrange));
128  }
129 
130  // Add values to slots - all values are between 1 and 6 characters, but
131  // later slots have a greater range of characters, so more unique
132  // values.
133  for (unsigned int slot = 0; slot < slots_used; ++slot) {
134  if (rand_01() < slot_probability) {
135  unsigned int len = rand_int(slotval_minlen, slotval_maxlen);
136  doc.add_value(slot, gen_word(len, slot + 2));
137  }
138  }
139 
140  dbw.add_document(doc);
142  }
143  dbw.commit();
145 
147 }
static unsigned int rand_int(unsigned int range)
Generate a random integer from 0 to "range" - 1.
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
Run multiple tests for different backends.
void add_value(Xapian::valueno slot, const std::string &value)
Add a new value.
Definition: omdocument.cc:107
performance tests for Xapian.
void indexing_add()
Log the addition of a document in an indexing run.
Definition: perftest.cc:338
a generic test suite engine
PerfTestLogger logger
Definition: perftest.cc:52
STL namespace.
Convert types to std::string.
virtual Xapian::WritableDatabase get_writable_database(const std::string &name, const std::string &file)
Get a writable database instance.
void testcase_begin(const std::string &testcase)
Start a testcase.
Definition: perftest.cc:409
Base class for backend handling in test harness.
This class provides read/write access to a database.
Definition: database.h:789
Public interfaces for the Xapian library.
BackendManager * backendmanager
backendmanager is global so that it can be accessed by individual tests.
Definition: testrunner.cc:42
string str(int value)
Convert int to std::string.
Definition: str.cc:90
static int seed
Definition: stemtest.cc:45
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
void indexing_end()
Log the end of an indexing run.
Definition: perftest.cc:354
static string gen_word(unsigned int length, unsigned int char_range)
Generate a "word", of the specified length.
static double rand_01()
Generate a random double in range 0.0 <= v < 1.0.
DEFINE_TESTCASE(randomidx1, writable &&!inmemory)
Xapian-specific test helper functions and macros.
void testcase_end()
End a testcase.
Definition: perftest.cc:419
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
A handle representing a document in a Xapian database.
Definition: document.h:61
void indexing_begin(const std::string &dbname, const std::map< std::string, std::string > &params)
Log the start of an indexing run.
Definition: perftest.cc:293
void add_term(const std::string &tname, Xapian::termcount wdfinc=1)
Add a term to the document, without positional information.
Definition: omdocument.cc:140