xapian-core  2.0.0
perftest_randomidx.cc
Go to the documentation of this file.
1 
4 /* Copyright 2008 Lemur Consulting Ltd
5  * Copyright 2009,2015 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see
19  * <https://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 
25 
26 #include <cstdlib>
27 #include <string>
28 #include <xapian.h>
29 
30 #include "backendmanager.h"
31 #include "perftest.h"
32 #include "str.h"
33 #include "testrunner.h"
34 #include "testsuite.h"
35 #include "testutils.h"
36 
37 using namespace std;
38 
41 static unsigned int
42 rand_int(unsigned int range)
43 {
44  return unsigned(range * (rand() / (RAND_MAX + 1.0)));
45 }
46 
49 static unsigned int
50 rand_int(unsigned int min, unsigned int max)
51 {
52  return min + unsigned((max + 1 - min) * (rand() / (RAND_MAX + 1.0)));
53 }
54 
57 static double
59 {
60  return rand() / (RAND_MAX + 1.0);
61 }
62 
68 static string
69 gen_word(unsigned int length, unsigned int char_range)
70 {
71  string result;
72  result.reserve(length);
73  for (unsigned int i = 0; i != length; ++i) {
74  char ch = char('a' + rand_int(char_range));
75  result.append(1, ch);
76  }
77  return result;
78 }
79 
80 // Test the performance using randomly generated data.
81 DEFINE_TESTCASE(randomidx1, writable && !inmemory) {
82  logger.testcase_begin("randomidx1");
83 
84  std::string dbname("randomidx1");
86 
87  unsigned int runsize = 100000;
88  unsigned int seed = 42;
89 
90  // Some parameters used to control generation of documents.
91  unsigned int slots_used = 10;
92  double slot_probability = 0.7;
93  unsigned int slotval_minlen = 1;
94  unsigned int slotval_maxlen = 6;
95 
96  unsigned int minterms = 100;
97  unsigned int maxterms = 1000;
98  unsigned int mintermlen = 1;
99  unsigned int maxtermlen = 10;
100  unsigned int termcharrange = 10;
101 
102  srand(seed);
103 
104  std::map<std::string, std::string> params;
105  params["runsize"] = str(runsize);
106  params["seed"] = str(seed);
107  params["slots_used"] = str(slots_used);
108  params["slot_probability"] = str(slot_probability);
109  params["slotval_minlen"] = str(slotval_minlen);
110  params["slotval_maxlen"] = str(slotval_maxlen);
111  params["minterms"] = str(minterms);
112  params["maxterms"] = str(maxterms);
113  params["mintermlen"] = str(mintermlen);
114  params["maxtermlen"] = str(maxtermlen);
115  params["termcharrange"] = str(termcharrange);
116  logger.indexing_begin(dbname, params);
117 
118  unsigned int i;
119  for (i = 0; i < runsize; ++i) {
120  Xapian::Document doc;
121  doc.set_data("random document " + str(i));
122 
123  unsigned int terms = rand_int(minterms, maxterms);
124  for (unsigned int j = 0; j < terms; ++j) {
125  unsigned int termlen = rand_int(mintermlen, maxtermlen);
126  doc.add_term(gen_word(termlen, termcharrange));
127  }
128 
129  // Add values to slots - all values are between 1 and 6 characters, but
130  // later slots have a greater range of characters, so more unique
131  // values.
132  for (unsigned int slot = 0; slot < slots_used; ++slot) {
133  if (rand_01() < slot_probability) {
134  unsigned int len = rand_int(slotval_minlen, slotval_maxlen);
135  doc.add_value(slot, gen_word(len, slot + 2));
136  }
137  }
138 
139  dbw.add_document(doc);
141  }
142  dbw.commit();
144 
146 }
Base class for backend handling in test harness.
virtual Xapian::WritableDatabase get_writable_database(const std::string &name, const std::string &file)
Get a writable database instance.
void indexing_begin(const std::string &dbname, const std::map< std::string, std::string > &params)
Log the start of an indexing run.
Definition: perftest.cc:293
void testcase_end()
End a testcase.
Definition: perftest.cc:463
void indexing_end()
Log the end of an indexing run.
Definition: perftest.cc:354
void testcase_begin(const std::string &testcase)
Start a testcase.
Definition: perftest.cc:453
void indexing_add()
Log the addition of a document in an indexing run.
Definition: perftest.cc:338
Class representing a document.
Definition: document.h:64
void set_data(std::string_view data)
Set the document data.
Definition: document.cc:81
void add_term(std::string_view term, Xapian::termcount wdf_inc=1)
Add a term to this document.
Definition: document.cc:87
void add_value(Xapian::valueno slot, std::string_view value)
Add a value to a slot in this document.
Definition: document.cc:191
This class provides read/write access to a database.
Definition: database.h:964
void commit()
Commit pending modifications.
Definition: database.cc:543
Xapian::docid add_document(const Xapian::Document &doc)
Add a document to the database.
Definition: database.cc:561
string str(int value)
Convert int to std::string.
Definition: str.cc:91
PerfTestLogger logger
Definition: perftest.cc:51
performance tests for Xapian.
static unsigned int rand_int(unsigned int range)
Generate a random integer from 0 to "range" - 1.
DEFINE_TESTCASE(randomidx1, writable &&!inmemory)
static double rand_01()
Generate a random double in range 0.0 <= v < 1.0.
static string gen_word(unsigned int length, unsigned int char_range)
Generate a "word", of the specified length.
static int seed
Definition: stemtest.cc:46
Convert types to std::string.
BackendManager * backendmanager
backendmanager is global so that it can be accessed by individual tests.
Definition: testrunner.cc:41
Run multiple tests for different backends.
a generic test suite engine
Xapian-specific test helper functions and macros.
Public interfaces for the Xapian library.