xapian-core  1.4.21
simpleindex.cc
Go to the documentation of this file.
1 
4 /* Copyright (C) 2007-2022 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24 
25 #include <xapian.h>
26 
27 #include <iostream>
28 #include <string>
29 
30 #include <cstdlib> // For exit().
31 #include <cstring>
32 
33 using namespace std;
34 
35 int
36 main(int argc, char **argv)
37 try {
38  if (argc != 2 || argv[1][0] == '-') {
39  int rc = 1;
40  if (argv[1]) {
41  if (strcmp(argv[1], "--version") == 0) {
42  cout << "simpleindex\n";
43  exit(0);
44  }
45  if (strcmp(argv[1], "--help") == 0) {
46  rc = 0;
47  }
48  }
49  cout << "Usage: " << argv[0] << " PATH_TO_DATABASE\n"
50  "Index each paragraph of a text file as a Xapian document.\n";
51  exit(rc);
52  }
53 
54  // Open the database for update, creating a new database if necessary.
56 
57  Xapian::TermGenerator indexer;
58  Xapian::Stem stemmer("english");
59  indexer.set_stemmer(stemmer);
61 
62  string para;
63  while (true) {
64  string line;
65  if (cin.eof()) {
66  if (para.empty()) break;
67  } else {
68  getline(cin, line);
69  }
70 
71  if (line.empty()) {
72  if (!para.empty()) {
73  // We've reached the end of a paragraph, so index it.
74  Xapian::Document doc;
75  doc.set_data(para);
76 
77  indexer.set_document(doc);
78  indexer.index_text(para);
79 
80  // Add the document to the database.
81  db.add_document(doc);
82 
83  para.resize(0);
84  }
85  } else {
86  if (!para.empty()) para += ' ';
87  para += line;
88  }
89  }
90 
91  // Explicitly commit so that we get to see any errors. WritableDatabase's
92  // destructor will commit implicitly (unless we're in a transaction) but
93  // will swallow any exceptions produced.
94  db.commit();
95 } catch (const Xapian::Error &e) {
96  cout << e.get_description() << '\n';
97  exit(1);
98 }
Xapian::docid add_document(const Xapian::Document &document)
Add a new document to the database.
Definition: omdatabase.cc:902
Class representing a stemming algorithm.
Definition: stem.h:62
void set_document(const Xapian::Document &doc)
Set the current document.
Parses a piece of text and generate terms.
Definition: termgenerator.h:48
void set_stemming_strategy(stem_strategy strategy)
Set the stemming strategy.
STL namespace.
const int DB_CREATE_OR_OPEN
Create database if it doesn&#39;t already exist.
Definition: constants.h:35
static Xapian::Stem stemmer
Definition: stemtest.cc:41
void index_text(const Xapian::Utf8Iterator &itor, Xapian::termcount wdf_inc=1, const std::string &prefix=std::string())
Index some text.
This class provides read/write access to a database.
Definition: database.h:785
Public interfaces for the Xapian library.
void set_stemmer(const Xapian::Stem &stemmer)
Set the Xapian::Stem object to be used for generating stemmed terms.
void commit()
Commit any pending modifications made to the database.
Definition: omdatabase.cc:857
std::string get_description() const
Return a string describing this object.
Definition: error.cc:93
All exceptions thrown by Xapian are subclasses of Xapian::Error.
Definition: error.h:43
int main(int argc, char **argv)
Definition: simpleindex.cc:36
void set_data(const std::string &data)
Set data stored in the document.
Definition: omdocument.cc:78
A handle representing a document in a Xapian database.
Definition: document.h:61