Skip to content

Commit 770fd5d

Browse files
committed
Remove Xapian dependency from forum search (use FTS5 only)
1 parent 6896384 commit 770fd5d

3 files changed

Lines changed: 74 additions & 51 deletions

File tree

src/deep_search/commonutils.cpp

Lines changed: 58 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,63 @@
2626
#include "util/rsthreads.h"
2727
#include "util/rsdebuglevel0.h"
2828

29+
namespace DeepSearch
30+
{
31+
32+
std::string simpleTextHtmlExtract(const std::string& rsHtmlDoc)
33+
{
34+
if(rsHtmlDoc.empty()) return rsHtmlDoc;
35+
36+
const bool isPlainMsg =
37+
rsHtmlDoc[0] != '<' || rsHtmlDoc[rsHtmlDoc.size() - 1] != '>';
38+
if(isPlainMsg) return rsHtmlDoc;
39+
40+
auto oSize = rsHtmlDoc.size();
41+
auto bodyTagBegin(rsHtmlDoc.find("<body"));
42+
if(bodyTagBegin >= oSize) return rsHtmlDoc;
43+
44+
auto bodyTagEnd(rsHtmlDoc.find(">", bodyTagBegin));
45+
if(bodyTagEnd >= oSize) return rsHtmlDoc;
46+
47+
std::string retVal(rsHtmlDoc.substr(bodyTagEnd+1));
48+
49+
// strip also CSS inside <style></style>
50+
oSize = retVal.size();
51+
auto styleTagBegin(retVal.find("<style"));
52+
if(styleTagBegin < oSize)
53+
{
54+
auto styleEnd(retVal.find("</style>", styleTagBegin));
55+
if(styleEnd < oSize)
56+
retVal.erase(styleTagBegin, 8+styleEnd-styleTagBegin);
57+
}
58+
59+
std::string::size_type oPos;
60+
std::string::size_type cPos;
61+
int itCount = 0;
62+
while((oPos = retVal.find("<")) < retVal.size())
63+
{
64+
if((cPos = retVal.find(">")) <= retVal.size())
65+
retVal.erase(oPos, 1+cPos-oPos);
66+
else break;
67+
68+
// Avoid infinite loop with crafty input
69+
if(itCount > 1000)
70+
{
71+
RS_WARN( "Breaking stripping loop due to max allowed iterations ",
72+
"rsHtmlDoc: ", rsHtmlDoc, " retVal: ", retVal );
73+
break;
74+
}
75+
++itCount;
76+
}
77+
78+
return retVal;
79+
}
80+
81+
}
82+
83+
// Xapian-specific code (only for channels/files indexing)
84+
#if defined(RS_DEEP_CHANNEL_INDEX) || defined(RS_DEEP_FILES_INDEX)
85+
2986
#ifndef XAPIAN_AT_LEAST
3087
/// Added in Xapian 1.4.2.
3188
#define XAPIAN_AT_LEAST(A,B,C) \
@@ -168,53 +225,6 @@ std::error_condition StubbornWriteOpQueue::flush(
168225
return std::error_condition();
169226
}
170227

171-
std::string simpleTextHtmlExtract(const std::string& rsHtmlDoc)
172-
{
173-
if(rsHtmlDoc.empty()) return rsHtmlDoc;
174-
175-
const bool isPlainMsg =
176-
rsHtmlDoc[0] != '<' || rsHtmlDoc[rsHtmlDoc.size() - 1] != '>';
177-
if(isPlainMsg) return rsHtmlDoc;
178-
179-
auto oSize = rsHtmlDoc.size();
180-
auto bodyTagBegin(rsHtmlDoc.find("<body"));
181-
if(bodyTagBegin >= oSize) return rsHtmlDoc;
182-
183-
auto bodyTagEnd(rsHtmlDoc.find(">", bodyTagBegin));
184-
if(bodyTagEnd >= oSize) return rsHtmlDoc;
185-
186-
std::string retVal(rsHtmlDoc.substr(bodyTagEnd+1));
187-
188-
// strip also CSS inside <style></style>
189-
oSize = retVal.size();
190-
auto styleTagBegin(retVal.find("<style"));
191-
if(styleTagBegin < oSize)
192-
{
193-
auto styleEnd(retVal.find("</style>", styleTagBegin));
194-
if(styleEnd < oSize)
195-
retVal.erase(styleTagBegin, 8+styleEnd-styleTagBegin);
196-
}
197-
198-
std::string::size_type oPos;
199-
std::string::size_type cPos;
200-
int itCount = 0;
201-
while((oPos = retVal.find("<")) < retVal.size())
202-
{
203-
if((cPos = retVal.find(">")) <= retVal.size())
204-
retVal.erase(oPos, 1+cPos-oPos);
205-
else break;
206-
207-
// Avoid infinite loop with crafty input
208-
if(itCount > 1000)
209-
{
210-
RS_WARN( "Breaking stripping loop due to max allowed iterations ",
211-
"rsHtmlDoc: ", rsHtmlDoc, " retVal: ", retVal );
212-
break;
213-
}
214-
++itCount;
215-
}
216-
217-
return retVal;
218228
}
219229

220-
}
230+
#endif // RS_DEEP_CHANNEL_INDEX || RS_DEEP_FILES_INDEX

src/deep_search/commonutils.hpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,25 @@
1919
*******************************************************************************/
2020
#pragma once
2121

22+
#include <string>
23+
24+
#include "util/rstime.h"
25+
26+
namespace DeepSearch
27+
{
28+
// Common utilities (always available, used by FTS5)
29+
std::string simpleTextHtmlExtract(const std::string& rsHtmlDoc);
30+
}
31+
32+
// Xapian-specific code (only for channels/files indexing)
33+
#if defined(RS_DEEP_CHANNEL_INDEX) || defined(RS_DEEP_FILES_INDEX)
34+
2235
#include <xapian.h>
2336
#include <memory>
2437
#include <functional>
2538
#include <queue>
2639
#include <mutex>
2740

28-
#include "util/rstime.h"
29-
3041
#ifndef XAPIAN_AT_LEAST
3142
#define XAPIAN_AT_LEAST(A,B,C) (XAPIAN_MAJOR_VERSION > (A) || \
3243
(XAPIAN_MAJOR_VERSION == (A) && \
@@ -67,3 +78,5 @@ struct StubbornWriteOpQueue
6778
};
6879

6980
}
81+
82+
#endif // RS_DEEP_CHANNEL_INDEX || RS_DEEP_FILES_INDEX

src/use_libretroshare.pri

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ linux-* {
108108
mLibs += dl
109109
}
110110

111-
rs_deep_channels_index | rs_deep_files_index | rs_deep_forums_index {
111+
rs_deep_channels_index | rs_deep_files_index {
112112
mLibs += xapian
113113
win32-g++|win32-clang-g++:mLibs += rpcrt4
114114
}

0 commit comments

Comments
 (0)