diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0b84439 --- /dev/null +++ b/.gitignore @@ -0,0 +1,100 @@ +## General + +# Compiled Object files +*.slo +*.lo +*.o +*.cuo +*.obj + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Compiled protocol buffers +*.pb.h +*.pb.cc +*_pb2.py + +# Compiled python +*.pyc +*.pyd + +# Compiled MATLAB +*.mex* + +# IPython notebook checkpoints +.ipynb_checkpoints + +# Editor temporaries +*.swn +*.swo +*.swp +*~ + +# Sublime Text settings +*.sublime-workspace +*.sublime-project + +# Eclipse Project settings +*.*project +.settings + +# QtCreator files +*.user + +# PyCharm files +.idea + +# Visual Studio Code files +.vscode + +# OSX dir files +.DS_Store + +## Caffe2 + +# build, distribute, and bins (+ python proto bindings) +build +build_host_protoc +build_android +build_ios +build_* +.build_debug/* +.build_release/* +distribute/* +*.testbin +*.bin +cmake_build +.cmake_build +gen +cmake-build-debug +.cmake-build-debug + +# Bram +plsdontbreak + +# Generated documentation +docs/_site +docs/gathered +_site +doxygen +docs/dev + +# LevelDB files +*.sst +*.ldb +LOCK +LOG* +CURRENT +MANIFEST-* + +# ctags +tags diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..1b51a41 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,67 @@ +cmake_minimum_required(VERSION 3.5) + +PROJECT(hardseed) + +SET(SRC_LIST ./src/main.cpp + ./src/lib/self/TopicsListWebpage.cpp + ./src/lib/self/AichengTopicsListWebpage.cpp + ./src/lib/self/CaoliuTopicsListWebpage.cpp + ./src/lib/self/TopicWebpage.cpp + ./src/lib/self/AichengTopicWebpage.cpp + ./src/lib/self/CaoliuTopicWebpage.cpp + ./src/lib/self/SeedWebpage.cpp + ./src/lib/self/JandownSeedWebpage.cpp + ./src/lib/self/RmdownSeedWebpage.cpp + ./src/lib/self/Aicheng.cpp + ./src/lib/self/Caoliu.cpp + ./src/lib/helper/Webpage.cpp + ./src/lib/helper/Time.cpp + ./src/lib/helper/CmdlineOption.cpp + ./src/lib/helper/Misc.cpp + ./src/lib/3rd/json11/json11.cpp) + +# linux +#>>>>>>>>>>>>>>>>>>>>>> + +## debug +#SET(CMAKE_CXX_COMPILER "clang++") +#SET(CMAKE_CXX_FLAGS "-std=c++11 -Werror -Weverything -Wno-documentation -Wno-disabled-macro-expansion -Wno-float-equal -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-global-constructors -Wno-exit-time-destructors -Wno-missing-prototypes -Wno-padded -Wno-old-style-cast -Wno-weak-vtables") +#SET(CMAKE_BUILD_TYPE debug) +#ADD_EXECUTABLE(main ${SRC_LIST}) +#TARGET_LINK_LIBRARIES(main curl pthread) + +# release +# SET(CMAKE_CXX_COMPILER "g++") +# SET(CMAKE_CXX_FLAGS "-std=c++11 -O3") +# SET(CMAKE_BUILD_TYPE release) +# ADD_EXECUTABLE(hardseed ${SRC_LIST}) +# TARGET_LINK_LIBRARIES(hardseed curl pthread) +# INSTALL(PROGRAMS hardseed DESTINATION /usr/local/bin) + +#<<<<<<<<<<<<<<<<<<<<<< + + +## cygwin +##>>>>>>>>>>>>>>>>>>>>>> + +#SET(CMAKE_CXX_COMPILER "g++") +#SET(CMAKE_CXX_FLAGS "-std=c++11 -O3 -s -DCYGWIN") +#SET(CMAKE_BUILD_TYPE release) +#ADD_EXECUTABLE(hardseed ${SRC_LIST}) +#target_link_libraries(hardseed /bin/cygcurl-4.dll) +#target_link_libraries(hardseed /lib/libiconv.a) + +##<<<<<<<<<<<<<<<<<<<<<< + + +## osX +##>>>>>>>>>>>>>>>>>>>>>> + +SET(CMAKE_CXX_COMPILER "g++") +SET(CMAKE_CXX_FLAGS "-std=c++11 -O3") +SET(CMAKE_BUILD_TYPE release) +ADD_EXECUTABLE(hardseed ${SRC_LIST}) +TARGET_LINK_LIBRARIES(hardseed curl pthread iconv) +INSTALL(PROGRAMS build/hardseed DESTINATION /usr/local/bin) + +##<<<<<<<<<<<<<<<<<<<<<< diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt deleted file mode 100644 index 02eb536..0000000 --- a/build/CMakeLists.txt +++ /dev/null @@ -1,57 +0,0 @@ -PROJECT(main) - - -SET(SRC_LIST ../src/main.cpp - ../src/lib/self/TopicsListWebpage.cpp ../src/lib/self/AichengTopicsListWebpage.cpp ../src/lib/self/CaoliuTopicsListWebpage.cpp - ../src/lib/self/TopicWebpage.cpp ../src/lib/self/AichengTopicWebpage.cpp ../src/lib/self/CaoliuTopicWebpage.cpp - ../src/lib/self/SeedWebpage.cpp ../src/lib/self/JandownSeedWebpage.cpp ../src/lib/self/RmdownSeedWebpage.cpp - ../src/lib/self/Aicheng.cpp ../src/lib/self/Caoliu.cpp - ../src/lib/helper/Webpage.cpp - ../src/lib/helper/Time.cpp ../src/lib/helper/CmdlineOption.cpp ../src/lib/helper/Misc.cpp - ../src/lib/3rd/json11/json11.cpp) - -# linux -#>>>>>>>>>>>>>>>>>>>>>> - -## debug -#SET(CMAKE_CXX_COMPILER "clang++") -#SET(CMAKE_CXX_FLAGS "-std=c++11 -Werror -Weverything -Wno-documentation -Wno-disabled-macro-expansion -Wno-float-equal -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-global-constructors -Wno-exit-time-destructors -Wno-missing-prototypes -Wno-padded -Wno-old-style-cast -Wno-weak-vtables") -#SET(CMAKE_BUILD_TYPE debug) -#ADD_EXECUTABLE(main ${SRC_LIST}) -#TARGET_LINK_LIBRARIES(main curl pthread) - -# release -SET(CMAKE_CXX_COMPILER "g++") -SET(CMAKE_CXX_FLAGS "-std=c++11 -O3") -SET(CMAKE_BUILD_TYPE release) -ADD_EXECUTABLE(hardseed ${SRC_LIST}) -TARGET_LINK_LIBRARIES(hardseed curl pthread) -INSTALL(PROGRAMS hardseed DESTINATION /usr/local/bin) - -#<<<<<<<<<<<<<<<<<<<<<< - - -## cygwin -##>>>>>>>>>>>>>>>>>>>>>> - -#SET(CMAKE_CXX_COMPILER "g++") -#SET(CMAKE_CXX_FLAGS "-std=c++11 -O3 -s -DCYGWIN") -#SET(CMAKE_BUILD_TYPE release) -#ADD_EXECUTABLE(hardseed ${SRC_LIST}) -#target_link_libraries(hardseed /bin/cygcurl-4.dll) -#target_link_libraries(hardseed /lib/libiconv.a) - -##<<<<<<<<<<<<<<<<<<<<<< - - -## osX -##>>>>>>>>>>>>>>>>>>>>>> - -#SET(CMAKE_CXX_COMPILER "g++") -#SET(CMAKE_CXX_FLAGS "-std=c++11 -O3") -#SET(CMAKE_BUILD_TYPE release) -#ADD_EXECUTABLE(hardseed ${SRC_LIST}) -#TARGET_LINK_LIBRARIES(hardseed curl pthread iconv) -#INSTALL(PROGRAMS hardseed DESTINATION /usr/local/bin) - -##<<<<<<<<<<<<<<<<<<<<<< diff --git a/config/portals_list.json b/config/portals_list.json index f1ca8d9..ea56777 100644 --- a/config/portals_list.json +++ b/config/portals_list.json @@ -1,4 +1,4 @@ { - "caoliu":"http://cl.bearhk.info/", - "aicheng":"http://www.ac168.info/bt/" -} + "caoliu": "http://www.t66y.com/", + "aicheng": "http://www.ac168.info/bt/" +} diff --git a/src/lib/3rd/json11/json11.cpp b/src/lib/3rd/json11/json11.cpp index 292789e..9647846 100644 --- a/src/lib/3rd/json11/json11.cpp +++ b/src/lib/3rd/json11/json11.cpp @@ -21,6 +21,7 @@ #include "json11.hpp" #include +#include #include #include #include @@ -36,18 +37,31 @@ using std::make_shared; using std::initializer_list; using std::move; +/* Helper for representing null - just a do-nothing struct, plus comparison + * operators so the helpers in JsonValue work. We can't use nullptr_t because + * it may not be orderable. + */ +struct NullStruct { + bool operator==(NullStruct) const { return true; } + bool operator<(NullStruct) const { return false; } +}; + /* * * * * * * * * * * * * * * * * * * * * Serialization */ -static void dump(std::nullptr_t, string &out) { +static void dump(NullStruct, string &out) { out += "null"; } static void dump(double value, string &out) { - char buf[32]; - snprintf(buf, sizeof buf, "%.17g", value); - out += buf; + if (std::isfinite(value)) { + char buf[32]; + snprintf(buf, sizeof buf, "%.17g", value); + out += buf; + } else { + out += "null"; + } } static void dump(int value, string &out) { @@ -203,9 +217,9 @@ class JsonObject final : public Value { explicit JsonObject(Json::object &&value) : Value(move(value)) {} }; -class JsonNull final : public Value { +class JsonNull final : public Value { public: - JsonNull() : Value(nullptr) {} + JsonNull() : Value({}) {} }; /* * * * * * * * * * * * * * * * * * * * @@ -221,12 +235,12 @@ struct Statics { Statics() {} }; -const Statics & statics() { +static const Statics & statics() { static const Statics s {}; return s; } -const Json & static_null() { +static const Json & static_null() { // This has to be separate, not in Statics, because Json() accesses statics().null. static const Json json_null; return json_null; @@ -286,6 +300,8 @@ const Json & JsonArray::operator[] (size_t i) const { */ bool Json::operator== (const Json &other) const { + if (m_ptr == other.m_ptr) + return true; if (m_ptr->type() != other.m_ptr->type()) return false; @@ -293,6 +309,8 @@ bool Json::operator== (const Json &other) const { } bool Json::operator< (const Json &other) const { + if (m_ptr == other.m_ptr) + return false; if (m_ptr->type() != other.m_ptr->type()) return m_ptr->type() < other.m_ptr->type(); @@ -321,11 +339,12 @@ static inline bool in_range(long x, long lower, long upper) { return (x >= lower && x <= upper); } +namespace { /* JsonParser * * Object that tracks all state of an in-progress parse. */ -struct JsonParser { +struct JsonParser final { /* State */ @@ -333,6 +352,7 @@ struct JsonParser { size_t i; string &err; bool failed; + const JsonParse strategy; /* fail(msg, err_ret = Json()) * @@ -359,15 +379,71 @@ struct JsonParser { i++; } + /* consume_comment() + * + * Advance comments (c-style inline and multiline). + */ + bool consume_comment() { + bool comment_found = false; + if (str[i] == '/') { + i++; + if (i == str.size()) + return fail("unexpected end of input after start of comment", false); + if (str[i] == '/') { // inline comment + i++; + // advance until next line, or end of input + while (i < str.size() && str[i] != '\n') { + i++; + } + comment_found = true; + } + else if (str[i] == '*') { // multiline comment + i++; + if (i > str.size()-2) + return fail("unexpected end of input inside multi-line comment", false); + // advance until closing tokens + while (!(str[i] == '*' && str[i+1] == '/')) { + i++; + if (i > str.size()-2) + return fail( + "unexpected end of input inside multi-line comment", false); + } + i += 2; + comment_found = true; + } + else + return fail("malformed comment", false); + } + return comment_found; + } + + /* consume_garbage() + * + * Advance until the current character is non-whitespace and non-comment. + */ + void consume_garbage() { + consume_whitespace(); + if(strategy == JsonParse::COMMENTS) { + bool comment_found = false; + do { + comment_found = consume_comment(); + if (failed) return; + consume_whitespace(); + } + while(comment_found); + } + } + /* get_next_token() * * Return the next non-whitespace character. If the end of the input is reached, * flag an error and return 0. */ char get_next_token() { - consume_whitespace(); + consume_garbage(); + if (failed) return (char)0; if (i == str.size()) - return (char)(fail("unexpected end of input", 0)); + return fail("unexpected end of input", (char)0); return str[i++]; } @@ -441,9 +517,9 @@ struct JsonParser { if (esc.length() < 4) { return fail("bad \\u escape: " + esc, ""); } - for (int j = 0; j < 4; j++) { - if (!in_range(esc[(unsigned int)j], 'a', 'f') && !in_range(esc[(unsigned int)j], 'A', 'F') - && !in_range(esc[(unsigned int)j], '0', '9')) + for (size_t j = 0; j < 4; j++) { + if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F') + && !in_range(esc[j], '0', '9')) return fail("bad \\u escape: " + esc, ""); } @@ -651,13 +727,16 @@ struct JsonParser { return fail("expected value, got " + esc(ch)); } }; +}//namespace { -Json Json::parse(const string &in, string &err) { - JsonParser parser { in, 0, err, false }; +Json Json::parse(const string &in, string &err, JsonParse strategy) { + JsonParser parser { in, 0, err, false, strategy }; Json result = parser.parse_json(0); // Check for any trailing garbage - parser.consume_whitespace(); + parser.consume_garbage(); + if (parser.failed) + return Json(); if (parser.i != in.size()) return parser.fail("unexpected trailing " + esc(in[parser.i])); @@ -665,14 +744,23 @@ Json Json::parse(const string &in, string &err) { } // Documented in json11.hpp -vector Json::parse_multi(const string &in, string &err) { - JsonParser parser { in, 0, err, false }; - +vector Json::parse_multi(const string &in, + std::string::size_type &parser_stop_pos, + string &err, + JsonParse strategy) { + JsonParser parser { in, 0, err, false, strategy }; + parser_stop_pos = 0; vector json_vec; while (parser.i != in.size() && !parser.failed) { json_vec.push_back(parser.parse_json(0)); + if (parser.failed) + break; + // Check for another object - parser.consume_whitespace(); + parser.consume_garbage(); + if (parser.failed) + break; + parser_stop_pos = parser.i; } return json_vec; } diff --git a/src/lib/3rd/json11/json11.hpp b/src/lib/3rd/json11/json11.hpp index fe9bba4..0c47d05 100644 --- a/src/lib/3rd/json11/json11.hpp +++ b/src/lib/3rd/json11/json11.hpp @@ -56,8 +56,24 @@ #include #include +#ifdef _MSC_VER + #if _MSC_VER <= 1800 // VS 2013 + #ifndef noexcept + #define noexcept throw() + #endif + + #ifndef snprintf + #define snprintf _snprintf_s + #endif + #endif +#endif + namespace json11 { +enum JsonParse { + STANDARD, COMMENTS +}; + class JsonValue; class Json final { @@ -91,14 +107,14 @@ class Json final { // Implicit constructor: map-like objects (std::map, std::unordered_map, etc) template ::value - && std::is_constructible::value, + std::is_constructible().begin()->first)>::value + && std::is_constructible().begin()->second)>::value, int>::type = 0> Json(const M & m) : Json(object(m.begin(), m.end())) {} // Implicit constructor: vector-like objects (std::list, std::vector, std::set, etc) template ::value, + std::is_constructible().begin())>::value, int>::type = 0> Json(const V & v) : Json(array(v.begin(), v.end())) {} @@ -145,17 +161,33 @@ class Json final { } // Parse. If parse fails, return Json() and assign an error message to err. - static Json parse(const std::string & in, std::string & err); - static Json parse(const char * in, std::string & err) { + static Json parse(const std::string & in, + std::string & err, + JsonParse strategy = JsonParse::STANDARD); + static Json parse(const char * in, + std::string & err, + JsonParse strategy = JsonParse::STANDARD) { if (in) { - return parse(std::string(in), err); + return parse(std::string(in), err, strategy); } else { err = "null input"; return nullptr; } } // Parse multiple objects, concatenated or separated by whitespace - static std::vector parse_multi(const std::string & in, std::string & err); + static std::vector parse_multi( + const std::string & in, + std::string::size_type & parser_stop_pos, + std::string & err, + JsonParse strategy = JsonParse::STANDARD); + + static inline std::vector parse_multi( + const std::string & in, + std::string & err, + JsonParse strategy = JsonParse::STANDARD) { + std::string::size_type parser_stop_pos; + return parse_multi(in, parser_stop_pos, err, strategy); + } bool operator== (const Json &rhs) const; bool operator< (const Json &rhs) const; diff --git a/src/lib/self/Caoliu.cpp b/src/lib/self/Caoliu.cpp index 9353440..87e87e6 100644 --- a/src/lib/self/Caoliu.cpp +++ b/src/lib/self/Caoliu.cpp @@ -41,7 +41,7 @@ getTopicsListWebpagePartUrl (Caoliu::AvClass av_class) static const string asia_non_mosaicked_original_part_url("thread0806.php?fid=2"); // selfie - static const string selfie_part_url("thread0806.php?fid=16"); + static const string selfie_part_url("thread0806.php?fid=16&search=1"); switch (av_class) { case Caoliu::west_reposted: @@ -175,7 +175,10 @@ downloadTopicPicsAndSeed ( const string& topic_url, // 0) delete the web logo info; // 1) clear the "/" in topictitle string, if the "/" present in filename, // linux will treat it as directory, again, clear the "\" for windows; - static const vector keyword_logos_list = {" 草榴社區 - powered by phpwind.net"}; + static const vector keyword_logos_list = { + " 草榴社區 - powered by phpwind.net", + " - 達蓋爾的旗幟 | 草榴社區 - t66y.com" + }; const string& topic_webpage_title = caoliu_topics_webpage.getTitle(); auto keyword_logo_pos = string::npos; for (const auto& f : keyword_logos_list) { diff --git a/src/lib/self/CaoliuTopicWebpage.cpp b/src/lib/self/CaoliuTopicWebpage.cpp index 2e13a8d..947db6c 100644 --- a/src/lib/self/CaoliuTopicWebpage.cpp +++ b/src/lib/self/CaoliuTopicWebpage.cpp @@ -68,7 +68,7 @@ parsePicturesUrls (const string& webpage_txt, vector& pictures_urls_list // just parse the toptip static const string keyword_toptip_begin("本頁主題:"); - static const string keyword_toptip_end("[樓主]"); + static const string keyword_toptip_end("回樓主"); const pair& pair_tmp = fetchStringBetweenKeywords( webpage_txt, keyword_toptip_begin, keyword_toptip_end ); @@ -79,10 +79,9 @@ parsePicturesUrls (const string& webpage_txt, vector& pictures_urls_list } // the list may be on the webpage at the same time - static const vector> begin_and_end_keywords_list = { make_pair("> begin_and_end_keywords_list + = { make_pair("") }; + bool b_ok = false; for (const auto& e : begin_and_end_keywords_list) { if (parsePicturesUrlsHelper(toptip, pictures_urls_list, e.first, e.second)) { diff --git a/src/main.cpp b/src/main.cpp index 5fb640c..036daf4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -215,7 +215,7 @@ static void getPortalUrls (string& caoliu_portal_url, string& aicheng_portal_url) { //#ifdef CYGWIN - caoliu_portal_url = "http://t66y.com/"; + caoliu_portal_url = "http://www.t66y.com/"; aicheng_portal_url = "http://www.ac168.info/bt/"; //#else //static const string portals_file_url("https://raw.githubusercontent.com/yangyangwithgnu/hardseed/master/config/portals_list.json"); @@ -288,14 +288,14 @@ main (int argc, char* argv[]) // --av-class // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> cout << "Your command arguments: " << endl; - string av_class_name("aicheng_asia_mosaicked"); + string av_class_name("caoliu_selfie"); cmdline_arguments_list = cmdline_options.getArgumentsList("--av-class"); if (!cmdline_arguments_list.empty()) { av_class_name = cmdline_arguments_list[0]; } - bool b_aicheng = true; - Caoliu::AvClass caoliu_av_class = Caoliu::asia_mosaicked_original; + bool b_aicheng = false; + Caoliu::AvClass caoliu_av_class = Caoliu::selfie; Aicheng::AvClass aicheng_av_class = Aicheng::asia_mosaicked; if ("caoliu_west_original" == av_class_name) { caoliu_av_class = Caoliu::west_original; @@ -471,7 +471,7 @@ main (int argc, char* argv[]) cout << RichTxt::bold_off << "\b\b\"; " << endl; // --proxy. prompt user to use proxy, because the caoliu bbs maybe block IP - vector proxy_addrs_list = {"http://127.0.0.1:8087"}; // the default proxy is GoAgent + vector proxy_addrs_list = {""}; // the default proxy is NOTHING cmdline_arguments_list = cmdline_options.getArgumentsList("--proxy"); if (!cmdline_arguments_list.empty()) { proxy_addrs_list = cmdline_arguments_list;