diff --git a/.gitignore b/.gitignore index 72da70cf..ae34a6ea 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ Makefile .libs .dirstamp *.la +.cache +compile_commands.json # make dist generated files /README @@ -57,6 +59,9 @@ Makefile /manual/aspell.html/ /manual/aspell.info /manual/texinfo.tex +/manual/mdate-sh +/manual/stamp-vti +/manual/version.texi /missing /myspell/Makefile.in /po/Makefile.in diff --git a/Makefile.am b/Makefile.am index 5196a526..8d4d65b2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -181,7 +181,8 @@ optfiles = \ modules/filter/html-filter.info\ modules/filter/context-filter.info\ modules/filter/nroff-filter.info\ - modules/filter/texinfo-filter.info + modules/filter/texinfo-filter.info\ + modules/filter/po-filter.info ### Add all your aspell mode files ### fltfiles = \ @@ -196,7 +197,8 @@ fltfiles = \ modules/filter/modes/url.amf \ modules/filter/modes/comment.amf \ modules/filter/modes/nroff.amf\ - modules/filter/modes/texinfo.amf + modules/filter/modes/texinfo.amf \ + modules/filter/modes/po.amf if COMPILE_IN_FILTERS @@ -212,7 +214,8 @@ libaspell_la_SOURCES +=\ modules/filter/markdown.cpp\ modules/filter/context.cpp\ modules/filter/nroff.cpp\ - modules/filter/texinfo.cpp + modules/filter/texinfo.cpp\ + modules/filter/po.cpp else # not COMPILE_IN_FILTERS @@ -223,7 +226,7 @@ filter_ldflags = -module -avoid-version ### must look like lib-filter.la see development manual filter_LTLIBRARIES = email-filter.la tex-filter.la\ sgml-filter.la markdown-filter.la context-filter.la\ - nroff-filter.la texinfo-filter.la + nroff-filter.la texinfo-filter.la po-filter.la email_filter_la_SOURCES = modules/filter/email.cpp email_filter_la_LIBADD = libaspell.la @@ -253,6 +256,10 @@ texinfo_filter_la_SOURCES = modules/filter/texinfo.cpp texinfo_filter_la_LIBADD = libaspell.la texinfo_filter_la_LDFLAGS = ${filter_ldflags} +po_filter_la_SOURCES = modules/filter/po.cpp +po_filter_la_LIBADD = libaspell.la +po_filter_la_LDFLAGS = ${filter_ldflags} + ### Before this line add the corresponding _SOURCES and ### _LIBADD lines. The later at least has to look ### like _LIBADD = ${top_builddir}/lib/libaspell.la diff --git a/manual/aspell.texi b/manual/aspell.texi index c8d5260c..dd7498c7 100644 --- a/manual/aspell.texi +++ b/manual/aspell.texi @@ -1368,6 +1368,24 @@ The Texinfo filter will also skip over the @samp{\input texinfo} line. @end table +@subsubsection PO gettext Filter + +The @option{po} filter allows you to spell check GNU @code{gettext} +@code{po} files. It will skip the header section, @code{msgid}, +@code{msgid_plural}, @code{msgctxt}, translator comments, extracted comments, +references, flags and previous untranslated strings. + +The filter is tailored to translators. We offer an option to review the +msgid and mssgid strings, but not the other strings. + +@table @b +@item maintainer-mode +@i{(boolean)} +In this case, spellchecks only @code{msgid} and @code{msgid_plural}, filtering +out any other content from the file. + +@end table + @subsubsection Nroff Filter The @option{nroff} filter mode allows you to check the spelling of diff --git a/modules/filter/modes/po.amf b/modules/filter/modes/po.amf new file mode 100644 index 00000000..8fc2fc56 --- /dev/null +++ b/modules/filter/modes/po.amf @@ -0,0 +1,10 @@ +MODE po + +ASPELL >=0.60.1 + +MAGIC //po + +DESCRIPTION mode for checking gettext .po files + +FILTER url +FILTER po diff --git a/modules/filter/po-filter.info b/modules/filter/po-filter.info new file mode 100644 index 00000000..fba5c32a --- /dev/null +++ b/modules/filter/po-filter.info @@ -0,0 +1,15 @@ +# gettext po filter option file + +#This Filter is usable with the following version(s) of Aspell +ASPELL >=0.51 + +#This line will be printed when typing `aspell help po' +DESCRIPTION filter to deal with gettext .po files + +STATIC filter + +OPTION maintainer-mode +TYPE bool +DESCRIPTION review only msgids +DEFAULT false +ENDOPTION diff --git a/modules/filter/po.cpp b/modules/filter/po.cpp new file mode 100644 index 00000000..29339781 --- /dev/null +++ b/modules/filter/po.cpp @@ -0,0 +1,282 @@ +// This file is part of The New Aspell +// Copyright (C) 2019 by Kevin Atkinson and +// Copyright (C) 2025 by Igor Támara +// under the GNU LGPL license version 2.0 or 2.1. You should +// have received a copy of the LGPL license along with this +// library if you did not you can find it at http://www.gnu.org/. + +#include "settings.h" + +#include "asc_ctype.hpp" +#include "config.hpp" +#include "filter_char.hpp" +#include "indiv_filter.hpp" +#include "iostream.hpp" + +// #define DEBUG_FILTER +/* + * Include the path of the directory that holds the compiled filter + * In an invocation like : + * inst/bin/aspell --add-filter-path=inst/lib/aspell-0.60/ + * --data-dir=/usr/lib/aspell -c po/es.po To reuse the dictionaries, we need to + * pass the 32 bit compatibility option in Debian + * ./configure --enable-maintainer-mode --disable-shared + * --disable-pspell-compatibility --enable-w-all-error --prefix="`pwd`/inst" + * CFLAGS='-g -O' CXXFLAGS='-g -O' --enable-32-bit-hash-fun && bear -- make + * seergdb is an option for debugger + */ +using namespace acommon; + +namespace { + +enum actionState { source = 0, translation = 1, other = 2 }; + +class PoFilter : public IndividualFilter { + int hide_to_char(FilterChar *, char, FilterChar *); + int find_char(FilterChar *, char, FilterChar *); + int hide_all(FilterChar *, FilterChar *); + int initial_whitespace(FilterChar *, FilterChar *); + int sanitize_portion(FilterChar *, FilterChar *); + void maintainer(FilterChar *&, FilterChar *&); + void translator(FilterChar *&, FilterChar *&); + +public: + virtual PosibErr setup(Config *); + virtual void reset(void); + void process(FilterChar *&, FilterChar *&); +}; + +bool in_header = false; +bool header_processed = false; +bool in_translation = false; +bool maintainer_mode = false; +actionState current_action = other; + +PosibErr PoFilter::setup(Config *config) { + name_ = "po-filter"; + order_num_ = 0.80; + + maintainer_mode = config->retrieve_bool("f-po-maintainer-mode"); + return true; +} + +void PoFilter::maintainer(FilterChar *&str, FilterChar *&end) { + + for (FilterChar *cur = str; cur < end;) { + if (*cur == 'm' && *(cur + 1) == 's' && *(cur + 2) == 'g' && + *(cur + 3) == 'i' && *(cur + 4) == 'd') { + current_action = source; + cur += hide_to_char(cur, '"', end); + if (cur >= end) + break; + sanitize_portion(cur, end - 2); + cur += find_char(cur, '\n', end); + if (*(cur - 2) == '"') + *(cur - 2) = ' '; + } else { + if (current_action == source) { + cur += initial_whitespace(cur, end); + if (*cur == '"') { + // This is a multiline msgid... + *cur = ' '; + sanitize_portion(cur + 1, end - 1); + if (*(end - 1) == '"') + *(end - 1) = ' '; + cur = end; + break; + } else { + current_action = other; + } + } + cur += hide_to_char(cur, '\n', end); + } + } +} + +void PoFilter::translator(FilterChar *&str, FilterChar *&end) { + for (FilterChar *cur = str; cur < end;) { + if (*cur == 'm' && *(cur + 1) == 's' && *(cur + 2) == 'g' && + cur + 6 < end) { + cur += hide_all(cur, cur + 3); + current_action = other; + if (*cur == 'i' && *(cur + 1) == 'd' && header_processed == false) { + current_action = source; + in_translation = false; + cur += hide_to_char(cur, '"', end); + if (cur >= end) + break; + in_header = *cur == '"'; +#ifdef DEBUG_FILTER + if (in_header) { + CERR.printf(" H"); + } else { + CERR.printf(" h"); + } +#endif + cur += hide_to_char(cur, '\n', end); + if (cur >= end) + break; + } else if (*cur == 's' && *(cur + 1) == 't' && *(cur + 2) == 'r') { + current_action = translation; + if (in_header) { +#ifdef DEBUG_FILTER + CERR.printf(" H"); +#endif + in_translation = false; + cur += hide_all(cur, end); + break; + } else { + in_translation = true; + } + cur += hide_to_char(cur, '"', end); + if (cur >= end) + break; +#ifdef DEBUG_FILTER + if (end > cur + 1) { + CERR.printf(" I -----------------------"); + } +#endif + // the chunk between cur and end is the translation + // and can be processed to avoid escape characters + // and ignore formatters or the like for a given language + sanitize_portion(cur, end - 2); + cur += find_char(cur, '\n', end); + if (*(cur - 2) == '"') + *(cur - 2) = ' '; + } else { // A malformed file is not reviewed + in_translation = false; + cur += hide_all(cur, end); + } + } else { + if (in_translation) { + cur += initial_whitespace(cur, end); + if (*cur == '"') { + // This is a multiline translation +#ifdef DEBUG_FILTER + CERR.printf(" M -----------------------"); +#endif + *cur = ' '; + sanitize_portion(cur + 1, end - 1); + if (*(end - 1) == '"') + *(end - 1) = ' '; + cur = end; + break; + } +#ifdef DEBUG_FILTER + CERR.printf(" h"); +#endif + in_translation = false; + // The line does not start with a quote, then hiding + cur += hide_all(cur, end); + } else { + if (current_action == source) { + in_header = false; + } + in_translation = false; + cur += hide_all(cur, end); +#ifdef DEBUG_FILTER + CERR.printf(" h"); +#endif + } + } + } // end of for +} + +void PoFilter::process(FilterChar *&str, FilterChar *&end) { +#ifdef DEBUG_FILTER + CERR.printf("\np %lu:", end - str); + FilterChar *tmp = str; + int limit = 20, i = 0; + while (tmp < end - 1 && i < limit) { + CERR.printf("%c", (char)*tmp); + tmp++; + i++; + } +#endif + if (maintainer_mode) { + maintainer(str, end); + } else { + translator(str, end); + } +} + +int PoFilter::hide_all(FilterChar *begin, FilterChar *end) { + // We will hide everything, nothing to work in here + FilterChar *current = begin; + + while (current < end) { + *current = ' '; + current++; + } + return current - begin; +} + +int PoFilter::hide_to_char(FilterChar *begin, char limiter, FilterChar *end) { + // This line needs no spell checking, we go out if the endline is + // reached, and do no clear the endline, otherwise we clear + FilterChar *current = begin; + while (current < end) { + if (*current == '\n') { + current++; + break; + } else if (*current == limiter) { + *current = ' '; + current++; + break; + } + *current = ' '; + current++; + } + return current - begin; +} + +int PoFilter::find_char(FilterChar *begin, char limiter, FilterChar *end) { + // We find the limiter and return the position of the next char + // we are limited also by the line + FilterChar *current = begin; + while (current < end) { + if (*current == limiter || *current == '\n') { + current++; + break; + } + current++; + } + return current - begin; +} + +int PoFilter::initial_whitespace(FilterChar *begin, FilterChar *end) { + // We find the limiter and return the position of the next char + FilterChar *current = begin; + while (current < end && asc_isspace(*current)) + current++; + return current - begin; +} + +int PoFilter::sanitize_portion(FilterChar *begin, FilterChar *end) { + // We remove the escape characters like \n and friends + FilterChar *current = begin; + while (current < end) { + if (*current == '\\') { + *current = ' '; + current++; + if (current < end) { + *current = ' '; + current++; + } else { + break; + } + } + current++; + } + return current - begin; +} + +void PoFilter::reset(void) { + in_header = false; + header_processed = false; + maintainer_mode = false; +} +} // namespace + +C_EXPORT +IndividualFilter *new_aspell_po_filter() { return new PoFilter; }