diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..7b2511e --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +pyproject.toml \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index c9d44ad..e4f9f71 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ *.rtf eol=crlf +* linguist-vendored +*.py linguist-vendored=false diff --git a/.gitignore b/.gitignore index b661fef..40cbba9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ *.py[co] *.egg-info tests/currentoutput/ +.devcontainer +pyproject.toml +poetry.lock diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ac786d1 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM fkrull/multi-python + +WORKDIR /app + +RUN apt update && apt install pdftohtml -y + +COPY tox.ini . + +RUN tox -v; exit 0 + +COPY . . diff --git a/README.md b/README.md index e8b17f1..ff25a1b 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,8 @@ existing reference output files in `tests/rtf-as-html` and `tests/rtf-as-html`. The empty or missing output files indicate where functionality is missing, which nicely indicates possible places to jump in if you want to help. +To run tests quietly with docker and tox `docker run --rm $(docker build -q .) tox`. Tests run against python 2.7 and python 3.6 at the moment. + Dependencies ============ diff --git a/pyth/plugins/latex/writer.py b/pyth/plugins/latex/writer.py index 1369350..591b125 100644 --- a/pyth/plugins/latex/writer.py +++ b/pyth/plugins/latex/writer.py @@ -6,7 +6,7 @@ """ from __future__ import absolute_import -from six import StringIO +import six import docutils.core from pyth import document @@ -15,7 +15,6 @@ class LatexWriter(PythWriter): - @classmethod def write(klass, document, target=None, stylesheet=""): """ @@ -37,7 +36,7 @@ def __init__(self, doc, target=None, stylesheet=""): """ self.document = doc self.stylesheet = stylesheet - self.target = target if target is not None else StringIO() + self.target = target if target is not None else six.BytesIO() @property def full_stylesheet(self): @@ -57,19 +56,20 @@ def full_stylesheet(self): } """ % (self.document.properties.get("title"), self.document.properties.get("author"), - self.document.properties.get("subject")) + self.document.properties.get("subject"), + ) return latex_fragment + self.stylesheet def go(self): rst = RSTWriter.write(self.document).getvalue() - settings = dict(input_encoding="UTF-8", - output_encoding="UTF-8", - stylesheet="stylesheet.tex") - latex = docutils.core.publish_string(rst, - writer_name="latex", - settings_overrides=settings) + settings = dict( + input_encoding="UTF-8", output_encoding="UTF-8", stylesheet="stylesheet.tex" + ) + latex = docutils.core.publish_string( + rst, writer_name="latex", settings_overrides=settings + ) # We don't want to keep an \input command in the latex file - latex = latex.replace(r"\input{stylesheet.tex}", - self.full_stylesheet) + # assert False, '{}, {}'.format(type(rb"\input{stylesheet.tex}"), type(six.ensure_binary(self.full_stylesheet))) + latex = latex.replace(six.ensure_binary(r"\input{stylesheet.tex}"), six.ensure_binary(self.full_stylesheet)) self.target.write(latex) return self.target diff --git a/pyth/plugins/pdf/writer.py b/pyth/plugins/pdf/writer.py index be45290..97c6704 100644 --- a/pyth/plugins/pdf/writer.py +++ b/pyth/plugins/pdf/writer.py @@ -3,7 +3,7 @@ """ from __future__ import absolute_import -from six import StringIO +import six import cgi # For escape() from pyth import document @@ -34,7 +34,7 @@ def write(klass, document, target=None, paragraphStyle=None): story = writer.go() if target is None: - target = StringIO() + target = six.BytesIO() doc = SimpleDocTemplate(target) doc.build(story) diff --git a/pyth/plugins/rst/writer.py b/pyth/plugins/rst/writer.py index de42c44..cfb311d 100644 --- a/pyth/plugins/rst/writer.py +++ b/pyth/plugins/rst/writer.py @@ -1,9 +1,9 @@ """ Render documents as reStructuredText. """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import six -from six import StringIO +from six import BytesIO from pyth import document from pyth.format import PythWriter @@ -15,7 +15,7 @@ class RSTWriter(PythWriter): @classmethod def write(klass, document, target=None): if target is None: - target = StringIO() + target = BytesIO() writer = RSTWriter(document, target) return writer.go() @@ -28,10 +28,10 @@ def __init__(self, doc, target): document.Paragraph: self.paragraph} def go(self): - for (i, paragraph) in enumerate(self.document.content): + for _, paragraph in enumerate(self.document.content): handler = self.paragraphDispatch[paragraph.__class__] handler(paragraph) - self.target.write("\n") + self.target.write(b"\n") # Heh heh, remove final paragraph spacing self.target.seek(-2, 1) @@ -43,35 +43,35 @@ def text(self, text): """ process a pyth text and return the formatted string """ - ret = u"".join(text.content) + ret = "".join(text.content) if 'url' in text.properties: - return u"`%s`_" % ret + return "`%s`_" % ret if 'bold' in text.properties: - return u"**%s**" % ret + return "**%s**" % ret if 'italic' in text.properties: - return u"*%s*" % ret + return "*%s*" % ret if 'sub' in text.properties: - return six.u(r"\ :sub:`%s`\ " % ret) + return r"\ :sub:`%s`\ " % ret if 'super' in text.properties: - return six.u(r"\ :sup:`%s`\ " % ret) + return r"\ :sup:`%s`\ " % ret return ret - def paragraph(self, paragraph, prefix=""): + def paragraph(self, paragraph, prefix=b""): """ process a pyth paragraph into the target """ content = [] for text in paragraph.content: content.append(self.text(text)) - content = u"".join(content).encode("utf-8") + content = "".join(content).encode("utf-8") - for line in content.split("\n"): - self.target.write(" " * self.indent) + for line in content.split(b"\n"): + self.target.write(b" " * self.indent) self.target.write(prefix) self.target.write(line) - self.target.write("\n") + self.target.write(b"\n") if prefix: - prefix = " " + prefix = b" " # handle the links if any('url' in text.properties for text in paragraph.content): diff --git a/pyth/plugins/xhtml/reader.py b/pyth/plugins/xhtml/reader.py index fc27f86..245aec6 100644 --- a/pyth/plugins/xhtml/reader.py +++ b/pyth/plugins/xhtml/reader.py @@ -3,7 +3,7 @@ """ from __future__ import absolute_import -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, NavigableString import six from pyth import document @@ -26,9 +26,8 @@ def __init__(self, source, css_source=None, encoding="utf-8", link_callback=None def go(self): soup = BeautifulSoup(self.source, - convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES, - fromEncoding=self.encoding, - smartQuotesTo=None) + features="xml", + from_encoding=self.encoding) # Make sure the document content doesn't use multi-lines soup = self.format(soup) doc = document.Document() @@ -58,12 +57,12 @@ def format(self, soup): text = six.text_type(node) lines = [x.strip() for x in text.splitlines()] text = ' '.join(lines) - node.replaceWith(BeautifulSoup.BeautifulSoup(text)) - soup = BeautifulSoup.BeautifulSoup(six.text_type(soup)) + node.replaceWith(BeautifulSoup(text, features="xml")) + soup = BeautifulSoup(six.text_type(soup), features="xml") # replace all
tag by newline character for node in soup.findAll('br'): node.replaceWith("\n") - soup = BeautifulSoup.BeautifulSoup(six.text_type(soup)) + soup = BeautifulSoup(six.text_type(soup), features="xml") return soup def is_bold(self, node): @@ -143,7 +142,7 @@ def process_into(self, node, obj): Process a BeautifulSoup node and fill its elements into a pyth base object. """ - if isinstance(node, BeautifulSoup.NavigableString): + if isinstance(node, NavigableString): text = self.process_text(node) if text: obj.append(text) diff --git a/pyth/plugins/xhtml/writer.py b/pyth/plugins/xhtml/writer.py index eeb5edb..772b09d 100644 --- a/pyth/plugins/xhtml/writer.py +++ b/pyth/plugins/xhtml/writer.py @@ -1,6 +1,8 @@ """ Render documents as XHTML fragments """ +import os + from pyth import document from pyth.format import PythWriter @@ -17,11 +19,11 @@ class XHTMLWriter(PythWriter): @classmethod - def write(klass, document, target=None, cssClasses=True, pretty=False): + def write(klass, document, target=None, cssClasses=True, pretty=False, newline=os.linesep): if target is None: target = six.BytesIO() - writer = XHTMLWriter(document, target, cssClasses, pretty) + writer = XHTMLWriter(document, target, cssClasses, pretty, newline) final = writer.go() final.seek(0) @@ -37,12 +39,12 @@ def write(klass, document, target=None, cssClasses=True, pretty=False): return final - - def __init__(self, doc, target, cssClasses=True, pretty=False): + def __init__(self, doc, target, cssClasses=True, pretty=False, newline=os.linesep): self.document = doc self.target = target self.cssClasses = cssClasses self.pretty = pretty + self.newline = newline self.paragraphDispatch = { document.List: self._list, document.Paragraph: self._paragraph @@ -154,7 +156,7 @@ def render(self, target): if self.tag is not None: target.write(('' % self.tag).encode("utf-8")) - + def attrString(self): return " ".join( diff --git a/setup.py b/setup.py index fd92b06..b69aee7 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup(name="pyth3", - version="0.7", + version="0.7.1", packages = find_packages(), zip_safe = False, diff --git a/tests/test_readrtf15.py b/tests/test_readrtf15.py index 0acecf5..a45c8f8 100644 --- a/tests/test_readrtf15.py +++ b/tests/test_readrtf15.py @@ -6,6 +6,8 @@ """ from __future__ import absolute_import from __future__ import print_function +from __future__ import unicode_literals + import glob import os import os.path @@ -15,6 +17,10 @@ from pyth.plugins.xhtml.writer import XHTMLWriter, write_html_file from pyth.plugins.plaintext.writer import PlaintextWriter + +TEST_LINE_SEP = '\r\n' # Reference Outputs use CRLF + + class TestRtfHTML(unittest.TestCase): pass # will be filled dynamically now: @@ -45,7 +51,7 @@ def testmethod(self): # the test method to be added write_html_file(outputfilename, the_testoutput, print_msg=False) elif writer == 'txt': with open(outputfilename, "wt") as f: - PlaintextWriter.write(document, f) + PlaintextWriter.write(document, f, newline=TEST_LINE_SEP) #--- compute test output: with open(outputfilename, "rb") as input: diff --git a/tests/test_readxhtml.py b/tests/test_readxhtml.py index 5038834..6e0019b 100644 --- a/tests/test_readxhtml.py +++ b/tests/test_readxhtml.py @@ -18,8 +18,8 @@ def test_basic(self): """ xhtml = "
" doc = XHTMLReader.read(xhtml) - self.assert_(isinstance(doc, pyth.document.Document)) - self.assert_(not doc.content) + self.assertTrue(isinstance(doc, pyth.document.Document)) + self.assertTrue(not doc.content) def test_paragraphs(self): """ @@ -27,14 +27,14 @@ def test_paragraphs(self): """ xhtml = "

p0

p1

p2

" doc = XHTMLReader.read(xhtml) - self.assert_(len(doc.content) == 3) + self.assertTrue(len(doc.content) == 3) for i, p in enumerate(doc.content): - self.assert_(isinstance(p, pyth.document.Paragraph)) - self.assert_(len(p.content) == 1) - self.assert_(isinstance(p.content[0], pyth.document.Text)) + self.assertTrue(isinstance(p, pyth.document.Paragraph)) + self.assertTrue(len(p.content) == 1) + self.assertTrue(isinstance(p.content[0], pyth.document.Text)) text = p.content[0] - self.assert_(len(text.content) == 1) - self.assert_(text.content[0] == 'p%d' % i) + self.assertTrue(len(text.content) == 1) + self.assertTrue(text.content[0] == 'p%d' % i) def test_bold(self): """ diff --git a/tests/test_writelatex.py b/tests/test_writelatex.py index 73c7aac..99a3b4a 100644 --- a/tests/test_writelatex.py +++ b/tests/test_writelatex.py @@ -1,12 +1,13 @@ """ unit tests of the latex writer """ -from __future__ import absolute_import +from __future__ import absolute_import, unicode_literals import unittest +import six from pyth.plugins.latex.writer import LatexWriter -from pyth.plugins.python.reader import * +from pyth.plugins.python.reader import PythonReader, P, T, BOLD, ITALIC class TestWriteLatex(unittest.TestCase): @@ -22,19 +23,19 @@ def test_paragraph(self): """ Try a single paragraph document """ - doc = PythonReader.read(P[u"the text"]) + doc = PythonReader.read(P["the text"]) latex = LatexWriter.write(doc).getvalue() - assert "the text" in latex + assert six.ensure_binary("the text") in latex def test_bold(self): - doc = PythonReader.read([P[T(BOLD)[u"bold text"]]]) + doc = PythonReader.read([P[T(BOLD)["bold text"]]]) latex = LatexWriter.write(doc).getvalue() - assert r"\textbf{bold text}" in latex, latex + assert six.ensure_binary(r"\textbf{bold text}") in latex, latex def test_italic(self): - doc = PythonReader.read([P[T(ITALIC)[u"italic text"]]]) + doc = PythonReader.read([P[T(ITALIC)["italic text"]]]) latex = LatexWriter.write(doc).getvalue() - assert r"\emph{italic text}" in latex, latex + assert six.ensure_binary(r"\emph{italic text}") in latex, latex def test_metadata(self): """ @@ -46,6 +47,6 @@ def test_metadata(self): doc["title"] = "The Title" latex = LatexWriter.write(doc).getvalue() - assert "pdfauthor={The Author}" in latex, latex - assert "pdfsubject={The Subject}" in latex, latex - assert "pdftitle={The Title}" in latex, latex + assert six.ensure_binary("pdfauthor={The Author}") in latex, latex + assert six.ensure_binary("pdfsubject={The Subject}") in latex, latex + assert six.ensure_binary("pdftitle={The Title}") in latex, latex diff --git a/tests/test_writepdf.py b/tests/test_writepdf.py index 6f89560..31ca237 100644 --- a/tests/test_writepdf.py +++ b/tests/test_writepdf.py @@ -3,11 +3,13 @@ """ from __future__ import absolute_import from __future__ import print_function +from __future__ import unicode_literals import unittest import subprocess import tempfile import os +import six from bs4 import BeautifulSoup @@ -57,41 +59,41 @@ def test_paragraph(self): """ Try a simple document with one paragraph """ - doc = PythonReader.read(P[u"the text"]) + doc = PythonReader.read(P["the text"]) pdf = PDFWriter.write(doc).getvalue() html = self.pdf_to_html(pdf) - assert "the text" in html + assert six.ensure_binary("the text") in html def test_bold(self): - doc = PythonReader.read([P[T(BOLD)[u"bold text"]]]) + doc = PythonReader.read([P[T(BOLD)["bold text"]]]) pdf = PDFWriter.write(doc).getvalue() html = self.pdf_to_html(pdf) - soup = BeautifulSoup(html) + soup = BeautifulSoup(html, features='xml') node = soup.find("b") assert node assert node.string == "bold text" def test_italic(self): - doc = PythonReader.read([P[T(ITALIC)[u"italic text"]]]) + doc = PythonReader.read([P[T(ITALIC)["italic text"]]]) pdf = PDFWriter.write(doc).getvalue() html = self.pdf_to_html(pdf) - soup = BeautifulSoup(html) + soup = BeautifulSoup(html, features='xml') node = soup.find("i") assert node assert node.string == "italic text" def test_latex(self): - doc = PythonReader.read(P[u"the-text"]) + doc = PythonReader.read(P["the-text"]) pdf = PDFWriter.write(doc).getvalue() html = self.pdf_to_html(pdf) - assert "the-text" in html, html + assert six.ensure_binary("the-text") in html, html def test_rst(self): - doc = PythonReader.read(P[u"the-text"]) + doc = PythonReader.read(P["the-text"]) pdf = PDFWriter.write(doc).getvalue() print(pdf) html = self.pdf_to_html(pdf) - assert "the-text" in html, html + assert six.ensure_binary("the-text") in html, html if __name__ == '__main__': diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..cecc51a --- /dev/null +++ b/tox.ini @@ -0,0 +1,16 @@ +[tox] +envlist = py27,py36 + +[testenv] +deps = + pytest + six + beautifulsoup4 + lxml + docutils + reportlab +commands = + pytest -v tests/test_readrtf15.py + pytest -v tests/test_readxhtml.py + pytest -v tests/test_writelatex.py + pytest -v tests/test_writepdf.py \ No newline at end of file