diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..7b2511e
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1 @@
+pyproject.toml
\ No newline at end of file
diff --git a/.gitattributes b/.gitattributes
index c9d44ad..e4f9f71 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,3 @@
*.rtf eol=crlf
+* linguist-vendored
+*.py linguist-vendored=false
diff --git a/.gitignore b/.gitignore
index b661fef..40cbba9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,6 @@
*.py[co]
*.egg-info
tests/currentoutput/
+.devcontainer
+pyproject.toml
+poetry.lock
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..ac786d1
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,11 @@
+FROM fkrull/multi-python
+
+WORKDIR /app
+
+RUN apt update && apt install pdftohtml -y
+
+COPY tox.ini .
+
+RUN tox -v; exit 0
+
+COPY . .
diff --git a/README.md b/README.md
index e8b17f1..ff25a1b 100644
--- a/README.md
+++ b/README.md
@@ -95,6 +95,8 @@ existing reference output files in `tests/rtf-as-html` and `tests/rtf-as-html`.
The empty or missing output files indicate where functionality is missing,
which nicely indicates possible places to jump in if you want to help.
+To run tests quietly with docker and tox `docker run --rm $(docker build -q .) tox`. Tests run against python 2.7 and python 3.6 at the moment.
+
Dependencies
============
diff --git a/pyth/plugins/latex/writer.py b/pyth/plugins/latex/writer.py
index 1369350..591b125 100644
--- a/pyth/plugins/latex/writer.py
+++ b/pyth/plugins/latex/writer.py
@@ -6,7 +6,7 @@
"""
from __future__ import absolute_import
-from six import StringIO
+import six
import docutils.core
from pyth import document
@@ -15,7 +15,6 @@
class LatexWriter(PythWriter):
-
@classmethod
def write(klass, document, target=None, stylesheet=""):
"""
@@ -37,7 +36,7 @@ def __init__(self, doc, target=None, stylesheet=""):
"""
self.document = doc
self.stylesheet = stylesheet
- self.target = target if target is not None else StringIO()
+ self.target = target if target is not None else six.BytesIO()
@property
def full_stylesheet(self):
@@ -57,19 +56,20 @@ def full_stylesheet(self):
}
""" % (self.document.properties.get("title"),
self.document.properties.get("author"),
- self.document.properties.get("subject"))
+ self.document.properties.get("subject"),
+ )
return latex_fragment + self.stylesheet
def go(self):
rst = RSTWriter.write(self.document).getvalue()
- settings = dict(input_encoding="UTF-8",
- output_encoding="UTF-8",
- stylesheet="stylesheet.tex")
- latex = docutils.core.publish_string(rst,
- writer_name="latex",
- settings_overrides=settings)
+ settings = dict(
+ input_encoding="UTF-8", output_encoding="UTF-8", stylesheet="stylesheet.tex"
+ )
+ latex = docutils.core.publish_string(
+ rst, writer_name="latex", settings_overrides=settings
+ )
# We don't want to keep an \input command in the latex file
- latex = latex.replace(r"\input{stylesheet.tex}",
- self.full_stylesheet)
+ # assert False, '{}, {}'.format(type(rb"\input{stylesheet.tex}"), type(six.ensure_binary(self.full_stylesheet)))
+ latex = latex.replace(six.ensure_binary(r"\input{stylesheet.tex}"), six.ensure_binary(self.full_stylesheet))
self.target.write(latex)
return self.target
diff --git a/pyth/plugins/pdf/writer.py b/pyth/plugins/pdf/writer.py
index be45290..97c6704 100644
--- a/pyth/plugins/pdf/writer.py
+++ b/pyth/plugins/pdf/writer.py
@@ -3,7 +3,7 @@
"""
from __future__ import absolute_import
-from six import StringIO
+import six
import cgi # For escape()
from pyth import document
@@ -34,7 +34,7 @@ def write(klass, document, target=None, paragraphStyle=None):
story = writer.go()
if target is None:
- target = StringIO()
+ target = six.BytesIO()
doc = SimpleDocTemplate(target)
doc.build(story)
diff --git a/pyth/plugins/rst/writer.py b/pyth/plugins/rst/writer.py
index de42c44..cfb311d 100644
--- a/pyth/plugins/rst/writer.py
+++ b/pyth/plugins/rst/writer.py
@@ -1,9 +1,9 @@
"""
Render documents as reStructuredText.
"""
-from __future__ import absolute_import
+from __future__ import absolute_import, unicode_literals
import six
-from six import StringIO
+from six import BytesIO
from pyth import document
from pyth.format import PythWriter
@@ -15,7 +15,7 @@ class RSTWriter(PythWriter):
@classmethod
def write(klass, document, target=None):
if target is None:
- target = StringIO()
+ target = BytesIO()
writer = RSTWriter(document, target)
return writer.go()
@@ -28,10 +28,10 @@ def __init__(self, doc, target):
document.Paragraph: self.paragraph}
def go(self):
- for (i, paragraph) in enumerate(self.document.content):
+ for _, paragraph in enumerate(self.document.content):
handler = self.paragraphDispatch[paragraph.__class__]
handler(paragraph)
- self.target.write("\n")
+ self.target.write(b"\n")
# Heh heh, remove final paragraph spacing
self.target.seek(-2, 1)
@@ -43,35 +43,35 @@ def text(self, text):
"""
process a pyth text and return the formatted string
"""
- ret = u"".join(text.content)
+ ret = "".join(text.content)
if 'url' in text.properties:
- return u"`%s`_" % ret
+ return "`%s`_" % ret
if 'bold' in text.properties:
- return u"**%s**" % ret
+ return "**%s**" % ret
if 'italic' in text.properties:
- return u"*%s*" % ret
+ return "*%s*" % ret
if 'sub' in text.properties:
- return six.u(r"\ :sub:`%s`\ " % ret)
+ return r"\ :sub:`%s`\ " % ret
if 'super' in text.properties:
- return six.u(r"\ :sup:`%s`\ " % ret)
+ return r"\ :sup:`%s`\ " % ret
return ret
- def paragraph(self, paragraph, prefix=""):
+ def paragraph(self, paragraph, prefix=b""):
"""
process a pyth paragraph into the target
"""
content = []
for text in paragraph.content:
content.append(self.text(text))
- content = u"".join(content).encode("utf-8")
+ content = "".join(content).encode("utf-8")
- for line in content.split("\n"):
- self.target.write(" " * self.indent)
+ for line in content.split(b"\n"):
+ self.target.write(b" " * self.indent)
self.target.write(prefix)
self.target.write(line)
- self.target.write("\n")
+ self.target.write(b"\n")
if prefix:
- prefix = " "
+ prefix = b" "
# handle the links
if any('url' in text.properties for text in paragraph.content):
diff --git a/pyth/plugins/xhtml/reader.py b/pyth/plugins/xhtml/reader.py
index fc27f86..245aec6 100644
--- a/pyth/plugins/xhtml/reader.py
+++ b/pyth/plugins/xhtml/reader.py
@@ -3,7 +3,7 @@
"""
from __future__ import absolute_import
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, NavigableString
import six
from pyth import document
@@ -26,9 +26,8 @@ def __init__(self, source, css_source=None, encoding="utf-8", link_callback=None
def go(self):
soup = BeautifulSoup(self.source,
- convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES,
- fromEncoding=self.encoding,
- smartQuotesTo=None)
+ features="xml",
+ from_encoding=self.encoding)
# Make sure the document content doesn't use multi-lines
soup = self.format(soup)
doc = document.Document()
@@ -58,12 +57,12 @@ def format(self, soup):
text = six.text_type(node)
lines = [x.strip() for x in text.splitlines()]
text = ' '.join(lines)
- node.replaceWith(BeautifulSoup.BeautifulSoup(text))
- soup = BeautifulSoup.BeautifulSoup(six.text_type(soup))
+ node.replaceWith(BeautifulSoup(text, features="xml"))
+ soup = BeautifulSoup(six.text_type(soup), features="xml")
# replace all
tag by newline character
for node in soup.findAll('br'):
node.replaceWith("\n")
- soup = BeautifulSoup.BeautifulSoup(six.text_type(soup))
+ soup = BeautifulSoup(six.text_type(soup), features="xml")
return soup
def is_bold(self, node):
@@ -143,7 +142,7 @@ def process_into(self, node, obj):
Process a BeautifulSoup node and fill its elements into a pyth
base object.
"""
- if isinstance(node, BeautifulSoup.NavigableString):
+ if isinstance(node, NavigableString):
text = self.process_text(node)
if text:
obj.append(text)
diff --git a/pyth/plugins/xhtml/writer.py b/pyth/plugins/xhtml/writer.py
index eeb5edb..772b09d 100644
--- a/pyth/plugins/xhtml/writer.py
+++ b/pyth/plugins/xhtml/writer.py
@@ -1,6 +1,8 @@
"""
Render documents as XHTML fragments
"""
+import os
+
from pyth import document
from pyth.format import PythWriter
@@ -17,11 +19,11 @@
class XHTMLWriter(PythWriter):
@classmethod
- def write(klass, document, target=None, cssClasses=True, pretty=False):
+ def write(klass, document, target=None, cssClasses=True, pretty=False, newline=os.linesep):
if target is None:
target = six.BytesIO()
- writer = XHTMLWriter(document, target, cssClasses, pretty)
+ writer = XHTMLWriter(document, target, cssClasses, pretty, newline)
final = writer.go()
final.seek(0)
@@ -37,12 +39,12 @@ def write(klass, document, target=None, cssClasses=True, pretty=False):
return final
-
- def __init__(self, doc, target, cssClasses=True, pretty=False):
+ def __init__(self, doc, target, cssClasses=True, pretty=False, newline=os.linesep):
self.document = doc
self.target = target
self.cssClasses = cssClasses
self.pretty = pretty
+ self.newline = newline
self.paragraphDispatch = {
document.List: self._list,
document.Paragraph: self._paragraph
@@ -154,7 +156,7 @@ def render(self, target):
if self.tag is not None:
target.write(('%s>' % self.tag).encode("utf-8"))
-
+
def attrString(self):
return " ".join(
diff --git a/setup.py b/setup.py
index fd92b06..b69aee7 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
from setuptools import setup, find_packages
setup(name="pyth3",
- version="0.7",
+ version="0.7.1",
packages = find_packages(),
zip_safe = False,
diff --git a/tests/test_readrtf15.py b/tests/test_readrtf15.py
index 0acecf5..a45c8f8 100644
--- a/tests/test_readrtf15.py
+++ b/tests/test_readrtf15.py
@@ -6,6 +6,8 @@
"""
from __future__ import absolute_import
from __future__ import print_function
+from __future__ import unicode_literals
+
import glob
import os
import os.path
@@ -15,6 +17,10 @@
from pyth.plugins.xhtml.writer import XHTMLWriter, write_html_file
from pyth.plugins.plaintext.writer import PlaintextWriter
+
+TEST_LINE_SEP = '\r\n' # Reference Outputs use CRLF
+
+
class TestRtfHTML(unittest.TestCase):
pass # will be filled dynamically now:
@@ -45,7 +51,7 @@ def testmethod(self): # the test method to be added
write_html_file(outputfilename, the_testoutput, print_msg=False)
elif writer == 'txt':
with open(outputfilename, "wt") as f:
- PlaintextWriter.write(document, f)
+ PlaintextWriter.write(document, f, newline=TEST_LINE_SEP)
#--- compute test output:
with open(outputfilename, "rb") as input:
diff --git a/tests/test_readxhtml.py b/tests/test_readxhtml.py
index 5038834..6e0019b 100644
--- a/tests/test_readxhtml.py
+++ b/tests/test_readxhtml.py
@@ -18,8 +18,8 @@ def test_basic(self):
"""
xhtml = "
p0
p1
p2