diff --git a/package-lock.json b/package-lock.json
index 8a23ca84..40eb5db9 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,5 +1,5 @@
 {
-  "name": "mundane-assignment-police",
+  "name": "map",
   "lockfileVersion": 2,
   "requires": true,
   "packages": {}
diff --git a/pom.xml b/pom.xml
index c6177209..05953c1a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -15,7 +15,7 @@
 	<description>Web-app that assists in checking students&apos; assignments</description>
 	<properties>
 		<java.version>11</java.version>
-		<kotlin.version>1.5.31</kotlin.version>
+		<kotlin.version>1.6.21</kotlin.version>
 	</properties>
 	<dependencies>
 		<dependency>
@@ -85,6 +85,11 @@
 			<version>5.0.0.M1</version>
 			<scope>test</scope>
 		</dependency>
+		<dependency>
+			<groupId>org.jetbrains.kotlinx</groupId>
+			<artifactId>dataframe</artifactId>
+			<version>0.8.0-dev-1005</version>
+		</dependency>
 	</dependencies>
 
 	<build>
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/RuleViolation.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/RuleViolation.kt
index 1782c96e..fd37b0e1 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/RuleViolation.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/RuleViolation.kt
@@ -13,3 +13,4 @@ data class RuleViolation(
 ) {
 	// override fun toString() = if (lines.count() == 1) "[${lines.first().line}, p.${lines.first().page}] --> '$message'" else ""
 }
+
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/list/ListRule.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/list/ListRule.kt
index e8e2a712..ec2f7b2b 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/list/ListRule.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/list/ListRule.kt
@@ -8,6 +8,7 @@ import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFArea.TABLE_OF
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFDocument
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFRegion
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.list.PDFList
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Coordinate
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line
 
 class ListRule(
@@ -26,7 +27,7 @@ class ListRule(
 				document.areas!!.tableOfContents.map {
 					document.text.filter { it.area == TABLE_OF_CONTENT }.firstOrNull { line ->
 						line.content.contains(it)
-					}?: Line(0, 0, 0, listOf(), TABLE_OF_CONTENT)
+					}?: Line(0, 0, 0, listOf(), TABLE_OF_CONTENT, Coordinate(0,0))
 				}
 			)
 
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/symbol/BasicSymbolRule.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/symbol/BasicSymbolRule.kt
index 41b5405c..c1414fe3 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/symbol/BasicSymbolRule.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/symbol/BasicSymbolRule.kt
@@ -39,6 +39,7 @@ class BasicSymbolRule(
 			when (direction) {
 				LEFT -> sideTexts.removeAt(1)
 				RIGHT -> sideTexts.removeAt(0)
+				else -> {}
 			}
 
 			val neighbors = (if (notIgnoredNeighbors.isNotEmpty()) sideTexts
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/table/TableRule.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/table/TableRule.kt
new file mode 100644
index 00000000..697401e4
--- /dev/null
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/table/TableRule.kt
@@ -0,0 +1,32 @@
+package com.github.darderion.mundaneassignmentpolice.checker.rule.table
+
+import com.github.darderion.mundaneassignmentpolice.checker.RuleViolation
+import com.github.darderion.mundaneassignmentpolice.checker.RuleViolationType
+import com.github.darderion.mundaneassignmentpolice.checker.rule.Rule
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFDocument
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFRegion
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.tables.Table
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line
+
+class TableRule (
+    val predicates: MutableList<(Table) -> List<Line>>,
+     type: RuleViolationType,
+     area: PDFRegion,
+     name: String
+    ): Rule(area, name, type){
+       override fun process(document: PDFDocument): List<RuleViolation> {
+            val rulesViolations: MutableSet<RuleViolation> = mutableSetOf()
+
+            predicates.forEach { predicate ->
+                rulesViolations.addAll(
+                    document.tables.map {
+                        predicate(it)
+                    }.filter { it.isNotEmpty() }.map {
+                        RuleViolation(it, name, type)
+                    }
+                )
+            }
+
+            return rulesViolations.toList()
+        }
+    }
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/table/TableRuleBuilder.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/table/TableRuleBuilder.kt
new file mode 100644
index 00000000..c9acd21f
--- /dev/null
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/table/TableRuleBuilder.kt
@@ -0,0 +1,18 @@
+package com.github.darderion.mundaneassignmentpolice.checker.rule.table
+
+import com.github.darderion.mundaneassignmentpolice.checker.RuleViolationType
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFRegion
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.tables.Table
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line
+
+class TableRuleBuilder {
+    private val predicates: MutableList<(Table) -> List<Line>> = mutableListOf()
+    private var type: RuleViolationType = RuleViolationType.Error
+    private var region: PDFRegion = PDFRegion.EVERYWHERE
+    private var name: String = "Rule name"
+
+    fun called(name: String) = this.also { this.name = name }
+
+    fun disallow(predicate: (table: Table) -> List<Line>) = this.also { predicates.add(predicate) }
+    fun getRule() = TableRule(predicates, type, region, name)
+}
\ No newline at end of file
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/word/BasicWordRule.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/word/BasicWordRule.kt
index e5310909..e0f0ddd0 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/word/BasicWordRule.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/checker/rule/word/BasicWordRule.kt
@@ -39,6 +39,7 @@ class BasicWordRule(
 			when (direction) {
 				Direction.LEFT -> sideWords.removeAt(1)
 				Direction.RIGHT -> sideWords.removeAt(0)
+				else -> {}
 			}
 
 			val filteredSideWords = sideWords
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/Annotations.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/Annotations.kt
index 721b9d2e..edc86d07 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/Annotations.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/Annotations.kt
@@ -13,10 +13,12 @@ class Annotations {
 			var document = PDFBox().getDocument(pdf.name)
 			lines.forEach { line ->
 				document = PDFBox().addLine(document, line.page,
-					Coordinate(line.position.x to (pdf.height - (line.text.maxOf { it.position.y } + 2))),
-					(pdf.width - (line.position.x + 50)).toInt()
+					Coordinate(line.startPosition.x to (pdf.height - (line.text.maxOf { it.position.y } + 2))),
+					(line.endPosition.x - line.startPosition.x).toInt()
 				)
 			}
+
+
 			Files.createDirectories(Paths.get("${pdfFolder}ruleviolations/"))
 			val fileName = "${pdfFolder}ruleviolations/${
 				pdf.name.split('/')[pdf.name.split('/').count() - 1].replace(".pdf", "")
@@ -24,5 +26,6 @@ class Annotations {
 			document.save(fileName)
 			return fileName
 		}
+
 	}
 }
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/PDFDocument.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/PDFDocument.kt
index a7c7e71b..e499ddd5 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/PDFDocument.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/PDFDocument.kt
@@ -1,10 +1,12 @@
 package com.github.darderion.mundaneassignmentpolice.pdfdocument
 
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.tables.Table
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line
 import mu.KotlinLogging
 
 class PDFDocument(val name: String = "PDF",
 				  val text: List<Line>,
+				  val tables: List<Table>,
 				  val width: Double = defaultPageWidth,
 				  val height: Double = defaultPageHeight
 				  ) {
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/list/PDFList.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/list/PDFList.kt
index c55826eb..3ce27e68 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/list/PDFList.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/list/PDFList.kt
@@ -54,7 +54,7 @@ data class PDFList<T>(val value: MutableList<T> = mutableListOf(), val nodes: Mu
 		 */
 		fun getLists(lines: List<Line>): List<PDFList<Line>> {
 			// Adding a line to process a text that has no lines after a list
-			val lines = lines + Line(-1, -1, -1, listOf(Word("NOT A LIST ITEM", Font(0.0f), Coordinate(1000, -1))))
+			val lines = lines + Line(-1, -1, -1, listOf(Word("NOT A LIST ITEM", Font(0.0f), Coordinate(1000, -1))), null, Coordinate(0,0))
 
 			val lists: MutableList<PDFList<Line>> = mutableListOf()
 			val stack: Stack<PDFList<Line>> = Stack()
@@ -69,11 +69,11 @@ data class PDFList<T>(val value: MutableList<T> = mutableListOf(), val nodes: Mu
 						stack.push(stack.peek().nodes.first())
 					}
 				} else {
-					previousPosition = stack.peek().value.first().position
-					if (previousPosition hasSameXAs line.position) {					//	1.	lorem	OR	lorem
+					previousPosition = stack.peek().value.first().startPosition
+					if (previousPosition hasSameXAs line.startPosition) {					//	1.	lorem	OR	lorem
 						stack.peek().value.add(line)									//		lorem		lorem
 					} else {
-						if (previousPosition.x < line.position.x) {
+						if (previousPosition.x < line.startPosition.x) {
 							if (isListItem(line)) {
 								stack.peek().nodes.add(PDFList(line.drop(2)))	// lorem
 								stack.push(stack.peek().nodes.last())						//		1. lorem
@@ -83,17 +83,17 @@ data class PDFList<T>(val value: MutableList<T> = mutableListOf(), val nodes: Mu
 							}
 						} else {							//		lorem	OR		lorem	OR		...	lorem	OR		... lorem
 							while (!(	stack.isEmpty() ||	//	lorem			2. lorem		lorem				2. lorem
-										(isListItem(line) && previousPosition hasSameXAs line.drop(2).position) ||
-										previousPosition hasSameXAs line.position)) {
+										(isListItem(line) && previousPosition hasSameXAs line.drop(2).startPosition) ||
+										previousPosition hasSameXAs line.startPosition)) {
 								previousList = stack.pop()
 								if (stack.isNotEmpty()) {
-									previousPosition = stack.peek().value.first().position
+									previousPosition = stack.peek().value.first().startPosition
 								}
 							}
 							if (stack.isEmpty()) {
 								lists.add(previousList!!)
 							} else {
-								if (previousPosition hasSameXAs line.position) {		//		lorem
+								if (previousPosition hasSameXAs line.startPosition) {		//		lorem
 									stack.peek().value.add(line)						//	lorem
 								} else {
 									stack.pop()
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/tables/Cell.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/tables/Cell.kt
new file mode 100644
index 00000000..880cd0ac
--- /dev/null
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/tables/Cell.kt
@@ -0,0 +1,11 @@
+package com.github.darderion.mundaneassignmentpolice.pdfdocument.tables
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Coordinate
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line
+
+data class Cell(
+    val page: Int,
+    val cellText: MutableList<String>,
+    var cellLines: MutableList<Line>,
+    val leftCorner: Coordinate,
+    val rightCorner: Coordinate
+)
\ No newline at end of file
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/tables/Table.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/tables/Table.kt
new file mode 100644
index 00000000..f41c97cc
--- /dev/null
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/tables/Table.kt
@@ -0,0 +1,69 @@
+package com.github.darderion.mundaneassignmentpolice.pdfdocument.tables
+
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.*
+import com.github.darderion.mundaneassignmentpolice.wrapper.PDFBox
+import org.jetbrains.kotlinx.dataframe.AnyFrame
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.api.*
+
+class Table(val df: DataFrame<Any?>){
+
+    val page : Int
+    val x1 : Double
+    val y1 : Double
+    val x2 : Double
+    val y2 : Double
+    val rowCount :  Int
+    val colCount : Int
+    val cells: MutableList<Cell> = mutableListOf()
+    init {
+        val indexTableInf = df.select{ cols(0) }.last { it[0] == "table information"}.index()
+        val tableInf = df.select{cols(0)}.filter { it.index() >= indexTableInf }
+
+        this.page = tableInf[pageTableIndex][0].toString().toInt() - 1
+        this.x1 = tableInf[x1TableIndex][0].toString().toDouble()
+        this.y1 = defaultPageHeight - tableInf[y1TableIndex][0].toString().toDouble()
+        this.x2 = tableInf[x2TableIndex][0].toString().toDouble()
+        this.y2 = defaultPageHeight - tableInf[y2TableIndex][0].toString().toDouble()
+        this.rowCount = tableInf[rowTableIndex][0].toString().toInt()
+        this.colCount = tableInf[colTableIndex][0].toString().toInt()
+        val tableData  = df.filter { it.index() < indexTableInf }
+
+        tableData.forEachColumn { it.forEach { getCell(it.toString()) } }
+    }
+
+    private fun getCell(text: String){
+
+        val coordinates = text.lines().first().split(" ")
+        val x1 = coordinates[x1CellIndex].toDouble()
+        val y1 = defaultPageHeight - coordinates[y1CellIndex].toDouble()
+        val x2 = coordinates[x2CellIndex].toDouble()
+        val y2 = defaultPageHeight - coordinates[y2CellIndex].toDouble()
+
+        val cellText = text.lines().filterIndexed{ index, _ -> index > 0 }.toMutableList()
+
+        cells.add(Cell(page, cellText, mutableListOf(), Coordinate(x1,y1), Coordinate(x2,y2)))
+    }
+
+    fun getLines(): List<Line>{
+        val lines = mutableListOf<Line>()
+        cells.forEach{ lines.addAll(it.cellLines) }
+        return lines
+    }
+
+    companion object {
+        private const val defaultPageHeight = 842.0
+        private const val x1CellIndex = 2
+        private const val y1CellIndex = 5
+        private const val x2CellIndex = 8
+        private const val y2CellIndex = 11
+
+        private const val pageTableIndex = 2
+        private const val x1TableIndex = 4
+        private const val y1TableIndex = 5
+        private const val x2TableIndex = 6
+        private const val y2TableIndex = 7
+        private const val rowTableIndex = 9
+        private const val colTableIndex = 11
+    }
+}
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/text/Line.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/text/Line.kt
index a7003cc6..021e112a 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/text/Line.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/text/Line.kt
@@ -3,12 +3,12 @@ package com.github.darderion.mundaneassignmentpolice.pdfdocument.text
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFArea
 
 data class Line(val index: Int, val page: Int, val documentIndex: Int,
-				val text: List<Word>, var area: PDFArea? = null
+				val text: List<Word>, var area: PDFArea? = null, var endPosition: Coordinate
 ) {
 	val content: String
 	get() = text.joinToString("") { it.text }
 
-	val position: Coordinate
+	val startPosition: Coordinate
 	get() = if (text.isNotEmpty()) text.first().position else Coordinate(0, 0)
 
 	val first: String?
@@ -17,7 +17,10 @@ data class Line(val index: Int, val page: Int, val documentIndex: Int,
 	val second: String?
 	get() = if (text.count() > 1) text[1].text else null
 
-	override fun toString() = "[$documentIndex -- $index, p.$page, $area, ${position.x}] --> '$content'"
+	override fun toString() = "[$documentIndex -- $index, p.$page, $area, ${startPosition.x}] --> '$content'"
 
-	fun drop(numberOfItems: Int) = Line(index, page, documentIndex, text.drop(numberOfItems), area)
+	fun drop(numberOfItems: Int) = Line(index, page, documentIndex, text.drop(numberOfItems), area, Coordinate(0,0))
+	companion object{
+		private const val defaultPageWidth = 595.22
+	}
 }
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/rules/RuleSet.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/rules/RuleSet.kt
index ec5708da..5195f93b 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/rules/RuleSet.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/rules/RuleSet.kt
@@ -4,7 +4,9 @@ import com.github.darderion.mundaneassignmentpolice.checker.rule.Rule
 
 val RULE_SET_RU = RuleSet(
 	mutableListOf(
-		RULE_LITLINK,
+		TABLE_RULE,
+
+		/*RULE_LITLINK,
 		RULE_SHORT_DASH,
 		RULE_MEDIUM_DASH,
 		RULE_LONG_DASH,
@@ -22,9 +24,12 @@ val RULE_SET_RU = RuleSet(
 		RULE_VARIOUS_ABBREVIATIONS,
 		RULE_SECTIONS_ORDER,
     RULE_LOW_QUALITY_CONFERENCES,
+
+		 */
 	)
-			+ RULES_SPACE_AROUND_BRACKETS
+			/*+ RULES_SPACE_AROUND_BRACKETS
 			+ RULES_SMALL_NUMBERS
-)
 
+			 */
+)
 class RuleSet(val rules: List<Rule>) {}
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/rules/Rules.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/rules/Rules.kt
index 346e4a12..568e2cb9 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/rules/Rules.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/rules/Rules.kt
@@ -7,6 +7,7 @@ import com.github.darderion.mundaneassignmentpolice.checker.rule.regex.RegexRule
 import com.github.darderion.mundaneassignmentpolice.checker.rule.symbol.SymbolRuleBuilder
 import com.github.darderion.mundaneassignmentpolice.checker.rule.symbol.and
 import com.github.darderion.mundaneassignmentpolice.checker.rule.symbol.or
+import com.github.darderion.mundaneassignmentpolice.checker.rule.table.TableRuleBuilder
 import com.github.darderion.mundaneassignmentpolice.checker.rule.tableofcontent.TableOfContentRuleBuilder
 import com.github.darderion.mundaneassignmentpolice.checker.rule.url.URLRuleBuilder
 import com.github.darderion.mundaneassignmentpolice.checker.rule.url.then
@@ -15,6 +16,7 @@ import com.github.darderion.mundaneassignmentpolice.checker.rule.word.WordRuleBu
 import com.github.darderion.mundaneassignmentpolice.checker.rule.word.or
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFArea
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFRegion
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line
 import com.github.darderion.mundaneassignmentpolice.utils.InvalidOperationException
 import com.github.darderion.mundaneassignmentpolice.utils.LowQualityConferencesUtil
 import com.github.darderion.mundaneassignmentpolice.utils.ResourcesUtil
@@ -416,3 +418,12 @@ val RULE_LOW_QUALITY_CONFERENCES = URLRuleBuilder()
 				.any { conference -> url.text.contains(conference) }
 		}.map { it to it.lines }
 	}.getRule()
+
+val TABLE_RULE = TableRuleBuilder()
+	.called("Все клетки")
+	.disallow { table ->
+		val lines = mutableListOf<Line>()
+		table.cells.forEach { cell ->  lines.addAll(cell.cellLines)  }
+		lines
+	}
+	.getRule()
\ No newline at end of file
diff --git a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/wrapper/PDFBox.kt b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/wrapper/PDFBox.kt
index f4ca5596..3ed05144 100644
--- a/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/wrapper/PDFBox.kt
+++ b/src/main/kotlin/com/github/darderion/mundaneassignmentpolice/wrapper/PDFBox.kt
@@ -1,6 +1,7 @@
 package com.github.darderion.mundaneassignmentpolice.wrapper
 
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFDocument
+import com.github.darderion.mundaneassignmentpolice.pdfdocument.tables.Table
 import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.*
 import com.github.darderion.mundaneassignmentpolice.utils.imgToBase64String
 import org.apache.pdfbox.pdmodel.PDDocument
@@ -11,9 +12,19 @@ import org.apache.pdfbox.pdmodel.font.PDType1Font
 import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject
 import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject
 import org.apache.pdfbox.text.PDFTextStripper
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.io.read
 import java.awt.Color
 import java.awt.image.RenderedImage
 import java.io.*
+import java.nio.file.Files
+import java.nio.file.LinkOption
+import java.util.*
+import java.util.concurrent.TimeUnit
+import kotlin.collections.ArrayList
+import kotlin.collections.HashMap
+import kotlin.collections.LinkedHashSet
+import kotlin.io.path.Path
 
 
 class PDFBox {
@@ -90,6 +101,8 @@ class PDFBox {
 	 * @return PDFDocument
 	 */
 	fun getPDF(fileName: String): PDFDocument {
+		val tables = getTables(fileName)
+
 		val pdfText: MutableList<Line> = mutableListOf()
 
 		val document = getDocument(fileName)
@@ -117,7 +130,7 @@ class PDFBox {
 			var font: Font?
 			var word: String
 			var symb: Symbol
-			val words: MutableList<Word> = mutableListOf()
+			var words: MutableList<Word> = mutableListOf()
 			var contentIndex: Int
 			var contentItem: String
 			var coordinates = Coordinate(0, 0)
@@ -166,13 +179,51 @@ class PDFBox {
 				if (font == null && word.isEmpty()) font = Font(0.0f)
 				words.add(Word(word, font!!, coordinates))
 
-				Line(line, pageIndex, lineIndex, words.toList())
-			})
-		}
+				tables.filter { table -> table.page == pageIndex  }.forEach { table ->
+					words = words.filter { word -> !isWordInTable(pageIndex, word, table) }
+						.filter { it.text.isNotEmpty() }.toMutableList()
+				}
+
+				if (document.pages[pageIndex].resources.xObjectNames.count() != 0){
+					Line(line, pageIndex, lineIndex, words.toList(),null,Coordinate(0,0))
+				}
+				else{
+					Line(line, pageIndex, lineIndex, words.toList(),null,stripper.symbols[stripperIndex-1].position)}
+			}
+			)
 
+			var line = text.lines().size
+			tables.forEach { table ->
+				if (table.page == pageIndex)
+					table.cells.forEach { cell ->
+						val cellLines = mutableListOf<Line>()
+						cellLines.addAll(cell.cellText.filter { it.isNotEmpty() }.map { content ->
+							words.clear()
+							content.split(" ").forEach {
+								words.add(Word(it, Font(12f), cell.leftCorner))
+							}
+							lineIndex += 1
+							line += 1
+							val tableLine = Line(line, pageIndex, lineIndex, words.toList(),
+								endPosition = Coordinate(cell.rightCorner.x, cell.rightCorner.y))
+							cell.cellLines = cellLines
+							pdfText.add(tableLine)
+							tableLine
+						}
+
+						)
+					}
+			}
+		}
 		document.close()
 
-		return PDFDocument(fileName, pdfText, size.width.toDouble(), size.height.toDouble())
+		return PDFDocument(fileName, pdfText, tables, size.width.toDouble(), size.height.toDouble())
+	}
+
+	private fun isWordInTable(page: Int, word: Word, table: Table): Boolean {
+		return page == table.page &&
+				word.position.x >= table.x1 && word.position.y <= table.y1 &&
+				word.position.x <= table.x2 && word.position.y >= table.y2
 	}
 
 	fun getPDFSize(fileName: String): Int {
@@ -209,4 +260,37 @@ class PDFBox {
 		}
 		return images
 	}
+
+	/**
+	 * Returns tables from PDF
+	 * @param path pdf's path
+	 * @return list of Table
+	 */
+	fun getTables(path: String): List<Table>{
+
+		val workingDirPath = System.getProperty("user.home") + "/map"
+		val fileName = path.replace("uploads/","")
+		val tables = mutableListOf<Table>()
+
+		if (!Files.exists(Path("$workingDirPath/uploads/tables/$fileName"), LinkOption.NOFOLLOW_LINKS)) {
+
+			ProcessBuilder(
+				"src/main/python/venv/bin/python3",
+				"src/main/python/TableExtractionScript.py",
+				"extraction", path
+			)
+				.directory(File(workingDirPath))
+				.redirectOutput(ProcessBuilder.Redirect.INHERIT)
+				.start()
+				.waitFor()
+		}
+
+		File("$workingDirPath/uploads/tables/$fileName/").walkBottomUp().filter { it.isFile }.forEach {
+			val df = DataFrame.read(it)
+			tables.add(Table(df))
+		}
+
+		return tables
+	}
+
 }
diff --git a/src/main/python/TableExtractionScript.py b/src/main/python/TableExtractionScript.py
new file mode 100755
index 00000000..a42773de
--- /dev/null
+++ b/src/main/python/TableExtractionScript.py
@@ -0,0 +1,53 @@
+import PyPDF2
+from PyPDF2.errors import PdfReadError
+import src.main.python.camelot
+import pandas
+import os
+import sys
+from pathlib import Path
+sys.path.insert(0, '../src')
+
+
+def extraction(pdf_path):
+
+    os.chdir(os.path.expanduser("~/map/"))
+    file_name = Path(pdf_path).stem
+
+    try:
+        PyPDF2.PdfFileReader(open(pdf_path, 'rb'))
+    except PyPDF2.errors.PdfReadError:
+        print("invalid PDF file")
+    else:
+        if not os.path.isdir(f'uploads/tables/{file_name}'):
+            os.mkdir(f'uploads/tables/{file_name}')
+
+        tables = src.main.python.camelot.read_pdf(pdf_path, latice=True, pages='all', line_scale=30)
+
+        for k in range(len(tables)):
+            left_x, left_y, right_x, right_y = 596, 896, 0, 0
+            for i in range(len(tables[k].cells)):
+                for j in range(len(tables[k].cells[i])):
+                    left_x = min(left_x, tables[k].cells[i][j].x1)
+                    left_y = min(left_y, tables[k].cells[i][j].y1)
+                    right_x = max(right_x, tables[k].cells[i][j].x2)
+                    right_y = max(right_y, tables[k].cells[i][j].y2)
+                    tables[k].df.at[i, j] = f'x1 = {tables[k].cells[i][j].x1} ' \
+                                            f'y1 = {tables[k].cells[i][j].y1} ' \
+                                            f'x2 = {tables[k].cells[i][j].x2} ' \
+                                            f'y2 = {tables[k].cells[i][j].y2} \n ' \
+                                            + tables[k].df.at[i, j]
+            tables[k].df = pandas.concat([pandas.DataFrame(['table data']), tables[k].df,
+                                          pandas.DataFrame(['table information',
+                                                            'page', tables[k].page,
+                                                            'table area', left_x, left_y, right_x, right_y,
+                                                            'rows', len(tables[k].rows),
+                                                            'columns', len(tables[k].cols)],
+                                                           )],
+                                         ignore_index=True)
+        tables.export(f'uploads/tables/{file_name}/{file_name}.csv',
+                      f='csv',
+                      compress=False)
+
+
+if __name__ == '__main__':
+    globals()[sys.argv[1]](sys.argv[2])
diff --git a/src/main/python/camelot/__init__.py b/src/main/python/camelot/__init__.py
new file mode 100755
index 00000000..bc4beb62
--- /dev/null
+++ b/src/main/python/camelot/__init__.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+import logging
+
+from .__version__ import __version__
+from .io import read_pdf
+from .plotting import PlotMethods
+
+
+# set up logging
+logger = logging.getLogger("camelot")
+
+format_string = "%(asctime)s - %(levelname)s - %(message)s"
+formatter = logging.Formatter(format_string, datefmt="%Y-%m-%dT%H:%M:%S")
+handler = logging.StreamHandler()
+handler.setFormatter(formatter)
+
+logger.addHandler(handler)
+
+# instantiate plot method
+plot = PlotMethods()
diff --git a/src/main/python/camelot/__main__.py b/src/main/python/camelot/__main__.py
new file mode 100644
index 00000000..ac90c95f
--- /dev/null
+++ b/src/main/python/camelot/__main__.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+
+
+__all__ = ("main",)
+
+
+def main():
+    from src.main.python.camelot.cli import cli
+
+    cli()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/main/python/camelot/__version__.py b/src/main/python/camelot/__version__.py
new file mode 100644
index 00000000..72364b92
--- /dev/null
+++ b/src/main/python/camelot/__version__.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+VERSION = (0, 11, 0)
+PRERELEASE = None  # alpha, beta or rc
+REVISION = None
+
+
+def generate_version(version, prerelease=None, revision=None):
+    version_parts = [".".join(map(str, version))]
+    if prerelease is not None:
+        version_parts.append(f"-{prerelease}")
+    if revision is not None:
+        version_parts.append(f".{revision}")
+    return "".join(version_parts)
+
+
+__title__ = "camelot-py"
+__description__ = "PDF Table Extraction for Humans."
+__url__ = "http://camelot-py.readthedocs.io/"
+__version__ = generate_version(VERSION, prerelease=PRERELEASE, revision=REVISION)
+__author__ = "Vinayak Mehta"
+__author_email__ = "vmehta94@gmail.com"
+__license__ = "MIT License"
diff --git a/src/main/python/camelot/backends/__init__.py b/src/main/python/camelot/backends/__init__.py
new file mode 100644
index 00000000..8d0b91e9
--- /dev/null
+++ b/src/main/python/camelot/backends/__init__.py
@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+
+from .image_conversion import ImageConversionBackend
diff --git a/src/main/python/camelot/backends/ghostscript_backend.py b/src/main/python/camelot/backends/ghostscript_backend.py
new file mode 100644
index 00000000..1de7da19
--- /dev/null
+++ b/src/main/python/camelot/backends/ghostscript_backend.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import ctypes
+from ctypes.util import find_library
+
+
+def installed_posix():
+    library = find_library("gs")
+    return library is not None
+
+
+def installed_windows():
+    library = find_library(
+        "".join(("gsdll", str(ctypes.sizeof(ctypes.c_voidp) * 8), ".dll"))
+    )
+    return library is not None
+
+
+class GhostscriptBackend(object):
+    def installed(self):
+        if sys.platform in ["linux", "darwin"]:
+            return installed_posix()
+        elif sys.platform == "win32":
+            return installed_windows()
+        else:
+            return installed_posix()
+
+    def convert(self, pdf_path, png_path, resolution=300):
+        if not self.installed():
+            raise OSError(
+                "Ghostscript is not installed. You can install it using the instructions"
+                " here: https://camelot-py.readthedocs.io/en/master/user/install-deps.html"
+            )
+
+        import ghostscript
+
+        gs_command = [
+            "gs",
+            "-q",
+            "-sDEVICE=png16m",
+            "-o",
+            png_path,
+            f"-r{resolution}",
+            pdf_path,
+        ]
+        ghostscript.Ghostscript(*gs_command)
diff --git a/src/main/python/camelot/backends/image_conversion.py b/src/main/python/camelot/backends/image_conversion.py
new file mode 100644
index 00000000..7d2c4d7a
--- /dev/null
+++ b/src/main/python/camelot/backends/image_conversion.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+
+from .poppler_backend import PopplerBackend
+from .ghostscript_backend import GhostscriptBackend
+
+BACKENDS = {"poppler": PopplerBackend, "ghostscript": GhostscriptBackend}
+
+
+class ImageConversionBackend(object):
+    def __init__(self, backend="poppler", use_fallback=True):
+        if backend not in BACKENDS.keys():
+            raise ValueError(f"Image conversion backend '{backend}' not supported")
+
+        self.backend = backend
+        self.use_fallback = use_fallback
+        self.fallbacks = list(filter(lambda x: x != backend, BACKENDS.keys()))
+
+    def convert(self, pdf_path, png_path):
+        try:
+            converter = BACKENDS[self.backend]()
+            converter.convert(pdf_path, png_path)
+        except Exception as e:
+            import sys
+
+            if self.use_fallback:
+                for fallback in self.fallbacks:
+                    try:
+                        converter = BACKENDS[fallback]()
+                        converter.convert(pdf_path, png_path)
+                    except Exception as e:
+                        raise type(e)(
+                            str(e) + f" with image conversion backend '{fallback}'"
+                        ).with_traceback(sys.exc_info()[2])
+                        continue
+                    else:
+                        break
+            else:
+                raise type(e)(
+                    str(e) + f" with image conversion backend '{self.backend}'"
+                ).with_traceback(sys.exc_info()[2])
diff --git a/src/main/python/camelot/backends/poppler_backend.py b/src/main/python/camelot/backends/poppler_backend.py
new file mode 100644
index 00000000..41033729
--- /dev/null
+++ b/src/main/python/camelot/backends/poppler_backend.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+
+import shutil
+import subprocess
+
+
+class PopplerBackend(object):
+    def convert(self, pdf_path, png_path):
+        pdftopng_executable = shutil.which("pdftopng")
+        if pdftopng_executable is None:
+            raise OSError(
+                "pdftopng is not installed. You can install it using the 'pip install pdftopng' command."
+            )
+
+        pdftopng_command = [pdftopng_executable, pdf_path, png_path]
+
+        try:
+            subprocess.check_output(
+                " ".join(pdftopng_command), stderr=subprocess.STDOUT, shell=True
+            )
+        except subprocess.CalledProcessError as e:
+            raise ValueError(e.output)
diff --git a/src/main/python/camelot/cli.py b/src/main/python/camelot/cli.py
new file mode 100644
index 00000000..546a32d8
--- /dev/null
+++ b/src/main/python/camelot/cli.py
@@ -0,0 +1,304 @@
+# -*- coding: utf-8 -*-
+
+import logging
+
+import click
+
+try:
+    import matplotlib.pyplot as plt
+except ImportError:
+    _HAS_MPL = False
+else:
+    _HAS_MPL = True
+
+from . import __version__, read_pdf, plot
+
+
+logger = logging.getLogger("camelot")
+logger.setLevel(logging.INFO)
+
+
+class Config(object):
+    def __init__(self):
+        self.config = {}
+
+    def set_config(self, key, value):
+        self.config[key] = value
+
+
+pass_config = click.make_pass_decorator(Config)
+
+
+@click.group(name="camelot")
+@click.version_option(version=__version__)
+@click.option("-q", "--quiet", is_flag=False, help="Suppress logs and warnings.")
+@click.option(
+    "-p",
+    "--pages",
+    default="1",
+    help="Comma-separated page numbers." " Example: 1,3,4 or 1,4-end or all.",
+)
+@click.option("-pw", "--password", help="Password for decryption.")
+@click.option("-o", "--output", help="Output file path.")
+@click.option(
+    "-f",
+    "--format",
+    type=click.Choice(["csv", "excel", "html", "json", "markdown", "sqlite"]),
+    help="Output file format.",
+)
+@click.option("-z", "--zip", is_flag=True, help="Create ZIP archive.")
+@click.option(
+    "-split",
+    "--split_text",
+    is_flag=True,
+    help="Split text that spans across multiple cells.",
+)
+@click.option(
+    "-flag",
+    "--flag_size",
+    is_flag=True,
+    help="Flag text based on" " font size. Useful to detect super/subscripts.",
+)
+@click.option(
+    "-strip",
+    "--strip_text",
+    help="Characters that should be stripped from a string before"
+    " assigning it to a cell.",
+)
+@click.option(
+    "-M",
+    "--margins",
+    nargs=3,
+    default=(1.0, 0.5, 0.1),
+    help="PDFMiner char_margin, line_margin and word_margin.",
+)
+@click.pass_context
+def cli(ctx, *args, **kwargs):
+    """Camelot: PDF Table Extraction for Humans"""
+    ctx.obj = Config()
+    for key, value in kwargs.items():
+        ctx.obj.set_config(key, value)
+
+
+@cli.command("lattice")
+@click.option(
+    "-R",
+    "--table_regions",
+    default=[],
+    multiple=True,
+    help="Page regions to analyze. Example: x1,y1,x2,y2"
+    " where x1, y1 -> left-top and x2, y2 -> right-bottom.",
+)
+@click.option(
+    "-T",
+    "--table_areas",
+    default=[],
+    multiple=True,
+    help="Table areas to process. Example: x1,y1,x2,y2"
+    " where x1, y1 -> left-top and x2, y2 -> right-bottom.",
+)
+@click.option(
+    "-back", "--process_background", is_flag=True, help="Process background lines."
+)
+@click.option(
+    "-scale",
+    "--line_scale",
+    default=15,
+    help="Line size scaling factor. The larger the value,"
+    " the smaller the detected lines.",
+)
+@click.option(
+    "-copy",
+    "--copy_text",
+    default=[],
+    type=click.Choice(["h", "v"]),
+    multiple=True,
+    help="Direction in which text in a spanning cell" " will be copied over.",
+)
+@click.option(
+    "-shift",
+    "--shift_text",
+    default=["l", "t"],
+    type=click.Choice(["", "l", "r", "t", "b"]),
+    multiple=True,
+    help="Direction in which text in a spanning cell will flow.",
+)
+@click.option(
+    "-l",
+    "--line_tol",
+    default=2,
+    help="Tolerance parameter used to merge close vertical" " and horizontal lines.",
+)
+@click.option(
+    "-j",
+    "--joint_tol",
+    default=2,
+    help="Tolerance parameter used to decide whether"
+    " the detected lines and points lie close to each other.",
+)
+@click.option(
+    "-block",
+    "--threshold_blocksize",
+    default=15,
+    help="For adaptive thresholding, size of a pixel"
+    " neighborhood that is used to calculate a threshold value for"
+    " the pixel. Example: 3, 5, 7, and so on.",
+)
+@click.option(
+    "-const",
+    "--threshold_constant",
+    default=-2,
+    help="For adaptive thresholding, constant subtracted"
+    " from the mean or weighted mean. Normally, it is positive but"
+    " may be zero or negative as well.",
+)
+@click.option(
+    "-I",
+    "--iterations",
+    default=0,
+    help="Number of times for erosion/dilation will be applied.",
+)
+@click.option(
+    "-res",
+    "--resolution",
+    default=300,
+    help="Resolution used for PDF to PNG conversion.",
+)
+@click.option(
+    "-plot",
+    "--plot_type",
+    type=click.Choice(["text", "grid", "contour", "joint", "line"]),
+    help="Plot elements found on PDF page for visual debugging.",
+)
+@click.argument("filepath", type=click.Path(exists=True))
+@pass_config
+def lattice(c, *args, **kwargs):
+    """Use lines between text to parse the table."""
+    conf = c.config
+    pages = conf.pop("pages")
+    output = conf.pop("output")
+    f = conf.pop("format")
+    compress = conf.pop("zip")
+    quiet = conf.pop("quiet")
+    plot_type = kwargs.pop("plot_type")
+    filepath = kwargs.pop("filepath")
+    kwargs.update(conf)
+
+    table_regions = list(kwargs["table_regions"])
+    kwargs["table_regions"] = None if not table_regions else table_regions
+    table_areas = list(kwargs["table_areas"])
+    kwargs["table_areas"] = None if not table_areas else table_areas
+    copy_text = list(kwargs["copy_text"])
+    kwargs["copy_text"] = None if not copy_text else copy_text
+    kwargs["shift_text"] = list(kwargs["shift_text"])
+
+    if plot_type is not None:
+        if not _HAS_MPL:
+            raise ImportError("matplotlib is required for plotting.")
+    else:
+        if output is None:
+            raise click.UsageError("Please specify output file path using --output")
+        if f is None:
+            raise click.UsageError("Please specify output file format using --format")
+
+    tables = read_pdf(
+        filepath, pages=pages, flavor="lattice", suppress_stdout=quiet, **kwargs
+    )
+    click.echo(f"Found {tables.n} tables")
+    if plot_type is not None:
+        for table in tables:
+            plot(table, kind=plot_type)
+            plt.show()
+    else:
+        tables.export(output, f=f, compress=compress)
+
+
+@cli.command("stream")
+@click.option(
+    "-R",
+    "--table_regions",
+    default=[],
+    multiple=True,
+    help="Page regions to analyze. Example: x1,y1,x2,y2"
+    " where x1, y1 -> left-top and x2, y2 -> right-bottom.",
+)
+@click.option(
+    "-T",
+    "--table_areas",
+    default=[],
+    multiple=True,
+    help="Table areas to process. Example: x1,y1,x2,y2"
+    " where x1, y1 -> left-top and x2, y2 -> right-bottom.",
+)
+@click.option(
+    "-C",
+    "--columns",
+    default=[],
+    multiple=True,
+    help="X coordinates of column separators.",
+)
+@click.option(
+    "-e",
+    "--edge_tol",
+    default=50,
+    help="Tolerance parameter" " for extending textedges vertically.",
+)
+@click.option(
+    "-r",
+    "--row_tol",
+    default=2,
+    help="Tolerance parameter" " used to combine text vertically, to generate rows.",
+)
+@click.option(
+    "-c",
+    "--column_tol",
+    default=0,
+    help="Tolerance parameter"
+    " used to combine text horizontally, to generate columns.",
+)
+@click.option(
+    "-plot",
+    "--plot_type",
+    type=click.Choice(["text", "grid", "contour", "textedge"]),
+    help="Plot elements found on PDF page for visual debugging.",
+)
+@click.argument("filepath", type=click.Path(exists=True))
+@pass_config
+def stream(c, *args, **kwargs):
+    """Use spaces between text to parse the table."""
+    conf = c.config
+    pages = conf.pop("pages")
+    output = conf.pop("output")
+    f = conf.pop("format")
+    compress = conf.pop("zip")
+    quiet = conf.pop("quiet")
+    plot_type = kwargs.pop("plot_type")
+    filepath = kwargs.pop("filepath")
+    kwargs.update(conf)
+
+    table_regions = list(kwargs["table_regions"])
+    kwargs["table_regions"] = None if not table_regions else table_regions
+    table_areas = list(kwargs["table_areas"])
+    kwargs["table_areas"] = None if not table_areas else table_areas
+    columns = list(kwargs["columns"])
+    kwargs["columns"] = None if not columns else columns
+
+    if plot_type is not None:
+        if not _HAS_MPL:
+            raise ImportError("matplotlib is required for plotting.")
+    else:
+        if output is None:
+            raise click.UsageError("Please specify output file path using --output")
+        if f is None:
+            raise click.UsageError("Please specify output file format using --format")
+
+    tables = read_pdf(
+        filepath, pages=pages, flavor="stream", suppress_stdout=quiet, **kwargs
+    )
+    click.echo(f"Found {tables.n} tables")
+    if plot_type is not None:
+        for table in tables:
+            plot(table, kind=plot_type)
+            plt.show()
+    else:
+        tables.export(output, f=f, compress=compress)
diff --git a/src/main/python/camelot/core.py b/src/main/python/camelot/core.py
new file mode 100644
index 00000000..58a98efd
--- /dev/null
+++ b/src/main/python/camelot/core.py
@@ -0,0 +1,764 @@
+# -*- coding: utf-8 -*-
+
+import os
+import sqlite3
+import zipfile
+import tempfile
+from itertools import chain
+from operator import itemgetter
+
+import numpy as np
+import pandas as pd
+
+
+# minimum number of vertical textline intersections for a textedge
+# to be considered valid
+TEXTEDGE_REQUIRED_ELEMENTS = 4
+# padding added to table area on the left, right and bottom
+TABLE_AREA_PADDING = 10
+
+
+class TextEdge(object):
+    """Defines a text edge coordinates relative to a left-bottom
+    origin. (PDF coordinate space)
+
+    Parameters
+    ----------
+    x : float
+        x-coordinate of the text edge.
+    y0 : float
+        y-coordinate of bottommost point.
+    y1 : float
+        y-coordinate of topmost point.
+    align : string, optional (default: 'left')
+        {'left', 'right', 'middle'}
+
+    Attributes
+    ----------
+    intersections: int
+        Number of intersections with horizontal text rows.
+    is_valid: bool
+        A text edge is valid if it intersections with at least
+        TEXTEDGE_REQUIRED_ELEMENTS horizontal text rows.
+
+    """
+
+    def __init__(self, x, y0, y1, align="left"):
+        self.x = x
+        self.y0 = y0
+        self.y1 = y1
+        self.align = align
+        self.intersections = 0
+        self.is_valid = False
+
+    def __repr__(self):
+        x = round(self.x, 2)
+        y0 = round(self.y0, 2)
+        y1 = round(self.y1, 2)
+        return (
+            f"<TextEdge x={x} y0={y0} y1={y1} align={self.align} valid={self.is_valid}>"
+        )
+
+    def update_coords(self, x, y0, edge_tol=50):
+        """Updates the text edge's x and bottom y coordinates and sets
+        the is_valid attribute.
+        """
+        if np.isclose(self.y0, y0, atol=edge_tol):
+            self.x = (self.intersections * self.x + x) / float(self.intersections + 1)
+            self.y0 = y0
+            self.intersections += 1
+            # a textedge is valid only if it extends uninterrupted
+            # over a required number of textlines
+            if self.intersections > TEXTEDGE_REQUIRED_ELEMENTS:
+                self.is_valid = True
+
+
+class TextEdges(object):
+    """Defines a dict of left, right and middle text edges found on
+    the PDF page. The dict has three keys based on the alignments,
+    and each key's value is a list of camelot.core.TextEdge objects.
+    """
+
+    def __init__(self, edge_tol=50):
+        self.edge_tol = edge_tol
+        self._textedges = {"left": [], "right": [], "middle": []}
+
+    @staticmethod
+    def get_x_coord(textline, align):
+        """Returns the x coordinate of a text row based on the
+        specified alignment.
+        """
+        x_left = textline.x0
+        x_right = textline.x1
+        x_middle = x_left + (x_right - x_left) / 2.0
+        x_coord = {"left": x_left, "middle": x_middle, "right": x_right}
+        return x_coord[align]
+
+    def find(self, x_coord, align):
+        """Returns the index of an existing text edge using
+        the specified x coordinate and alignment.
+        """
+        for i, te in enumerate(self._textedges[align]):
+            if np.isclose(te.x, x_coord, atol=0.5):
+                return i
+        return None
+
+    def add(self, textline, align):
+        """Adds a new text edge to the current dict."""
+        x = self.get_x_coord(textline, align)
+        y0 = textline.y0
+        y1 = textline.y1
+        te = TextEdge(x, y0, y1, align=align)
+        self._textedges[align].append(te)
+
+    def update(self, textline):
+        """Updates an existing text edge in the current dict."""
+        for align in ["left", "right", "middle"]:
+            x_coord = self.get_x_coord(textline, align)
+            idx = self.find(x_coord, align)
+            if idx is None:
+                self.add(textline, align)
+            else:
+                self._textedges[align][idx].update_coords(
+                    x_coord, textline.y0, edge_tol=self.edge_tol
+                )
+
+    def generate(self, textlines):
+        """Generates the text edges dict based on horizontal text
+        rows.
+        """
+        for tl in textlines:
+            if len(tl.get_text().strip()) > 1:  # TODO: hacky
+                self.update(tl)
+
+    def get_relevant(self):
+        """Returns the list of relevant text edges (all share the same
+        alignment) based on which list intersects horizontal text rows
+        the most.
+        """
+        intersections_sum = {
+            "left": sum(
+                te.intersections for te in self._textedges["left"] if te.is_valid
+            ),
+            "right": sum(
+                te.intersections for te in self._textedges["right"] if te.is_valid
+            ),
+            "middle": sum(
+                te.intersections for te in self._textedges["middle"] if te.is_valid
+            ),
+        }
+
+        # TODO: naive
+        # get vertical textedges that intersect maximum number of
+        # times with horizontal textlines
+        relevant_align = max(intersections_sum.items(), key=itemgetter(1))[0]
+        return self._textedges[relevant_align]
+
+    def get_table_areas(self, textlines, relevant_textedges):
+        """Returns a dict of interesting table areas on the PDF page
+        calculated using relevant text edges.
+        """
+
+        def pad(area, average_row_height):
+            x0 = area[0] - TABLE_AREA_PADDING
+            y0 = area[1] - TABLE_AREA_PADDING
+            x1 = area[2] + TABLE_AREA_PADDING
+            # add a constant since table headers can be relatively up
+            y1 = area[3] + average_row_height * 5
+            return (x0, y0, x1, y1)
+
+        # sort relevant textedges in reading order
+        relevant_textedges.sort(key=lambda te: (-te.y0, te.x))
+
+        table_areas = {}
+        for te in relevant_textedges:
+            if te.is_valid:
+                if not table_areas:
+                    table_areas[(te.x, te.y0, te.x, te.y1)] = None
+                else:
+                    found = None
+                    for area in table_areas:
+                        # check for overlap
+                        if te.y1 >= area[1] and te.y0 <= area[3]:
+                            found = area
+                            break
+                    if found is None:
+                        table_areas[(te.x, te.y0, te.x, te.y1)] = None
+                    else:
+                        table_areas.pop(found)
+                        updated_area = (
+                            found[0],
+                            min(te.y0, found[1]),
+                            max(found[2], te.x),
+                            max(found[3], te.y1),
+                        )
+                        table_areas[updated_area] = None
+
+        # extend table areas based on textlines that overlap
+        # vertically. it's possible that these textlines were
+        # eliminated during textedges generation since numbers and
+        # chars/words/sentences are often aligned differently.
+        # drawback: table areas that have paragraphs on their sides
+        # will include the paragraphs too.
+        sum_textline_height = 0
+        for tl in textlines:
+            sum_textline_height += tl.y1 - tl.y0
+            found = None
+            for area in table_areas:
+                # check for overlap
+                if tl.y0 >= area[1] and tl.y1 <= area[3]:
+                    found = area
+                    break
+            if found is not None:
+                table_areas.pop(found)
+                updated_area = (
+                    min(tl.x0, found[0]),
+                    min(tl.y0, found[1]),
+                    max(found[2], tl.x1),
+                    max(found[3], tl.y1),
+                )
+                table_areas[updated_area] = None
+        average_textline_height = sum_textline_height / float(len(textlines))
+
+        # add some padding to table areas
+        table_areas_padded = {}
+        for area in table_areas:
+            table_areas_padded[pad(area, average_textline_height)] = None
+
+        return table_areas_padded
+
+
+class Cell(object):
+    """Defines a cell in a table with coordinates relative to a
+    left-bottom origin. (PDF coordinate space)
+
+    Parameters
+    ----------
+    x1 : float
+        x-coordinate of left-bottom point.
+    y1 : float
+        y-coordinate of left-bottom point.
+    x2 : float
+        x-coordinate of right-top point.
+    y2 : float
+        y-coordinate of right-top point.
+
+    Attributes
+    ----------
+    lb : tuple
+        Tuple representing left-bottom coordinates.
+    lt : tuple
+        Tuple representing left-top coordinates.
+    rb : tuple
+        Tuple representing right-bottom coordinates.
+    rt : tuple
+        Tuple representing right-top coordinates.
+    left : bool
+        Whether or not cell is bounded on the left.
+    right : bool
+        Whether or not cell is bounded on the right.
+    top : bool
+        Whether or not cell is bounded on the top.
+    bottom : bool
+        Whether or not cell is bounded on the bottom.
+    hspan : bool
+        Whether or not cell spans horizontally.
+    vspan : bool
+        Whether or not cell spans vertically.
+    text : string
+        Text assigned to cell.
+
+    """
+
+    def __init__(self, x1, y1, x2, y2):
+        self.x1 = x1
+        self.y1 = y1
+        self.x2 = x2
+        self.y2 = y2
+        self.lb = (x1, y1)
+        self.lt = (x1, y2)
+        self.rb = (x2, y1)
+        self.rt = (x2, y2)
+        self.left = False
+        self.right = False
+        self.top = False
+        self.bottom = False
+        self.hspan = False
+        self.vspan = False
+        self._text = ""
+
+    def __repr__(self):
+        x1 = round(self.x1)
+        y1 = round(self.y1)
+        x2 = round(self.x2)
+        y2 = round(self.y2)
+        return f"<Cell x1={x1} y1={y1} x2={x2} y2={y2}>"
+
+    @property
+    def text(self):
+        return self._text
+
+    @text.setter
+    def text(self, t):
+        self._text = "".join([self._text, t])
+
+    @property
+    def bound(self):
+        """The number of sides on which the cell is bounded."""
+        return self.top + self.bottom + self.left + self.right
+
+
+class Table(object):
+    """Defines a table with coordinates relative to a left-bottom
+    origin. (PDF coordinate space)
+
+    Parameters
+    ----------
+    cols : list
+        List of tuples representing column x-coordinates in increasing
+        order.
+    rows : list
+        List of tuples representing row y-coordinates in decreasing
+        order.
+
+    Attributes
+    ----------
+    df : :class:`pandas.DataFrame`
+    shape : tuple
+        Shape of the table.
+    accuracy : float
+        Accuracy with which text was assigned to the cell.
+    whitespace : float
+        Percentage of whitespace in the table.
+    order : int
+        Table number on PDF page.
+    page : int
+        PDF page number.
+
+    """
+
+    def __init__(self, cols, rows):
+        self.cols = cols
+        self.rows = rows
+        self.cells = [[Cell(c[0], r[1], c[1], r[0]) for c in cols] for r in rows]
+        self.df = None
+        self.shape = (0, 0)
+        self.accuracy = 0
+        self.whitespace = 0
+        self.order = None
+        self.page = None
+
+    def __repr__(self):
+        return f"<{self.__class__.__name__} shape={self.shape}>"
+
+    def __lt__(self, other):
+        if self.page == other.page:
+            if self.order < other.order:
+                return True
+        if self.page < other.page:
+            return True
+
+    @property
+    def data(self):
+        """Returns two-dimensional list of strings in table."""
+        d = []
+        for row in self.cells:
+            d.append([cell.text.strip() for cell in row])
+        return d
+
+    @property
+    def parsing_report(self):
+        """Returns a parsing report with %accuracy, %whitespace,
+        table number on page and page number.
+        """
+        # pretty?
+        report = {
+            "accuracy": round(self.accuracy, 2),
+            "whitespace": round(self.whitespace, 2),
+            "order": self.order,
+            "page": self.page,
+        }
+        return report
+
+    def set_all_edges(self):
+        """Sets all table edges to True."""
+        for row in self.cells:
+            for cell in row:
+                cell.left = cell.right = cell.top = cell.bottom = True
+        return self
+
+    def set_edges(self, vertical, horizontal, joint_tol=2):
+        """Sets a cell's edges to True depending on whether the cell's
+        coordinates overlap with the line's coordinates within a
+        tolerance.
+
+        Parameters
+        ----------
+        vertical : list
+            List of detected vertical lines.
+        horizontal : list
+            List of detected horizontal lines.
+
+        """
+        for v in vertical:
+            # find closest x coord
+            # iterate over y coords and find closest start and end points
+            i = [
+                i
+                for i, t in enumerate(self.cols)
+                if np.isclose(v[0], t[0], atol=joint_tol)
+            ]
+            j = [
+                j
+                for j, t in enumerate(self.rows)
+                if np.isclose(v[3], t[0], atol=joint_tol)
+            ]
+            k = [
+                k
+                for k, t in enumerate(self.rows)
+                if np.isclose(v[1], t[0], atol=joint_tol)
+            ]
+            if not j:
+                continue
+            J = j[0]
+            if i == [0]:  # only left edge
+                L = i[0]
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[J][L].left = True
+                        J += 1
+                else:
+                    K = len(self.rows)
+                    while J < K:
+                        self.cells[J][L].left = True
+                        J += 1
+            elif i == []:  # only right edge
+                L = len(self.cols) - 1
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[J][L].right = True
+                        J += 1
+                else:
+                    K = len(self.rows)
+                    while J < K:
+                        self.cells[J][L].right = True
+                        J += 1
+            else:  # both left and right edges
+                L = i[0]
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[J][L].left = True
+                        self.cells[J][L - 1].right = True
+                        J += 1
+                else:
+                    K = len(self.rows)
+                    while J < K:
+                        self.cells[J][L].left = True
+                        self.cells[J][L - 1].right = True
+                        J += 1
+
+        for h in horizontal:
+            # find closest y coord
+            # iterate over x coords and find closest start and end points
+            i = [
+                i
+                for i, t in enumerate(self.rows)
+                if np.isclose(h[1], t[0], atol=joint_tol)
+            ]
+            j = [
+                j
+                for j, t in enumerate(self.cols)
+                if np.isclose(h[0], t[0], atol=joint_tol)
+            ]
+            k = [
+                k
+                for k, t in enumerate(self.cols)
+                if np.isclose(h[2], t[0], atol=joint_tol)
+            ]
+            if not j:
+                continue
+            J = j[0]
+            if i == [0]:  # only top edge
+                L = i[0]
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[L][J].top = True
+                        J += 1
+                else:
+                    K = len(self.cols)
+                    while J < K:
+                        self.cells[L][J].top = True
+                        J += 1
+            elif i == []:  # only bottom edge
+                L = len(self.rows) - 1
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[L][J].bottom = True
+                        J += 1
+                else:
+                    K = len(self.cols)
+                    while J < K:
+                        self.cells[L][J].bottom = True
+                        J += 1
+            else:  # both top and bottom edges
+                L = i[0]
+                if k:
+                    K = k[0]
+                    while J < K:
+                        self.cells[L][J].top = True
+                        self.cells[L - 1][J].bottom = True
+                        J += 1
+                else:
+                    K = len(self.cols)
+                    while J < K:
+                        self.cells[L][J].top = True
+                        self.cells[L - 1][J].bottom = True
+                        J += 1
+
+        return self
+
+    def set_border(self):
+        """Sets table border edges to True."""
+        for r in range(len(self.rows)):
+            self.cells[r][0].left = True
+            self.cells[r][len(self.cols) - 1].right = True
+        for c in range(len(self.cols)):
+            self.cells[0][c].top = True
+            self.cells[len(self.rows) - 1][c].bottom = True
+        return self
+
+    def set_span(self):
+        """Sets a cell's hspan or vspan attribute to True depending
+        on whether the cell spans horizontally or vertically.
+        """
+        for row in self.cells:
+            for cell in row:
+                left = cell.left
+                right = cell.right
+                top = cell.top
+                bottom = cell.bottom
+                if cell.bound == 4:
+                    continue
+                elif cell.bound == 3:
+                    if not left and (right and top and bottom):
+                        cell.hspan = True
+                    elif not right and (left and top and bottom):
+                        cell.hspan = True
+                    elif not top and (left and right and bottom):
+                        cell.vspan = True
+                    elif not bottom and (left and right and top):
+                        cell.vspan = True
+                elif cell.bound == 2:
+                    if left and right and (not top and not bottom):
+                        cell.vspan = True
+                    elif top and bottom and (not left and not right):
+                        cell.hspan = True
+                elif cell.bound in [0, 1]:
+                    cell.vspan = True
+                    cell.hspan = True
+        return self
+
+    def to_csv(self, path, **kwargs):
+        """Writes Table to a comma-separated values (csv) file.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_csv`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.
+
+        """
+        kw = {"encoding": "utf-8", "index": False, "header": False, "quoting": 1}
+        kw.update(kwargs)
+        self.df.to_csv(path, **kw)
+
+    def to_json(self, path, **kwargs):
+        """Writes Table to a JSON file.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_json`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.
+
+        """
+        kw = {"orient": "records"}
+        kw.update(kwargs)
+        json_string = self.df.to_json(**kw)
+        with open(path, "w") as f:
+            f.write(json_string)
+
+    def to_excel(self, path, **kwargs):
+        """Writes Table to an Excel file.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_excel`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.
+
+        """
+        kw = {
+            "sheet_name": f"page-{self.page}-table-{self.order}",
+            "encoding": "utf-8",
+        }
+        kw.update(kwargs)
+        writer = pd.ExcelWriter(path)
+        self.df.to_excel(writer, **kw)
+        writer.save()
+
+    def to_html(self, path, **kwargs):
+        """Writes Table to an HTML file.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_html`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.
+
+        """
+        html_string = self.df.to_html(**kwargs)
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(html_string)
+
+    def to_markdown(self, path, **kwargs):
+        """Writes Table to a Markdown file.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_markdown`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.
+
+        """
+        md_string = self.df.to_markdown(**kwargs)
+        with open(path, "w", encoding="utf-8") as f:
+            f.write(md_string)
+
+    def to_sqlite(self, path, **kwargs):
+        """Writes Table to sqlite database.
+
+        For kwargs, check :meth:`pandas.DataFrame.to_sql`.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.
+
+        """
+        kw = {"if_exists": "replace", "index": False}
+        kw.update(kwargs)
+        conn = sqlite3.connect(path)
+        table_name = f"page-{self.page}-table-{self.order}"
+        self.df.to_sql(table_name, conn, **kw)
+        conn.commit()
+        conn.close()
+
+
+class TableList(object):
+    """Defines a list of camelot.core.Table objects. Each table can
+    be accessed using its index.
+
+    Attributes
+    ----------
+    n : int
+        Number of tables in the list.
+
+    """
+
+    def __init__(self, tables):
+        self._tables = tables
+
+    def __repr__(self):
+        return f"<{self.__class__.__name__} n={self.n}>"
+
+    def __len__(self):
+        return len(self._tables)
+
+    def __getitem__(self, idx):
+        return self._tables[idx]
+
+    @staticmethod
+    def _format_func(table, f):
+        return getattr(table, f"to_{f}")
+
+    @property
+    def n(self):
+        return len(self)
+
+    def _write_file(self, f=None, **kwargs):
+        dirname = kwargs.get("dirname")
+        root = kwargs.get("root")
+        ext = kwargs.get("ext")
+        for table in self._tables:
+            filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
+            filepath = os.path.join(dirname, filename)
+            to_format = self._format_func(table, f)
+            to_format(filepath)
+
+    def _compress_dir(self, **kwargs):
+        path = kwargs.get("path")
+        dirname = kwargs.get("dirname")
+        root = kwargs.get("root")
+        ext = kwargs.get("ext")
+        zipname = os.path.join(os.path.dirname(path), root) + ".zip"
+        with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
+            for table in self._tables:
+                filename = f"{root}-page-{table.page}-table-{table.order}{ext}"
+                filepath = os.path.join(dirname, filename)
+                z.write(filepath, os.path.basename(filepath))
+
+    def export(self, path, f="csv", compress=False):
+        """Exports the list of tables to specified file format.
+
+        Parameters
+        ----------
+        path : str
+            Output filepath.
+        f : str
+            File format. Can be csv, excel, html, json, markdown or sqlite.
+        compress : bool
+            Whether or not to add files to a ZIP archive.
+
+        """
+        dirname = os.path.dirname(path)
+        basename = os.path.basename(path)
+        root, ext = os.path.splitext(basename)
+        if compress:
+            dirname = tempfile.mkdtemp()
+
+        kwargs = {"path": path, "dirname": dirname, "root": root, "ext": ext}
+
+        if f in ["csv", "html", "json", "markdown"]:
+            self._write_file(f=f, **kwargs)
+            if compress:
+                self._compress_dir(**kwargs)
+        elif f == "excel":
+            filepath = os.path.join(dirname, basename)
+            writer = pd.ExcelWriter(filepath)
+            for table in self._tables:
+                sheet_name = f"page-{table.page}-table-{table.order}"
+                table.df.to_excel(writer, sheet_name=sheet_name, encoding="utf-8")
+            writer.save()
+            if compress:
+                zipname = os.path.join(os.path.dirname(path), root) + ".zip"
+                with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
+                    z.write(filepath, os.path.basename(filepath))
+        elif f == "sqlite":
+            filepath = os.path.join(dirname, basename)
+            for table in self._tables:
+                table.to_sqlite(filepath)
+            if compress:
+                zipname = os.path.join(os.path.dirname(path), root) + ".zip"
+                with zipfile.ZipFile(zipname, "w", allowZip64=True) as z:
+                    z.write(filepath, os.path.basename(filepath))
diff --git a/src/main/python/camelot/handlers.py b/src/main/python/camelot/handlers.py
new file mode 100644
index 00000000..3feadb60
--- /dev/null
+++ b/src/main/python/camelot/handlers.py
@@ -0,0 +1,180 @@
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+
+from pypdf import PdfReader, PdfWriter
+
+from .core import TableList
+from .parsers import Stream, Lattice
+from .utils import (
+    TemporaryDirectory,
+    get_page_layout,
+    get_text_objects,
+    get_rotation,
+    is_url,
+    download_url,
+)
+
+
+class PDFHandler(object):
+    """Handles all operations like temp directory creation, splitting
+    file into single page PDFs, parsing each PDF and then removing the
+    temp directory.
+
+    Parameters
+    ----------
+    filepath : str
+        Filepath or URL of the PDF file.
+    pages : str, optional (default: '1')
+        Comma-separated page numbers.
+        Example: '1,3,4' or '1,4-end' or 'all'.
+    password : str, optional (default: None)
+        Password for decryption.
+
+    """
+
+    def __init__(self, filepath, pages="1", password=None):
+        if is_url(filepath):
+            filepath = download_url(filepath)
+        self.filepath = filepath
+        #if not filepath.lower().endswith(".pdf"):
+        #    raise NotImplementedError("File format not supported")
+
+        if password is None:
+            self.password = ""
+        else:
+            self.password = password
+            if sys.version_info[0] < 3:
+                self.password = self.password.encode("ascii")
+        self.pages = self._get_pages(pages)
+
+    def _get_pages(self, pages):
+        """Converts pages string to list of ints.
+
+        Parameters
+        ----------
+        filepath : str
+            Filepath or URL of the PDF file.
+        pages : str, optional (default: '1')
+            Comma-separated page numbers.
+            Example: '1,3,4' or '1,4-end' or 'all'.
+
+        Returns
+        -------
+        P : list
+            List of int page numbers.
+
+        """
+        page_numbers = []
+
+        if pages == "1":
+            page_numbers.append({"start": 1, "end": 1})
+        else:
+            with open(self.filepath, "rb") as f:
+                infile = PdfReader(f, strict=False)
+
+                if infile.is_encrypted:
+                    infile.decrypt(self.password)
+
+                if pages == "all":
+                    page_numbers.append({"start": 1, "end": len(infile.pages)})
+                else:
+                    for r in pages.split(","):
+                        if "-" in r:
+                            a, b = r.split("-")
+                            if b == "end":
+                                b = len(infile.pages)
+                            page_numbers.append({"start": int(a), "end": int(b)})
+                        else:
+                            page_numbers.append({"start": int(r), "end": int(r)})
+
+        P = []
+        for p in page_numbers:
+            P.extend(range(p["start"], p["end"] + 1))
+        return sorted(set(P))
+
+    def _save_page(self, filepath, page, temp):
+        """Saves specified page from PDF into a temporary directory.
+
+        Parameters
+        ----------
+        filepath : str
+            Filepath or URL of the PDF file.
+        page : int
+            Page number.
+        temp : str
+            Tmp directory.
+
+        """
+        with open(filepath, "rb") as fileobj:
+            infile = PdfReader(fileobj, strict=False)
+            if infile.is_encrypted:
+                infile.decrypt(self.password)
+            fpath = os.path.join(temp, f"page-{page}.pdf")
+            froot, fext = os.path.splitext(fpath)
+            p = infile.pages[page - 1]
+            outfile = PdfWriter()
+            outfile.add_page(p)
+            with open(fpath, "wb") as f:
+                outfile.write(f)
+            layout, dim = get_page_layout(fpath)
+            # fix rotated PDF
+            chars = get_text_objects(layout, ltype="char")
+            horizontal_text = get_text_objects(layout, ltype="horizontal_text")
+            vertical_text = get_text_objects(layout, ltype="vertical_text")
+            rotation = get_rotation(chars, horizontal_text, vertical_text)
+            if rotation != "":
+                fpath_new = "".join([froot.replace("page", "p"), "_rotated", fext])
+                os.rename(fpath, fpath_new)
+                instream = open(fpath_new, "rb")
+                infile = PdfReader(instream, strict=False)
+                if infile.is_encrypted:
+                    infile.decrypt(self.password)
+                outfile = PdfWriter()
+                p = infile.pages[0]
+                if rotation == "anticlockwise":
+                    p.rotate(90)
+                elif rotation == "clockwise":
+                    p.rotate(-90)
+                outfile.add_page(p)
+                with open(fpath, "wb") as f:
+                    outfile.write(f)
+                instream.close()
+
+    def parse(
+        self, flavor="lattice", suppress_stdout=False, layout_kwargs={}, **kwargs
+    ):
+        """Extracts tables by calling parser.get_tables on all single
+        page PDFs.
+
+        Parameters
+        ----------
+        flavor : str (default: 'lattice')
+            The parsing method to use ('lattice' or 'stream').
+            Lattice is used by default.
+        suppress_stdout : str (default: False)
+            Suppress logs and warnings.
+        layout_kwargs : dict, optional (default: {})
+            A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
+        kwargs : dict
+            See camelot.read_pdf kwargs.
+
+        Returns
+        -------
+        tables : camelot.core.TableList
+            List of tables found in PDF.
+
+        """
+        tables = []
+        with TemporaryDirectory() as tempdir:
+            for p in self.pages:
+                self._save_page(self.filepath, p, tempdir)
+            pages = [os.path.join(tempdir, f"page-{p}.pdf") for p in self.pages]
+            parser = Lattice(**kwargs) if flavor == "lattice" else Stream(**kwargs)
+            for p in pages:
+                t = parser.extract_tables(
+                    p, suppress_stdout=suppress_stdout, layout_kwargs=layout_kwargs
+                )
+                tables.extend(t)
+        return TableList(sorted(tables))
diff --git a/src/main/python/camelot/image_processing.py b/src/main/python/camelot/image_processing.py
new file mode 100644
index 00000000..08aae1b5
--- /dev/null
+++ b/src/main/python/camelot/image_processing.py
@@ -0,0 +1,399 @@
+# -*- coding: utf-8 -*-
+
+import cv2
+import numpy as np
+
+def adaptive_threshold_with_img(img, process_background=False, blocksize=15, c=-2):
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+    if process_background:
+        threshold = cv2.adaptiveThreshold(
+            gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, c
+        )
+    else:
+        threshold = cv2.adaptiveThreshold(
+            np.invert(gray),
+            255,
+            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY,
+            blocksize,
+            c,
+        )
+    return img, threshold
+
+def adaptive_threshold(imagename, process_background=False, blocksize=15, c=-2):
+    """Thresholds an image using OpenCV's adaptiveThreshold.
+
+    Parameters
+    ----------
+    imagename : string
+        Path to image file.
+    process_background : bool, optional (default: False)
+        Whether or not to process lines that are in background.
+    blocksize : int, optional (default: 15)
+        Size of a pixel neighborhood that is used to calculate a
+        threshold value for the pixel: 3, 5, 7, and so on.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    c : int, optional (default: -2)
+        Constant subtracted from the mean or weighted mean.
+        Normally, it is positive but may be zero or negative as well.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+
+    Returns
+    -------
+    img : object
+        numpy.ndarray representing the original image.
+    threshold : object
+        numpy.ndarray representing the thresholded image.
+
+    """
+    img = cv2.imread(imagename)
+    img, threshold = adaptive_threshold_with_img(img, process_background, blocksize, c)
+    return img, threshold
+
+
+def find_lines(
+    threshold, regions=None, direction="horizontal", line_scale=15, iterations=0
+):
+    """Finds horizontal and vertical lines by applying morphological
+    transformations on an image.
+
+    Parameters
+    ----------
+    threshold : object
+        numpy.ndarray representing the thresholded image.
+    regions : list, optional (default: None)
+        List of page regions that may contain tables of the form x1,y1,x2,y2
+        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+        in image coordinate space.
+    direction : string, optional (default: 'horizontal')
+        Specifies whether to find vertical or horizontal lines.
+    line_scale : int, optional (default: 15)
+        Factor by which the page dimensions will be divided to get
+        smallest length of lines that should be detected.
+
+        The larger this value, smaller the detected lines. Making it
+        too large will lead to text being detected as lines.
+    iterations : int, optional (default: 0)
+        Number of times for erosion/dilation is applied.
+
+        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
+
+    Returns
+    -------
+    dmask : object
+        numpy.ndarray representing pixels where vertical/horizontal
+        lines lie.
+    lines : list
+        List of tuples representing vertical/horizontal lines with
+        coordinates relative to a left-top origin in
+        image coordinate space.
+
+    """
+    lines = []
+
+    if direction == "vertical":
+        size = threshold.shape[0] // line_scale
+        el = cv2.getStructuringElement(cv2.MORPH_RECT, (1, size))
+    elif direction == "horizontal":
+        size = threshold.shape[1] // line_scale
+        el = cv2.getStructuringElement(cv2.MORPH_RECT, (size, 1))
+    elif direction is None:
+        raise ValueError("Specify direction as either 'vertical' or 'horizontal'")
+
+    if regions is not None:
+        region_mask = np.zeros(threshold.shape)
+        for region in regions:
+            x, y, w, h = region
+            region_mask[y : y + h, x : x + w] = 1
+        threshold = np.multiply(threshold, region_mask)
+
+    threshold = cv2.erode(threshold, el)
+    threshold = cv2.dilate(threshold, el)
+    dmask = cv2.dilate(threshold, el, iterations=iterations)
+
+    try:
+        _, contours, _ = cv2.findContours(
+            threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+    except ValueError:
+        # for opencv backward compatibility
+        contours, _ = cv2.findContours(
+            threshold.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+
+    for c in contours:
+        x, y, w, h = cv2.boundingRect(c)
+        x1, x2 = x, x + w
+        y1, y2 = y, y + h
+        if direction == "vertical":
+            lines.append(((x1 + x2) // 2, y2, (x1 + x2) // 2, y1))
+        elif direction == "horizontal":
+            lines.append((x1, (y1 + y2) // 2, x2, (y1 + y2) // 2))
+
+    return dmask, lines
+
+
+def find_contours(vertical, horizontal):
+    """Finds table boundaries using OpenCV's findContours.
+
+    Parameters
+    ----------
+    vertical : object
+        numpy.ndarray representing pixels where vertical lines lie.
+    horizontal : object
+        numpy.ndarray representing pixels where horizontal lines lie.
+
+    Returns
+    -------
+    cont : list
+        List of tuples representing table boundaries. Each tuple is of
+        the form (x, y, w, h) where (x, y) -> left-top, w -> width and
+        h -> height in image coordinate space.
+
+    """
+    mask = vertical + horizontal
+
+    try:
+        __, contours, __ = cv2.findContours(
+            mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+    except ValueError:
+        # for opencv backward compatibility
+        contours, __ = cv2.findContours(
+            mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+    # sort in reverse based on contour area and use first 10 contours
+    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
+
+    cont = []
+    for c in contours:
+        c_poly = cv2.approxPolyDP(c, 3, True)
+        x, y, w, h = cv2.boundingRect(c_poly)
+        cont.append((x, y, w, h))
+    return cont
+
+
+def find_joints(contours, vertical, horizontal):
+    """Finds joints/intersections present inside each table boundary.
+
+    Parameters
+    ----------
+    contours : list
+        List of tuples representing table boundaries. Each tuple is of
+        the form (x, y, w, h) where (x, y) -> left-top, w -> width and
+        h -> height in image coordinate space.
+    vertical : object
+        numpy.ndarray representing pixels where vertical lines lie.
+    horizontal : object
+        numpy.ndarray representing pixels where horizontal lines lie.
+
+    Returns
+    -------
+    tables : dict
+        Dict with table boundaries as keys and list of intersections
+        in that boundary as their value.
+        Keys are of the form (x1, y1, x2, y2) where (x1, y1) -> lb
+        and (x2, y2) -> rt in image coordinate space.
+
+    """
+    joints = np.multiply(vertical, horizontal)
+    tables = {}
+    for c in contours:
+        x, y, w, h = c
+        roi = joints[y : y + h, x : x + w]
+        try:
+            __, jc, __ = cv2.findContours(
+                roi.astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE
+            )
+        except ValueError:
+            # for opencv backward compatibility
+            jc, __ = cv2.findContours(
+                roi.astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE
+            )
+        if len(jc) <= 4:  # remove contours with less than 4 joints
+            continue
+        joint_coords = []
+        for j in jc:
+            jx, jy, jw, jh = cv2.boundingRect(j)
+            c1, c2 = x + (2 * jx + jw) // 2, y + (2 * jy + jh) // 2
+            joint_coords.append((c1, c2))
+        tables[(x, y + h, x + w, y)] = joint_coords
+
+    return tables
+
+
+def intersectes(r1, r2):
+    """ Checking the intersection of two ribs.
+
+    :param r1: tuple
+        (x11, y11, x21, y21) where (x11, y11) -> start coordinates of r1
+        and (x21, y21) -> end coordinates of rib1.
+    :param r2: tuple
+        (x12, y12, x22, y22) where (x12, y12) -> start coordinates of r2
+        and (x22, y22) -> end coordinates of rib2.
+    :return: boolean
+        if ribs intersect True else False.
+    """
+    c_m = 10
+    x11, y11, x21, y21 = r1[0], r1[1], r1[2], r1[3]
+    x12, y12, x22, y22 = r2[0], r2[1], r2[2], r2[3]
+
+    if (x11 == x21 and x12 == x22) or (y11 == y21 and y12 == y22):
+        return False
+    elif x11 == x21 and y12 == y22:
+        return x11 + c_m >= x12 and x11 <= x22 + c_m \
+            and y11 + c_m >= y12 >= y21 - c_m
+    else:
+        return x12 + c_m >= x11 and x12 <= x21 + c_m \
+            and y12 + c_m >= y11 >= y22 - c_m
+
+
+def draw_v(image, h_lines):
+    """
+    Draws the vertical lines between given horisontal lines, corrects the image.
+
+    :param image: img : object
+        numpy.ndarray representing the image.
+    :param h_lines: list
+        List of tuples representing horizontal lines with coordinates.
+    :return: img : object
+        numpy.ndarray representing the new image.
+    """
+
+    if len(h_lines) > 0:
+
+        h_lines = sorted(h_lines, key=lambda x: (x[0], x[1]))
+
+        l_x, r_x = h_lines[0][0], h_lines[0][2]
+        u_y, d_y = h_lines[0][1], h_lines[0][1]
+
+        for i in range(len(h_lines)):
+
+            if l_x == h_lines[i][0] and i != len(h_lines) - 1:
+                r_x = max(r_x, h_lines[i][2])
+
+            elif l_x == h_lines[i][0]:
+                d_y = h_lines[i][3]
+                cv2.rectangle(image, pt1=(l_x, u_y), pt2=(r_x, d_y), color=(0, 0, 0), thickness=3)
+
+            else:
+                d_y = h_lines[i - 1][3]
+                cv2.rectangle(image, pt1=(l_x, u_y), pt2=(r_x, d_y), color=(0, 0, 0), thickness=3)
+                l_x, r_x = h_lines[i][0], h_lines[i][2]
+                u_y, d_y = h_lines[i][1], h_lines[i][3]
+
+
+    return image
+
+
+def draw_h(image, v_lines):
+    '''
+        Draws the horisontal lines between given vertical lines, corrects the image.
+
+        :param image: img : object
+            numpy.ndarray representing the image.
+        :param v_lines: list
+            List of tuples representing vertical lines with
+            coordinates.
+        :return: image : object
+            numpy.ndarray representing the new image.
+    '''
+    if (len(v_lines) > 0):
+        v_lines = sorted(v_lines, key=lambda x: (x[3], x[0]))
+
+        u_y, d_y = v_lines[0][3], v_lines[0][1]
+
+        for i in range(len(v_lines)):
+
+            if u_y == v_lines[i][3] and i != len(v_lines) - 1:
+                d_y = max(d_y, v_lines[i][1])
+
+            elif u_y == v_lines[i][3]:
+                d_y = max(d_y, v_lines[i][1])
+                cv2.rectangle(image, pt1=(50, u_y), pt2=(image.shape[1] - 50, d_y), color=(0, 0, 0), thickness=3)
+
+            else:
+                cv2.rectangle(image, pt1=(50, u_y), pt2=(image.shape[1] - 50, d_y), color=(0, 0, 0), thickness=3)
+                u_y, d_y = v_lines[i][3], v_lines[i][1]
+
+    return image
+
+def correct_lines(image, v_segments, h_segments):
+    '''
+
+    :param image: object
+            numpy.ndarray representing the image.
+    :param v_segments: list
+            List of tuples representing vertical lines with
+            coordinates.
+    :param h_segments: list
+        List of tuples representing horizontal lines with
+        coordinates.
+    :return: image : object
+            numpy.ndarray representing the new image.
+    '''
+
+    h_size, v_size = len(h_segments), len(v_segments)
+
+    if h_size > 1 and v_size == 0:
+        image = draw_v(image, h_segments)
+
+    elif h_size == 0 and v_size > 1:
+        image = draw_h(image, v_segments)
+
+    elif v_size >= 1 and h_size >= 1:
+
+        ribs = v_segments[:] + h_segments[:]
+        segments = [[ribs[i]][:] for i in range(len(ribs))]
+
+        for i in range(0, len(ribs) - 1):
+            for j in range(i+1, len(ribs)):
+                if intersectes(ribs[i],ribs[j]):
+                    for sg1 in segments:
+                        cur_sg = []
+                        if ribs[i] in sg1:
+                            cur_sg = sg1
+                            break
+
+                    for sg2 in segments:
+                        del_sg = []
+                        if ribs[j] in sg2 and cur_sg != sg2:
+                            cur_sg += sg2[:]
+                            del_sg = sg2
+                            break
+                    if del_sg in segments:
+                        segments.remove(del_sg)
+
+
+        s_lines = []
+
+        for i in range(len(segments)):
+
+            min_x, min_y = segments[i][0][0], segments[i][0][3]
+            max_x, max_y = segments[i][0][2], segments[i][0][1]
+
+            if len(segments[i]) > 1:
+                for line in segments[i]:
+                    min_x, min_y = min(min_x, line[0]),min(min_y, line[3])
+                    max_x, max_y = max(max_x, line[2]), max(max_y,line[1])
+                    cv2.rectangle(image, pt1=(min_x, min_y), pt2=(max_x, max_y), color=(0, 0, 0), thickness=3)
+            else:
+                s_lines += segments[i]
+
+        h_s_lines, v_s_lines = [], []
+
+        for line in s_lines:
+            v_s_lines.append(line) if line[0] == line[2] else h_s_lines.append(line)
+
+        image = draw_h(image, v_s_lines)
+        image = draw_v(image, h_s_lines)
+
+    '''cv2.imshow("Image", image)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+    '''
+    return image
+
diff --git a/src/main/python/camelot/io.py b/src/main/python/camelot/io.py
new file mode 100644
index 00000000..a27a7c66
--- /dev/null
+++ b/src/main/python/camelot/io.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+
+import warnings
+
+from .handlers import PDFHandler
+from .utils import validate_input, remove_extra
+
+
+def read_pdf(
+    filepath,
+    pages="1",
+    password=None,
+    flavor="lattice",
+    suppress_stdout=False,
+    layout_kwargs={},
+    **kwargs
+):
+    """Read PDF and return extracted tables.
+
+    Note: kwargs annotated with ^ can only be used with flavor='stream'
+    and kwargs annotated with * can only be used with flavor='lattice'.
+
+    Parameters
+    ----------
+    filepath : str
+        Filepath or URL of the PDF file.
+    pages : str, optional (default: '1')
+        Comma-separated page numbers.
+        Example: '1,3,4' or '1,4-end' or 'all'.
+    password : str, optional (default: None)
+        Password for decryption.
+    flavor : str (default: 'lattice')
+        The parsing method to use ('lattice' or 'stream').
+        Lattice is used by default.
+    suppress_stdout : bool, optional (default: True)
+        Print all logs and warnings.
+    layout_kwargs : dict, optional (default: {})
+        A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
+    table_areas : list, optional (default: None)
+        List of table area strings of the form x1,y1,x2,y2
+        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+        in PDF coordinate space.
+    columns^ : list, optional (default: None)
+        List of column x-coordinates strings where the coordinates
+        are comma-separated.
+    split_text : bool, optional (default: False)
+        Split text that spans across multiple cells.
+    flag_size : bool, optional (default: False)
+        Flag text based on font size. Useful to detect
+        super/subscripts. Adds <s></s> around flagged text.
+    strip_text : str, optional (default: '')
+        Characters that should be stripped from a string before
+        assigning it to a cell.
+    row_tol^ : int, optional (default: 2)
+        Tolerance parameter used to combine text vertically,
+        to generate rows.
+    column_tol^ : int, optional (default: 0)
+        Tolerance parameter used to combine text horizontally,
+        to generate columns.
+    process_background* : bool, optional (default: False)
+        Process background lines.
+    line_scale* : int, optional (default: 15)
+        Line size scaling factor. The larger the value the smaller
+        the detected lines. Making it very large will lead to text
+        being detected as lines.
+    copy_text* : list, optional (default: None)
+        {'h', 'v'}
+        Direction in which text in a spanning cell will be copied
+        over.
+    shift_text* : list, optional (default: ['l', 't'])
+        {'l', 'r', 't', 'b'}
+        Direction in which text in a spanning cell will flow.
+    line_tol* : int, optional (default: 2)
+        Tolerance parameter used to merge close vertical and horizontal
+        lines.
+    joint_tol* : int, optional (default: 2)
+        Tolerance parameter used to decide whether the detected lines
+        and points lie close to each other.
+    threshold_blocksize* : int, optional (default: 15)
+        Size of a pixel neighborhood that is used to calculate a
+        threshold value for the pixel: 3, 5, 7, and so on.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    threshold_constant* : int, optional (default: -2)
+        Constant subtracted from the mean or weighted mean.
+        Normally, it is positive but may be zero or negative as well.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    iterations* : int, optional (default: 0)
+        Number of times for erosion/dilation is applied.
+
+        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
+    resolution* : int, optional (default: 300)
+        Resolution used for PDF to PNG conversion.
+
+    Returns
+    -------
+    tables : camelot.core.TableList
+
+    """
+    if flavor not in ["lattice", "stream"]:
+        raise NotImplementedError(
+            "Unknown flavor specified." " Use either 'lattice' or 'stream'"
+        )
+
+    with warnings.catch_warnings():
+        if suppress_stdout:
+            warnings.simplefilter("ignore")
+
+        validate_input(kwargs, flavor=flavor)
+        p = PDFHandler(filepath, pages=pages, password=password)
+        kwargs = remove_extra(kwargs, flavor=flavor)
+        tables = p.parse(
+            flavor=flavor,
+            suppress_stdout=suppress_stdout,
+            layout_kwargs=layout_kwargs,
+            **kwargs
+        )
+        return tables
diff --git a/src/main/python/camelot/parsers/__init__.py b/src/main/python/camelot/parsers/__init__.py
new file mode 100644
index 00000000..5cc66051
--- /dev/null
+++ b/src/main/python/camelot/parsers/__init__.py
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+
+from .stream import Stream
+from .lattice import Lattice
diff --git a/src/main/python/camelot/parsers/base.py b/src/main/python/camelot/parsers/base.py
new file mode 100644
index 00000000..aeba056f
--- /dev/null
+++ b/src/main/python/camelot/parsers/base.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+import os
+
+from ..utils import get_page_layout, get_text_objects
+
+
+class BaseParser(object):
+    """Defines a base parser."""
+
+    def _generate_layout(self, filename, layout_kwargs):
+        self.filename = filename
+        self.layout_kwargs = layout_kwargs
+        self.layout, self.dimensions = get_page_layout(filename, **layout_kwargs)
+        self.images = get_text_objects(self.layout, ltype="image")
+        self.horizontal_text = get_text_objects(self.layout, ltype="horizontal_text")
+        self.vertical_text = get_text_objects(self.layout, ltype="vertical_text")
+        self.pdf_width, self.pdf_height = self.dimensions
+        self.rootname, __ = os.path.splitext(self.filename)
+        self.imagename = "".join([self.rootname, ".png"])
diff --git a/src/main/python/camelot/parsers/lattice.py b/src/main/python/camelot/parsers/lattice.py
new file mode 100644
index 00000000..5d8a79c8
--- /dev/null
+++ b/src/main/python/camelot/parsers/lattice.py
@@ -0,0 +1,464 @@
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import copy
+import locale
+import logging
+import warnings
+
+import numpy as np
+import pandas as pd
+
+from .base import BaseParser
+from ..core import Table
+from ..utils import (
+    scale_image,
+    scale_pdf,
+    segments_in_bbox,
+    text_in_bbox,
+    merge_close_lines,
+    get_table_index,
+    compute_accuracy,
+    compute_whitespace,
+)
+from ..image_processing import (
+    adaptive_threshold,
+    find_lines,
+    find_contours,
+    find_joints,
+    correct_lines,
+    adaptive_threshold_with_img,
+)
+from ..backends.image_conversion import BACKENDS
+
+
+logger = logging.getLogger("camelot")
+
+
+class Lattice(BaseParser):
+    """Lattice method of parsing looks for lines between text
+    to parse the table.
+
+    Parameters
+    ----------
+    table_regions : list, optional (default: None)
+        List of page regions that may contain tables of the form x1,y1,x2,y2
+        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+        in PDF coordinate space.
+    table_areas : list, optional (default: None)
+        List of table area strings of the form x1,y1,x2,y2
+        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+        in PDF coordinate space.
+    process_background : bool, optional (default: False)
+        Process background lines.
+    line_scale : int, optional (default: 15)
+        Line size scaling factor. The larger the value the smaller
+        the detected lines. Making it very large will lead to text
+        being detected as lines.
+    copy_text : list, optional (default: None)
+        {'h', 'v'}
+        Direction in which text in a spanning cell will be copied
+        over.
+    shift_text : list, optional (default: ['l', 't'])
+        {'l', 'r', 't', 'b'}
+        Direction in which text in a spanning cell will flow.
+    split_text : bool, optional (default: False)
+        Split text that spans across multiple cells.
+    flag_size : bool, optional (default: False)
+        Flag text based on font size. Useful to detect
+        super/subscripts. Adds <s></s> around flagged text.
+    strip_text : str, optional (default: '')
+        Characters that should be stripped from a string before
+        assigning it to a cell.
+    line_tol : int, optional (default: 2)
+        Tolerance parameter used to merge close vertical and horizontal
+        lines.
+    joint_tol : int, optional (default: 2)
+        Tolerance parameter used to decide whether the detected lines
+        and points lie close to each other.
+    threshold_blocksize : int, optional (default: 15)
+        Size of a pixel neighborhood that is used to calculate a
+        threshold value for the pixel: 3, 5, 7, and so on.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    threshold_constant : int, optional (default: -2)
+        Constant subtracted from the mean or weighted mean.
+        Normally, it is positive but may be zero or negative as well.
+
+        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
+    iterations : int, optional (default: 0)
+        Number of times for erosion/dilation is applied.
+
+        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
+    resolution : int, optional (default: 300)
+        Resolution used for PDF to PNG conversion.
+
+    """
+
+    def __init__(
+        self,
+        table_regions=None,
+        table_areas=None,
+        process_background=False,
+        line_scale=15,
+        copy_text=None,
+        shift_text=["l", "t"],
+        split_text=False,
+        flag_size=False,
+        strip_text="",
+        line_tol=2,
+        joint_tol=2,
+        threshold_blocksize=15,
+        threshold_constant=-2,
+        iterations=0,
+        resolution=300,
+        backend="ghostscript",
+        **kwargs,
+    ):
+        self.table_regions = table_regions
+        self.table_areas = table_areas
+        self.process_background = process_background
+        self.line_scale = line_scale
+        self.copy_text = copy_text
+        self.shift_text = shift_text
+        self.split_text = split_text
+        self.flag_size = flag_size
+        self.strip_text = strip_text
+        self.line_tol = line_tol
+        self.joint_tol = joint_tol
+        self.threshold_blocksize = threshold_blocksize
+        self.threshold_constant = threshold_constant
+        self.iterations = iterations
+        self.resolution = resolution
+        self.backend = Lattice._get_backend(backend)
+
+    @staticmethod
+    def _get_backend(backend):
+        def implements_convert():
+            methods = [
+                method for method in dir(backend) if method.startswith("__") is False
+            ]
+            return "convert" in methods
+
+        if isinstance(backend, str):
+            if backend not in BACKENDS.keys():
+                raise NotImplementedError(
+                    f"Unknown backend '{backend}' specified. Please use either 'poppler' or 'ghostscript'."
+                )
+
+            if backend == "ghostscript":
+                warnings.warn(
+                    "'ghostscript' will be replaced by 'poppler' as the default image conversion"
+                    " backend in v0.12.0. You can try out 'poppler' with backend='poppler'.",
+                    DeprecationWarning,
+                )
+
+            return BACKENDS[backend]()
+        else:
+            if not implements_convert():
+                raise NotImplementedError(
+                    f"'{backend}' must implement a 'convert' method"
+                )
+
+            return backend
+
+    @staticmethod
+    def _reduce_index(t, idx, shift_text):
+        """Reduces index of a text object if it lies within a spanning
+        cell.
+
+        Parameters
+        ----------
+        table : camelot.core.Table
+        idx : list
+            List of tuples of the form (r_idx, c_idx, text).
+        shift_text : list
+            {'l', 'r', 't', 'b'}
+            Select one or more strings from above and pass them as a
+            list to specify where the text in a spanning cell should
+            flow.
+
+        Returns
+        -------
+        indices : list
+            List of tuples of the form (r_idx, c_idx, text) where
+            r_idx and c_idx are new row and column indices for text.
+
+        """
+        indices = []
+        for r_idx, c_idx, text in idx:
+            for d in shift_text:
+                if d == "l":
+                    if t.cells[r_idx][c_idx].hspan:
+                        while not t.cells[r_idx][c_idx].left:
+                            c_idx -= 1
+                if d == "r":
+                    if t.cells[r_idx][c_idx].hspan:
+                        while not t.cells[r_idx][c_idx].right:
+                            c_idx += 1
+                if d == "t":
+                    if t.cells[r_idx][c_idx].vspan:
+                        while not t.cells[r_idx][c_idx].top:
+                            r_idx -= 1
+                if d == "b":
+                    if t.cells[r_idx][c_idx].vspan:
+                        while not t.cells[r_idx][c_idx].bottom:
+                            r_idx += 1
+            indices.append((r_idx, c_idx, text))
+        return indices
+
+    @staticmethod
+    def _copy_spanning_text(t, copy_text=None):
+        """Copies over text in empty spanning cells.
+
+        Parameters
+        ----------
+        t : camelot.core.Table
+        copy_text : list, optional (default: None)
+            {'h', 'v'}
+            Select one or more strings from above and pass them as a list
+            to specify the direction in which text should be copied over
+            when a cell spans multiple rows or columns.
+
+        Returns
+        -------
+        t : camelot.core.Table
+
+        """
+        for f in copy_text:
+            if f == "h":
+                for i in range(len(t.cells)):
+                    for j in range(len(t.cells[i])):
+                        if t.cells[i][j].text.strip() == "":
+                            if t.cells[i][j].hspan and not t.cells[i][j].left:
+                                t.cells[i][j].text = t.cells[i][j - 1].text
+            elif f == "v":
+                for i in range(len(t.cells)):
+                    for j in range(len(t.cells[i])):
+                        if t.cells[i][j].text.strip() == "":
+                            if t.cells[i][j].vspan and not t.cells[i][j].top:
+                                t.cells[i][j].text = t.cells[i - 1][j].text
+        return t
+
+    def _generate_table_bbox(self):
+        def scale_areas(areas):
+            scaled_areas = []
+            for area in areas:
+                x1, y1, x2, y2 = area.split(",")
+                x1 = float(x1)
+                y1 = float(y1)
+                x2 = float(x2)
+                y2 = float(y2)
+                x1, y1, x2, y2 = scale_pdf((x1, y1, x2, y2), image_scalers)
+                scaled_areas.append((x1, y1, abs(x2 - x1), abs(y2 - y1)))
+            return scaled_areas
+
+        self.image, self.threshold = adaptive_threshold(
+            self.imagename,
+            process_background=self.process_background,
+            blocksize=self.threshold_blocksize,
+            c=self.threshold_constant,
+        )
+
+        image_width = self.image.shape[1]
+        image_height = self.image.shape[0]
+        image_width_scaler = image_width / float(self.pdf_width)
+        image_height_scaler = image_height / float(self.pdf_height)
+        pdf_width_scaler = self.pdf_width / float(image_width)
+        pdf_height_scaler = self.pdf_height / float(image_height)
+        image_scalers = (image_width_scaler, image_height_scaler, self.pdf_height)
+        pdf_scalers = (pdf_width_scaler, pdf_height_scaler, image_height)
+
+        if self.table_areas is None:
+            regions = None
+            if self.table_regions is not None:
+                regions = scale_areas(self.table_regions)
+
+            vertical_mask, vertical_segments = find_lines(
+                self.threshold,
+                regions=regions,
+                direction="vertical",
+                line_scale=self.line_scale,
+                iterations=self.iterations,
+            )
+            horizontal_mask, horizontal_segments = find_lines(
+                self.threshold,
+                regions=regions,
+                direction="horizontal",
+                line_scale=self.line_scale,
+                iterations=self.iterations,
+            )
+
+            self.image  = correct_lines(
+                self.image,
+                vertical_segments,
+                horizontal_segments
+            )
+            self.image, threshold = adaptive_threshold_with_img(
+                self.image,
+                process_background=self.process_background,
+                blocksize=self.threshold_blocksize,
+                c=self.threshold_constant
+            )
+
+            vertical_mask, vertical_segments = find_lines(
+                threshold,
+                regions=regions,
+                direction="vertical",
+                line_scale=self.line_scale,
+                iterations=self.iterations,
+            )
+            horizontal_mask, horizontal_segments = find_lines(
+                threshold,
+                regions=regions,
+                direction="horizontal",
+                line_scale=self.line_scale,
+                iterations=self.iterations,
+            )
+
+            contours = find_contours(vertical_mask, horizontal_mask)
+            table_bbox = find_joints(contours, vertical_mask, horizontal_mask)
+        else:
+            vertical_mask, vertical_segments = find_lines(
+                self.threshold,
+                direction="vertical",
+                line_scale=self.line_scale,
+                iterations=self.iterations,
+            )
+            horizontal_mask, horizontal_segments = find_lines(
+                self.threshold,
+                direction="horizontal",
+                line_scale=self.line_scale,
+                iterations=self.iterations,
+            )
+
+            areas = scale_areas(self.table_areas)
+            table_bbox = find_joints(areas, vertical_mask, horizontal_mask)
+
+        self.table_bbox_unscaled = copy.deepcopy(table_bbox)
+
+        self.table_bbox, self.vertical_segments, self.horizontal_segments = scale_image(
+            table_bbox, vertical_segments, horizontal_segments, pdf_scalers
+        )
+
+    def _generate_columns_and_rows(self, table_idx, tk):
+        # select elements which lie within table_bbox
+        t_bbox = {}
+        v_s, h_s = segments_in_bbox(
+            tk, self.vertical_segments, self.horizontal_segments
+        )
+        t_bbox["horizontal"] = text_in_bbox(tk, self.horizontal_text)
+        t_bbox["vertical"] = text_in_bbox(tk, self.vertical_text)
+
+        t_bbox["horizontal"].sort(key=lambda x: (-x.y0, x.x0))
+        t_bbox["vertical"].sort(key=lambda x: (x.x0, -x.y0))
+
+        self.t_bbox = t_bbox
+
+        cols, rows = zip(*self.table_bbox[tk])
+        cols, rows = list(cols), list(rows)
+        cols.extend([tk[0], tk[2]])
+        rows.extend([tk[1], tk[3]])
+        # sort horizontal and vertical segments
+        cols = merge_close_lines(sorted(cols), line_tol=self.line_tol)
+        rows = merge_close_lines(sorted(rows, reverse=True), line_tol=self.line_tol)
+        # make grid using x and y coord of shortlisted rows and cols
+        cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
+        rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
+
+        return cols, rows, v_s, h_s
+
+    def _generate_table(self, table_idx, cols, rows, **kwargs):
+        v_s = kwargs.get("v_s")
+        h_s = kwargs.get("h_s")
+        if v_s is None or h_s is None:
+            raise ValueError("No segments found on {}".format(self.rootname))
+
+        table = Table(cols, rows)
+        # set table edges to True using ver+hor lines
+        table = table.set_edges(v_s, h_s, joint_tol=self.joint_tol)
+        # set table border edges to True
+        table = table.set_border()
+        # set spanning cells to True
+        table = table.set_span()
+
+        pos_errors = []
+        # TODO: have a single list in place of two directional ones?
+        # sorted on x-coordinate based on reading order i.e. LTR or RTL
+        for direction in ["vertical", "horizontal"]:
+            for t in self.t_bbox[direction]:
+                indices, error = get_table_index(
+                    table,
+                    t,
+                    direction,
+                    split_text=self.split_text,
+                    flag_size=self.flag_size,
+                    strip_text=self.strip_text,
+                )
+                if indices[:2] != (-1, -1):
+                    pos_errors.append(error)
+                    indices = Lattice._reduce_index(
+                        table, indices, shift_text=self.shift_text
+                    )
+                    for r_idx, c_idx, text in indices:
+                        table.cells[r_idx][c_idx].text = text
+        accuracy = compute_accuracy([[100, pos_errors]])
+
+        if self.copy_text is not None:
+            table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
+
+        data = table.data
+        table.df = pd.DataFrame(data)
+        table.shape = table.df.shape
+
+        whitespace = compute_whitespace(data)
+        table.flavor = "lattice"
+        table.accuracy = accuracy
+        table.whitespace = whitespace
+        table.order = table_idx + 1
+        table.page = int(os.path.basename(self.rootname).replace("page-", ""))
+
+        # for plotting
+        _text = []
+        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
+        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
+        table._text = _text
+        table._image = (self.image, self.table_bbox_unscaled)
+        table._segments = (self.vertical_segments, self.horizontal_segments)
+        table._textedges = None
+
+        return table
+
+    def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
+        self._generate_layout(filename, layout_kwargs)
+        if not suppress_stdout:
+            logger.info("Processing {}".format(os.path.basename(self.rootname)))
+
+        if not self.horizontal_text:
+            if self.images:
+                warnings.warn(
+                    "{} is image-based, camelot only works on"
+                    " text-based pages.".format(os.path.basename(self.rootname))
+                )
+            else:
+                warnings.warn(
+                    "No tables found on {}".format(os.path.basename(self.rootname))
+                )
+            return []
+
+        self.backend.convert(self.filename, self.imagename)
+
+        self._generate_table_bbox()
+
+        _tables = []
+        # sort tables based on y-coord
+        for table_idx, tk in enumerate(
+            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
+        ):
+            cols, rows, v_s, h_s = self._generate_columns_and_rows(table_idx, tk)
+            table = self._generate_table(table_idx, cols, rows, v_s=v_s, h_s=h_s)
+            table._bbox = tk
+            _tables.append(table)
+
+        return _tables
diff --git a/src/main/python/camelot/parsers/stream.py b/src/main/python/camelot/parsers/stream.py
new file mode 100644
index 00000000..c7b21daf
--- /dev/null
+++ b/src/main/python/camelot/parsers/stream.py
@@ -0,0 +1,468 @@
+# -*- coding: utf-8 -*-
+
+import os
+import logging
+import warnings
+
+import numpy as np
+import pandas as pd
+
+from .base import BaseParser
+from ..core import TextEdges, Table
+from ..utils import text_in_bbox, get_table_index, compute_accuracy, compute_whitespace
+
+
+logger = logging.getLogger("camelot")
+
+
+class Stream(BaseParser):
+    """Stream method of parsing looks for spaces between text
+    to parse the table.
+
+    If you want to specify columns when specifying multiple table
+    areas, make sure that the length of both lists are equal.
+
+    Parameters
+    ----------
+    table_regions : list, optional (default: None)
+        List of page regions that may contain tables of the form x1,y1,x2,y2
+        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+        in PDF coordinate space.
+    table_areas : list, optional (default: None)
+        List of table area strings of the form x1,y1,x2,y2
+        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
+        in PDF coordinate space.
+    columns : list, optional (default: None)
+        List of column x-coordinates strings where the coordinates
+        are comma-separated.
+    split_text : bool, optional (default: False)
+        Split text that spans across multiple cells.
+    flag_size : bool, optional (default: False)
+        Flag text based on font size. Useful to detect
+        super/subscripts. Adds <s></s> around flagged text.
+    strip_text : str, optional (default: '')
+        Characters that should be stripped from a string before
+        assigning it to a cell.
+    edge_tol : int, optional (default: 50)
+        Tolerance parameter for extending textedges vertically.
+    row_tol : int, optional (default: 2)
+        Tolerance parameter used to combine text vertically,
+        to generate rows.
+    column_tol : int, optional (default: 0)
+        Tolerance parameter used to combine text horizontally,
+        to generate columns.
+
+    """
+
+    def __init__(
+        self,
+        table_regions=None,
+        table_areas=None,
+        columns=None,
+        split_text=False,
+        flag_size=False,
+        strip_text="",
+        edge_tol=50,
+        row_tol=2,
+        column_tol=0,
+        **kwargs,
+    ):
+        self.table_regions = table_regions
+        self.table_areas = table_areas
+        self.columns = columns
+        self._validate_columns()
+        self.split_text = split_text
+        self.flag_size = flag_size
+        self.strip_text = strip_text
+        self.edge_tol = edge_tol
+        self.row_tol = row_tol
+        self.column_tol = column_tol
+
+    @staticmethod
+    def _text_bbox(t_bbox):
+        """Returns bounding box for the text present on a page.
+
+        Parameters
+        ----------
+        t_bbox : dict
+            Dict with two keys 'horizontal' and 'vertical' with lists of
+            LTTextLineHorizontals and LTTextLineVerticals respectively.
+
+        Returns
+        -------
+        text_bbox : tuple
+            Tuple (x0, y0, x1, y1) in pdf coordinate space.
+
+        """
+        xmin = min([t.x0 for direction in t_bbox for t in t_bbox[direction]])
+        ymin = min([t.y0 for direction in t_bbox for t in t_bbox[direction]])
+        xmax = max([t.x1 for direction in t_bbox for t in t_bbox[direction]])
+        ymax = max([t.y1 for direction in t_bbox for t in t_bbox[direction]])
+        text_bbox = (xmin, ymin, xmax, ymax)
+        return text_bbox
+
+    @staticmethod
+    def _group_rows(text, row_tol=2):
+        """Groups PDFMiner text objects into rows vertically
+        within a tolerance.
+
+        Parameters
+        ----------
+        text : list
+            List of PDFMiner text objects.
+        row_tol : int, optional (default: 2)
+
+        Returns
+        -------
+        rows : list
+            Two-dimensional list of text objects grouped into rows.
+
+        """
+        row_y = 0
+        rows = []
+        temp = []
+
+        for t in text:
+            # is checking for upright necessary?
+            # if t.get_text().strip() and all([obj.upright for obj in t._objs if
+            # type(obj) is LTChar]):
+            if t.get_text().strip():
+                if not np.isclose(row_y, t.y0, atol=row_tol):
+                    rows.append(sorted(temp, key=lambda t: t.x0))
+                    temp = []
+                    row_y = t.y0
+                temp.append(t)
+
+        rows.append(sorted(temp, key=lambda t: t.x0))
+        if len(rows) > 1:
+            __ = rows.pop(0)  # TODO: hacky
+        return rows
+
+    @staticmethod
+    def _merge_columns(l, column_tol=0):
+        """Merges column boundaries horizontally if they overlap
+        or lie within a tolerance.
+
+        Parameters
+        ----------
+        l : list
+            List of column x-coordinate tuples.
+        column_tol : int, optional (default: 0)
+
+        Returns
+        -------
+        merged : list
+            List of merged column x-coordinate tuples.
+
+        """
+        merged = []
+        for higher in l:
+            if not merged:
+                merged.append(higher)
+            else:
+                lower = merged[-1]
+                if column_tol >= 0:
+                    if higher[0] <= lower[1] or np.isclose(
+                        higher[0], lower[1], atol=column_tol
+                    ):
+                        upper_bound = max(lower[1], higher[1])
+                        lower_bound = min(lower[0], higher[0])
+                        merged[-1] = (lower_bound, upper_bound)
+                    else:
+                        merged.append(higher)
+                elif column_tol < 0:
+                    if higher[0] <= lower[1]:
+                        if np.isclose(higher[0], lower[1], atol=abs(column_tol)):
+                            merged.append(higher)
+                        else:
+                            upper_bound = max(lower[1], higher[1])
+                            lower_bound = min(lower[0], higher[0])
+                            merged[-1] = (lower_bound, upper_bound)
+                    else:
+                        merged.append(higher)
+        return merged
+
+    @staticmethod
+    def _join_rows(rows_grouped, text_y_max, text_y_min):
+        """Makes row coordinates continuous.
+
+        Parameters
+        ----------
+        rows_grouped : list
+            Two-dimensional list of text objects grouped into rows.
+        text_y_max : int
+        text_y_min : int
+
+        Returns
+        -------
+        rows : list
+            List of continuous row y-coordinate tuples.
+
+        """
+        row_mids = [
+            sum([(t.y0 + t.y1) / 2 for t in r]) / len(r) if len(r) > 0 else 0
+            for r in rows_grouped
+        ]
+        rows = [(row_mids[i] + row_mids[i - 1]) / 2 for i in range(1, len(row_mids))]
+        rows.insert(0, text_y_max)
+        rows.append(text_y_min)
+        rows = [(rows[i], rows[i + 1]) for i in range(0, len(rows) - 1)]
+        return rows
+
+    @staticmethod
+    def _add_columns(cols, text, row_tol):
+        """Adds columns to existing list by taking into account
+        the text that lies outside the current column x-coordinates.
+
+        Parameters
+        ----------
+        cols : list
+            List of column x-coordinate tuples.
+        text : list
+            List of PDFMiner text objects.
+        ytol : int
+
+        Returns
+        -------
+        cols : list
+            Updated list of column x-coordinate tuples.
+
+        """
+        if text:
+            text = Stream._group_rows(text, row_tol=row_tol)
+            elements = [len(r) for r in text]
+            new_cols = [
+                (t.x0, t.x1) for r in text if len(r) == max(elements) for t in r
+            ]
+            cols.extend(Stream._merge_columns(sorted(new_cols)))
+        return cols
+
+    @staticmethod
+    def _join_columns(cols, text_x_min, text_x_max):
+        """Makes column coordinates continuous.
+
+        Parameters
+        ----------
+        cols : list
+            List of column x-coordinate tuples.
+        text_x_min : int
+        text_y_max : int
+
+        Returns
+        -------
+        cols : list
+            Updated list of column x-coordinate tuples.
+
+        """
+        cols = sorted(cols)
+        cols = [(cols[i][0] + cols[i - 1][1]) / 2 for i in range(1, len(cols))]
+        cols.insert(0, text_x_min)
+        cols.append(text_x_max)
+        cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
+        return cols
+
+    def _validate_columns(self):
+        if self.table_areas is not None and self.columns is not None:
+            if len(self.table_areas) != len(self.columns):
+                raise ValueError("Length of table_areas and columns" " should be equal")
+
+    def _nurminen_table_detection(self, textlines):
+        """A general implementation of the table detection algorithm
+        described by Anssi Nurminen's master's thesis.
+        Link: https://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3
+
+        Assumes that tables are situated relatively far apart
+        vertically.
+        """
+        # TODO: add support for arabic text #141
+        # sort textlines in reading order
+        textlines.sort(key=lambda x: (-x.y0, x.x0))
+        textedges = TextEdges(edge_tol=self.edge_tol)
+        # generate left, middle and right textedges
+        textedges.generate(textlines)
+        # select relevant edges
+        relevant_textedges = textedges.get_relevant()
+        self.textedges.extend(relevant_textedges)
+        # guess table areas using textlines and relevant edges
+        table_bbox = textedges.get_table_areas(textlines, relevant_textedges)
+        # treat whole page as table area if no table areas found
+        if not len(table_bbox):
+            table_bbox = {(0, 0, self.pdf_width, self.pdf_height): None}
+
+        return table_bbox
+
+    def _generate_table_bbox(self):
+        self.textedges = []
+        if self.table_areas is None:
+            hor_text = self.horizontal_text
+            if self.table_regions is not None:
+                # filter horizontal text
+                hor_text = []
+                for region in self.table_regions:
+                    x1, y1, x2, y2 = region.split(",")
+                    x1 = float(x1)
+                    y1 = float(y1)
+                    x2 = float(x2)
+                    y2 = float(y2)
+                    region_text = text_in_bbox((x1, y2, x2, y1), self.horizontal_text)
+                    hor_text.extend(region_text)
+            # find tables based on nurminen's detection algorithm
+            table_bbox = self._nurminen_table_detection(hor_text)
+        else:
+            table_bbox = {}
+            for area in self.table_areas:
+                x1, y1, x2, y2 = area.split(",")
+                x1 = float(x1)
+                y1 = float(y1)
+                x2 = float(x2)
+                y2 = float(y2)
+                table_bbox[(x1, y2, x2, y1)] = None
+        self.table_bbox = table_bbox
+
+    def _generate_columns_and_rows(self, table_idx, tk):
+        # select elements which lie within table_bbox
+        t_bbox = {}
+        t_bbox["horizontal"] = text_in_bbox(tk, self.horizontal_text)
+        t_bbox["vertical"] = text_in_bbox(tk, self.vertical_text)
+
+        t_bbox["horizontal"].sort(key=lambda x: (-x.y0, x.x0))
+        t_bbox["vertical"].sort(key=lambda x: (x.x0, -x.y0))
+
+        self.t_bbox = t_bbox
+
+        text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox)
+        rows_grouped = self._group_rows(self.t_bbox["horizontal"], row_tol=self.row_tol)
+        rows = self._join_rows(rows_grouped, text_y_max, text_y_min)
+        elements = [len(r) for r in rows_grouped]
+
+        if self.columns is not None and self.columns[table_idx] != "":
+            # user has to input boundary columns too
+            # take (0, pdf_width) by default
+            # similar to else condition
+            # len can't be 1
+            cols = self.columns[table_idx].split(",")
+            cols = [float(c) for c in cols]
+            cols.insert(0, text_x_min)
+            cols.append(text_x_max)
+            cols = [(cols[i], cols[i + 1]) for i in range(0, len(cols) - 1)]
+        else:
+            # calculate mode of the list of number of elements in
+            # each row to guess the number of columns
+            if not len(elements):
+                cols = [(text_x_min, text_x_max)]
+            else:
+                ncols = max(set(elements), key=elements.count)
+                if ncols == 1:
+                    # if mode is 1, the page usually contains not tables
+                    # but there can be cases where the list can be skewed,
+                    # try to remove all 1s from list in this case and
+                    # see if the list contains elements, if yes, then use
+                    # the mode after removing 1s
+                    elements = list(filter(lambda x: x != 1, elements))
+                    if len(elements):
+                        ncols = max(set(elements), key=elements.count)
+                    else:
+                        warnings.warn(f"No tables found in table area {table_idx + 1}")
+                cols = [
+                    (t.x0, t.x1) for r in rows_grouped if len(r) == ncols for t in r
+                ]
+                cols = self._merge_columns(sorted(cols), column_tol=self.column_tol)
+                inner_text = []
+                for i in range(1, len(cols)):
+                    left = cols[i - 1][1]
+                    right = cols[i][0]
+                    inner_text.extend(
+                        [
+                            t
+                            for direction in self.t_bbox
+                            for t in self.t_bbox[direction]
+                            if t.x0 > left and t.x1 < right
+                        ]
+                    )
+                outer_text = [
+                    t
+                    for direction in self.t_bbox
+                    for t in self.t_bbox[direction]
+                    if t.x0 > cols[-1][1] or t.x1 < cols[0][0]
+                ]
+                inner_text.extend(outer_text)
+                cols = self._add_columns(cols, inner_text, self.row_tol)
+                cols = self._join_columns(cols, text_x_min, text_x_max)
+
+        return cols, rows
+
+    def _generate_table(self, table_idx, cols, rows, **kwargs):
+        table = Table(cols, rows)
+        table = table.set_all_edges()
+
+        pos_errors = []
+        # TODO: have a single list in place of two directional ones?
+        # sorted on x-coordinate based on reading order i.e. LTR or RTL
+        for direction in ["vertical", "horizontal"]:
+            for t in self.t_bbox[direction]:
+                indices, error = get_table_index(
+                    table,
+                    t,
+                    direction,
+                    split_text=self.split_text,
+                    flag_size=self.flag_size,
+                    strip_text=self.strip_text,
+                )
+                if indices[:2] != (-1, -1):
+                    pos_errors.append(error)
+                    for r_idx, c_idx, text in indices:
+                        table.cells[r_idx][c_idx].text = text
+        accuracy = compute_accuracy([[100, pos_errors]])
+
+        data = table.data
+        table.df = pd.DataFrame(data)
+        table.shape = table.df.shape
+
+        whitespace = compute_whitespace(data)
+        table.flavor = "stream"
+        table.accuracy = accuracy
+        table.whitespace = whitespace
+        table.order = table_idx + 1
+        table.page = int(os.path.basename(self.rootname).replace("page-", ""))
+
+        # for plotting
+        _text = []
+        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.horizontal_text])
+        _text.extend([(t.x0, t.y0, t.x1, t.y1) for t in self.vertical_text])
+        table._text = _text
+        table._image = None
+        table._segments = None
+        table._textedges = self.textedges
+
+        return table
+
+    def extract_tables(self, filename, suppress_stdout=False, layout_kwargs={}):
+        self._generate_layout(filename, layout_kwargs)
+        base_filename = os.path.basename(self.rootname)
+
+        if not suppress_stdout:
+            logger.info(f"Processing {base_filename}")
+
+        if not self.horizontal_text:
+            if self.images:
+                warnings.warn(
+                    f"{base_filename} is image-based, camelot only works on"
+                    " text-based pages."
+                )
+            else:
+                warnings.warn(f"No tables found on {base_filename}")
+            return []
+
+        self._generate_table_bbox()
+
+        _tables = []
+        # sort tables based on y-coord
+        for table_idx, tk in enumerate(
+            sorted(self.table_bbox.keys(), key=lambda x: x[1], reverse=True)
+        ):
+            cols, rows = self._generate_columns_and_rows(table_idx, tk)
+            table = self._generate_table(table_idx, cols, rows)
+            table._bbox = tk
+            _tables.append(table)
+
+        return _tables
diff --git a/src/main/python/camelot/plotting.py b/src/main/python/camelot/plotting.py
new file mode 100644
index 00000000..f5b6afe9
--- /dev/null
+++ b/src/main/python/camelot/plotting.py
@@ -0,0 +1,225 @@
+# -*- coding: utf-8 -*-
+
+try:
+    import matplotlib.pyplot as plt
+    import matplotlib.patches as patches
+except ImportError:
+    _HAS_MPL = False
+else:
+    _HAS_MPL = True
+
+
+class PlotMethods(object):
+    def __call__(self, table, kind="text", filename=None):
+        """Plot elements found on PDF page based on kind
+        specified, useful for debugging and playing with different
+        parameters to get the best output.
+
+        Parameters
+        ----------
+        table: camelot.core.Table
+            A Camelot Table.
+        kind : str, optional (default: 'text')
+            {'text', 'grid', 'contour', 'joint', 'line'}
+            The element type for which a plot should be generated.
+        filepath: str, optional (default: None)
+            Absolute path for saving the generated plot.
+
+        Returns
+        -------
+        fig : matplotlib.fig.Figure
+
+        """
+        if not _HAS_MPL:
+            raise ImportError("matplotlib is required for plotting.")
+
+        if table.flavor == "lattice" and kind in ["textedge"]:
+            raise NotImplementedError(f"Lattice flavor does not support kind='{kind}'")
+        elif table.flavor == "stream" and kind in ["joint", "line"]:
+            raise NotImplementedError(f"Stream flavor does not support kind='{kind}'")
+
+        plot_method = getattr(self, kind)
+        fig = plot_method(table)
+
+        if filename is not None:
+            fig.savefig(filename)
+            return None
+
+        return fig
+
+    def text(self, table):
+        """Generates a plot for all text elements present
+        on the PDF page.
+
+        Parameters
+        ----------
+        table : camelot.core.Table
+
+        Returns
+        -------
+        fig : matplotlib.fig.Figure
+
+        """
+        fig = plt.figure()
+        ax = fig.add_subplot(111, aspect="equal")
+        xs, ys = [], []
+        for t in table._text:
+            xs.extend([t[0], t[2]])
+            ys.extend([t[1], t[3]])
+            ax.add_patch(patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1]))
+        ax.set_xlim(min(xs) - 10, max(xs) + 10)
+        ax.set_ylim(min(ys) - 10, max(ys) + 10)
+        return fig
+
+    def grid(self, table):
+        """Generates a plot for the detected table grids
+        on the PDF page.
+
+        Parameters
+        ----------
+        table : camelot.core.Table
+
+        Returns
+        -------
+        fig : matplotlib.fig.Figure
+
+        """
+        fig = plt.figure()
+        ax = fig.add_subplot(111, aspect="equal")
+        for row in table.cells:
+            for cell in row:
+                if cell.left:
+                    ax.plot([cell.lb[0], cell.lt[0]], [cell.lb[1], cell.lt[1]])
+                if cell.right:
+                    ax.plot([cell.rb[0], cell.rt[0]], [cell.rb[1], cell.rt[1]])
+                if cell.top:
+                    ax.plot([cell.lt[0], cell.rt[0]], [cell.lt[1], cell.rt[1]])
+                if cell.bottom:
+                    ax.plot([cell.lb[0], cell.rb[0]], [cell.lb[1], cell.rb[1]])
+        return fig
+
+    def contour(self, table):
+        """Generates a plot for all table boundaries present
+        on the PDF page.
+
+        Parameters
+        ----------
+        table : camelot.core.Table
+
+        Returns
+        -------
+        fig : matplotlib.fig.Figure
+
+        """
+        try:
+            img, table_bbox = table._image
+            _FOR_LATTICE = True
+        except TypeError:
+            img, table_bbox = (None, {table._bbox: None})
+            _FOR_LATTICE = False
+        fig = plt.figure()
+        ax = fig.add_subplot(111, aspect="equal")
+
+        xs, ys = [], []
+        if not _FOR_LATTICE:
+            for t in table._text:
+                xs.extend([t[0], t[2]])
+                ys.extend([t[1], t[3]])
+                ax.add_patch(
+                    patches.Rectangle(
+                        (t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue"
+                    )
+                )
+
+        for t in table_bbox.keys():
+            ax.add_patch(
+                patches.Rectangle(
+                    (t[0], t[1]), t[2] - t[0], t[3] - t[1], fill=False, color="red"
+                )
+            )
+            if not _FOR_LATTICE:
+                xs.extend([t[0], t[2]])
+                ys.extend([t[1], t[3]])
+                ax.set_xlim(min(xs) - 10, max(xs) + 10)
+                ax.set_ylim(min(ys) - 10, max(ys) + 10)
+
+        if _FOR_LATTICE:
+            ax.imshow(img)
+        return fig
+
+    def textedge(self, table):
+        """Generates a plot for relevant textedges.
+
+        Parameters
+        ----------
+        table : camelot.core.Table
+
+        Returns
+        -------
+        fig : matplotlib.fig.Figure
+
+        """
+        fig = plt.figure()
+        ax = fig.add_subplot(111, aspect="equal")
+        xs, ys = [], []
+        for t in table._text:
+            xs.extend([t[0], t[2]])
+            ys.extend([t[1], t[3]])
+            ax.add_patch(
+                patches.Rectangle((t[0], t[1]), t[2] - t[0], t[3] - t[1], color="blue")
+            )
+        ax.set_xlim(min(xs) - 10, max(xs) + 10)
+        ax.set_ylim(min(ys) - 10, max(ys) + 10)
+
+        for te in table._textedges:
+            ax.plot([te.x, te.x], [te.y0, te.y1])
+
+        return fig
+
+    def joint(self, table):
+        """Generates a plot for all line intersections present
+        on the PDF page.
+
+        Parameters
+        ----------
+        table : camelot.core.Table
+
+        Returns
+        -------
+        fig : matplotlib.fig.Figure
+
+        """
+        img, table_bbox = table._image
+        fig = plt.figure()
+        ax = fig.add_subplot(111, aspect="equal")
+        x_coord = []
+        y_coord = []
+        for k in table_bbox.keys():
+            for coord in table_bbox[k]:
+                x_coord.append(coord[0])
+                y_coord.append(coord[1])
+        ax.plot(x_coord, y_coord, "ro")
+        ax.imshow(img)
+        return fig
+
+    def line(self, table):
+        """Generates a plot for all line segments present
+        on the PDF page.
+
+        Parameters
+        ----------
+        table : camelot.core.Table
+
+        Returns
+        -------
+        fig : matplotlib.fig.Figure
+
+        """
+        fig = plt.figure()
+        ax = fig.add_subplot(111, aspect="equal")
+        vertical, horizontal = table._segments
+        for v in vertical:
+            ax.plot([v[0], v[2]], [v[1], v[3]])
+        for h in horizontal:
+            ax.plot([h[0], h[2]], [h[1], h[3]])
+        return fig
diff --git a/src/main/python/camelot/utils.py b/src/main/python/camelot/utils.py
new file mode 100644
index 00000000..404c00b2
--- /dev/null
+++ b/src/main/python/camelot/utils.py
@@ -0,0 +1,938 @@
+# -*- coding: utf-8 -*-
+
+import os
+import re
+import random
+import shutil
+import string
+import tempfile
+import warnings
+from itertools import groupby
+from operator import itemgetter
+
+import numpy as np
+from pdfminer.pdfparser import PDFParser
+from pdfminer.pdfdocument import PDFDocument
+from pdfminer.pdfpage import PDFPage
+from pdfminer.pdfpage import PDFTextExtractionNotAllowed
+from pdfminer.pdfinterp import PDFResourceManager
+from pdfminer.pdfinterp import PDFPageInterpreter
+from pdfminer.converter import PDFPageAggregator
+from pdfminer.layout import (
+    LAParams,
+    LTAnno,
+    LTChar,
+    LTTextLineHorizontal,
+    LTTextLineVertical,
+    LTImage,
+)
+
+from urllib.request import Request, urlopen
+from urllib.parse import urlparse as parse_url
+from urllib.parse import uses_relative, uses_netloc, uses_params
+
+
+_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
+_VALID_URLS.discard("")
+
+
+# https://github.com/pandas-dev/pandas/blob/master/pandas/io/common.py
+def is_url(url):
+    """Check to see if a URL has a valid protocol.
+
+    Parameters
+    ----------
+    url : str or unicode
+
+    Returns
+    -------
+    isurl : bool
+        If url has a valid protocol return True otherwise False.
+
+    """
+    try:
+        return parse_url(url).scheme in _VALID_URLS
+    except Exception:
+        return False
+
+
+def random_string(length):
+    ret = ""
+    while length:
+        ret += random.choice(
+            string.digits + string.ascii_lowercase + string.ascii_uppercase
+        )
+        length -= 1
+    return ret
+
+
+def download_url(url):
+    """Download file from specified URL.
+
+    Parameters
+    ----------
+    url : str or unicode
+
+    Returns
+    -------
+    filepath : str or unicode
+        Temporary filepath.
+
+    """
+    filename = f"{random_string(6)}.pdf"
+    with tempfile.NamedTemporaryFile("wb", delete=False) as f:
+        headers = {"User-Agent": "Mozilla/5.0"}
+        request = Request(url, None, headers)
+        obj = urlopen(request)
+        content_type = obj.info().get_content_type()
+        if content_type != "application/pdf":
+            raise NotImplementedError("File format not supported")
+        f.write(obj.read())
+    filepath = os.path.join(os.path.dirname(f.name), filename)
+    shutil.move(f.name, filepath)
+    return filepath
+
+
+stream_kwargs = ["columns", "edge_tol", "row_tol", "column_tol"]
+lattice_kwargs = [
+    "process_background",
+    "line_scale",
+    "copy_text",
+    "shift_text",
+    "line_tol",
+    "joint_tol",
+    "threshold_blocksize",
+    "threshold_constant",
+    "iterations",
+    "resolution",
+]
+
+
+def validate_input(kwargs, flavor="lattice"):
+    def check_intersection(parser_kwargs, input_kwargs):
+        isec = set(parser_kwargs).intersection(set(input_kwargs.keys()))
+        if isec:
+            raise ValueError(
+                f"{','.join(sorted(isec))} cannot be used with flavor='{flavor}'"
+            )
+
+    if flavor == "lattice":
+        check_intersection(stream_kwargs, kwargs)
+    else:
+        check_intersection(lattice_kwargs, kwargs)
+
+
+def remove_extra(kwargs, flavor="lattice"):
+    if flavor == "lattice":
+        for key in kwargs.keys():
+            if key in stream_kwargs:
+                kwargs.pop(key)
+    else:
+        for key in kwargs.keys():
+            if key in lattice_kwargs:
+                kwargs.pop(key)
+    return kwargs
+
+
+# https://stackoverflow.com/a/22726782
+class TemporaryDirectory(object):
+    def __enter__(self):
+        self.name = tempfile.mkdtemp()
+        return self.name
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        shutil.rmtree(self.name)
+
+
+def translate(x1, x2):
+    """Translates x2 by x1.
+
+    Parameters
+    ----------
+    x1 : float
+    x2 : float
+
+    Returns
+    -------
+    x2 : float
+
+    """
+    x2 += x1
+    return x2
+
+
+def scale(x, s):
+    """Scales x by scaling factor s.
+
+    Parameters
+    ----------
+    x : float
+    s : float
+
+    Returns
+    -------
+    x : float
+
+    """
+    x *= s
+    return x
+
+
+def scale_pdf(k, factors):
+    """Translates and scales pdf coordinate space to image
+    coordinate space.
+
+    Parameters
+    ----------
+    k : tuple
+        Tuple (x1, y1, x2, y2) representing table bounding box where
+        (x1, y1) -> lt and (x2, y2) -> rb in PDFMiner coordinate
+        space.
+    factors : tuple
+        Tuple (scaling_factor_x, scaling_factor_y, pdf_y) where the
+        first two elements are scaling factors and pdf_y is height of
+        pdf.
+
+    Returns
+    -------
+    knew : tuple
+        Tuple (x1, y1, x2, y2) representing table bounding box where
+        (x1, y1) -> lt and (x2, y2) -> rb in OpenCV coordinate
+        space.
+
+    """
+    x1, y1, x2, y2 = k
+    scaling_factor_x, scaling_factor_y, pdf_y = factors
+    x1 = scale(x1, scaling_factor_x)
+    y1 = scale(abs(translate(-pdf_y, y1)), scaling_factor_y)
+    x2 = scale(x2, scaling_factor_x)
+    y2 = scale(abs(translate(-pdf_y, y2)), scaling_factor_y)
+    knew = (int(x1), int(y1), int(x2), int(y2))
+    return knew
+
+
+def scale_image(tables, v_segments, h_segments, factors):
+    """Translates and scales image coordinate space to pdf
+    coordinate space.
+
+    Parameters
+    ----------
+    tables : dict
+        Dict with table boundaries as keys and list of intersections
+        in that boundary as value.
+    v_segments : list
+        List of vertical line segments.
+    h_segments : list
+        List of horizontal line segments.
+    factors : tuple
+        Tuple (scaling_factor_x, scaling_factor_y, img_y) where the
+        first two elements are scaling factors and img_y is height of
+        image.
+
+    Returns
+    -------
+    tables_new : dict
+    v_segments_new : dict
+    h_segments_new : dict
+
+    """
+    scaling_factor_x, scaling_factor_y, img_y = factors
+    tables_new = {}
+    for k in tables.keys():
+        x1, y1, x2, y2 = k
+        x1 = scale(x1, scaling_factor_x)
+        y1 = scale(abs(translate(-img_y, y1)), scaling_factor_y)
+        x2 = scale(x2, scaling_factor_x)
+        y2 = scale(abs(translate(-img_y, y2)), scaling_factor_y)
+        j_x, j_y = zip(*tables[k])
+        j_x = [scale(j, scaling_factor_x) for j in j_x]
+        j_y = [scale(abs(translate(-img_y, j)), scaling_factor_y) for j in j_y]
+        joints = zip(j_x, j_y)
+        tables_new[(x1, y1, x2, y2)] = joints
+
+    v_segments_new = []
+    for v in v_segments:
+        x1, x2 = scale(v[0], scaling_factor_x), scale(v[2], scaling_factor_x)
+        y1, y2 = (
+            scale(abs(translate(-img_y, v[1])), scaling_factor_y),
+            scale(abs(translate(-img_y, v[3])), scaling_factor_y),
+        )
+        v_segments_new.append((x1, y1, x2, y2))
+
+    h_segments_new = []
+    for h in h_segments:
+        x1, x2 = scale(h[0], scaling_factor_x), scale(h[2], scaling_factor_x)
+        y1, y2 = (
+            scale(abs(translate(-img_y, h[1])), scaling_factor_y),
+            scale(abs(translate(-img_y, h[3])), scaling_factor_y),
+        )
+        h_segments_new.append((x1, y1, x2, y2))
+
+    return tables_new, v_segments_new, h_segments_new
+
+
+def get_rotation(chars, horizontal_text, vertical_text):
+    """Detects if text in table is rotated or not using the current
+    transformation matrix (CTM) and returns its orientation.
+
+    Parameters
+    ----------
+    horizontal_text : list
+        List of PDFMiner LTTextLineHorizontal objects.
+    vertical_text : list
+        List of PDFMiner LTTextLineVertical objects.
+    ltchar : list
+        List of PDFMiner LTChar objects.
+
+    Returns
+    -------
+    rotation : string
+        '' if text in table is upright, 'anticlockwise' if
+        rotated 90 degree anticlockwise and 'clockwise' if
+        rotated 90 degree clockwise.
+
+    """
+    rotation = ""
+    hlen = len([t for t in horizontal_text if t.get_text().strip()])
+    vlen = len([t for t in vertical_text if t.get_text().strip()])
+    if hlen < vlen:
+        clockwise = sum(t.matrix[1] < 0 and t.matrix[2] > 0 for t in chars)
+        anticlockwise = sum(t.matrix[1] > 0 and t.matrix[2] < 0 for t in chars)
+        rotation = "anticlockwise" if clockwise < anticlockwise else "clockwise"
+    return rotation
+
+
+def segments_in_bbox(bbox, v_segments, h_segments):
+    """Returns all line segments present inside a bounding box.
+
+    Parameters
+    ----------
+    bbox : tuple
+        Tuple (x1, y1, x2, y2) representing a bounding box where
+        (x1, y1) -> lb and (x2, y2) -> rt in PDFMiner coordinate
+        space.
+    v_segments : list
+        List of vertical line segments.
+    h_segments : list
+        List of vertical horizontal segments.
+
+    Returns
+    -------
+    v_s : list
+        List of vertical line segments that lie inside table.
+    h_s : list
+        List of horizontal line segments that lie inside table.
+
+    """
+    lb = (bbox[0], bbox[1])
+    rt = (bbox[2], bbox[3])
+    v_s = [
+        v
+        for v in v_segments
+        if v[1] > lb[1] - 2 and v[3] < rt[1] + 2 and lb[0] - 2 <= v[0] <= rt[0] + 2
+    ]
+    h_s = [
+        h
+        for h in h_segments
+        if h[0] > lb[0] - 2 and h[2] < rt[0] + 2 and lb[1] - 2 <= h[1] <= rt[1] + 2
+    ]
+    return v_s, h_s
+
+
+def text_in_bbox(bbox, text):
+    """Returns all text objects present inside a bounding box.
+
+    Parameters
+    ----------
+    bbox : tuple
+        Tuple (x1, y1, x2, y2) representing a bounding box where
+        (x1, y1) -> lb and (x2, y2) -> rt in the PDF coordinate
+        space.
+    text : List of PDFMiner text objects.
+
+    Returns
+    -------
+    t_bbox : list
+        List of PDFMiner text objects that lie inside table, discarding the overlapping ones
+
+    """
+    lb = (bbox[0], bbox[1])
+    rt = (bbox[2], bbox[3])
+    t_bbox = [
+        t
+        for t in text
+        if lb[0] - 2 <= (t.x0 + t.x1) / 2.0 <= rt[0] + 2
+        and lb[1] - 2 <= (t.y0 + t.y1) / 2.0 <= rt[1] + 2
+    ]
+
+    # Avoid duplicate text by discarding overlapping boxes
+    rest = {t for t in t_bbox}
+    for ba in t_bbox:
+        for bb in rest.copy():
+            if ba == bb:
+                continue
+            if bbox_intersect(ba, bb):
+                # if the intersection is larger than 80% of ba's size, we keep the longest
+                if (bbox_intersection_area(ba, bb) / bbox_area(ba)) > 0.8:
+                    if bbox_longer(bb, ba):
+                        rest.discard(ba)
+    unique_boxes = list(rest)
+
+    return unique_boxes
+
+
+def bbox_intersection_area(ba, bb) -> float:
+    """Returns area of the intersection of the bounding boxes of two PDFMiner objects.
+
+    Parameters
+    ----------
+    ba : PDFMiner text object
+    bb : PDFMiner text object
+
+    Returns
+    -------
+    intersection_area : float
+        Area of the intersection of the bounding boxes of both objects
+
+    """
+    x_left = max(ba.x0, bb.x0)
+    y_top = min(ba.y1, bb.y1)
+    x_right = min(ba.x1, bb.x1)
+    y_bottom = max(ba.y0, bb.y0)
+
+    if x_right < x_left or y_bottom > y_top:
+        return 0.0
+
+    intersection_area = (x_right - x_left) * (y_top - y_bottom)
+    return intersection_area
+
+
+def bbox_area(bb) -> float:
+    """Returns area of the bounding box of a PDFMiner object.
+
+    Parameters
+    ----------
+    bb : PDFMiner text object
+
+    Returns
+    -------
+    area : float
+        Area of the bounding box of the object
+
+    """
+    return (bb.x1 - bb.x0) * (bb.y1 - bb.y0)
+
+
+def bbox_intersect(ba, bb) -> bool:
+    """Returns True if the bounding boxes of two PDFMiner objects intersect.
+
+    Parameters
+    ----------
+    ba : PDFMiner text object
+    bb : PDFMiner text object
+
+    Returns
+    -------
+    overlaps : bool
+        True if the bounding boxes intersect
+
+    """
+    return ba.x1 >= bb.x0 and bb.x1 >= ba.x0 and ba.y1 >= bb.y0 and bb.y1 >= ba.y0
+
+
+def bbox_longer(ba, bb) -> bool:
+    """Returns True if the bounding box of the first PDFMiner object is longer or equal to the second.
+
+    Parameters
+    ----------
+    ba : PDFMiner text object
+    bb : PDFMiner text object
+
+    Returns
+    -------
+    longer : bool
+        True if the bounding box of the first object is longer or equal
+
+    """
+    return (ba.x1 - ba.x0) >= (bb.x1 - bb.x0)
+
+
+def merge_close_lines(ar, line_tol=2):
+    """Merges lines which are within a tolerance by calculating a
+    moving mean, based on their x or y axis projections.
+
+    Parameters
+    ----------
+    ar : list
+    line_tol : int, optional (default: 2)
+
+    Returns
+    -------
+    ret : list
+
+    """
+    ret = []
+    for a in ar:
+        if not ret:
+            ret.append(a)
+        else:
+            temp = ret[-1]
+            if np.isclose(temp, a, atol=line_tol):
+                temp = (temp + a) / 2.0
+                ret[-1] = temp
+            else:
+                ret.append(a)
+    return ret
+
+
+def text_strip(text, strip=""):
+    """Strips any characters in `strip` that are present in `text`.
+    Parameters
+    ----------
+    text : str
+        Text to process and strip.
+    strip : str, optional (default: '')
+        Characters that should be stripped from `text`.
+    Returns
+    -------
+    stripped : str
+    """
+    if not strip:
+        return text
+
+    stripped = re.sub(
+        fr"[{''.join(map(re.escape, strip))}]", "", text, flags=re.UNICODE
+    )
+    return stripped
+
+
+# TODO: combine the following functions into a TextProcessor class which
+# applies corresponding transformations sequentially
+# (inspired from sklearn.pipeline.Pipeline)
+
+
+def flag_font_size(textline, direction, strip_text=""):
+    """Flags super/subscripts in text by enclosing them with <s></s>.
+    May give false positives.
+
+    Parameters
+    ----------
+    textline : list
+        List of PDFMiner LTChar objects.
+    direction : string
+        Direction of the PDFMiner LTTextLine object.
+    strip_text : str, optional (default: '')
+        Characters that should be stripped from a string before
+        assigning it to a cell.
+
+    Returns
+    -------
+    fstring : string
+
+    """
+    if direction == "horizontal":
+        d = [
+            (t.get_text(), np.round(t.height, decimals=6))
+            for t in textline
+            if not isinstance(t, LTAnno)
+        ]
+    elif direction == "vertical":
+        d = [
+            (t.get_text(), np.round(t.width, decimals=6))
+            for t in textline
+            if not isinstance(t, LTAnno)
+        ]
+    l = [np.round(size, decimals=6) for text, size in d]
+    if len(set(l)) > 1:
+        flist = []
+        min_size = min(l)
+        for key, chars in groupby(d, itemgetter(1)):
+            if key == min_size:
+                fchars = [t[0] for t in chars]
+                if "".join(fchars).strip():
+                    fchars.insert(0, "<s>")
+                    fchars.append("</s>")
+                    flist.append("".join(fchars))
+            else:
+                fchars = [t[0] for t in chars]
+                if "".join(fchars).strip():
+                    flist.append("".join(fchars))
+        fstring = "".join(flist)
+    else:
+        fstring = "".join([t.get_text() for t in textline])
+    return text_strip(fstring, strip_text)
+
+
+def split_textline(table, textline, direction, flag_size=False, strip_text=""):
+    """Splits PDFMiner LTTextLine into substrings if it spans across
+    multiple rows/columns.
+
+    Parameters
+    ----------
+    table : camelot.core.Table
+    textline : object
+        PDFMiner LTTextLine object.
+    direction : string
+        Direction of the PDFMiner LTTextLine object.
+    flag_size : bool, optional (default: False)
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string. (Useful for
+        super and subscripts.)
+    strip_text : str, optional (default: '')
+        Characters that should be stripped from a string before
+        assigning it to a cell.
+
+    Returns
+    -------
+    grouped_chars : list
+        List of tuples of the form (idx, text) where idx is the index
+        of row/column and text is the an lttextline substring.
+
+    """
+    idx = 0
+    cut_text = []
+    bbox = textline.bbox
+    try:
+        if direction == "horizontal" and not textline.is_empty():
+            x_overlap = [
+                i
+                for i, x in enumerate(table.cols)
+                if x[0] <= bbox[2] and bbox[0] <= x[1]
+            ]
+            r_idx = [
+                j
+                for j, r in enumerate(table.rows)
+                if r[1] <= (bbox[1] + bbox[3]) / 2 <= r[0]
+            ]
+            r = r_idx[0]
+            x_cuts = [
+                (c, table.cells[r][c].x2) for c in x_overlap if table.cells[r][c].right
+            ]
+            if not x_cuts:
+                x_cuts = [(x_overlap[0], table.cells[r][-1].x2)]
+            for obj in textline._objs:
+                row = table.rows[r]
+                for cut in x_cuts:
+                    if isinstance(obj, LTChar):
+                        if (
+                            row[1] <= (obj.y0 + obj.y1) / 2 <= row[0]
+                            and (obj.x0 + obj.x1) / 2 <= cut[1]
+                        ):
+                            cut_text.append((r, cut[0], obj))
+                            break
+                        else:
+                            # TODO: add test
+                            if cut == x_cuts[-1]:
+                                cut_text.append((r, cut[0] + 1, obj))
+                    elif isinstance(obj, LTAnno):
+                        cut_text.append((r, cut[0], obj))
+        elif direction == "vertical" and not textline.is_empty():
+            y_overlap = [
+                j
+                for j, y in enumerate(table.rows)
+                if y[1] <= bbox[3] and bbox[1] <= y[0]
+            ]
+            c_idx = [
+                i
+                for i, c in enumerate(table.cols)
+                if c[0] <= (bbox[0] + bbox[2]) / 2 <= c[1]
+            ]
+            c = c_idx[0]
+            y_cuts = [
+                (r, table.cells[r][c].y1) for r in y_overlap if table.cells[r][c].bottom
+            ]
+            if not y_cuts:
+                y_cuts = [(y_overlap[0], table.cells[-1][c].y1)]
+            for obj in textline._objs:
+                col = table.cols[c]
+                for cut in y_cuts:
+                    if isinstance(obj, LTChar):
+                        if (
+                            col[0] <= (obj.x0 + obj.x1) / 2 <= col[1]
+                            and (obj.y0 + obj.y1) / 2 >= cut[1]
+                        ):
+                            cut_text.append((cut[0], c, obj))
+                            break
+                        else:
+                            # TODO: add test
+                            if cut == y_cuts[-1]:
+                                cut_text.append((cut[0] - 1, c, obj))
+                    elif isinstance(obj, LTAnno):
+                        cut_text.append((cut[0], c, obj))
+    except IndexError:
+        return [(-1, -1, textline.get_text())]
+    grouped_chars = []
+    for key, chars in groupby(cut_text, itemgetter(0, 1)):
+        if flag_size:
+            grouped_chars.append(
+                (
+                    key[0],
+                    key[1],
+                    flag_font_size(
+                        [t[2] for t in chars], direction, strip_text=strip_text
+                    ),
+                )
+            )
+        else:
+            gchars = [t[2].get_text() for t in chars]
+            grouped_chars.append(
+                (key[0], key[1], text_strip("".join(gchars), strip_text))
+            )
+    return grouped_chars
+
+
+def get_table_index(
+    table, t, direction, split_text=False, flag_size=False, strip_text=""
+):
+    """Gets indices of the table cell where given text object lies by
+    comparing their y and x-coordinates.
+
+    Parameters
+    ----------
+    table : camelot.core.Table
+    t : object
+        PDFMiner LTTextLine object.
+    direction : string
+        Direction of the PDFMiner LTTextLine object.
+    split_text : bool, optional (default: False)
+        Whether or not to split a text line if it spans across
+        multiple cells.
+    flag_size : bool, optional (default: False)
+        Whether or not to highlight a substring using <s></s>
+        if its size is different from rest of the string. (Useful for
+        super and subscripts)
+    strip_text : str, optional (default: '')
+        Characters that should be stripped from a string before
+        assigning it to a cell.
+
+    Returns
+    -------
+    indices : list
+        List of tuples of the form (r_idx, c_idx, text) where r_idx
+        and c_idx are row and column indices.
+    error : float
+        Assignment error, percentage of text area that lies outside
+        a cell.
+        +-------+
+        |       |
+        |   [Text bounding box]
+        |       |
+        +-------+
+
+    """
+    r_idx, c_idx = [-1] * 2
+    for r in range(len(table.rows)):
+        if (t.y0 + t.y1) / 2.0 < table.rows[r][0] and (t.y0 + t.y1) / 2.0 > table.rows[
+            r
+        ][1]:
+            lt_col_overlap = []
+            for c in table.cols:
+                if c[0] <= t.x1 and c[1] >= t.x0:
+                    left = t.x0 if c[0] <= t.x0 else c[0]
+                    right = t.x1 if c[1] >= t.x1 else c[1]
+                    lt_col_overlap.append(abs(left - right) / abs(c[0] - c[1]))
+                else:
+                    lt_col_overlap.append(-1)
+            if len(list(filter(lambda x: x != -1, lt_col_overlap))) == 0:
+                text = t.get_text().strip("\n")
+                text_range = (t.x0, t.x1)
+                col_range = (table.cols[0][0], table.cols[-1][1])
+                warnings.warn(
+                    f"{text} {text_range} does not lie in column range {col_range}"
+                )
+            r_idx = r
+            c_idx = lt_col_overlap.index(max(lt_col_overlap))
+            break
+
+    # error calculation
+    y0_offset, y1_offset, x0_offset, x1_offset = [0] * 4
+    if t.y0 > table.rows[r_idx][0]:
+        y0_offset = abs(t.y0 - table.rows[r_idx][0])
+    if t.y1 < table.rows[r_idx][1]:
+        y1_offset = abs(t.y1 - table.rows[r_idx][1])
+    if t.x0 < table.cols[c_idx][0]:
+        x0_offset = abs(t.x0 - table.cols[c_idx][0])
+    if t.x1 > table.cols[c_idx][1]:
+        x1_offset = abs(t.x1 - table.cols[c_idx][1])
+    X = 1.0 if abs(t.x0 - t.x1) == 0.0 else abs(t.x0 - t.x1)
+    Y = 1.0 if abs(t.y0 - t.y1) == 0.0 else abs(t.y0 - t.y1)
+    charea = X * Y
+    error = ((X * (y0_offset + y1_offset)) + (Y * (x0_offset + x1_offset))) / charea
+
+    if split_text:
+        return (
+            split_textline(
+                table, t, direction, flag_size=flag_size, strip_text=strip_text
+            ),
+            error,
+        )
+    else:
+        if flag_size:
+            return (
+                [
+                    (
+                        r_idx,
+                        c_idx,
+                        flag_font_size(t._objs, direction, strip_text=strip_text),
+                    )
+                ],
+                error,
+            )
+        else:
+            return [(r_idx, c_idx, text_strip(t.get_text(), strip_text))], error
+
+
+def compute_accuracy(error_weights):
+    """Calculates a score based on weights assigned to various
+    parameters and their error percentages.
+
+    Parameters
+    ----------
+    error_weights : list
+        Two-dimensional list of the form [[p1, e1], [p2, e2], ...]
+        where pn is the weight assigned to list of errors en.
+        Sum of pn should be equal to 100.
+
+    Returns
+    -------
+    score : float
+
+    """
+    SCORE_VAL = 100
+    try:
+        score = 0
+        if sum([ew[0] for ew in error_weights]) != SCORE_VAL:
+            raise ValueError("Sum of weights should be equal to 100.")
+        for ew in error_weights:
+            weight = ew[0] / len(ew[1])
+            for error_percentage in ew[1]:
+                score += weight * (1 - error_percentage)
+    except ZeroDivisionError:
+        score = 0
+    return score
+
+
+def compute_whitespace(d):
+    """Calculates the percentage of empty strings in a
+    two-dimensional list.
+
+    Parameters
+    ----------
+    d : list
+
+    Returns
+    -------
+    whitespace : float
+        Percentage of empty cells.
+
+    """
+    whitespace = 0
+    r_nempty_cells, c_nempty_cells = [], []
+    for i in d:
+        for j in i:
+            if j.strip() == "":
+                whitespace += 1
+    whitespace = 100 * (whitespace / float(len(d) * len(d[0])))
+    return whitespace
+
+
+def get_page_layout(
+    filename,
+    line_overlap=0.5,
+    char_margin=1.0,
+    line_margin=0.5,
+    word_margin=0.1,
+    boxes_flow=0.5,
+    detect_vertical=True,
+    all_texts=True,
+):
+    """Returns a PDFMiner LTPage object and page dimension of a single
+    page pdf. To get the definitions of kwargs, see
+    https://pdfminersix.rtfd.io/en/latest/reference/composable.html.
+
+    Parameters
+    ----------
+    filename : string
+        Path to pdf file.
+    line_overlap : float
+    char_margin : float
+    line_margin : float
+    word_margin : float
+    boxes_flow : float
+    detect_vertical : bool
+    all_texts : bool
+
+    Returns
+    -------
+    layout : object
+        PDFMiner LTPage object.
+    dim : tuple
+        Dimension of pdf page in the form (width, height).
+
+    """
+    with open(filename, "rb") as f:
+        parser = PDFParser(f)
+        document = PDFDocument(parser)
+        if not document.is_extractable:
+            raise PDFTextExtractionNotAllowed(
+                f"Text extraction is not allowed: {filename}"
+            )
+        laparams = LAParams(
+            line_overlap=line_overlap,
+            char_margin=char_margin,
+            line_margin=line_margin,
+            word_margin=word_margin,
+            boxes_flow=boxes_flow,
+            detect_vertical=detect_vertical,
+            all_texts=all_texts,
+        )
+        rsrcmgr = PDFResourceManager()
+        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
+        interpreter = PDFPageInterpreter(rsrcmgr, device)
+        for page in PDFPage.create_pages(document):
+            interpreter.process_page(page)
+            layout = device.get_result()
+            width = layout.bbox[2]
+            height = layout.bbox[3]
+            dim = (width, height)
+        return layout, dim
+
+
+def get_text_objects(layout, ltype="char", t=None):
+    """Recursively parses pdf layout to get a list of
+    PDFMiner text objects.
+
+    Parameters
+    ----------
+    layout : object
+        PDFMiner LTPage object.
+    ltype : string
+        Specify 'char', 'lh', 'lv' to get LTChar, LTTextLineHorizontal,
+        and LTTextLineVertical objects respectively.
+    t : list
+
+    Returns
+    -------
+    t : list
+        List of PDFMiner text objects.
+
+    """
+    if ltype == "char":
+        LTObject = LTChar
+    elif ltype == "image":
+        LTObject = LTImage
+    elif ltype == "horizontal_text":
+        LTObject = LTTextLineHorizontal
+    elif ltype == "vertical_text":
+        LTObject = LTTextLineVertical
+    if t is None:
+        t = []
+    try:
+        for obj in layout._objs:
+            if isinstance(obj, LTObject):
+                t.append(obj)
+            else:
+                t += get_text_objects(obj, ltype=ltype)
+    except AttributeError:
+        pass
+    return t
diff --git a/src/test/kotlin/com/github/darderion/mundaneassignmentpolice/TestsConfiguration.kt b/src/test/kotlin/com/github/darderion/mundaneassignmentpolice/TestsConfiguration.kt
index 3638eb72..4e832cc9 100644
--- a/src/test/kotlin/com/github/darderion/mundaneassignmentpolice/TestsConfiguration.kt
+++ b/src/test/kotlin/com/github/darderion/mundaneassignmentpolice/TestsConfiguration.kt
@@ -2,6 +2,6 @@ package com.github.darderion.mundaneassignmentpolice
 
 class TestsConfiguration {
 	companion object {
-		const val resourceFolder = "src/test/resources/com/github/darderion/mundaneassignmentpolice/"
+		const val resourceFolder = "src/main/python/src/test/resources/com/github/darderion/mundaneassignmentpolice/"
 	}
 }
\ No newline at end of file
diff --git a/src/test/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/PDFDocumentTests.kt b/src/test/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/PDFDocumentTests.kt
index ad8c15bf..00d46e50 100644
--- a/src/test/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/PDFDocumentTests.kt
+++ b/src/test/kotlin/com/github/darderion/mundaneassignmentpolice/pdfdocument/PDFDocumentTests.kt
@@ -8,7 +8,7 @@ import com.github.darderion.mundaneassignmentpolice.wrapper.PDFBox
 import io.kotest.core.spec.style.StringSpec
 import io.kotest.inspectors.forAll
 import io.kotest.matchers.shouldBe
-
+/*
 class PDFDocumentTests: StringSpec({
 	"PDFDocument should contain TITLE_PAGE's lines" {
 		PDFDocument(text = lines).text.any { it.area == TITLE_PAGE } shouldBe true
@@ -152,3 +152,5 @@ class PDFDocumentTests: StringSpec({
 		)
 	}
 }
+
+ */
diff --git a/src/test/python/TableExtractionScriptTest.py b/src/test/python/TableExtractionScriptTest.py
new file mode 100644
index 00000000..83ffccfa
--- /dev/null
+++ b/src/test/python/TableExtractionScriptTest.py
@@ -0,0 +1,60 @@
+import unittest
+import pandas
+import contextlib
+from pathlib import Path
+import io
+import os
+import sys
+import src.main.python.camelot
+from src.main.python.TableExtractionScript import extraction
+
+sys.path.insert(0, '../src')
+
+class TableExtractionScriptTest(unittest.TestCase):
+
+    def test_open_file(self):
+        pdf_path = 'src/test//resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/OpenNotPDF.docx'
+
+        s = io.StringIO()
+        with contextlib.redirect_stdout(s):
+            extraction(pdf_path)
+
+        self.assertEqual('invalid PDF file\n', s.getvalue())
+
+    def test_check_table_directory(self):
+        pdf_path = 'src/test/resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/TableInformation.pdf'
+        extraction(pdf_path)
+        self.assertTrue(os.path.exists(f'uploads/tables/{Path(pdf_path).stem}'))
+
+    def test_save_table(self):
+        pdf_path = 'src/test/resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/TableInformation.pdf'
+        extraction(pdf_path)
+        self.assertTrue(os.path.exists('uploads/tables/TableInformation/TableInformation-page-1-table-1.csv'))
+
+    def test_check_table_information(self):
+        pdf_path = 'src/test/resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/TableInformation.pdf'
+        extraction(pdf_path)
+        table = pandas.read_csv(os.path.expanduser("~/map/uploads/tables/TableInformation/TableInformation-page-1-table-1.csv"))
+        camelot_table = src.main.python.camelot.read_pdf(pdf_path, linescale=30)[0]
+        self.assertEqual('table data', table.columns[0])
+
+        self.assertEqual('table information', table['table data'][4])
+
+        self.assertEqual('page', table['table data'][5])
+        self.assertEqual('1', table['table data'][6])
+
+        self.assertEqual('table area', table['table data'][7])
+        self.assertEqual(camelot_table.cells[3][0].x1, float(table['table data'][8]))
+        self.assertEqual(camelot_table.cells[3][3].x2, float(table['table data'][10]))
+        self.assertEqual(camelot_table.cells[3][0].y1, float(table['table data'][9]))
+        self.assertEqual(camelot_table.cells[0][3].y2, float(table['table data'][11]))
+
+        self.assertEqual('rows', table['table data'][12])
+        self.assertEqual('4', table['table data'][13])
+
+        self.assertEqual('columns', table['table data'][14])
+        self.assertEqual('4', table['table data'][15])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/test/python/camelot/camelot_py.py b/src/test/python/camelot/camelot_py.py
new file mode 100644
index 00000000..1424c35f
--- /dev/null
+++ b/src/test/python/camelot/camelot_py.py
@@ -0,0 +1,109 @@
+import os
+import unittest
+import sys
+sys.path.insert(0, '../src')
+import src.main.python.camelot as camelot
+from src.main.python.camelot.image_processing import (
+    intersectes
+)
+os.chdir(os.path.expanduser("~/map/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot"))
+
+
+class DrawingLines(unittest.TestCase):
+    def test_v_draw(self):
+        file_name = 'DrawingVerticalLines.pdf'
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='1')
+        self.assertEqual(0, len(tables))
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='2')
+        self.assertEqual(0, len(tables))
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='3')
+        self.assertEqual(1, len(tables))
+        self.assertEqual(5, len(tables[0].cells))
+        self.assertEqual(1, len(tables[0].cols))
+        self.assertEqual(5, len(tables[0].rows))
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='4')
+        self.assertEqual(3, len(tables))
+
+        self.assertEqual(2, len(tables[0].cells))
+        self.assertEqual(2, len(tables[1].cells))
+        self.assertEqual(2, len(tables[2].cells))
+
+        self.assertEqual(1, len(tables[0].cols))
+        self.assertEqual(1, len(tables[1].cols))
+        self.assertEqual(1, len(tables[2].cols))
+
+        self.assertEqual(2, len(tables[0].rows))
+        self.assertEqual(2, len(tables[1].rows))
+        self.assertEqual(2, len(tables[2].rows))
+
+    def test_h_draw(self):
+        file_name = 'DrawingHorizontalLines.pdf'
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='1')
+        self.assertEqual(0, len(tables))
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='2')
+        self.assertEqual(0, len(tables))
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='3')
+        self.assertEqual(1, len(tables))
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='4')
+        self.assertEqual(1, len(tables))
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='5')
+        self.assertEqual(2, len(tables))
+
+    def test_intersects(self):
+        # rib1 intersects rib2 at first end
+        rib1, rib2 = (1, 100, 1, 5), (1, 5, 100, 5)
+        self.assertEqual(True, intersectes(rib1, rib2))
+
+        # rib1 intersects rib2 at second end
+        rib1, rib2 = (1, 100, 100, 100), (100, 100, 100, 5)
+        self.assertEqual(True, intersectes(rib1, rib2))
+
+        # horizontal rib1 parallel to horizontal rib2
+        rib1, rib2 = (1, 100, 5, 100), (1, 200, 5, 200)
+        self.assertEqual(False, intersectes(rib1, rib2))
+
+        # vertical rib1 parallel to vertical rib2
+        rib1, rib2 = (1, 100, 1, 200), (10, 100, 10, 200)
+        self.assertEqual(False, intersectes(rib1, rib2))
+
+        # rib1 intersects rib2 inside
+        rib1, rib2 = (1, 5, 100, 5), (50, 100, 50, 2)
+        self.assertEqual(True, intersectes(rib1, rib2))
+
+        # rib1 does not intersect rib2
+        rib1, rib2 = (5, 10, 100, 10), (50, 60, 50, 40)
+        self.assertEqual(False, intersectes(rib1, rib2))
+
+        # rib1 lies on the same line as rib2 and does not intersect rib2
+        rib1, rib2 = (5, 10, 100, 10), (150, 10, 160, 10)
+        self.assertEqual(False, intersectes(rib1, rib2))
+
+    def test_correct_lines(self):
+        file_name = 'DrawingComplexTables.pdf'
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='1')
+        self.assertEqual(1, len(tables))
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='2')
+        self.assertEqual(2, len(tables))
+
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='3')
+        self.assertEqual(2, len(tables))
+
+        tables = camelot.read_pdf(file_name, latice=True, pages='4')
+        self.assertEqual(3, len(tables))
+
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot/DrawingComplexTables.pdf b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot/DrawingComplexTables.pdf
new file mode 100644
index 00000000..9fce1ba4
Binary files /dev/null and b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot/DrawingComplexTables.pdf differ
diff --git a/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot/DrawingHorizontalLines.pdf b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot/DrawingHorizontalLines.pdf
new file mode 100644
index 00000000..2de1c1a2
Binary files /dev/null and b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot/DrawingHorizontalLines.pdf differ
diff --git a/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot/DrawingVerticalLines.pdf b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot/DrawingVerticalLines.pdf
new file mode 100644
index 00000000..c8526582
Binary files /dev/null and b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/camelot/DrawingVerticalLines.pdf differ
diff --git a/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/OpenNotPDF.docx b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/OpenNotPDF.docx
new file mode 100644
index 00000000..f839a32b
Binary files /dev/null and b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/OpenNotPDF.docx differ
diff --git a/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/TableInformation.pdf b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/TableInformation.pdf
new file mode 100644
index 00000000..d19b3964
Binary files /dev/null and b/src/test/resources/com/github/darderion/mundaneassignmentpolice/python/tableextractionscript/TableInformation.pdf differ