Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
<description>Web-app that assists in checking students&apos; assignments</description>
<properties>
<java.version>11</java.version>
<kotlin.version>1.5.31</kotlin.version>
<kotlin.version>1.6.21</kotlin.version>
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

</properties>
<dependencies>
<dependency>
Expand Down Expand Up @@ -85,6 +85,11 @@
<version>5.0.0.M1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlinx</groupId>
<artifactId>dataframe</artifactId>
<version>0.8.0-dev-1005</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ data class RuleViolation(
) {
// override fun toString() = if (lines.count() == 1) "[${lines.first().line}, p.${lines.first().page}] --> '$message'" else ""
}

Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFArea.TABLE_OF
import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFDocument
import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFRegion
import com.github.darderion.mundaneassignmentpolice.pdfdocument.list.PDFList
import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Coordinate
import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line

class ListRule(
Expand All @@ -26,7 +27,7 @@ class ListRule(
document.areas!!.tableOfContents.map {
document.text.filter { it.area == TABLE_OF_CONTENT }.firstOrNull { line ->
line.content.contains(it)
}?: Line(0, 0, 0, listOf(), TABLE_OF_CONTENT)
}?: Line(0, 0, 0, listOf(), TABLE_OF_CONTENT, Coordinate(0,0))
}
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class BasicSymbolRule(
when (direction) {
LEFT -> sideTexts.removeAt(1)
RIGHT -> sideTexts.removeAt(0)
else -> {}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

}

val neighbors = (if (notIgnoredNeighbors.isNotEmpty()) sideTexts
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package com.github.darderion.mundaneassignmentpolice.checker.rule.table

import com.github.darderion.mundaneassignmentpolice.checker.RuleViolation
import com.github.darderion.mundaneassignmentpolice.checker.RuleViolationType
import com.github.darderion.mundaneassignmentpolice.checker.rule.Rule
import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFDocument
import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFRegion
import com.github.darderion.mundaneassignmentpolice.pdfdocument.tables.Table
import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line

class TableRule (
val predicates: MutableList<(Table) -> List<Line>>,
type: RuleViolationType,
area: PDFRegion,
name: String
): Rule(area, name, type){
override fun process(document: PDFDocument): List<RuleViolation> {
val rulesViolations: MutableSet<RuleViolation> = mutableSetOf()

predicates.forEach { predicate ->
rulesViolations.addAll(
document.tables.map {
predicate(it)
}.filter { it.isNotEmpty() }.map {
RuleViolation(it, name, type)
}
)
}

return rulesViolations.toList()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.github.darderion.mundaneassignmentpolice.checker.rule.table

import com.github.darderion.mundaneassignmentpolice.checker.RuleViolationType
import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFRegion
import com.github.darderion.mundaneassignmentpolice.pdfdocument.tables.Table
import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line

class TableRuleBuilder {
private val predicates: MutableList<(Table) -> List<Line>> = mutableListOf()
private var type: RuleViolationType = RuleViolationType.Error
private var region: PDFRegion = PDFRegion.EVERYWHERE
private var name: String = "Rule name"

fun called(name: String) = this.also { this.name = name }

fun disallow(predicate: (table: Table) -> List<Line>) = this.also { predicates.add(predicate) }
fun getRule() = TableRule(predicates, type, region, name)
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class BasicWordRule(
when (direction) {
Direction.LEFT -> sideWords.removeAt(1)
Direction.RIGHT -> sideWords.removeAt(0)
else -> {}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

}

val filteredSideWords = sideWords
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@ class Annotations {
var document = PDFBox().getDocument(pdf.name)
lines.forEach { line ->
document = PDFBox().addLine(document, line.page,
Coordinate(line.position.x to (pdf.height - (line.text.maxOf { it.position.y } + 2))),
(pdf.width - (line.position.x + 50)).toInt()
Coordinate(line.startPosition.x to (pdf.height - (line.text.maxOf { it.position.y } + 2))),
(line.endPosition.x - line.startPosition.x).toInt()
)
}


Files.createDirectories(Paths.get("${pdfFolder}ruleviolations/"))
val fileName = "${pdfFolder}ruleviolations/${
pdf.name.split('/')[pdf.name.split('/').count() - 1].replace(".pdf", "")
}${lines.first().index}-${lines.last().index}(${lines[0].page}).pdf"
document.save(fileName)
return fileName
}

}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package com.github.darderion.mundaneassignmentpolice.pdfdocument

import com.github.darderion.mundaneassignmentpolice.pdfdocument.tables.Table
import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line
import mu.KotlinLogging

class PDFDocument(val name: String = "PDF",
val text: List<Line>,
val tables: List<Table>,
val width: Double = defaultPageWidth,
val height: Double = defaultPageHeight
) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ data class PDFList<T>(val value: MutableList<T> = mutableListOf(), val nodes: Mu
*/
fun getLists(lines: List<Line>): List<PDFList<Line>> {
// Adding a line to process a text that has no lines after a list
val lines = lines + Line(-1, -1, -1, listOf(Word("NOT A LIST ITEM", Font(0.0f), Coordinate(1000, -1))))
val lines = lines + Line(-1, -1, -1, listOf(Word("NOT A LIST ITEM", Font(0.0f), Coordinate(1000, -1))), null, Coordinate(0,0))

val lists: MutableList<PDFList<Line>> = mutableListOf()
val stack: Stack<PDFList<Line>> = Stack()
Expand All @@ -69,11 +69,11 @@ data class PDFList<T>(val value: MutableList<T> = mutableListOf(), val nodes: Mu
stack.push(stack.peek().nodes.first())
}
} else {
previousPosition = stack.peek().value.first().position
if (previousPosition hasSameXAs line.position) { // 1. lorem OR lorem
previousPosition = stack.peek().value.first().startPosition
if (previousPosition hasSameXAs line.startPosition) { // 1. lorem OR lorem
stack.peek().value.add(line) // lorem lorem
} else {
if (previousPosition.x < line.position.x) {
if (previousPosition.x < line.startPosition.x) {
if (isListItem(line)) {
stack.peek().nodes.add(PDFList(line.drop(2))) // lorem
stack.push(stack.peek().nodes.last()) // 1. lorem
Expand All @@ -83,17 +83,17 @@ data class PDFList<T>(val value: MutableList<T> = mutableListOf(), val nodes: Mu
}
} else { // lorem OR lorem OR ... lorem OR ... lorem
while (!( stack.isEmpty() || // lorem 2. lorem lorem 2. lorem
(isListItem(line) && previousPosition hasSameXAs line.drop(2).position) ||
previousPosition hasSameXAs line.position)) {
(isListItem(line) && previousPosition hasSameXAs line.drop(2).startPosition) ||
previousPosition hasSameXAs line.startPosition)) {
previousList = stack.pop()
if (stack.isNotEmpty()) {
previousPosition = stack.peek().value.first().position
previousPosition = stack.peek().value.first().startPosition
}
}
if (stack.isEmpty()) {
lists.add(previousList!!)
} else {
if (previousPosition hasSameXAs line.position) { // lorem
if (previousPosition hasSameXAs line.startPosition) { // lorem
stack.peek().value.add(line) // lorem
} else {
stack.pop()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.github.darderion.mundaneassignmentpolice.pdfdocument.tables
import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Coordinate
import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line

data class Cell(
val page: Int,
val cellText: MutableList<String>,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

var cellLines: MutableList<Line>,
val leftCorner: Coordinate,
val rightCorner: Coordinate
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package com.github.darderion.mundaneassignmentpolice.pdfdocument.tables

import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.*
import com.github.darderion.mundaneassignmentpolice.wrapper.PDFBox
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.*

class Table(val df: DataFrame<Any?>){

val page : Int
val x1 : Double
val y1 : Double
val x2 : Double
val y2 : Double
val rowCount : Int
val colCount : Int
val cells: MutableList<Cell> = mutableListOf()
init {
val indexTableInf = df.select{ cols(0) }.last { it[0] == "table information"}.index()
val tableInf = df.select{cols(0)}.filter { it.index() >= indexTableInf }

this.page = tableInf[pageTableIndex][0].toString().toInt() - 1
this.x1 = tableInf[x1TableIndex][0].toString().toDouble()
this.y1 = defaultPageHeight - tableInf[y1TableIndex][0].toString().toDouble()
this.x2 = tableInf[x2TableIndex][0].toString().toDouble()
this.y2 = defaultPageHeight - tableInf[y2TableIndex][0].toString().toDouble()
this.rowCount = tableInf[rowTableIndex][0].toString().toInt()
this.colCount = tableInf[colTableIndex][0].toString().toInt()
val tableData = df.filter { it.index() < indexTableInf }

tableData.forEachColumn { it.forEach { getCell(it.toString()) } }
}

private fun getCell(text: String){

val coordinates = text.lines().first().split(" ")
val x1 = coordinates[x1CellIndex].toDouble()
val y1 = defaultPageHeight - coordinates[y1CellIndex].toDouble()
val x2 = coordinates[x2CellIndex].toDouble()
val y2 = defaultPageHeight - coordinates[y2CellIndex].toDouble()

val cellText = text.lines().filterIndexed{ index, _ -> index > 0 }.toMutableList()

cells.add(Cell(page, cellText, mutableListOf(), Coordinate(x1,y1), Coordinate(x2,y2)))
}

fun getLines(): List<Line>{
val lines = mutableListOf<Line>()
cells.forEach{ lines.addAll(it.cellLines) }
return lines
}

companion object {
private const val defaultPageHeight = 842.0
private const val x1CellIndex = 2
private const val y1CellIndex = 5
private const val x2CellIndex = 8
private const val y2CellIndex = 11

private const val pageTableIndex = 2
private const val x1TableIndex = 4
private const val y1TableIndex = 5
private const val x2TableIndex = 6
private const val y2TableIndex = 7
private const val rowTableIndex = 9
private const val colTableIndex = 11
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ package com.github.darderion.mundaneassignmentpolice.pdfdocument.text
import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFArea

data class Line(val index: Int, val page: Int, val documentIndex: Int,
val text: List<Word>, var area: PDFArea? = null
val text: List<Word>, var area: PDFArea? = null, var endPosition: Coordinate
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

) {
val content: String
get() = text.joinToString("") { it.text }

val position: Coordinate
val startPosition: Coordinate
get() = if (text.isNotEmpty()) text.first().position else Coordinate(0, 0)

val first: String?
Expand All @@ -17,7 +17,10 @@ data class Line(val index: Int, val page: Int, val documentIndex: Int,
val second: String?
get() = if (text.count() > 1) text[1].text else null

override fun toString() = "[$documentIndex -- $index, p.$page, $area, ${position.x}] --> '$content'"
override fun toString() = "[$documentIndex -- $index, p.$page, $area, ${startPosition.x}] --> '$content'"

fun drop(numberOfItems: Int) = Line(index, page, documentIndex, text.drop(numberOfItems), area)
fun drop(numberOfItems: Int) = Line(index, page, documentIndex, text.drop(numberOfItems), area, Coordinate(0,0))
companion object{
private const val defaultPageWidth = 595.22
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import com.github.darderion.mundaneassignmentpolice.checker.rule.Rule

val RULE_SET_RU = RuleSet(
mutableListOf(
RULE_LITLINK,
TABLE_RULE,

/*RULE_LITLINK,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

RULE_SHORT_DASH,
RULE_MEDIUM_DASH,
RULE_LONG_DASH,
Expand All @@ -22,9 +24,12 @@ val RULE_SET_RU = RuleSet(
RULE_VARIOUS_ABBREVIATIONS,
RULE_SECTIONS_ORDER,
RULE_LOW_QUALITY_CONFERENCES,

*/
)
+ RULES_SPACE_AROUND_BRACKETS
/*+ RULES_SPACE_AROUND_BRACKETS
+ RULES_SMALL_NUMBERS
)

*/
)
class RuleSet(val rules: List<Rule>) {}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import com.github.darderion.mundaneassignmentpolice.checker.rule.regex.RegexRule
import com.github.darderion.mundaneassignmentpolice.checker.rule.symbol.SymbolRuleBuilder
import com.github.darderion.mundaneassignmentpolice.checker.rule.symbol.and
import com.github.darderion.mundaneassignmentpolice.checker.rule.symbol.or
import com.github.darderion.mundaneassignmentpolice.checker.rule.table.TableRuleBuilder
import com.github.darderion.mundaneassignmentpolice.checker.rule.tableofcontent.TableOfContentRuleBuilder
import com.github.darderion.mundaneassignmentpolice.checker.rule.url.URLRuleBuilder
import com.github.darderion.mundaneassignmentpolice.checker.rule.url.then
Expand All @@ -15,6 +16,7 @@ import com.github.darderion.mundaneassignmentpolice.checker.rule.word.WordRuleBu
import com.github.darderion.mundaneassignmentpolice.checker.rule.word.or
import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFArea
import com.github.darderion.mundaneassignmentpolice.pdfdocument.PDFRegion
import com.github.darderion.mundaneassignmentpolice.pdfdocument.text.Line
import com.github.darderion.mundaneassignmentpolice.utils.InvalidOperationException
import com.github.darderion.mundaneassignmentpolice.utils.LowQualityConferencesUtil
import com.github.darderion.mundaneassignmentpolice.utils.ResourcesUtil
Expand Down Expand Up @@ -416,3 +418,12 @@ val RULE_LOW_QUALITY_CONFERENCES = URLRuleBuilder()
.any { conference -> url.text.contains(conference) }
}.map { it to it.lines }
}.getRule()

val TABLE_RULE = TableRuleBuilder()
.called("Все клетки")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

.disallow { table ->
val lines = mutableListOf<Line>()
table.cells.forEach { cell -> lines.addAll(cell.cellLines) }
lines
}
.getRule()
Loading