Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,10 @@ package net.mamoe.yamlkt

import kotlinx.serialization.SerializationException

public class YamlDecodingException(message: String, cause: Throwable? = null) : SerializationException(message, cause)
public class YamlDecodingException(message: String, cause: Throwable? = null) : SerializationException(message, cause) {
init {
cleanStack()
}
}

public expect fun YamlDecodingException.cleanStack()
24 changes: 23 additions & 1 deletion yamlkt/src/commonMain/kotlin/net.mamoe.yamlkt/internal/Escape.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ package net.mamoe.yamlkt.internal
import net.mamoe.yamlkt.YamlBuilder
import net.mamoe.yamlkt.YamlBuilder.CharSerialization
import net.mamoe.yamlkt.YamlBuilder.StringSerialization.*
import kotlin.contracts.ExperimentalContracts
import kotlin.contracts.InvocationKind
import kotlin.contracts.contract
import kotlin.jvm.JvmMultifileClass
import kotlin.jvm.JvmName
import kotlin.jvm.JvmStatic
Expand Down Expand Up @@ -153,15 +156,19 @@ internal fun TokenStream.readUnquotedString(stopOnComma: Boolean, begin: Char):
whileNotEOFWithBegin(begin) { char ->

if (char.isLineSeparator()) {
currentIndent = 0
// There is no need to set `currentIndent=0` here, as this is the responsibility of the external TokenStream.
append(source, startCur, cur - 2)
escapedOnce = true
if (!runNewLineSkippingAndEscapingForUnquoted(startingIndent)) {
cur-- // roll back `cur` before line break for external processor
return takeStringBufTrimEnd()
}
startCur = cur
} else when (char) {
':' -> {
if (!source[cur].isWhitespace()) {
return@whileNotEOFWithBegin
}
reuseToken(Token.COLON)
return doEnd()
}
Expand Down Expand Up @@ -598,6 +605,21 @@ internal inline fun <R> TokenStream.peekNext(block: (ch: Char) -> R?): R? {
if (endOfInput) return null
return source[cur + 1].let(block)
}
@OptIn(ExperimentalContracts::class)
internal inline fun TokenStream.validateStrBuffLength(maxLength: Int = 1024, crossinline errorMessage: (String) -> String) {
contract {
callsInPlace(errorMessage, InvocationKind.AT_MOST_ONCE)
}
val buf = this.strBuff ?: return
/* NOTE:
* Using String.length to enforce the constraint "must not span more than 1024 Unicode characters" is technically inaccurate in Kotlin.
* This is because Kotlin's Char type exclusively represents a 16-bit Unicode character,
* whereas a single Unicode character (such as a basic emoji) may require more than 16 bits for representation.
* */
if (buf.length > maxLength) {
throw contextualDecodingException(errorMessage.invoke(buf))
}
}

internal fun Char.isHexDigit(): Boolean = this in '0'..'9' || this in 'a'..'f' || this in 'A'..'F'

Expand Down
161 changes: 117 additions & 44 deletions yamlkt/src/commonMain/kotlin/net.mamoe.yamlkt/internal/TokenStream.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
@file:JvmMultifileClass
@file:JvmName("YamlUtils")
@file:Suppress("NOTHING_TO_INLINE")

package net.mamoe.yamlkt.internal

Expand All @@ -22,6 +23,8 @@ internal enum class Token(val value: Char) {
MAP_BEGIN('{'),
MAP_END('}'),

//explicit-key
EXPLICIT_MAPPING_KEY('?'),

STRING(' '),
STRING_NULL(' ');
Expand Down Expand Up @@ -123,6 +126,8 @@ internal open class StringBufHolder {
internal class TokenStream(
@JvmField val source: String
) : StringBufHolder() {
private var stateFlags: Int = 0

@JvmField
var cur: Int = 0

Expand All @@ -147,20 +152,32 @@ internal class TokenStream(
/**
* Whether the current string is quoted
*/
@JvmField
var quoted: Boolean = false
var quoted: Boolean
get() = hasAllFlags(FLAG_QUOTED)
set(value) = setFlags(FLAG_QUOTED, value)

/**
* Whether the current string is decoded after a newline.
*/
@JvmField
var newLined: Boolean = false
var newLined: Boolean
get() = hasAllFlags(FLAG_NEW_LINED)
set(value) = setFlags(FLAG_NEW_LINED, value)

var isLatestFailedAsToken: Boolean
get() = hasAllFlags(FLAG_LATEST_FAILED_AS_TOKEN)
set(value) = setFlags(FLAG_LATEST_FAILED_AS_TOKEN, value)

inline val endOfInput: Boolean get() = cur == source.length

@JvmField
var escapeCount = 0

/**
* Character index to skip during reading. Automatically resets to null after being ignored.
*/
@JvmField
var ignoreIndex: Int? = null


fun subSourceTrimEnd(offset: Int, endIndex: Int): String {
for (i in endIndex - 1 downTo offset) {
Expand Down Expand Up @@ -209,13 +226,30 @@ internal class TokenStream(


/**
* Pop the last element of [reuseTokenStack] if possible, or read a token or a string from [source]
* Attempts to pop the top element from [reuseTokenStack]. When the stack is empty,
* calls [nextTokenWithoutStack] to read a token from the [source] string.
*
* Returns [END_OF_FILE] if end of file is reached
* Returns [END_OF_FILE] if end of file is reached.
*
* If [Token.STRING] is returned, [strBuff] will also be updated
* If [Token.STRING] is returned, [strBuff] will also be updated.
*
* ### Implementation Details
* First, it continuously consumes the stream (moving `cur` backward) until the first non-whitespace character is found.
* During this process, if [ignoreIndex] is set, it skips characters at [ignoreIndex] (resets to `null` after skipping) and increments [currentIndent].
*
* Then it attempts to match this character to a valid [Token]. If no match is found, it is parsed as a string.
*
* If the matched [Token] is one of [Token.COLON], [Token.COMMA], [Token.MULTILINE_LIST_FLAG], or [Token.EXPLICIT_MAPPING_KEY],
* **it requires the next character to also be whitespace**; **otherwise**, it is parsed as a string and the [isLatestFailedAsToken] flag is set.
*
* ### Warning
* If expecting a [Token] from the set [Token.COLON], [Token.COMMA], [Token.MULTILINE_LIST_FLAG], or [Token.EXPLICIT_MAPPING_KEY],
* **you must** check the stream state when [Token.STRING] is returned.
* When the character following these [Token] characters is not whitespace, the [isLatestFailedAsToken] flag is set and the remaining content is stored in [strBuff].
*/
fun nextToken(stopOnComma: Boolean): Token? {
isLatestFailedAsToken = false

val reuse = reuseTokenStack.popOrNull()
if (reuse != null) {
return if (reuse is String) {
Expand All @@ -226,56 +260,66 @@ internal class TokenStream(
reuse as Token
}
}
return nextTokenWithoutStack(stopOnComma)
}


/**
* Always read a token or a string from [source]
*
* @see nextToken
* */
private fun nextTokenWithoutStack(stopOnComma: Boolean): Token? {
newLined = false
leadingSpace = 0
//currentIndent = 0
whileNotEOF { char ->
whileNotEOFIndexed { char, index ->
fun withIncreaseIndent(result: Token): Token {
currentIndent++
return result
}
fun requireWhitespaceResult(result: Token): Token {
if (source.getOrNull(index)?.isWhitespace() == true) {
return withIncreaseIndent(result)
}
isLatestFailedAsToken = true
val str = prepareStringAndNextToken(stopOnComma, char) ?: return Token.STRING_NULL
this.strBuff = str
if (newLined) {
cur--
newLined = false
}
return Token.STRING
}

if (ignoreIndex != null && index == ignoreIndex) {
ignoreIndex = null
currentIndent++
Debugging.logCustom { "[TokenStream.nextToken] Skipped '$char' at $index (currentIndent=$currentIndent)" }
return@whileNotEOFIndexed
}
if (char == ' ') {
currentIndent++
leadingSpace++
} else when (char) {
':' -> {
currentIndent++
return Token.COLON
}
}else when(char) {
':' -> return requireWhitespaceResult(Token.COLON)
',' -> return if (stopOnComma) withIncreaseIndent(Token.COMMA)
else requireWhitespaceResult(Token.COMMA)
'-' -> return requireWhitespaceResult(Token.MULTILINE_LIST_FLAG)
'?' -> return requireWhitespaceResult(Token.EXPLICIT_MAPPING_KEY)

'{' -> return withIncreaseIndent(Token.MAP_BEGIN)
'}' -> return withIncreaseIndent(Token.MAP_END)
'[' -> return withIncreaseIndent(Token.LIST_BEGIN)
']' -> return withIncreaseIndent(Token.LIST_END)


'\n', '\r' -> {
newLined = true
currentIndent = 0
leadingSpace = 0
}

',' -> {
currentIndent++
return Token.COMMA
}

'{' -> {
currentIndent++
return Token.MAP_BEGIN
}

'}' -> {
currentIndent++
return Token.MAP_END
}

'[' -> {
currentIndent++
return Token.LIST_BEGIN
}

']' -> {
currentIndent++
return Token.LIST_END
}

'-' -> {
currentIndent++
return Token.MULTILINE_LIST_FLAG
}

'#' -> {
this.skipLine()
currentIndent = 0
Expand Down Expand Up @@ -313,8 +357,25 @@ internal class TokenStream(
readUnquotedString(stopOnComma, begin).optimizeNull()
}
}
}

///////////////////////////////////////////////////////////
//////////////// BIT STATE /////////////////////////////////
///////////////////////////////////////////////////////////

inline fun setFlags(flags: Int, value: Boolean) {
stateFlags = if (value) {
stateFlags or flags
} else {
stateFlags and flags.inv()
}
}
inline fun hasAllFlags(flags: Int): Boolean {
return (stateFlags and flags) == flags
}
}
private const val FLAG_QUOTED = 1 shl 0 // 0b00000001
private const val FLAG_NEW_LINED = 1 shl 1 // 0b00000010
private const val FLAG_LATEST_FAILED_AS_TOKEN = 1 shl 2 // 0b00000100
internal const val SINGLE_QUOTATION_CHAR = '\''
internal const val DOUBLE_QUOTATION_CHAR = '"'

Expand All @@ -329,6 +390,17 @@ internal inline fun TokenStream.whileNotEOF(block: (char: Char) -> Unit): Nothin
return null
}

@OptIn(ExperimentalContracts::class)
internal inline fun TokenStream.whileNotEOFIndexed(block: (char: Char, idx: Int) -> Unit): Nothing? {
contract {
callsInPlace(block, InvocationKind.UNKNOWN)
}
while (!endOfInput) {
block(source[cur++], cur) // don't change
}
return null
}

/**
* Move [TokenStream.cur] to the last unsatisfying point
*/
Expand Down Expand Up @@ -409,3 +481,4 @@ private class Stack<T> {
return content[--cur] as T
}
}

Loading
Loading