All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.space.kscience.dataforge.io.ioMisc.kt Maven / Gradle / Ivy

package space.kscience.dataforge.io

import kotlinx.io.*
import kotlinx.io.bytestring.ByteString
import kotlinx.io.bytestring.decodeToString
import kotlinx.io.bytestring.encodeToByteString
import space.kscience.dataforge.meta.Meta
import space.kscience.dataforge.misc.DFExperimental
import kotlin.math.min

/**
 * Convert a string literal, containing only ASCII characters to a [ByteString].
 * Throws an error if there are non-ASCII characters.
 */
public fun String.toAsciiByteString(): ByteString {
    val bytes = ByteArray(length) {
        val char = get(it)
        val code = char.code
        if (code > Byte.MAX_VALUE) error("Symbol $char is not ASCII symbol") else code.toByte()
    }
    return ByteString(bytes)
}

public inline fun Buffer(block: Sink.() -> Unit): Buffer = Buffer().apply(block)

//public fun Source.readSafeUtf8Line(): String = readUTF8Line() ?: error("Line not found")

public inline fun ByteArray(block: Sink.() -> Unit): ByteArray =
    Buffer(block).readByteArray()

public inline fun Binary(block: Sink.() -> Unit): Binary =
    ByteArray(block).asBinary()

public operator fun Binary.get(range: IntRange): Binary = view(range.first, range.last - range.first)

/**
 * Return inferred [EnvelopeFormat] if only one format could read given file. If no format accepts the binary, return null. If
 * multiple formats accept binary, throw an error.
 */
public fun IOPlugin.peekBinaryEnvelopeFormat(binary: Binary): EnvelopeFormat? {
    val formats = envelopeFormatFactories.mapNotNull { factory ->
        factory.peekFormat(this@peekBinaryEnvelopeFormat, binary)
    }

    return when (formats.size) {
        0 -> null
        1 -> formats.first()
        else -> error("Envelope format binary recognition clash: $formats")
    }
}

/**
 * A zero-copy read from
 */
@DFExperimental
public fun IOPlugin.readEnvelope(
    binary: Binary,
    readNonEnvelopes: Boolean = false,
    formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat,
): Envelope = formatPicker(binary)?.readFrom(binary) ?: if (readNonEnvelopes) {
    // if no format accepts file, read it as binary
    Envelope(Meta.EMPTY, binary)
} else error("Can't infer format for $binary")

@DFExperimental
public fun IOPlugin.readEnvelope(
    string: String,
    readNonEnvelopes: Boolean = false,
    formatPicker: IOPlugin.(Binary) -> EnvelopeFormat? = IOPlugin::peekBinaryEnvelopeFormat,
): Envelope = readEnvelope(string.encodeToByteArray().asBinary(), readNonEnvelopes, formatPicker)


private class RingByteArray(
    private val buffer: ByteArray,
    private var startIndex: Int = 0,
    var size: Int = 0,
) {
    operator fun get(index: Int): Byte {
        require(index >= 0) { "Index must be positive" }
        require(index < size) { "Index $index is out of circular buffer size $size" }
        return buffer[startIndex.forward(index)]
    }

    fun isFull(): Boolean = size == buffer.size

    fun push(element: Byte) {
        buffer[startIndex.forward(size)] = element
        if (isFull()) startIndex++ else size++

    }

    private fun Int.forward(n: Int): Int = (this + n) % (buffer.size)

    fun contentEquals(inputArray: ByteArray): Boolean = when {
        inputArray.size != buffer.size -> false
        size < buffer.size -> false
        else -> inputArray.indices.all { inputArray[it] == get(it) }
    }

    fun contentEquals(byteString: ByteString): Boolean = when {
        byteString.size != buffer.size -> false
        size < buffer.size -> false
        else -> (0 until byteString.size).all { byteString[it] == get(it) }
    }

}

private fun RingByteArray.toArray(): ByteArray = ByteArray(size) { get(it) }

/**
 * Read [Source] into [output] until designated multibyte [separator] and optionally continues until
 * the end of the line after it. Throw error if [separator] not found and [atMost] bytes are read.
 * Also fails if [separator] not found until the end of input.
 *
 * The Separator itself is not read into [Sink].
 *
 * @param errorOnEof if true error is thrown if separator is never encountered
 *
 * @return bytes actually being read, including separator
 */
public fun Source.readWithSeparatorTo(
    output: Sink?,
    separator: ByteString,
    atMost: Int = Int.MAX_VALUE,
    errorOnEof: Boolean = false,
): Int {
    var counter = 0
    val rb = RingByteArray(ByteArray(separator.size))

    while (!exhausted()) {
        val byte = readByte()
        counter++
        if (counter >= atMost) error("Maximum number of bytes to be read $atMost reached.")
        rb.push(byte)
        if (rb.contentEquals(separator)) {
            return counter
        } else if (rb.isFull()) {
            output?.writeByte(rb[0])
        }
    }

    if (errorOnEof) {
        error("Read to the end of input without encountering ${separator.decodeToString()}")
    } else {
        for (i in 1 until rb.size) {
            output?.writeByte(rb[i])
        }
        counter += (rb.size - 1)
        return counter
    }
}

/**
 * Discard all bytes until [separator] is encountered. Separator is discarded sa well.
 * Return the total number of bytes read.
 */
public fun Source.discardWithSeparator(
    separator: ByteString,
    atMost: Int = Int.MAX_VALUE,
    errorOnEof: Boolean = false,
): Int = readWithSeparatorTo(null, separator, atMost, errorOnEof)

/**
 * Discard all symbol until newline is discovered. Carriage return is not discarded.
 */
public fun Source.discardLine(
    atMost: Int = Int.MAX_VALUE,
    errorOnEof: Boolean = false,
): Int = discardWithSeparator("\n".encodeToByteString(), atMost, errorOnEof)


/**
 * A [Source] based on [ByteArray]
 */
internal class ByteArraySource(
    private val byteArray: ByteArray,
    private val offset: Int = 0,
    private val size: Int = byteArray.size - offset,
) : RawSource {

    init {
        require(offset >= 0) { "Offset must be positive" }
        require(offset + size <= byteArray.size) { "End index is ${offset + size}, but the array size is ${byteArray.size}" }
    }

    private var pointer = offset

    override fun close() {
        // Do nothing
    }

    override fun readAtMostTo(sink: Buffer, byteCount: Long): Long {
        if (pointer == offset + size) return -1
        val byteRead = min(byteCount.toInt(), (size + offset - pointer))
        sink.write(byteArray, pointer, pointer + byteRead)
        pointer += byteRead
        return byteRead.toLong()
    }
}

/**
 * A [Source] based on [String]
 */
public class StringSource(
    public val string: String,
    public val offset: Int = 0,
    public val size: Int = string.length - offset,
) : RawSource {

    private var pointer = offset

    override fun close() {
        // Do nothing
    }

    override fun readAtMostTo(sink: Buffer, byteCount: Long): Long {
        if (pointer == offset + size) return -1
        val byteRead = min(byteCount.toInt(), (size + offset - pointer))
        sink.writeString(string, pointer, pointer + byteRead)
        pointer += byteRead
        return byteRead.toLong()
    }
}

public fun Sink.writeDouble(value: Double) {
    writeLong(value.toBits())
}

public fun Source.readDouble(): Double = Double.fromBits(readLong())

public fun Sink.writeFloat(value: Float) {
    writeInt(value.toBits())
}

public fun Source.readFloat(): Float = Float.fromBits(readInt())




© 2015 - 2024 Weber Informatics LLC | Privacy Policy