Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancements for Source Line-Start-End Functionality (LC-196) #1184

Merged
merged 4 commits into from
Apr 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ object PropsKeyEnum extends Enum[PropsKeyEntry] {
case object ReadStartLine extends PropsKeyEntry("read.text.start.line")
case object ReadEndLine extends PropsKeyEntry("read.text.end.line")

case object ReadLastEndLineMissing extends PropsKeyEntry("read.text.last.end.line.missing")
stheppi marked this conversation as resolved.
Show resolved Hide resolved

case object ReadTrimLine extends PropsKeyEntry("read.text.trim")

case object StoreEnvelope extends PropsKeyEntry(DataStorageSettings.StoreEnvelopeKey)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@ object ReadTextMode {
startLine <- props.getString(PropsKeyEnum.ReadStartLine)
endLine <- props.getString(PropsKeyEnum.ReadEndLine)
trim <- props.getOptionalBoolean(PropsKeyEnum.ReadTrimLine).toOption.flatten.orElse(Some(false))
} yield StartEndLineReadTextMode(startLine, endLine, trim)
lastEndLineMissing <- props.getOptionalBoolean(PropsKeyEnum.ReadLastEndLineMissing).toOption.flatten.orElse(
Some(false),
)
} yield StartEndLineReadTextMode(startLine, endLine, trim, lastEndLineMissing)
case None => Option.empty
}
}
Expand All @@ -76,7 +79,8 @@ case class StartEndTagReadTextMode(startTag: String, endTag: String, buffer: Int
}
}

case class StartEndLineReadTextMode(startLine: String, endLine: String, trim: Boolean) extends ReadTextMode {
case class StartEndLineReadTextMode(startLine: String, endLine: String, trim: Boolean, lastEndLineMissing: Boolean)
extends ReadTextMode {
override def createStreamReader(
input: InputStream,
): CloudDataIterator[String] = {
Expand All @@ -85,6 +89,7 @@ case class StartEndLineReadTextMode(startLine: String, endLine: String, trim: Bo
startLine,
endLine,
trim,
lastEndLineMissing,
)
new CustomTextStreamReader(() => lineReader.next(), () => lineReader.close())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,16 @@ import scala.jdk.CollectionConverters.MapHasAsScala
object CloudSourcePropsSchema {

private[source] val keys = Map[PropsKeyEntry, PropsSchema](
ReadTextMode -> EnumPropsSchema(ReadTextModeEnum),
ReadRegex -> StringPropsSchema,
ReadStartTag -> StringPropsSchema,
ReadEndTag -> StringPropsSchema,
ReadStartLine -> StringPropsSchema,
ReadEndLine -> StringPropsSchema,
BufferSize -> IntPropsSchema,
ReadTrimLine -> BooleanPropsSchema,
StoreEnvelope -> BooleanPropsSchema,
ReadTextMode -> EnumPropsSchema(ReadTextModeEnum),
ReadRegex -> StringPropsSchema,
ReadStartTag -> StringPropsSchema,
ReadEndTag -> StringPropsSchema,
ReadStartLine -> StringPropsSchema,
ReadEndLine -> StringPropsSchema,
ReadLastEndLineMissing -> BooleanPropsSchema,
BufferSize -> IntPropsSchema,
ReadTrimLine -> BooleanPropsSchema,
StoreEnvelope -> BooleanPropsSchema,
)

val schema = KcqlPropsSchema(PropsKeyEnum, keys)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,20 @@ class ReadTextModeTestFormatSelection extends AnyFlatSpec with Matchers {
PropsKeyEnum.ReadEndLine.entryName -> "",
),
),
) should be(Some(StartEndLineReadTextMode("SSM", "", false)))
) should be(Some(StartEndLineReadTextMode("SSM", "", false, false)))
}

"ReadTextMode" should "set the end of line missing" in {
ReadTextMode(
readProps(
Map(
PropsKeyEnum.ReadTextMode.entryName -> ReadTextModeEnum.StartEndLine.entryName,
PropsKeyEnum.ReadStartLine.entryName -> "SSM",
PropsKeyEnum.ReadEndLine.entryName -> "",
PropsKeyEnum.ReadLastEndLineMissing.entryName -> "true",
),
),
) should be(Some(StartEndLineReadTextMode("SSM", "", false, true)))
}

"ReadTextMode" should "return start and end line when configured with trim enabled" in {
Expand All @@ -113,7 +126,7 @@ class ReadTextModeTestFormatSelection extends AnyFlatSpec with Matchers {
PropsKeyEnum.ReadTrimLine.entryName -> "true",
),
),
) should be(Some(StartEndLineReadTextMode("SSM", "", true)))
) should be(Some(StartEndLineReadTextMode("SSM", "", true, false)))
}

"ReadTextMode" should "return none when no start or end line is configured" in {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,19 @@ import java.io.InputStreamReader
* end is found. The start and end lines are included in the record.
* If the file ends and there is no end, the record is ignored
*
* @param input
* @param start
* @param end
* @param input the input stream
* @param start the record is considered to start when a line matching start is found
* @param end the record is considered complete when a line matching end is found
* @param trim if true, the record is trimmed
* @param lastEndLineMissing if true, the record is considered complete when end of file is reached
*/
class LineStartLineEndReader(input: InputStream, start: String, end: String, trim: Boolean = false) extends LineReader {
class LineStartLineEndReader(
input: InputStream,
start: String,
end: String,
trim: Boolean = false,
lastEndLineMissing: Boolean = false,
) extends LineReader {
private val br = new BufferedReader(new InputStreamReader(input))

//Returns the next record or None if there are no more
Expand Down Expand Up @@ -60,10 +68,19 @@ class LineStartLineEndReader(input: InputStream, start: String, end: String, tri
builder.append(line)
line = br.readLine()
}
Option(line).map { _ =>
builder.append(System.lineSeparator())
builder.append(end)
builder.toString()
Option(line) match {
case Some(_) =>
builder.append(System.lineSeparator())
builder.append(end)
Some(builder.toString())
case None =>
if (lastEndLineMissing) {
builder.append(System.lineSeparator())
builder.append(end)
Some(builder.toString())
} else {
None
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -234,5 +234,58 @@ class LineStartLineEndReaderTest extends AnyFunSuite with Matchers {
|x""".stripMargin,
)
}

test("when lastEndLineMissing=true, return the record if the end line is missing") {
val reader = new LineStartLineEndReader(createInputStream(
"""
|start
|a
|b
|c
|
|start
|x""".stripMargin,
),
"start",
"",
trim = true,
lastEndLineMissing = true,
)
reader.next() shouldBe Some(
"""start
|a
|b
|c""".stripMargin,
)
reader.next() shouldBe Some(
"""start
|x""".stripMargin,
)
}

test("when lastEndLineMissing=true, return the record if the end line is missing all file is a message") {
val reader = new LineStartLineEndReader(createInputStream(
"""
|start
|a
|b
|c
|start
|x""".stripMargin,
),
"start",
"",
trim = true,
lastEndLineMissing = true,
)
reader.next() shouldBe Some(
"""start
|a
|b
|c
|start
|x""".stripMargin,
)
}
private def createInputStream(data: String): InputStream = new ByteArrayInputStream(data.getBytes)
}
Loading