Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/java/org/apache/commons/csv/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ boolean isDelimiter(final int ch) throws IOException {
isLastTokenDelimiter = true;
return true;
}
Arrays.fill(delimiterBuf, '\0');
reader.peek(delimiterBuf);
for (int i = 0; i < delimiterBuf.length; i++) {
if (delimiterBuf[i] != delimiter[i + 1]) {
Expand Down Expand Up @@ -274,7 +275,6 @@ Token nextToken(final Token token) throws IOException {
token.type = Token.Type.COMMENT;
return token;
}
Arrays.fill(delimiterBuf, '\0');
// Important: make sure a new char gets consumed in each iteration
while (token.type == Token.Type.INVALID) {
// ignore whitespaces at beginning of a token
Expand Down
15 changes: 15 additions & 0 deletions src/test/java/org/apache/commons/csv/CSVParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,21 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
}
}

/**
* A truncated multi-character delimiter at EOF must not be completed from the look-ahead buffer left dirty by an
* earlier non-matching peek in the same token.
*/
@Test
void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
// The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]".
try (CSVParser parser = format.parse(new StringReader("x[a][|"))) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rootvector2

Thank you for your update.

In the future, please make magic strings like "x[a][|" a local variable such that you never have to check that the assertion below is checking for a possibly different value.

final CSVRecord record = parser.nextRecord();
assertEquals("x[a][|", record.get(0));
assertEquals(1, record.size());
}
}

@Test
void testProvidedHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
Expand Down
13 changes: 13 additions & 0 deletions src/test/java/org/apache/commons/csv/LexerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,19 @@ void testPartialMultiCharacterDelimiterAtEOF() throws IOException {
}
}

/**
* A truncated multi-character delimiter at EOF must not be accepted by reusing the look-ahead buffer left dirty by an
* earlier non-matching peek in the same token (CSV-324 only cleared the buffer once per token).
*/
@Test
void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get();
// The "[a]" peek leaves ']' in the look-ahead buffer; the trailing "[|" must not match "[|]".
try (Lexer lexer = createLexer("x[a][|", format)) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rootvector2

Thank you for your update.

In the future, please make magic strings like "x[a][|" a local variable such that you never have to check that the assertion below is checking for a possibly different value.

assertNextToken(EOF, "x[a][|", lexer);
}
}

@Test
void testReadEscapeBackspace() throws IOException {
try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
Expand Down
Loading