//
// Copyright (c) 2010, Brian Frank and Andy Frank
// Licensed under the Academic Free License version 3.0
//
// History:
// 19 Mar 10 Brian Frank Creation
//
**
** CsvInStream is used to read delimiter-separated values
** as specified by RFC 4180. Format details:
** - rows are delimited by a newline
** - cells are separated by `delimiter` char
** - cells may be quoted with '"' character
** - quoted cells may contain the delimiter
** - quoted cells may contain newlines (always normalized to "\n")
** - quoted cells must escape '"' with '""'
** - the `trim` flag trims leading/trailing whitespace from non-quoted
** cells (note that RFC 4180 specifies that whitespace is significant)
**
** Also see `CsvOutStream`.
**
@Js
class CsvInStream : InStream
{
**
** Wrap the underlying input stream.
**
new make(InStream in) : super(in) {}
**
** Delimiter character; defaults to comma.
**
Int delimiter := ','
**
** Configures whether unqualified whitespace around a cell
** is automatically trimmed. If a field is enclosed by
** quotes then it is never trimmed.
**
Bool trim := true
**
** Read the entire table of rows into memory.
** The input stream is guaranteed to be closed upon completion.
**
Str[][] readAllRows()
{
rows := Str[][,]
eachRow |row| { rows.add(row) }
return rows
}
**
** Iterate through all the lines parsing each one into
** delimited-separated strings and calling the given
** callback functions. The input stream is guaranteed
** to be closed upon completion.
**
Void eachRow(|Str[]| f)
{
try
{
while (true)
{
row := readRow
if (row == null) break
f(row)
}
}
finally close
}
**
** Read the next line as a row of delimiter-separated
** strings. Return null if at end of stream.
**
virtual Str[]? readRow()
{
// read in next line
this.line = readLine(null)
if (line == null) return null
// allocate cells based on last width
cells := Str[,]
cells.capacity = rowWidth
// parse the cells
this.pos = 0
while (pos < line.size) cells.add(parseCell)
// handle if last character was delimiter
if (!line.isEmpty && line[-1] == delimiter) cells.add("")
// save away width and return cells
this.rowWidth = cells.size
return cells
}
private Str parseCell()
{
// if trim enabled, skip any leading whitespace
if (trim)
{
while(pos < line.size && line[pos].isSpace) pos++
if (pos >= line.size) return ""
}
// parse quoted or non-quoted cell
if (line[pos] != '"')
return parseNonQuotedCell
else
return parseQuotedCell
}
private Str parseNonQuotedCell()
{
// find pos of delimiter or end of line
start := pos
while (pos < line.size && line[pos] != delimiter) ++pos
// if trimming, then backtrack to find last non-whitespace
end := pos - 1
if (trim)
{
while (end > start && line[end].isSpace) --end
}
// skip delimiter and return result
++pos
if (end < start) return ""
return line[start..end]
}
private Str parseQuotedCell()
{
s := StrBuf()
pos += 1 // skip opening quote
while (true)
{
// next char
ch := line.getSafe(pos++, 0)
// if we've reached the end of a line, then this quoted
// cell spans multiple lines so consume all empty lines
// and the next non-empty line
while (ch == 0)
{
this.pos = 0
this.line = readLine
if (line == null) throw IOErr("Unexpected end of file in multi-line quoted cell")
s.addChar('\n')
ch = line.getSafe(pos++, 0)
}
// if not quote, add it to our cell string
if (ch != '"') { s.addChar(ch); continue }
// if its "" then add ", otherwise end of cell
ch = line.getSafe(pos++)
if (ch == '"') { s.addChar(ch); continue }
// skip everything to next delimiter
while (ch != delimiter) ch = line.getSafe(pos++, delimiter)
break
}
return s.toStr
}
private Int rowWidth := 10
private Str? line
private Int pos
}