mirror of
https://github.com/crazy-max/diun.git
synced 2024-12-22 19:38:28 +00:00
174 lines
4.0 KiB
Go
174 lines
4.0 KiB
Go
// Package csvvalue provides an efficient parser for a single line CSV value.
|
|
// It is more efficient than the standard library csv package for parsing many
|
|
// small values. For multi-line CSV parsing, the standard library is recommended.
|
|
package csvvalue
|
|
|
|
import (
|
|
"encoding/csv"
|
|
"errors"
|
|
"io"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
|
|
|
|
var defaultParser = NewParser()
|
|
|
|
// Fields parses the line with default parser and returns
|
|
// slice of fields for the record. If dst is nil, a new slice is allocated.
|
|
func Fields(inp string, dst []string) ([]string, error) {
|
|
return defaultParser.Fields(inp, dst)
|
|
}
|
|
|
|
// Parser is a CSV parser for a single line value.
|
|
type Parser struct {
|
|
Comma rune
|
|
LazyQuotes bool
|
|
TrimLeadingSpace bool
|
|
}
|
|
|
|
// NewParser returns a new Parser with default settings.
|
|
func NewParser() *Parser {
|
|
return &Parser{Comma: ','}
|
|
}
|
|
|
|
// Fields parses the line and returns slice of fields for the record.
|
|
// If dst is nil, a new slice is allocated.
|
|
// For backward compatibility, a trailing newline is allowed.
|
|
func (r *Parser) Fields(line string, dst []string) ([]string, error) {
|
|
if !validDelim(r.Comma) {
|
|
return nil, errInvalidDelim
|
|
}
|
|
|
|
if cap(dst) == 0 {
|
|
// imprecise estimate, strings.Count is fast
|
|
dst = make([]string, 0, 1+strings.Count(line, string(r.Comma)))
|
|
} else {
|
|
dst = dst[:0]
|
|
}
|
|
|
|
const quoteLen = len(`"`)
|
|
var (
|
|
pos int
|
|
commaLen = utf8.RuneLen(r.Comma)
|
|
trim = r.TrimLeadingSpace
|
|
)
|
|
|
|
// allow trailing newline for compatibility
|
|
if n := len(line); n > 0 && line[n-1] == '\n' {
|
|
if n > 1 && line[n-2] == '\r' {
|
|
line = line[:n-2]
|
|
} else {
|
|
line = line[:n-1]
|
|
}
|
|
}
|
|
|
|
if len(line) == 0 {
|
|
return nil, io.EOF
|
|
}
|
|
|
|
parseField:
|
|
for {
|
|
if trim {
|
|
i := strings.IndexFunc(line, func(r rune) bool {
|
|
return !unicode.IsSpace(r)
|
|
})
|
|
if i < 0 {
|
|
i = len(line)
|
|
}
|
|
line = line[i:]
|
|
pos += i
|
|
}
|
|
if len(line) == 0 || line[0] != '"' {
|
|
// Non-quoted string field
|
|
i := strings.IndexRune(line, r.Comma)
|
|
var field string
|
|
if i >= 0 {
|
|
field = line[:i]
|
|
} else {
|
|
field = line
|
|
}
|
|
// Check to make sure a quote does not appear in field.
|
|
if !r.LazyQuotes {
|
|
if j := strings.IndexRune(field, '"'); j >= 0 {
|
|
return nil, parseErr(pos+j, csv.ErrBareQuote)
|
|
}
|
|
}
|
|
dst = append(dst, field)
|
|
if i >= 0 {
|
|
line = line[i+commaLen:]
|
|
pos += i + commaLen
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
// Quoted string field
|
|
line = line[quoteLen:]
|
|
pos += quoteLen
|
|
halfOpen := false
|
|
for {
|
|
i := strings.IndexRune(line, '"')
|
|
if i >= 0 {
|
|
// Hit next quote.
|
|
if !halfOpen {
|
|
dst = append(dst, line[:i])
|
|
} else {
|
|
appendToLast(dst, line[:i])
|
|
}
|
|
halfOpen = false
|
|
line = line[i+quoteLen:]
|
|
pos += i + quoteLen
|
|
switch rn := nextRune(line); {
|
|
case rn == '"':
|
|
// `""` sequence (append quote).
|
|
appendToLast(dst, "\"")
|
|
line = line[quoteLen:]
|
|
pos += quoteLen
|
|
halfOpen = true
|
|
case rn == r.Comma:
|
|
// `",` sequence (end of field).
|
|
line = line[commaLen:]
|
|
pos += commaLen
|
|
continue parseField
|
|
case len(line) == 0:
|
|
break parseField
|
|
case r.LazyQuotes:
|
|
// `"` sequence (bare quote).
|
|
appendToLast(dst, "\"")
|
|
halfOpen = true
|
|
default:
|
|
// `"*` sequence (invalid non-escaped quote).
|
|
return nil, parseErr(pos-quoteLen, csv.ErrQuote)
|
|
}
|
|
} else {
|
|
if !r.LazyQuotes {
|
|
return nil, parseErr(pos, csv.ErrQuote)
|
|
}
|
|
// Hit end of line (copy all data so far).
|
|
dst = append(dst, line)
|
|
break parseField
|
|
}
|
|
}
|
|
}
|
|
return dst, nil
|
|
}
|
|
|
|
func validDelim(r rune) bool {
|
|
return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
|
|
}
|
|
|
|
func appendToLast(dst []string, s string) {
|
|
dst[len(dst)-1] += s
|
|
}
|
|
|
|
func nextRune(b string) rune {
|
|
r, _ := utf8.DecodeRuneInString(b)
|
|
return r
|
|
}
|
|
|
|
func parseErr(pos int, err error) error {
|
|
return &csv.ParseError{StartLine: 1, Line: 1, Column: pos + 1, Err: err}
|
|
}
|