Black Lives Matter. Support the Equal Justice Initiative.

Source file src/encoding/csv/reader_test.go

Documentation: encoding/csv

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package csv
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  type readTest struct {
    18  	Name      string
    19  	Input     string
    20  	Output    [][]string
    21  	Positions [][][2]int
    22  	Errors    []error
    23  
    24  	// These fields are copied into the Reader
    25  	Comma              rune
    26  	Comment            rune
    27  	UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
    28  	FieldsPerRecord    int
    29  	LazyQuotes         bool
    30  	TrimLeadingSpace   bool
    31  	ReuseRecord        bool
    32  }
    33  
    34  // In these tests, the §, ¶ and ∑ characters in readTest.Input are used to denote
    35  // the start of a field, a record boundary and the position of an error respectively.
    36  // They are removed before parsing and are used to verify the position
    37  // information reported by FieldPos.
    38  
    39  var readTests = []readTest{{
    40  	Name:   "Simple",
    41  	Input:  "§a,§b,§c\n",
    42  	Output: [][]string{{"a", "b", "c"}},
    43  }, {
    44  	Name:   "CRLF",
    45  	Input:  "§a,§b\r\n¶§c,§d\r\n",
    46  	Output: [][]string{{"a", "b"}, {"c", "d"}},
    47  }, {
    48  	Name:   "BareCR",
    49  	Input:  "§a,§b\rc,§d\r\n",
    50  	Output: [][]string{{"a", "b\rc", "d"}},
    51  }, {
    52  	Name: "RFC4180test",
    53  	Input: `§#field1,§field2,§field3
    54  ¶§"aaa",§"bb
    55  b",§"ccc"
    56  ¶§"a,a",§"b""bb",§"ccc"
    57  ¶§zzz,§yyy,§xxx
    58  `,
    59  	Output: [][]string{
    60  		{"#field1", "field2", "field3"},
    61  		{"aaa", "bb\nb", "ccc"},
    62  		{"a,a", `b"bb`, "ccc"},
    63  		{"zzz", "yyy", "xxx"},
    64  	},
    65  	UseFieldsPerRecord: true,
    66  	FieldsPerRecord:    0,
    67  }, {
    68  	Name:   "NoEOLTest",
    69  	Input:  "§a,§b,§c",
    70  	Output: [][]string{{"a", "b", "c"}},
    71  }, {
    72  	Name:   "Semicolon",
    73  	Input:  "§a;§b;§c\n",
    74  	Output: [][]string{{"a", "b", "c"}},
    75  	Comma:  ';',
    76  }, {
    77  	Name: "MultiLine",
    78  	Input: `§"two
    79  line",§"one line",§"three
    80  line
    81  field"`,
    82  	Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
    83  }, {
    84  	Name:  "BlankLine",
    85  	Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
    86  	Output: [][]string{
    87  		{"a", "b", "c"},
    88  		{"d", "e", "f"},
    89  	},
    90  }, {
    91  	Name:  "BlankLineFieldCount",
    92  	Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
    93  	Output: [][]string{
    94  		{"a", "b", "c"},
    95  		{"d", "e", "f"},
    96  	},
    97  	UseFieldsPerRecord: true,
    98  	FieldsPerRecord:    0,
    99  }, {
   100  	Name:             "TrimSpace",
   101  	Input:            " §a,  §b,   §c\n",
   102  	Output:           [][]string{{"a", "b", "c"}},
   103  	TrimLeadingSpace: true,
   104  }, {
   105  	Name:   "LeadingSpace",
   106  	Input:  "§ a,§  b,§   c\n",
   107  	Output: [][]string{{" a", "  b", "   c"}},
   108  }, {
   109  	Name:    "Comment",
   110  	Input:   "#1,2,3\n§a,§b,§c\n#comment",
   111  	Output:  [][]string{{"a", "b", "c"}},
   112  	Comment: '#',
   113  }, {
   114  	Name:   "NoComment",
   115  	Input:  "§#1,§2,§3\n¶§a,§b,§c",
   116  	Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
   117  }, {
   118  	Name:       "LazyQuotes",
   119  	Input:      `§a "word",§"1"2",§a",§"b`,
   120  	Output:     [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
   121  	LazyQuotes: true,
   122  }, {
   123  	Name:       "BareQuotes",
   124  	Input:      `§a "word",§"1"2",§a"`,
   125  	Output:     [][]string{{`a "word"`, `1"2`, `a"`}},
   126  	LazyQuotes: true,
   127  }, {
   128  	Name:       "BareDoubleQuotes",
   129  	Input:      `§a""b,§c`,
   130  	Output:     [][]string{{`a""b`, `c`}},
   131  	LazyQuotes: true,
   132  }, {
   133  	Name:   "BadDoubleQuotes",
   134  	Input:  `§a∑""b,c`,
   135  	Errors: []error{&ParseError{Err: ErrBareQuote}},
   136  }, {
   137  	Name:             "TrimQuote",
   138  	Input:            ` §"a",§" b",§c`,
   139  	Output:           [][]string{{"a", " b", "c"}},
   140  	TrimLeadingSpace: true,
   141  }, {
   142  	Name:   "BadBareQuote",
   143  	Input:  `§a ∑"word","b"`,
   144  	Errors: []error{&ParseError{Err: ErrBareQuote}},
   145  }, {
   146  	Name:   "BadTrailingQuote",
   147  	Input:  `§"a word",b∑"`,
   148  	Errors: []error{&ParseError{Err: ErrBareQuote}},
   149  }, {
   150  	Name:   "ExtraneousQuote",
   151  	Input:  `§"a ∑"word","b"`,
   152  	Errors: []error{&ParseError{Err: ErrQuote}},
   153  }, {
   154  	Name:               "BadFieldCount",
   155  	Input:              "§a,§b,§c\n¶∑§d,§e",
   156  	Errors:             []error{nil, &ParseError{Err: ErrFieldCount}},
   157  	Output:             [][]string{{"a", "b", "c"}, {"d", "e"}},
   158  	UseFieldsPerRecord: true,
   159  	FieldsPerRecord:    0,
   160  }, {
   161  	Name:               "BadFieldCountMultiple",
   162  	Input:              "§a,§b,§c\n¶∑§d,§e\n¶∑§f",
   163  	Errors:             []error{nil, &ParseError{Err: ErrFieldCount}, &ParseError{Err: ErrFieldCount}},
   164  	Output:             [][]string{{"a", "b", "c"}, {"d", "e"}, {"f"}},
   165  	UseFieldsPerRecord: true,
   166  	FieldsPerRecord:    0,
   167  }, {
   168  	Name:               "BadFieldCount1",
   169  	Input:              `§∑a,§b,§c`,
   170  	Errors:             []error{&ParseError{Err: ErrFieldCount}},
   171  	Output:             [][]string{{"a", "b", "c"}},
   172  	UseFieldsPerRecord: true,
   173  	FieldsPerRecord:    2,
   174  }, {
   175  	Name:   "FieldCount",
   176  	Input:  "§a,§b,§c\n¶§d,§e",
   177  	Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
   178  }, {
   179  	Name:   "TrailingCommaEOF",
   180  	Input:  "§a,§b,§c,§",
   181  	Output: [][]string{{"a", "b", "c", ""}},
   182  }, {
   183  	Name:   "TrailingCommaEOL",
   184  	Input:  "§a,§b,§c,§\n",
   185  	Output: [][]string{{"a", "b", "c", ""}},
   186  }, {
   187  	Name:             "TrailingCommaSpaceEOF",
   188  	Input:            "§a,§b,§c, §",
   189  	Output:           [][]string{{"a", "b", "c", ""}},
   190  	TrimLeadingSpace: true,
   191  }, {
   192  	Name:             "TrailingCommaSpaceEOL",
   193  	Input:            "§a,§b,§c, §\n",
   194  	Output:           [][]string{{"a", "b", "c", ""}},
   195  	TrimLeadingSpace: true,
   196  }, {
   197  	Name:             "TrailingCommaLine3",
   198  	Input:            "§a,§b,§c\n¶§d,§e,§f\n¶§g,§hi,§",
   199  	Output:           [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
   200  	TrimLeadingSpace: true,
   201  }, {
   202  	Name:   "NotTrailingComma3",
   203  	Input:  "§a,§b,§c,§ \n",
   204  	Output: [][]string{{"a", "b", "c", " "}},
   205  }, {
   206  	Name: "CommaFieldTest",
   207  	Input: `§x,§y,§z,§w
   208  ¶§x,§y,§z,§
   209  ¶§x,§y,§,§
   210  ¶§x,§,§,§
   211  ¶§,§,§,§
   212  ¶§"x",§"y",§"z",§"w"
   213  ¶§"x",§"y",§"z",§""
   214  ¶§"x",§"y",§"",§""
   215  ¶§"x",§"",§"",§""
   216  ¶§"",§"",§"",§""
   217  `,
   218  	Output: [][]string{
   219  		{"x", "y", "z", "w"},
   220  		{"x", "y", "z", ""},
   221  		{"x", "y", "", ""},
   222  		{"x", "", "", ""},
   223  		{"", "", "", ""},
   224  		{"x", "y", "z", "w"},
   225  		{"x", "y", "z", ""},
   226  		{"x", "y", "", ""},
   227  		{"x", "", "", ""},
   228  		{"", "", "", ""},
   229  	},
   230  }, {
   231  	Name:  "TrailingCommaIneffective1",
   232  	Input: "§a,§b,§\n¶§c,§d,§e",
   233  	Output: [][]string{
   234  		{"a", "b", ""},
   235  		{"c", "d", "e"},
   236  	},
   237  	TrimLeadingSpace: true,
   238  }, {
   239  	Name:  "ReadAllReuseRecord",
   240  	Input: "§a,§b\n¶§c,§d",
   241  	Output: [][]string{
   242  		{"a", "b"},
   243  		{"c", "d"},
   244  	},
   245  	ReuseRecord: true,
   246  }, {
   247  	Name:   "StartLine1", // Issue 19019
   248  	Input:  "§a,\"b\nc∑\"d,e",
   249  	Errors: []error{&ParseError{Err: ErrQuote}},
   250  }, {
   251  	Name:   "StartLine2",
   252  	Input:  "§a,§b\n¶§\"d\n\n,e∑",
   253  	Errors: []error{nil, &ParseError{Err: ErrQuote}},
   254  	Output: [][]string{{"a", "b"}},
   255  }, {
   256  	Name:  "CRLFInQuotedField", // Issue 21201
   257  	Input: "§A,§\"Hello\r\nHi\",§B\r\n",
   258  	Output: [][]string{
   259  		{"A", "Hello\nHi", "B"},
   260  	},
   261  }, {
   262  	Name:   "BinaryBlobField", // Issue 19410
   263  	Input:  "§x09\x41\xb4\x1c,§aktau",
   264  	Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
   265  }, {
   266  	Name:   "TrailingCR",
   267  	Input:  "§field1,§field2\r",
   268  	Output: [][]string{{"field1", "field2"}},
   269  }, {
   270  	Name:   "QuotedTrailingCR",
   271  	Input:  "§\"field\"\r",
   272  	Output: [][]string{{"field"}},
   273  }, {
   274  	Name:   "QuotedTrailingCRCR",
   275  	Input:  "§\"field∑\"\r\r",
   276  	Errors: []error{&ParseError{Err: ErrQuote}},
   277  }, {
   278  	Name:   "FieldCR",
   279  	Input:  "§field\rfield\r",
   280  	Output: [][]string{{"field\rfield"}},
   281  }, {
   282  	Name:   "FieldCRCR",
   283  	Input:  "§field\r\rfield\r\r",
   284  	Output: [][]string{{"field\r\rfield\r"}},
   285  }, {
   286  	Name:   "FieldCRCRLF",
   287  	Input:  "§field\r\r\n¶§field\r\r\n",
   288  	Output: [][]string{{"field\r"}, {"field\r"}},
   289  }, {
   290  	Name:   "FieldCRCRLFCR",
   291  	Input:  "§field\r\r\n¶§\rfield\r\r\n\r",
   292  	Output: [][]string{{"field\r"}, {"\rfield\r"}},
   293  }, {
   294  	Name:   "FieldCRCRLFCRCR",
   295  	Input:  "§field\r\r\n¶§\r\rfield\r\r\n¶§\r\r",
   296  	Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
   297  }, {
   298  	Name:  "MultiFieldCRCRLFCRCR",
   299  	Input: "§field1,§field2\r\r\n¶§\r\rfield1,§field2\r\r\n¶§\r\r,§",
   300  	Output: [][]string{
   301  		{"field1", "field2\r"},
   302  		{"\r\rfield1", "field2\r"},
   303  		{"\r\r", ""},
   304  	},
   305  }, {
   306  	Name:             "NonASCIICommaAndComment",
   307  	Input:            "§a£§b,c£ \t§d,e\n€ comment\n",
   308  	Output:           [][]string{{"a", "b,c", "d,e"}},
   309  	TrimLeadingSpace: true,
   310  	Comma:            '£',
   311  	Comment:          '€',
   312  }, {
   313  	Name:    "NonASCIICommaAndCommentWithQuotes",
   314  	Input:   "§a€§\"  b,\"€§ c\nλ comment\n",
   315  	Output:  [][]string{{"a", "  b,", " c"}},
   316  	Comma:   '€',
   317  	Comment: 'λ',
   318  }, {
   319  	// λ and θ start with the same byte.
   320  	// This tests that the parser doesn't confuse such characters.
   321  	Name:    "NonASCIICommaConfusion",
   322  	Input:   "§\"abθcd\"λ§efθgh",
   323  	Output:  [][]string{{"abθcd", "efθgh"}},
   324  	Comma:   'λ',
   325  	Comment: '€',
   326  }, {
   327  	Name:    "NonASCIICommentConfusion",
   328  	Input:   "§λ\n¶§λ\nθ\n¶§λ\n",
   329  	Output:  [][]string{{"λ"}, {"λ"}, {"λ"}},
   330  	Comment: 'θ',
   331  }, {
   332  	Name:   "QuotedFieldMultipleLF",
   333  	Input:  "§\"\n\n\n\n\"",
   334  	Output: [][]string{{"\n\n\n\n"}},
   335  }, {
   336  	Name:  "MultipleCRLF",
   337  	Input: "\r\n\r\n\r\n\r\n",
   338  }, {
   339  	// The implementation may read each line in several chunks if it doesn't fit entirely
   340  	// in the read buffer, so we should test the code to handle that condition.
   341  	Name:    "HugeLines",
   342  	Input:   strings.Repeat("#ignore\n", 10000) + "§" + strings.Repeat("@", 5000) + ",§" + strings.Repeat("*", 5000),
   343  	Output:  [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
   344  	Comment: '#',
   345  }, {
   346  	Name:   "QuoteWithTrailingCRLF",
   347  	Input:  "§\"foo∑\"bar\"\r\n",
   348  	Errors: []error{&ParseError{Err: ErrQuote}},
   349  }, {
   350  	Name:       "LazyQuoteWithTrailingCRLF",
   351  	Input:      "§\"foo\"bar\"\r\n",
   352  	Output:     [][]string{{`foo"bar`}},
   353  	LazyQuotes: true,
   354  }, {
   355  	Name:   "DoubleQuoteWithTrailingCRLF",
   356  	Input:  "§\"foo\"\"bar\"\r\n",
   357  	Output: [][]string{{`foo"bar`}},
   358  }, {
   359  	Name:   "EvenQuotes",
   360  	Input:  `§""""""""`,
   361  	Output: [][]string{{`"""`}},
   362  }, {
   363  	Name:   "OddQuotes",
   364  	Input:  `§"""""""∑`,
   365  	Errors: []error{&ParseError{Err: ErrQuote}},
   366  }, {
   367  	Name:       "LazyOddQuotes",
   368  	Input:      `§"""""""`,
   369  	Output:     [][]string{{`"""`}},
   370  	LazyQuotes: true,
   371  }, {
   372  	Name:   "BadComma1",
   373  	Comma:  '\n',
   374  	Errors: []error{errInvalidDelim},
   375  }, {
   376  	Name:   "BadComma2",
   377  	Comma:  '\r',
   378  	Errors: []error{errInvalidDelim},
   379  }, {
   380  	Name:   "BadComma3",
   381  	Comma:  '"',
   382  	Errors: []error{errInvalidDelim},
   383  }, {
   384  	Name:   "BadComma4",
   385  	Comma:  utf8.RuneError,
   386  	Errors: []error{errInvalidDelim},
   387  }, {
   388  	Name:    "BadComment1",
   389  	Comment: '\n',
   390  	Errors:  []error{errInvalidDelim},
   391  }, {
   392  	Name:    "BadComment2",
   393  	Comment: '\r',
   394  	Errors:  []error{errInvalidDelim},
   395  }, {
   396  	Name:    "BadComment3",
   397  	Comment: utf8.RuneError,
   398  	Errors:  []error{errInvalidDelim},
   399  }, {
   400  	Name:    "BadCommaComment",
   401  	Comma:   'X',
   402  	Comment: 'X',
   403  	Errors:  []error{errInvalidDelim},
   404  }}
   405  
   406  func TestRead(t *testing.T) {
   407  	newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int) {
   408  		positions, errPositions, input := makePositions(tt.Input)
   409  		r := NewReader(strings.NewReader(input))
   410  
   411  		if tt.Comma != 0 {
   412  			r.Comma = tt.Comma
   413  		}
   414  		r.Comment = tt.Comment
   415  		if tt.UseFieldsPerRecord {
   416  			r.FieldsPerRecord = tt.FieldsPerRecord
   417  		} else {
   418  			r.FieldsPerRecord = -1
   419  		}
   420  		r.LazyQuotes = tt.LazyQuotes
   421  		r.TrimLeadingSpace = tt.TrimLeadingSpace
   422  		r.ReuseRecord = tt.ReuseRecord
   423  		return r, positions, errPositions
   424  	}
   425  
   426  	for _, tt := range readTests {
   427  		t.Run(tt.Name, func(t *testing.T) {
   428  			r, positions, errPositions := newReader(tt)
   429  			out, err := r.ReadAll()
   430  			if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil {
   431  				if !reflect.DeepEqual(err, wantErr) {
   432  					t.Fatalf("ReadAll() error mismatch:\ngot  %v (%#v)\nwant %v (%#v)", err, err, wantErr, wantErr)
   433  				}
   434  				if out != nil {
   435  					t.Fatalf("ReadAll() output:\ngot  %q\nwant nil", out)
   436  				}
   437  			} else {
   438  				if err != nil {
   439  					t.Fatalf("unexpected Readall() error: %v", err)
   440  				}
   441  				if !reflect.DeepEqual(out, tt.Output) {
   442  					t.Fatalf("ReadAll() output:\ngot  %q\nwant %q", out, tt.Output)
   443  				}
   444  			}
   445  
   446  			// Check field and error positions.
   447  			r, _, _ = newReader(tt)
   448  			for recNum := 0; ; recNum++ {
   449  				rec, err := r.Read()
   450  				var wantErr error
   451  				if recNum < len(tt.Errors) && tt.Errors[recNum] != nil {
   452  					wantErr = errorWithPosition(tt.Errors[recNum], recNum, positions, errPositions)
   453  				} else if recNum >= len(tt.Output) {
   454  					wantErr = io.EOF
   455  				}
   456  				if !reflect.DeepEqual(err, wantErr) {
   457  					t.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum, err, err, wantErr, wantErr)
   458  				}
   459  				// ErrFieldCount is explicitly non-fatal.
   460  				if err != nil && !errors.Is(err, ErrFieldCount) {
   461  					if recNum < len(tt.Output) {
   462  						t.Fatalf("need more records; got %d want %d", recNum, len(tt.Output))
   463  					}
   464  					break
   465  				}
   466  				if got, want := rec, tt.Output[recNum]; !reflect.DeepEqual(got, want) {
   467  					t.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got, want)
   468  				}
   469  				pos := positions[recNum]
   470  				if len(pos) != len(rec) {
   471  					t.Fatalf("mismatched position length at record %d", recNum)
   472  				}
   473  				for i := range rec {
   474  					line, col := r.FieldPos(i)
   475  					if got, want := [2]int{line, col}, pos[i]; got != want {
   476  						t.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum, i, got, want)
   477  					}
   478  				}
   479  			}
   480  		})
   481  	}
   482  }
   483  
   484  // firstError returns the first non-nil error in errs,
   485  // with the position adjusted according to the error's
   486  // index inside positions.
   487  func firstError(errs []error, positions [][][2]int, errPositions map[int][2]int) error {
   488  	for i, err := range errs {
   489  		if err != nil {
   490  			return errorWithPosition(err, i, positions, errPositions)
   491  		}
   492  	}
   493  	return nil
   494  }
   495  
   496  func errorWithPosition(err error, recNum int, positions [][][2]int, errPositions map[int][2]int) error {
   497  	parseErr, ok := err.(*ParseError)
   498  	if !ok {
   499  		return err
   500  	}
   501  	if recNum >= len(positions) {
   502  		panic(fmt.Errorf("no positions found for error at record %d", recNum))
   503  	}
   504  	errPos, ok := errPositions[recNum]
   505  	if !ok {
   506  		panic(fmt.Errorf("no error position found for error at record %d", recNum))
   507  	}
   508  	parseErr1 := *parseErr
   509  	parseErr1.StartLine = positions[recNum][0][0]
   510  	parseErr1.Line = errPos[0]
   511  	parseErr1.Column = errPos[1]
   512  	return &parseErr1
   513  }
   514  
   515  // makePositions returns the expected field positions of all
   516  // the fields in text, the positions of any errors, and the text with the position markers
   517  // removed.
   518  //
   519  // The start of each field is marked with a § symbol;
   520  // CSV lines are separated by ¶ symbols;
   521  // Error positions are marked with ∑ symbols.
   522  func makePositions(text string) ([][][2]int, map[int][2]int, string) {
   523  	buf := make([]byte, 0, len(text))
   524  	var positions [][][2]int
   525  	errPositions := make(map[int][2]int)
   526  	line, col := 1, 1
   527  	recNum := 0
   528  
   529  	for len(text) > 0 {
   530  		r, size := utf8.DecodeRuneInString(text)
   531  		switch r {
   532  		case '\n':
   533  			line++
   534  			col = 1
   535  			buf = append(buf, '\n')
   536  		case '§':
   537  			if len(positions) == 0 {
   538  				positions = append(positions, [][2]int{})
   539  			}
   540  			positions[len(positions)-1] = append(positions[len(positions)-1], [2]int{line, col})
   541  		case '¶':
   542  			positions = append(positions, [][2]int{})
   543  			recNum++
   544  		case '∑':
   545  			errPositions[recNum] = [2]int{line, col}
   546  		default:
   547  			buf = append(buf, text[:size]...)
   548  			col += size
   549  		}
   550  		text = text[size:]
   551  	}
   552  	return positions, errPositions, string(buf)
   553  }
   554  
   555  // nTimes is an io.Reader which yields the string s n times.
   556  type nTimes struct {
   557  	s   string
   558  	n   int
   559  	off int
   560  }
   561  
   562  func (r *nTimes) Read(p []byte) (n int, err error) {
   563  	for {
   564  		if r.n <= 0 || r.s == "" {
   565  			return n, io.EOF
   566  		}
   567  		n0 := copy(p, r.s[r.off:])
   568  		p = p[n0:]
   569  		n += n0
   570  		r.off += n0
   571  		if r.off == len(r.s) {
   572  			r.off = 0
   573  			r.n--
   574  		}
   575  		if len(p) == 0 {
   576  			return
   577  		}
   578  	}
   579  }
   580  
   581  // benchmarkRead measures reading the provided CSV rows data.
   582  // initReader, if non-nil, modifies the Reader before it's used.
   583  func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
   584  	b.ReportAllocs()
   585  	r := NewReader(&nTimes{s: rows, n: b.N})
   586  	if initReader != nil {
   587  		initReader(r)
   588  	}
   589  	for {
   590  		_, err := r.Read()
   591  		if err == io.EOF {
   592  			break
   593  		}
   594  		if err != nil {
   595  			b.Fatal(err)
   596  		}
   597  	}
   598  }
   599  
   600  const benchmarkCSVData = `x,y,z,w
   601  x,y,z,
   602  x,y,,
   603  x,,,
   604  ,,,
   605  "x","y","z","w"
   606  "x","y","z",""
   607  "x","y","",""
   608  "x","","",""
   609  "","","",""
   610  `
   611  
   612  func BenchmarkRead(b *testing.B) {
   613  	benchmarkRead(b, nil, benchmarkCSVData)
   614  }
   615  
   616  func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
   617  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
   618  }
   619  
   620  func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
   621  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
   622  }
   623  
   624  func BenchmarkReadLargeFields(b *testing.B) {
   625  	benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   626  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   627  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   628  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   629  `, 3))
   630  }
   631  
   632  func BenchmarkReadReuseRecord(b *testing.B) {
   633  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
   634  }
   635  
   636  func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
   637  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
   638  }
   639  
   640  func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
   641  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
   642  }
   643  
   644  func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
   645  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   646  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   647  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   648  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   649  `, 3))
   650  }
   651  

View as plain text