Parsing CSV data when a field has commas

Table of Contents

Problem

I am parsing CSV data and a string field contains commas.

For example:

TITLE,YEAR,RATING Prisoners,2013,R I, Tonya,2017,R The Wolf Of Wall Street,2013,R

The split string for the 3rd line will produce the array:

[I, Tonya, 2017, R]

The solution is to merge the slots that belong to the string field. In the example above, you would merge indexes 0 and 1.

Code

using System; using System.Collections.Generic; using System.Linq; namespace ParsingCSVWhenFieldHasCommas { public class MovieCSVParser { private const int EXPECTED_COLUMNS = 3; public List<Movie> ParseMoviesCSV(string MoviesCSV, bool HasHeader) { var movies = new List<Movie>(); foreach(var line in SplitByNewLine(MoviesCSV, HasHeader)) { var columns = line.Split(',').ToList(); if (columns.Count > EXPECTED_COLUMNS) { MergeFieldWithCommas(columns); } movies.Add(new Movie() { Title = columns[0], Rating = columns[1], Year = columns[2] }); } return movies; } private void MergeFieldWithCommas(List<string> columns) { int mergeCount = 1 + (columns.Count - EXPECTED_COLUMNS); var mergedString = string.Join(",", columns.Take(mergeCount)); columns.RemoveRange(0, mergeCount); columns.Insert(0, mergedString); } private IEnumerable<string> SplitByNewLine(string toSplit, bool HasHeader) { return toSplit.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries).Skip(HasHeader ? 1 : 0); } } public class Movie { public string Title { get; set; } public string Rating { get; set; } public string Year { get; set; } } }

Tests

using Microsoft.VisualStudio.TestTools.UnitTesting; using System.Linq; using System.Text; namespace ParsingCSVWhenFieldHasCommas.Tests { [TestClass()] public class ParserTests { [TestMethod()] public void WhenTitleDoesNotHaveCommas_ReturnsTitleWithoutCommas() { //arrange string movieTitle = "Prisoners"; string csv = $"{movieTitle},2013,R"; MovieCSVParser parser = new MovieCSVParser(); //act var movie = parser.ParseMoviesCSV(csv, HasHeader: false).First(); //assert Assert.AreEqual(movieTitle, movie.Title); } [TestMethod()] public void WhenTitleHasCommas_ReturnsTitleWithCommas() { //arrange string movieTitle = "I, Tonya"; string csv = $"{movieTitle},2017,R"; MovieCSVParser parser = new MovieCSVParser(); //act var movie = parser.ParseMoviesCSV(csv, HasHeader: false).First(); //assert Assert.AreEqual(movieTitle, movie.Title); } [TestMethod()] public void WhenHasHeader_DoesNotParseTheHeader() { //arrange StringBuilder csvWithHeader = new StringBuilder(); csvWithHeader.AppendLine("TITLE,YEAR,RATING"); string movieTitle = "Prisoners"; csvWithHeader.AppendLine($"{movieTitle},2013,R"); MovieCSVParser parser = new MovieCSVParser(); //act var movies = parser.ParseMoviesCSV(csvWithHeader.ToString(), HasHeader: true); //assert Assert.AreEqual(1, movies.Count); Assert.AreEqual(movieTitle, movies.First().Title); } } }

Leave a Comment