Parsing CSV data when a field has commas

Problem

I am parsing CSV data and a string field contains commas.

For example:

TITLE,YEAR,RATING
Prisoners,2013,R
I, Tonya,2017,R
The Wolf Of Wall Street,2013,R

The split string for the 3rd line will produce the array:

[I, Tonya, 2017, R]

The solution is to merge the slots that belong to the string field. In the example above, you would merge indexes 0 and 1.

Code

using System;
using System.Collections.Generic;
using System.Linq;

namespace ParsingCSVWhenFieldHasCommas
{

    public class MovieCSVParser
    {
        private const int EXPECTED_COLUMNS = 3;

        public List<Movie> ParseMoviesCSV(string MoviesCSV, bool HasHeader)
        {
            var movies = new List<Movie>();
            foreach(var line in SplitByNewLine(MoviesCSV, HasHeader))
            {
                var columns = line.Split(',').ToList();

                if (columns.Count > EXPECTED_COLUMNS)
                {
                    MergeFieldWithCommas(columns);
                }

                movies.Add(new Movie()
                {
                    Title = columns[0],
                    Rating = columns[1],
                    Year = columns[2]
                });
            }
            return movies;
        }

        private void MergeFieldWithCommas(List<string> columns)
        {
            int mergeCount = 1 + (columns.Count - EXPECTED_COLUMNS);
            var mergedString = string.Join(",", columns.Take(mergeCount));
            columns.RemoveRange(0, mergeCount);
            columns.Insert(0, mergedString);
        }
        private IEnumerable<string> SplitByNewLine(string toSplit, bool HasHeader)
        {
            return toSplit.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries).Skip(HasHeader ? 1 : 0);
        }
    }
    public class Movie
    {
        public string Title { get; set; }
        public string Rating { get; set; }
        public string Year { get; set; }
    }
}

Tests

using Microsoft.VisualStudio.TestTools.UnitTesting;
using System.Linq;
using System.Text;

namespace ParsingCSVWhenFieldHasCommas.Tests
{
    [TestClass()]
    public class ParserTests
    {
        [TestMethod()]
        public void WhenTitleDoesNotHaveCommas_ReturnsTitleWithoutCommas()
        {
            //arrange
            string movieTitle = "Prisoners";
            string csv = $"{movieTitle},2013,R";
            MovieCSVParser parser = new MovieCSVParser();

            //act
            var movie = parser.ParseMoviesCSV(csv, HasHeader: false).First();


            //assert
            Assert.AreEqual(movieTitle, movie.Title);
        }
        [TestMethod()]
        public void WhenTitleHasCommas_ReturnsTitleWithCommas()
        {
            //arrange
            string movieTitle = "I, Tonya";
            string csv = $"{movieTitle},2017,R";
            MovieCSVParser parser = new MovieCSVParser();

            //act
            var movie = parser.ParseMoviesCSV(csv, HasHeader: false).First();


            //assert
            Assert.AreEqual(movieTitle, movie.Title);
        }
        [TestMethod()]
        public void WhenHasHeader_DoesNotParseTheHeader()
        {
            //arrange
            StringBuilder csvWithHeader = new StringBuilder();
            csvWithHeader.AppendLine("TITLE,YEAR,RATING");
            string movieTitle = "Prisoners";
            csvWithHeader.AppendLine($"{movieTitle},2013,R");
            MovieCSVParser parser = new MovieCSVParser();

            //act
            var movies = parser.ParseMoviesCSV(csvWithHeader.ToString(), HasHeader: true);


            //assert
            Assert.AreEqual(1, movies.Count);
            Assert.AreEqual(movieTitle, movies.First().Title);
        }
    }
}

Leave a Comment