When dealing with CSV (Comma-Separated Values) strings in JavaScript, parsing can be challenging when data can contain commas within quoted fields. Here's how you can effectively handle this scenario with a detailed solution:
To ensure the input string is a valid CSV string, we define a validation regex:
re_valid = r""" # Validate a CSV string having single, double or un-quoted values. ^ # Anchor to start of string. \s* # Allow whitespace before value. (?: # Group for value alternatives. '[^'\]*(?:\[\S\s][^'\]*)*' # Either Single quoted string, | "[^"\]*(?:\[\S\s][^"\]*)*" # or Double quoted string, | [^,'"\s\]*(?:\s+[^,'"\s\]+)* # or Non-comma, non-quote stuff. ) # End group of value alternatives. \s* # Allow whitespace after value. (?: # Zero or more additional values , # Values separated by a comma. \s* # Allow whitespace before value. (?: # Group for value alternatives. '[^'\]*(?:\[\S\s][^'\]*)*' # Either Single quoted string, | "[^"\]*(?:\[\S\s][^"\]*)*" # or Double quoted string, | [^,'"\s\]*(?:\s+[^,'"\s\]+)* # or Non-comma, non-quote stuff. ) # End group of value alternatives. \s* # Allow whitespace after value. )* # Zero or more additional values $ # Anchor to end of string. """
To parse individual values from the validated CSV string, we utilize the following regex:
re_value = r""" # Match one value in valid CSV string. (?!\s*$) # Don't match empty last value. \s* # Strip whitespace before value. (?: # Group for value alternatives. '([^'\]*(?:\[\S\s][^'\]*)*)' # Either : Single quoted string, | "([^"\]*(?:\[\S\s][^"\]*)*)" # or : Double quoted string, | ([^,'"\s\]*(?:\s+[^,'"\s\]+)*) # or : Non-comma, non-quote stuff. ) # End group of value alternatives. \s* # Strip whitespace after value. (?:,|$) # Field ends on comma or EOS. """
With these regexes defined, we can implement a function to parse the CSV string:
function CSVtoArray(text) { // Return NULL if input string is not well formed CSV string. if (!re_valid.test(text)) return null; var a = []; // Initialize array to receive values. text.replace(re_value, // "Walk" the string using replace with callback. function(m0, m1, m2, m3) { // Remove backslash from \' in single quoted values. if (m1 !== undefined) a.push(m1.replace(/\'/g, "'")); // Remove backslash from \" in double quoted values. else if (m2 !== undefined) a.push(m2.replace(/\"/g, '"')); else if (m3 !== undefined) a.push(m3); return ''; // Return empty string. }); // Handle special case of empty last value. if (/,\s*$/.test(text)) a.push(''); return a; }
Here are some examples of input CSV strings and their corresponding parsed outputs:
// Test string from original question let result = CSVtoArray("'string, duppi, du', 23, lala"); console.log(result); // ['string, duppi, du', '23', 'lala'] // Empty CSV string let result = CSVtoArray(""); console.log(result); // [] // CSV string with two empty values let result = CSVtoArray(","); console.log(result); // ['', ''] // Double quoted CSV string having single quoted values let result = CSVtoArray("'one','two with escaped \' single quote', 'three, with, commas'"); console.log(result); // ['one', 'two with escaped \' single quote', 'three, with, commas'] // Single quoted CSV string having double quoted values let result = CSVtoArray('"one","two with escaped \" double quote", "three, with, commas"'); console.log(result); // ['one', 'two with escaped " double quote', 'three, with, commas'] // CSV string with whitespace in and around empty and non-empty values let result = CSVtoArray(" one , 'two' , , ' four' ,, 'six ', ' seven ' , "); console.log(result); // ['one', 'two', '', 'four', '', 'six ', ' seven '] // Not valid let result = CSVtoArray("one, that's me!, escaped \, comma"); console.log(result); // null
This solution ensures accurate parsing of CSV strings, handling quoted fields containing commas while adhering to the specified requirements.
The above is the detailed content of How Can JavaScript Effectively Parse CSV Strings with Commas in Quoted Fields?. For more information, please follow other related articles on the PHP Chinese website!