Last active
November 27, 2023 08:53
-
-
Save iwfan/370ce99e5b44b669df1a5c1b7c1ab8c4 to your computer and use it in GitHub Desktop.
wild csv parse function
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const DELIMITER = "," | |
const GROUP_QUOTES = ['"', "'"] | |
function parse(input: string) { | |
const rows = input.trim().split(/\r?\n/) | |
return rows.map((row, rowIndex) => { | |
let [start, cursor, end] = [0, 0, row.length - 1] | |
const fields: string[] = [] | |
while (start <= end) { | |
const char = row.charAt(start) | |
const quote_char_index = GROUP_QUOTES.indexOf(char) | |
// 当前字符为单引号/双引号 | |
if (quote_char_index >= 0 && cursor === start) { | |
const quote_char = GROUP_QUOTES[quote_char_index] | |
// 查找下一个引号+分隔符的位置, 默认引号后没有空格 | |
const next_quote_char_index = row.indexOf(`${quote_char}${DELIMITER}`, start) | |
// 没有找到 | |
if (next_quote_char_index < 0) { | |
// 可能是因为最后一个field后没有分隔符,判断一下最后一个字符能否匹配上引号 | |
if (row.charAt(end) === quote_char) { | |
fields.push(row.slice(start)) | |
break // 本行已经遍历结束 | |
} else { | |
// throw new Error(`括号无法匹配 ${rowIndex}: ${row}`) | |
fields.push(row.slice(start)) | |
break | |
} | |
} else { | |
// 找到了对应的字符 | |
fields.push(row.slice(start, next_quote_char_index + 1)) | |
cursor = start = next_quote_char_index + 2 | |
} | |
} else if (char === DELIMITER) { | |
if (start === end) { | |
fields.push(row.slice(start, start)) | |
break | |
} | |
fields.push(row.slice(cursor, start)) | |
cursor = start = start + 1 | |
} else { | |
start += 1 | |
} | |
if (start === end) { | |
const regexp = new RegExp(`${DELIMITER}$`) | |
fields.push(row.slice(cursor).replace(regexp, "")) | |
} | |
} | |
return fields | |
}) | |
} | |
const csv_input = ` | |
line_1, column_2 | |
line_2, "column_2,2" | |
line_3, "column_\"3,2\"" | |
line_4, column_2,a | |
line_5, 'column_2',a | |
line_6, "column_2"2",a | |
line_7,"column_3'3',a | |
line_8,"column_3'3',a' | |
line_9,‘1,2,3’ | |
line_10,'1,2,3' | |
line_11,'content,a,b' | |
line_12,"content,a,b" | |
line_13,"name:'sy'",3 | |
line_14,"name:'sy',age:18",4 | |
awdwa, "adwadwd","dawdaw" | |
colum12,colum2,colum3 | |
"content,a",1, | |
‘1,2,3’,2, | |
"name:'sy'",3, | |
"name:'sy',age:18",4, | |
colum12,colum2,colum3, | |
"content,a",1,, | |
‘1,2,3’,2,, | |
"name:'sy'",3,,, | |
"name:'sy',age:18",4,,,5,6, | |
` | |
console.log(JSON.stringify(parse(csv_input), null, 2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment