Skip to content

Instantly share code, notes, and snippets.

@iwfan
Last active November 27, 2023 08:53
Show Gist options
  • Save iwfan/370ce99e5b44b669df1a5c1b7c1ab8c4 to your computer and use it in GitHub Desktop.
Save iwfan/370ce99e5b44b669df1a5c1b7c1ab8c4 to your computer and use it in GitHub Desktop.
wild csv parse function
const DELIMITER = ","
const GROUP_QUOTES = ['"', "'"]
function parse(input: string) {
const rows = input.trim().split(/\r?\n/)
return rows.map((row, rowIndex) => {
let [start, cursor, end] = [0, 0, row.length - 1]
const fields: string[] = []
while (start <= end) {
const char = row.charAt(start)
const quote_char_index = GROUP_QUOTES.indexOf(char)
// 当前字符为单引号/双引号
if (quote_char_index >= 0 && cursor === start) {
const quote_char = GROUP_QUOTES[quote_char_index]
// 查找下一个引号+分隔符的位置, 默认引号后没有空格
const next_quote_char_index = row.indexOf(`${quote_char}${DELIMITER}`, start)
// 没有找到
if (next_quote_char_index < 0) {
// 可能是因为最后一个field后没有分隔符,判断一下最后一个字符能否匹配上引号
if (row.charAt(end) === quote_char) {
fields.push(row.slice(start))
break // 本行已经遍历结束
} else {
// throw new Error(`括号无法匹配 ${rowIndex}: ${row}`)
fields.push(row.slice(start))
break
}
} else {
// 找到了对应的字符
fields.push(row.slice(start, next_quote_char_index + 1))
cursor = start = next_quote_char_index + 2
}
} else if (char === DELIMITER) {
if (start === end) {
fields.push(row.slice(start, start))
break
}
fields.push(row.slice(cursor, start))
cursor = start = start + 1
} else {
start += 1
}
if (start === end) {
const regexp = new RegExp(`${DELIMITER}$`)
fields.push(row.slice(cursor).replace(regexp, ""))
}
}
return fields
})
}
const csv_input = `
line_1, column_2
line_2, "column_2,2"
line_3, "column_\"3,2\""
line_4, column_2,a
line_5, 'column_2',a
line_6, "column_2"2",a
line_7,"column_3'3',a
line_8,"column_3'3',a'
line_9,‘1,2,3’
line_10,'1,2,3'
line_11,'content,a,b'
line_12,"content,a,b"
line_13,"name:'sy'",3
line_14,"name:'sy',age:18",4
awdwa, "adwadwd","dawdaw"
colum12,colum2,colum3
"content,a",1,
‘1,2,3’,2,
"name:'sy'",3,
"name:'sy',age:18",4,
colum12,colum2,colum3,
"content,a",1,,
‘1,2,3’,2,,
"name:'sy'",3,,,
"name:'sy',age:18",4,,,5,6,
`
console.log(JSON.stringify(parse(csv_input), null, 2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment