Skip to content

Instantly share code, notes, and snippets.

@iwfan
Last active November 27, 2023 08:53

Revisions

  1. iwfan revised this gist Nov 27, 2023. 1 changed file with 4 additions and 1 deletion.
    5 changes: 4 additions & 1 deletion wild-csv-parse.ts
    Original file line number Diff line number Diff line change
    @@ -34,7 +34,10 @@ function parse(input: string) {
    cursor = start = next_quote_char_index + 2
    }
    } else if (char === DELIMITER) {
    if (start === end) break
    if (start === end) {
    fields.push(row.slice(start, start))
    break
    }
    fields.push(row.slice(cursor, start))
    cursor = start = start + 1
    } else {
  2. iwfan revised this gist Nov 21, 2023. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion wild-csv-parse.ts
    Original file line number Diff line number Diff line change
    @@ -14,7 +14,6 @@ function parse(input: string) {
    const quote_char_index = GROUP_QUOTES.indexOf(char)
    // 当前字符为单引号/双引号
    if (quote_char_index >= 0 && cursor === start) {

    const quote_char = GROUP_QUOTES[quote_char_index]
    // 查找下一个引号+分隔符的位置, 默认引号后没有空格
    const next_quote_char_index = row.indexOf(`${quote_char}${DELIMITER}`, start)
  3. iwfan revised this gist Nov 21, 2023. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion wild-csv-parse.ts
    Original file line number Diff line number Diff line change
    @@ -13,7 +13,8 @@ function parse(input: string) {

    const quote_char_index = GROUP_QUOTES.indexOf(char)
    // 当前字符为单引号/双引号
    if (quote_char_index >= 0) {
    if (quote_char_index >= 0 && cursor === start) {

    const quote_char = GROUP_QUOTES[quote_char_index]
    // 查找下一个引号+分隔符的位置, 默认引号后没有空格
    const next_quote_char_index = row.indexOf(`${quote_char}${DELIMITER}`, start)
  4. iwfan revised this gist Nov 20, 2023. 1 changed file with 7 additions and 1 deletion.
    8 changes: 7 additions & 1 deletion wild-csv-parse.ts
    Original file line number Diff line number Diff line change
    @@ -42,7 +42,8 @@ function parse(input: string) {
    }

    if (start === end) {
    fields.push(row.slice(cursor))
    const regexp = new RegExp(`${DELIMITER}$`)
    fields.push(row.slice(cursor).replace(regexp, ""))
    }
    }

    @@ -71,6 +72,11 @@ colum12,colum2,colum3
    ‘1,2,3’,2,
    "name:'sy'",3,
    "name:'sy',age:18",4,
    colum12,colum2,colum3,
    "content,a",1,,
    ‘1,2,3’,2,,
    "name:'sy'",3,,,
    "name:'sy',age:18",4,,,5,6,
    `

    console.log(JSON.stringify(parse(csv_input), null, 2))
  5. iwfan revised this gist Nov 20, 2023. 1 changed file with 9 additions and 6 deletions.
    15 changes: 9 additions & 6 deletions wild-csv-parse.ts
    Original file line number Diff line number Diff line change
    @@ -34,18 +34,15 @@ function parse(input: string) {
    cursor = start = next_quote_char_index + 2
    }
    } else if (char === DELIMITER) {
    if (start === end) break
    fields.push(row.slice(cursor, start))
    cursor = start = start + 1
    } else {
    start += 1
    }

    if (start === end) {
    if (cursor < end) {
    fields.push(row.slice(cursor))
    } else if (cursor === end) {
    fields.push(row.charAt(cursor))
    }
    fields.push(row.slice(cursor))
    }
    }

    @@ -69,6 +66,12 @@ line_12,"content,a,b"
    line_13,"name:'sy'",3
    line_14,"name:'sy',age:18",4
    awdwa, "adwadwd","dawdaw"
    colum12,colum2,colum3
    "content,a",1,
    ‘1,2,3’,2,
    "name:'sy'",3,
    "name:'sy',age:18",4,
    `

    console.log(JSON.stringify(parse(csv_input), null, 2))
    console.log(JSON.stringify(parse(csv_input), null, 2))

  6. iwfan renamed this gist Nov 20, 2023. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  7. iwfan revised this gist Nov 20, 2023. 1 changed file with 40 additions and 36 deletions.
    76 changes: 40 additions & 36 deletions wild-csv-parse.js
    Original file line number Diff line number Diff line change
    @@ -1,49 +1,50 @@
    const DELIMITER = ","
    const GROUP_QUOTES = ['"', "'"]

    /**
    *
    * @param {string} input
    * @returns
    */
    function parse(input) {
    function parse(input: string) {
    const rows = input.trim().split(/\r?\n/)
    return rows.map(row => {
    let start = 0
    let end = row.length - 1
    let quote_char_count = [, 0]
    const fields = []
    return rows.map((row, rowIndex) => {
    let [start, cursor, end] = [0, 0, row.length - 1]

    for (let cursor = 0; cursor < row.length; cursor++) {
    const char = row.charAt(cursor)
    const quote_char_index = GROUP_QUOTES.findIndex(quote_char => quote_char === char)
    const fields: string[] = []

    while (start <= end) {
    const char = row.charAt(start)

    const quote_char_index = GROUP_QUOTES.indexOf(char)
    // 当前字符为单引号/双引号
    if (quote_char_index >= 0) {
    const quote_char = GROUP_QUOTES[quote_char_index]
    if (quote_char_count[0]) {
    if (quote_char === quote_char_count[0]) {
    quote_char_count[1] = quote_char_count[1] + 1
    // 查找下一个引号+分隔符的位置, 默认引号后没有空格
    const next_quote_char_index = row.indexOf(`${quote_char}${DELIMITER}`, start)
    // 没有找到
    if (next_quote_char_index < 0) {
    // 可能是因为最后一个field后没有分隔符,判断一下最后一个字符能否匹配上引号
    if (row.charAt(end) === quote_char) {
    fields.push(row.slice(start))
    break // 本行已经遍历结束
    } else {
    // throw new Error(`括号无法匹配 ${rowIndex}: ${row}`)
    fields.push(row.slice(start))
    break
    }
    } else {
    quote_char_count[0] === quote_char
    quote_char_count[1] = quote_char_count[1] + 1
    }
    }

    if (char === DELIMITER) {
    const quoteCount = quote_char_count[1]
    if (quoteCount === 0 || (quoteCount % 2 === 0 && row.charAt(cursor - 1) === GROUP_QUOTES)) {
    fields.push(row.slice(start, cursor))
    start = cursor + 1
    quote_char_count = [, 0]
    // 找到了对应的字符
    fields.push(row.slice(start, next_quote_char_index + 1))
    cursor = start = next_quote_char_index + 2
    }
    } else if (char === DELIMITER) {
    fields.push(row.slice(cursor, start))
    cursor = start = start + 1
    } else {
    start += 1
    }

    if (cursor === end) {
    if (start < end) {
    fields.push(row.slice(start))
    } else if (start === end) {
    fields.push(char)
    if (start === end) {
    if (cursor < end) {
    fields.push(row.slice(cursor))
    } else if (cursor === end) {
    fields.push(row.charAt(cursor))
    }
    }
    }
    @@ -57,14 +58,17 @@ line_1, column_2
    line_2, "column_2,2"
    line_3, "column_\"3,2\""
    line_4, column_2,a
    line_5, 'column_2' ,a
    line_6, "column_2"2" ,a
    line_5, 'column_2',a
    line_6, "column_2"2",a
    line_7,"column_3'3',a
    line_8,"column_3'3',a'
    line_9,‘1,2,3’
    line_10,'1,2,3'
    line_11,'content,a,b'
    line_12,"content,a,b"
    line_13,"name:'sy'",3
    line_14,"name:'sy',age:18",4
    awdwa, "adwadwd","dawdaw"
    `
    console.log(parse(csv_input))

    console.log(JSON.stringify(parse(csv_input), null, 2))
  8. iwfan created this gist Nov 20, 2023.
    70 changes: 70 additions & 0 deletions wild-csv-parse.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,70 @@
    const DELIMITER = ","
    const GROUP_QUOTES = ['"', "'"]

    /**
    *
    * @param {string} input
    * @returns
    */
    function parse(input) {
    const rows = input.trim().split(/\r?\n/)
    return rows.map(row => {
    let start = 0
    let end = row.length - 1
    let quote_char_count = [, 0]
    const fields = []

    for (let cursor = 0; cursor < row.length; cursor++) {
    const char = row.charAt(cursor)
    const quote_char_index = GROUP_QUOTES.findIndex(quote_char => quote_char === char)

    if (quote_char_index >= 0) {
    const quote_char = GROUP_QUOTES[quote_char_index]
    if (quote_char_count[0]) {
    if (quote_char === quote_char_count[0]) {
    quote_char_count[1] = quote_char_count[1] + 1
    }
    } else {
    quote_char_count[0] === quote_char
    quote_char_count[1] = quote_char_count[1] + 1
    }
    }

    if (char === DELIMITER) {
    const quoteCount = quote_char_count[1]
    if (quoteCount === 0 || (quoteCount % 2 === 0 && row.charAt(cursor - 1) === GROUP_QUOTES)) {
    fields.push(row.slice(start, cursor))
    start = cursor + 1
    quote_char_count = [, 0]
    }
    }

    if (cursor === end) {
    if (start < end) {
    fields.push(row.slice(start))
    } else if (start === end) {
    fields.push(char)
    }
    }
    }

    return fields
    })
    }

    const csv_input = `
    line_1, column_2
    line_2, "column_2,2"
    line_3, "column_\"3,2\""
    line_4, column_2,a
    line_5, 'column_2' ,a
    line_6, "column_2"2" ,a
    line_7,"column_3'3',a
    line_8,"column_3'3',a'
    line_9,‘1,2,3’
    line_10,'1,2,3'
    line_11,'content,a,b'
    line_12,"content,a,b"
    `
    console.log(parse(csv_input))