Last active
December 19, 2020 20:22
-
-
Save vaclavdekanovsky/d2f6cebfd661565222b24ee043634b05 to your computer and use it in GitHub Desktop.
Header parameter of Julia's CSV reader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Header parameter of the Julia CSV parser\n", | |
"Written in [Julia](https://julialang.org/). See [CSV.jl](https://csv.juliadata.org/stable/) and [DataFrames.jl](https://dataframes.juliadata.org/stable/) for more details" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"using CSV, DataFrames" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We will parse the following data using the IOBuffer to pass them to the CSV reader. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"\"c|c|c|d\\n1|2|3|4\\n\\\"1\\\"|2|c|1.5\\n\\\"C|D\\\"|16|x|2.33\\n\"" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data = \"\"\"\n", | |
"c|c|c|d\n", | |
"1|2|3|4\n", | |
"\"1\"|2|c|1.5\n", | |
"\"C|D\"|16|x|2.33\n", | |
"\"\"\"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Default header on the first row" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>c</th><th>c_1</th><th>c_2</th><th>d</th></tr><tr><th></th><th>String</th><th>Int64</th><th>String</th><th>Float64</th></tr></thead><tbody><p>3 rows × 4 columns</p><tr><th>1</th><td>1</td><td>2</td><td>3</td><td>4.0</td></tr><tr><th>2</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>3</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& c & c\\_1 & c\\_2 & d\\\\\n", | |
"\t\\hline\n", | |
"\t& String & Int64 & String & Float64\\\\\n", | |
"\t\\hline\n", | |
"\t1 & 1 & 2 & 3 & 4.0 \\\\\n", | |
"\t2 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t3 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"3×4 DataFrame\n", | |
"│ Row │ c │ c_1 │ c_2 │ d │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n", | |
"├─────┼────────┼───────┼────────┼─────────┤\n", | |
"│ 1 │ 1 │ 2 │ 3 │ 4.0 │\n", | |
"│ 2 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 3 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# default header is the first row, duplicated column names are postfixed\n", | |
"df = CSV.read(IOBuffer(data), DataFrame)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## No header" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>Column1</th><th>Column2</th><th>Column3</th><th>Column4</th></tr><tr><th></th><th>String</th><th>String</th><th>String</th><th>String</th></tr></thead><tbody><p>4 rows × 4 columns</p><tr><th>1</th><td>c</td><td>c</td><td>c</td><td>d</td></tr><tr><th>2</th><td>1</td><td>2</td><td>3</td><td>4</td></tr><tr><th>3</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>4</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& Column1 & Column2 & Column3 & Column4\\\\\n", | |
"\t\\hline\n", | |
"\t& String & String & String & String\\\\\n", | |
"\t\\hline\n", | |
"\t1 & c & c & c & d \\\\\n", | |
"\t2 & 1 & 2 & 3 & 4 \\\\\n", | |
"\t3 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t4 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"4×4 DataFrame\n", | |
"│ Row │ Column1 │ Column2 │ Column3 │ Column4 │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n", | |
"├─────┼─────────┼─────────┼─────────┼─────────┤\n", | |
"│ 1 │ c │ c │ c │ d │\n", | |
"│ 2 │ 1 │ 2 │ 3 │ 4 │\n", | |
"│ 3 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 4 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Julia convention is to separate the key arguments (kwargs) by semicolon (;). Comma though works as well\n", | |
"CSV.read(IOBuffer(data), DataFrame; header=0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>Column1</th><th>Column2</th><th>Column3</th><th>Column4</th></tr><tr><th></th><th>String</th><th>String</th><th>String</th><th>String</th></tr></thead><tbody><p>4 rows × 4 columns</p><tr><th>1</th><td>c</td><td>c</td><td>c</td><td>d</td></tr><tr><th>2</th><td>1</td><td>2</td><td>3</td><td>4</td></tr><tr><th>3</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>4</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& Column1 & Column2 & Column3 & Column4\\\\\n", | |
"\t\\hline\n", | |
"\t& String & String & String & String\\\\\n", | |
"\t\\hline\n", | |
"\t1 & c & c & c & d \\\\\n", | |
"\t2 & 1 & 2 & 3 & 4 \\\\\n", | |
"\t3 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t4 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"4×4 DataFrame\n", | |
"│ Row │ Column1 │ Column2 │ Column3 │ Column4 │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n", | |
"├─────┼─────────┼─────────┼─────────┼─────────┤\n", | |
"│ 1 │ c │ c │ c │ d │\n", | |
"│ 2 │ 1 │ 2 │ 3 │ 4 │\n", | |
"│ 3 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 4 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"CSV.read(IOBuffer(data), DataFrame; header=false)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Specify own column names using headers" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"You can pass the vector of column names (as strings or symbols) to specify the headers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>first</th><th>second</th><th>third</th><th>fourth</th></tr><tr><th></th><th>String</th><th>String</th><th>String</th><th>String</th></tr></thead><tbody><p>4 rows × 4 columns</p><tr><th>1</th><td>c</td><td>c</td><td>c</td><td>d</td></tr><tr><th>2</th><td>1</td><td>2</td><td>3</td><td>4</td></tr><tr><th>3</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>4</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& first & second & third & fourth\\\\\n", | |
"\t\\hline\n", | |
"\t& String & String & String & String\\\\\n", | |
"\t\\hline\n", | |
"\t1 & c & c & c & d \\\\\n", | |
"\t2 & 1 & 2 & 3 & 4 \\\\\n", | |
"\t3 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t4 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"4×4 DataFrame\n", | |
"│ Row │ first │ second │ third │ fourth │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n", | |
"├─────┼────────┼────────┼────────┼────────┤\n", | |
"│ 1 │ c │ c │ c │ d │\n", | |
"│ 2 │ 1 │ 2 │ 3 │ 4 │\n", | |
"│ 3 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 4 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"CSV.read(IOBuffer(data), DataFrame; header=[\"first\",\"second\",\"third\",\"fourth\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>a</th><th>b</th><th>c</th><th>d</th></tr><tr><th></th><th>String</th><th>String</th><th>String</th><th>String</th></tr></thead><tbody><p>4 rows × 4 columns</p><tr><th>1</th><td>c</td><td>c</td><td>c</td><td>d</td></tr><tr><th>2</th><td>1</td><td>2</td><td>3</td><td>4</td></tr><tr><th>3</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>4</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& a & b & c & d\\\\\n", | |
"\t\\hline\n", | |
"\t& String & String & String & String\\\\\n", | |
"\t\\hline\n", | |
"\t1 & c & c & c & d \\\\\n", | |
"\t2 & 1 & 2 & 3 & 4 \\\\\n", | |
"\t3 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t4 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"4×4 DataFrame\n", | |
"│ Row │ a │ b │ c │ d │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n", | |
"├─────┼────────┼────────┼────────┼────────┤\n", | |
"│ 1 │ c │ c │ c │ d │\n", | |
"│ 2 │ 1 │ 2 │ 3 │ 4 │\n", | |
"│ 3 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 4 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"CSV.read(IOBuffer(data), DataFrame; header=[:a,:b,:c,:d])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>z</th><th>zy</th><th>zyx</th><th>zyxw</th></tr><tr><th></th><th>String</th><th>String</th><th>String</th><th>String</th></tr></thead><tbody><p>4 rows × 4 columns</p><tr><th>1</th><td>c</td><td>c</td><td>c</td><td>d</td></tr><tr><th>2</th><td>1</td><td>2</td><td>3</td><td>4</td></tr><tr><th>3</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>4</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& z & zy & zyx & zyxw\\\\\n", | |
"\t\\hline\n", | |
"\t& String & String & String & String\\\\\n", | |
"\t\\hline\n", | |
"\t1 & c & c & c & d \\\\\n", | |
"\t2 & 1 & 2 & 3 & 4 \\\\\n", | |
"\t3 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t4 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"4×4 DataFrame\n", | |
"│ Row │ z │ zy │ zyx │ zyxw │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n", | |
"├─────┼────────┼────────┼────────┼────────┤\n", | |
"│ 1 │ c │ c │ c │ d │\n", | |
"│ 2 │ 1 │ 2 │ 3 │ 4 │\n", | |
"│ 3 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 4 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"CSV.read(IOBuffer(data), DataFrame; header=[Symbol(\"z\"),Symbol(\"zy\"),Symbol(\"zyx\"),Symbol(\"zyxw\")])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Header on the first row" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>c</th><th>c_1</th><th>c_2</th><th>d</th></tr><tr><th></th><th>String</th><th>Int64</th><th>String</th><th>Float64</th></tr></thead><tbody><p>3 rows × 4 columns</p><tr><th>1</th><td>1</td><td>2</td><td>3</td><td>4.0</td></tr><tr><th>2</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>3</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& c & c\\_1 & c\\_2 & d\\\\\n", | |
"\t\\hline\n", | |
"\t& String & Int64 & String & Float64\\\\\n", | |
"\t\\hline\n", | |
"\t1 & 1 & 2 & 3 & 4.0 \\\\\n", | |
"\t2 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t3 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"3×4 DataFrame\n", | |
"│ Row │ c │ c_1 │ c_2 │ d │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n", | |
"├─────┼────────┼───────┼────────┼─────────┤\n", | |
"│ 1 │ 1 │ 2 │ 3 │ 4.0 │\n", | |
"│ 2 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 3 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Remember that Julia stasts indexing at 1\n", | |
"CSV.read(IOBuffer(data), DataFrame; header=1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Header on the x-th row\n", | |
"Everything above the header is ignored" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>1</th><th>2</th><th>3</th><th>4</th></tr><tr><th></th><th>String</th><th>Int64</th><th>String</th><th>Float64</th></tr></thead><tbody><p>2 rows × 4 columns</p><tr><th>1</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>2</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& 1 & 2 & 3 & 4\\\\\n", | |
"\t\\hline\n", | |
"\t& String & Int64 & String & Float64\\\\\n", | |
"\t\\hline\n", | |
"\t1 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t2 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"2×4 DataFrame\n", | |
"│ Row │ 1 │ 2 │ 3 │ 4 │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n", | |
"├─────┼────────┼───────┼────────┼─────────┤\n", | |
"│ 1 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 2 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"CSV.read(IOBuffer(data), DataFrame; header=2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Multirow header\n", | |
"Can be specified using range, for example `1:2` or list `[1,2]`. Some rows can even be skipped, e.g. `[1,3]`. The columns names are concatenation of the values on these rows." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>Column1</th><th>Column2</th><th>Column3</th><th>Column4</th></tr><tr><th></th><th>String</th><th>String</th><th>String</th><th>String</th></tr></thead><tbody><p>4 rows × 4 columns</p><tr><th>1</th><td>c</td><td>c</td><td>c</td><td>d</td></tr><tr><th>2</th><td>1</td><td>2</td><td>3</td><td>4</td></tr><tr><th>3</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>4</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& Column1 & Column2 & Column3 & Column4\\\\\n", | |
"\t\\hline\n", | |
"\t& String & String & String & String\\\\\n", | |
"\t\\hline\n", | |
"\t1 & c & c & c & d \\\\\n", | |
"\t2 & 1 & 2 & 3 & 4 \\\\\n", | |
"\t3 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t4 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"4×4 DataFrame\n", | |
"│ Row │ Column1 │ Column2 │ Column3 │ Column4 │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n", | |
"├─────┼─────────┼─────────┼─────────┼─────────┤\n", | |
"│ 1 │ c │ c │ c │ d │\n", | |
"│ 2 │ 1 │ 2 │ 3 │ 4 │\n", | |
"│ 3 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 4 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# no headers, to remind you the data\n", | |
"df = CSV.read(IOBuffer(data), DataFrame; header=false)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>c_1</th><th>c_2</th><th>c_3</th><th>d_4</th></tr><tr><th></th><th>String</th><th>Int64</th><th>String</th><th>Float64</th></tr></thead><tbody><p>2 rows × 4 columns</p><tr><th>1</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>2</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& c\\_1 & c\\_2 & c\\_3 & d\\_4\\\\\n", | |
"\t\\hline\n", | |
"\t& String & Int64 & String & Float64\\\\\n", | |
"\t\\hline\n", | |
"\t1 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t2 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"2×4 DataFrame\n", | |
"│ Row │ c_1 │ c_2 │ c_3 │ d_4 │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n", | |
"├─────┼────────┼───────┼────────┼─────────┤\n", | |
"│ 1 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 2 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# headers are formed as concat of row 1 till row 2\n", | |
"CSV.read(IOBuffer(data), DataFrame; header=1:2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>c_1</th><th>c_2</th><th>c_3</th><th>d_4</th></tr><tr><th></th><th>String</th><th>Int64</th><th>String</th><th>Float64</th></tr></thead><tbody><p>2 rows × 4 columns</p><tr><th>1</th><td>1</td><td>2</td><td>c</td><td>1.5</td></tr><tr><th>2</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& c\\_1 & c\\_2 & c\\_3 & d\\_4\\\\\n", | |
"\t\\hline\n", | |
"\t& String & Int64 & String & Float64\\\\\n", | |
"\t\\hline\n", | |
"\t1 & 1 & 2 & c & 1.5 \\\\\n", | |
"\t2 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"2×4 DataFrame\n", | |
"│ Row │ c_1 │ c_2 │ c_3 │ d_4 │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n", | |
"├─────┼────────┼───────┼────────┼─────────┤\n", | |
"│ 1 │ 1 │ 2 │ c │ 1.5 │\n", | |
"│ 2 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# the same using the list\n", | |
"CSV.read(IOBuffer(data), DataFrame; header=[1,2])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table class=\"data-frame\"><thead><tr><th></th><th>c_1</th><th>c_2</th><th>c_c</th><th>d_1.5</th></tr><tr><th></th><th>String</th><th>Int64</th><th>String</th><th>Float64</th></tr></thead><tbody><p>1 rows × 4 columns</p><tr><th>1</th><td>C|D</td><td>16</td><td>x</td><td>2.33</td></tr></tbody></table>" | |
], | |
"text/latex": [ | |
"\\begin{tabular}{r|cccc}\n", | |
"\t& c\\_1 & c\\_2 & c\\_c & d\\_1.5\\\\\n", | |
"\t\\hline\n", | |
"\t& String & Int64 & String & Float64\\\\\n", | |
"\t\\hline\n", | |
"\t1 & C|D & 16 & x & 2.33 \\\\\n", | |
"\\end{tabular}\n" | |
], | |
"text/plain": [ | |
"1×4 DataFrame\n", | |
"│ Row │ c_1 │ c_2 │ c_c │ d_1.5 │\n", | |
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n", | |
"├─────┼────────┼───────┼────────┼─────────┤\n", | |
"│ 1 │ C|D │ 16 │ x │ 2.33 │" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# skipping row number 2\n", | |
"CSV.read(IOBuffer(data), DataFrame; header=[1,3])" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Julia 1.4.1", | |
"language": "julia", | |
"name": "julia-1.4" | |
}, | |
"language_info": { | |
"file_extension": ".jl", | |
"mimetype": "application/julia", | |
"name": "julia", | |
"version": "1.4.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment