Maecenas · May 3, 2018 02:25
diff --git a/data_processing.py b/data_processing.py
 # 时间索引
 >>> import pandas as pd
 >>> index = pd.date_range('20160503', periods=5)
 >>> index
 DatetimeIndex(['2016-05-03', '2016-05-04', '2016-05-05', '2016-05-06',
               '2016-05-07'],
              dtype='datetime64[ns]', freq='D')


 # 数据显示
 >>> import numpy as np
 >>> dates = pd.DataFrame(np.random.randn(5,3),index=index,columns=list('ABC'))
 >>> dates
                   A         B         C
 2016-05-03  0.320522 -0.854196 -0.477794
 2016-05-04 -0.736698  0.220931  0.878340
 2016-05-05  1.002439 -0.422472 -1.027093
 2016-05-06  1.792256  0.322563  0.285416
 2016-05-07 -2.291344  0.633183  1.120919
 >>> dates.index # 获取索引
 DatetimeIndex(['2016-05-03', '2016-05-04', '2016-05-05', '2016-05-06',
               '2016-05-07'],
              dtype='datetime64[ns]', freq='D')
 >>> dates.columns # 列名
 Index([u'A', u'B', u'C'], dtype='object')
 >>> dates.values # 返回数组，从这里可以看出来DataFrame与Series都是以ndarray为基础的
 array([[ 0.32052247, -0.85419619, -0.47779368],
       [-0.73669815,  0.22093113,  0.87834037],
       [ 1.00243859, -0.42247204, -1.02709299],
       [ 1.79225566,  0.32256317,  0.28541609],
       [-2.2913443 ,  0.63318336,  1.12091912]])
 >>> dates.describe # 描述
 <bound method DataFrame.describe of                    A         B         C
 2016-05-03  0.320522 -0.854196 -0.477794
 2016-05-04 -0.736698  0.220931  0.878340
 2016-05-05  1.002439 -0.422472 -1.027093
 2016-05-06  1.792256  0.322563  0.285416
 2016-05-07 -2.291344  0.633183  1.120919>


 # 数据筛选
 >>> dates.head(1) # 切片，前一行
                   A         B         C
 2016-05-03  0.320522 -0.854196 -0.477794
 >>> dates.tail(1) # 最后一行
                   A         B         C
 2016-05-07 -2.291344  0.633183  1.120919
 >>> dates['2016-05-03':'2016-05-05'] # 通过索引切片
                   A         B         C
 2016-05-03  0.320522 -0.854196 -0.477794
 2016-05-04 -0.736698  0.220931  0.878340
 2016-05-05  1.002439 -0.422472 -1.027093
 >>> dates.loc['2016-05-03':'2016-05-05', ['A', 'B']] # 区域选择，定义索引，字段名
                   A         B
 2016-05-03  0.320522 -0.854196
 2016-05-04 -0.736698  0.220931
 2016-05-05  1.002439 -0.422472
 >>> dates = pd.DataFrame(np.random.randn(5,3))
 >>> dates
          0         1         2
 0 -0.814773  0.828948  1.279713
 1  1.010236 -1.063719 -1.010884
 2 -0.206223 -1.756636 -0.777366
 3 -0.253049  0.955689 -1.735237
 4 -1.269495 -0.321743  0.929069
 >>> dates.at[0,0] # 定位到单元格
 -0.81477290050615703
 >>> dates.iloc[1:4,[1,]] # 通过数字切片loc
                   B
 2016-05-04 -1.002059
 2016-05-05 -0.298642
 2016-05-06 -1.042664
 >>> dates.iat[1,1] # 数字索引定位到单元格
 -1.0020589880335706
 >>> dates[dates.index > '2016-05-05'] # 条件筛选
                   A         B         C
 2016-05-06 -0.334060 -1.042664  0.999093
 2016-05-07 -2.051357  0.713025 -0.792198
 >>> dates[(dates.index > '2016-05-05') & (dates.A < 0)] # 组合筛选
                   A         B         C
 2016-05-06 -0.334060 -1.042664  0.999093
 2016-05-07 -2.051357  0.713025 -0.792198
 >>> dates[dates.A > 0].B # 返回Series
 2016-05-06   -0.139405
 2016-05-07   -1.437701
 Freq: D, Name: B, dtype: float64
 >>> dates[dates.A>0].B.mean() # 计算平均值
 -0.78855298332911494
 >>> len(dates) # 行数
 5
 >>> import numpy as np
 >>> np.sign(np.diff(dates.A))
 array([ 1., -1.,  1., -1.])
 >>> status = np.sign(np.diff(dates.A))
 >>> status[np.where(status == 1.)].size # 算变更类型未1的数量
 2
 >>> dates.sort(columns='A') # 排序
 __main__:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
                   A         B         C
 2016-05-03 -0.695635  0.077077  0.694779
 2016-05-05 -0.641996  1.420200  0.086330
 2016-05-04 -0.594039 -0.842575 -0.046909
 2016-05-07  0.667998 -1.437701 -0.196523
 2016-05-06  1.176916 -0.139405 -0.592849
 >>> dates.groupby('A').count() # 分组，分组的字段作为索引
           B  C
 A              
 -0.695635  1  1
 -0.641996  1  1
 -0.594039  1  1
 0.667998  1  1
 1.176916  1  1
 >>> dates.groupby('A').sum() # 分组计算和
                  B         C
 A                            
 -0.695635  0.077077  0.694779
 -0.641996  1.420200  0.086330
 -0.594039 -0.842575 -0.046909
 0.667998 -1.437701 -0.196523
 1.176916 -0.139405 -0.592849
 sum


 # 数据合并
 >>> bdates = pd.DataFrame(np.random.randn(2,3),columns=list('ABC'))
 >>> bdates
          A         B         C
 0 -0.043208  0.332842 -0.390034
 1 -0.318745  0.013284  0.147836
 >>> dates
                   A         B         C
 2016-05-03 -0.695635  0.077077  0.694779
 2016-05-04 -0.594039 -0.842575 -0.046909
 2016-05-05 -0.641996  1.420200  0.086330
 2016-05-06  1.176916 -0.139405 -0.592849
 2016-05-07  0.667998 -1.437701 -0.196523
 >>> dates.append(bdates) # 追加
                            A         B         C
 2016-05-03 00:00:00 -0.695635  0.077077  0.694779
 2016-05-04 00:00:00 -0.594039 -0.842575 -0.046909
 2016-05-05 00:00:00 -0.641996  1.420200  0.086330
 2016-05-06 00:00:00  1.176916 -0.139405 -0.592849
 2016-05-07 00:00:00  0.667998 -1.437701 -0.196523
 0                   -0.043208  0.332842 -0.390034
 1                   -0.318745  0.013284  0.147836
 >>> pieces = [dates[:2], dates[len(dates)-1:]
 >>> pd.concat(pieces) # 合并片段
                   A         B         C
 2016-05-03 -0.695635  0.077077  0.694779
 2016-05-04 -0.594039 -0.842575 -0.046909
 2016-05-07  0.667998 -1.437701 -0.196523
 >>> pd.concat(pieces, ignore_index=True) # 忽略不想同的字段，用NaN填充
          A         B         C
 0 -0.695635  0.077077  0.694779
 1 -0.594039 -0.842575 -0.046909
 2  0.667998 -1.437701 -0.196523
 >>> pd.merge(dates, bdates, on='A') # join合并，类比sql语句
 Empty DataFrame
 Columns: [A, B_x, C_x, B_y, C_y]
 Index: []
diff --git a/data_structure.py b/data_structure.py
 # ndarray 多维数组
 >>> import numpy as np
 >>> np.ones((3,4)) # 自动生成3*4的二维数组，全为1
 array([[ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.]])
 >>> np.array([[1,2], [3,4]])
 array([[1, 2],
       [3, 4]])
 >>> np.zeros((1,2)) # 生成全为0
 array([[ 0.,  0.]])
 >>> np.arange(1,10,1) # 类似于range生成列表
 array([1, 2, 3, 4, 5, 6, 7, 8, 9])


 # ndarray 运算
 >>> aarr = np.array([[2,2], [3,3]])
 >>> barr = np.array([[3,4], [5,6]])
 >>> aarr * barr # 直接对同位置的进行运算，+ - * / % +=等
 array([[ 6,  8],
       [15, 18]])
 >>> barr > 5 # bool运算
 array([[False, False],
       [False,  True]], dtype=bool)
 >>> barr.shape # 获取数组行列数结构
 (2, 2)
 >>> barr.reshape(1,4) # 重排数据结构
 array([[3, 4, 5, 6]])
 >>> barr.sum() # 算和
 18
 >>> barr.sum(axis=0) # 算列的和
 array([ 8, 10])
 >>> barr
 array([[3, 4],
       [5, 6]])
 >>> barr.sum(axis=1) # 算行的和 axis 0 纵轴 1 横轴
 array([ 7, 11])
 >>> c = np.array([[3,4], [5,6]])
 >>> np.where(c > 5, aarr, barr) # 筛选组合，判断c的同位置元素如果大于5则选aarr同位置的，否则选barr的
 array([[3, 4],
       [5, 3]])
 >>> def func(i, j):
 ...     return (i+1) * (j+1)
 ...
 >>> np.fromfunction(func, (9,9)) # 通过1个函数来创建数组，第二个参数为数组的行与列数，传给func的参数行列的索引
 array([[  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.],
       [  2.,   4.,   6.,   8.,  10.,  12.,  14.,  16.,  18.],
       [  3.,   6.,   9.,  12.,  15.,  18.,  21.,  24.,  27.],
       [  4.,   8.,  12.,  16.,  20.,  24.,  28.,  32.,  36.],
       [  5.,  10.,  15.,  20.,  25.,  30.,  35.,  40.,  45.],
       [  6.,  12.,  18.,  24.,  30.,  36.,  42.,  48.,  54.],
       [  7.,  14.,  21.,  28.,  35.,  42.,  49.,  56.,  63.],
       [  8.,  16.,  24.,  32.,  40.,  48.,  56.,  64.,  72.],
       [  9.,  18.,  27.,  36.,  45.,  54.,  63.,  72.,  81.]])
 >>> np.add(aarr, barr) # ufunc的数组+
 array([[5, 6],
       [8, 9]])
 >>> np.add.accumulate([1,2,3]) # reduce的方式对数组求和，并返回求和后的新的数组
 array([1, 3, 6])


 # Series 列 有序字段
 >>> import pandas as pd
 >>> pd.Series([1,2.0,'a']) # 自动增加索引，类似与字典的key-value结构
 0    1
 1    2
 2    a
 dtype: object
 >>> aser = pd.Series([1,2,3], index=['a', 'b', 'c']) # 自定义索引名称
 >>> aser
 a    1
 b    2
 c    3
 dtype: int64
 >>> aser.index
 Index([u'a', u'b', u'c'], dtype='object') # 获取Series的索引
 >>> aser.values
 array([1, 2, 3]) # 获取Series的值，返回数组对象
 >>> aser['b'] # 通过索引取值
 2
 >>> aser * 2 # 运算，可类比ndarray的运算
 a    2
 b    4
 c    6
 dtype: int64
 >>> import numpy as np
 >>> np.exp(aser) # 计算自然对数的n次方
 a     2.718282
 b     7.389056
 c    20.085537
 dtype: float64
 >>> data = {'a':1, 'b':2}
 >>> index = ['a', 'b', 'c'] # 数据对齐，如果没有对应索引的数据，自动填充NaN
 >>> bser = pd.Series(data, index=index)
 >>> bser
 a    1.0
 b    2.0
 c    NaN
 dtype: float64
 >>> pd.isnull(bser) # 判断是否有NaN元素，返回一个新的结果Series
 a    False
 b    False
 c     True
 dtype: bool
 >>> cser = pd.Series({'a':2,'b':5,'d':10})
 >>> bser + cser # 运算时如果有不相称的索引也会对齐
 a    3.0
 b    7.0
 c    NaN
 d    NaN
 dtype: float64
 >>> aser.name # 名称属性，分别对应Series的名称与索引名称
 >>> aser.index.name
 >>> aser.name = 'names'
 >>> aser.index.name = 'col'
 >>> aser
 col
 a    1
 b    2
 c    3
 Name: names, dtype: int64


 # DataFrame 数据帧 表
 >>> data = {'col1': [1,2,3], 'col2': [3,4,5]}
 >>> adf = pd.DataFrame(data)
 >>> adf
   col1  col2
 0     1     3
 1     2     4
 2     3     5
 >>> adf['col1'] # 通过列名获取单列数据，返回Series对象
 0    1
 1    2
 2    3
 Name: col1, dtype: int64
 >>> adf.col1 # . 也可以用
 0    1
 1    2
 2    3
 Name: col1, dtype: int64
 >>> adf.ix[0] # 通过索引获取一行数据，返回Series
 col1    1
 col2    3
 Name: 0, dtype: int64
 >>> adf.col1 = 'admin' # 重写一列的值
 >>> adf
    col1  col2
 0  admin     3
 1  admin     4
 2  admin     5
 >>> adf['col1'] = [10,9,8]
 >>> adf
   col1  col2
 0    10     3
 1     9     4
 2     8     5
 >>> del adf['col1'] # 删除一列
 >>> adf
   col2
 0     3
 1     4
 2     5
 >>> adf.index.name = 'index' # 索引名
 >>> adf
       col2
 index      
 0         3
 1         4
 2         5
	# 时间索引
	>>> import pandas as pd
	>>> index = pd.date_range('20160503', periods=5)
	>>> index
	DatetimeIndex(['2016-05-03', '2016-05-04', '2016-05-05', '2016-05-06',
	'2016-05-07'],
	dtype='datetime64[ns]', freq='D')


	# 数据显示
	>>> import numpy as np
	>>> dates = pd.DataFrame(np.random.randn(5,3),index=index,columns=list('ABC'))
	>>> dates
	A B C
	2016-05-03 0.320522 -0.854196 -0.477794
	2016-05-04 -0.736698 0.220931 0.878340
	2016-05-05 1.002439 -0.422472 -1.027093
	2016-05-06 1.792256 0.322563 0.285416
	2016-05-07 -2.291344 0.633183 1.120919
	>>> dates.index # 获取索引
	DatetimeIndex(['2016-05-03', '2016-05-04', '2016-05-05', '2016-05-06',
	'2016-05-07'],
	dtype='datetime64[ns]', freq='D')
	>>> dates.columns # 列名
	Index([u'A', u'B', u'C'], dtype='object')
	>>> dates.values # 返回数组，从这里可以看出来DataFrame与Series都是以ndarray为基础的
	array([[ 0.32052247, -0.85419619, -0.47779368],
	[-0.73669815, 0.22093113, 0.87834037],
	[ 1.00243859, -0.42247204, -1.02709299],
	[ 1.79225566, 0.32256317, 0.28541609],
	[-2.2913443 , 0.63318336, 1.12091912]])
	>>> dates.describe # 描述
	<bound method DataFrame.describe of A B C
	2016-05-03 0.320522 -0.854196 -0.477794
	2016-05-04 -0.736698 0.220931 0.878340
	2016-05-05 1.002439 -0.422472 -1.027093
	2016-05-06 1.792256 0.322563 0.285416
	2016-05-07 -2.291344 0.633183 1.120919>


	# 数据筛选
	>>> dates.head(1) # 切片，前一行
	A B C
	2016-05-03 0.320522 -0.854196 -0.477794
	>>> dates.tail(1) # 最后一行
	A B C
	2016-05-07 -2.291344 0.633183 1.120919
	>>> dates['2016-05-03':'2016-05-05'] # 通过索引切片
	A B C
	2016-05-03 0.320522 -0.854196 -0.477794
	2016-05-04 -0.736698 0.220931 0.878340
	2016-05-05 1.002439 -0.422472 -1.027093
	>>> dates.loc['2016-05-03':'2016-05-05', ['A', 'B']] # 区域选择，定义索引，字段名
	A B
	2016-05-03 0.320522 -0.854196
	2016-05-04 -0.736698 0.220931
	2016-05-05 1.002439 -0.422472
	>>> dates = pd.DataFrame(np.random.randn(5,3))
	>>> dates
	0 1 2
	0 -0.814773 0.828948 1.279713
	1 1.010236 -1.063719 -1.010884
	2 -0.206223 -1.756636 -0.777366
	3 -0.253049 0.955689 -1.735237
	4 -1.269495 -0.321743 0.929069
	>>> dates.at[0,0] # 定位到单元格
	-0.81477290050615703
	>>> dates.iloc[1:4,[1,]] # 通过数字切片loc
	B
	2016-05-04 -1.002059
	2016-05-05 -0.298642
	2016-05-06 -1.042664
	>>> dates.iat[1,1] # 数字索引定位到单元格
	-1.0020589880335706
	>>> dates[dates.index > '2016-05-05'] # 条件筛选
	A B C
	2016-05-06 -0.334060 -1.042664 0.999093
	2016-05-07 -2.051357 0.713025 -0.792198
	>>> dates[(dates.index > '2016-05-05') & (dates.A < 0)] # 组合筛选
	A B C
	2016-05-06 -0.334060 -1.042664 0.999093
	2016-05-07 -2.051357 0.713025 -0.792198
	>>> dates[dates.A > 0].B # 返回Series
	2016-05-06 -0.139405
	2016-05-07 -1.437701
	Freq: D, Name: B, dtype: float64
	>>> dates[dates.A>0].B.mean() # 计算平均值
	-0.78855298332911494
	>>> len(dates) # 行数
	5
	>>> import numpy as np
	>>> np.sign(np.diff(dates.A))
	array([ 1., -1., 1., -1.])
	>>> status = np.sign(np.diff(dates.A))
	>>> status[np.where(status == 1.)].size # 算变更类型未1的数量
	2
	>>> dates.sort(columns='A') # 排序
	__main__:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
	A B C
	2016-05-03 -0.695635 0.077077 0.694779
	2016-05-05 -0.641996 1.420200 0.086330
	2016-05-04 -0.594039 -0.842575 -0.046909
	2016-05-07 0.667998 -1.437701 -0.196523
	2016-05-06 1.176916 -0.139405 -0.592849
	>>> dates.groupby('A').count() # 分组，分组的字段作为索引
	B C
	A
	-0.695635 1 1
	-0.641996 1 1
	-0.594039 1 1
	0.667998 1 1
	1.176916 1 1
	>>> dates.groupby('A').sum() # 分组计算和
	B C
	A
	-0.695635 0.077077 0.694779
	-0.641996 1.420200 0.086330
	-0.594039 -0.842575 -0.046909
	0.667998 -1.437701 -0.196523
	1.176916 -0.139405 -0.592849
	sum


	# 数据合并
	>>> bdates = pd.DataFrame(np.random.randn(2,3),columns=list('ABC'))
	>>> bdates
	A B C
	0 -0.043208 0.332842 -0.390034
	1 -0.318745 0.013284 0.147836
	>>> dates
	A B C
	2016-05-03 -0.695635 0.077077 0.694779
	2016-05-04 -0.594039 -0.842575 -0.046909
	2016-05-05 -0.641996 1.420200 0.086330
	2016-05-06 1.176916 -0.139405 -0.592849
	2016-05-07 0.667998 -1.437701 -0.196523
	>>> dates.append(bdates) # 追加
	A B C
	2016-05-03 00:00:00 -0.695635 0.077077 0.694779
	2016-05-04 00:00:00 -0.594039 -0.842575 -0.046909
	2016-05-05 00:00:00 -0.641996 1.420200 0.086330
	2016-05-06 00:00:00 1.176916 -0.139405 -0.592849
	2016-05-07 00:00:00 0.667998 -1.437701 -0.196523
	0 -0.043208 0.332842 -0.390034
	1 -0.318745 0.013284 0.147836
	>>> pieces = [dates[:2], dates[len(dates)-1:]
	>>> pd.concat(pieces) # 合并片段
	A B C
	2016-05-03 -0.695635 0.077077 0.694779
	2016-05-04 -0.594039 -0.842575 -0.046909
	2016-05-07 0.667998 -1.437701 -0.196523
	>>> pd.concat(pieces, ignore_index=True) # 忽略不想同的字段，用NaN填充
	A B C
	0 -0.695635 0.077077 0.694779
	1 -0.594039 -0.842575 -0.046909
	2 0.667998 -1.437701 -0.196523
	>>> pd.merge(dates, bdates, on='A') # join合并，类比sql语句
	Empty DataFrame
	Columns: [A, B_x, C_x, B_y, C_y]
	Index: []
	# ndarray 多维数组
	>>> import numpy as np
	>>> np.ones((3,4)) # 自动生成3*4的二维数组，全为1
	array([[ 1., 1., 1., 1.],
	[ 1., 1., 1., 1.],
	[ 1., 1., 1., 1.]])
	>>> np.array([[1,2], [3,4]])
	array([[1, 2],
	[3, 4]])
	>>> np.zeros((1,2)) # 生成全为0
	array([[ 0., 0.]])
	>>> np.arange(1,10,1) # 类似于range生成列表
	array([1, 2, 3, 4, 5, 6, 7, 8, 9])


	# ndarray 运算
	>>> aarr = np.array([[2,2], [3,3]])
	>>> barr = np.array([[3,4], [5,6]])
	>>> aarr * barr # 直接对同位置的进行运算，+ - * / % +=等
	array([[ 6, 8],
	[15, 18]])
	>>> barr > 5 # bool运算
	array([[False, False],
	[False, True]], dtype=bool)
	>>> barr.shape # 获取数组行列数结构
	(2, 2)
	>>> barr.reshape(1,4) # 重排数据结构
	array([[3, 4, 5, 6]])
	>>> barr.sum() # 算和
	18
	>>> barr.sum(axis=0) # 算列的和
	array([ 8, 10])
	>>> barr
	array([[3, 4],
	[5, 6]])
	>>> barr.sum(axis=1) # 算行的和 axis 0 纵轴 1 横轴
	array([ 7, 11])
	>>> c = np.array([[3,4], [5,6]])
	>>> np.where(c > 5, aarr, barr) # 筛选组合，判断c的同位置元素如果大于5则选aarr同位置的，否则选barr的
	array([[3, 4],
	[5, 3]])
	>>> def func(i, j):
	... return (i+1) * (j+1)
	...
	>>> np.fromfunction(func, (9,9)) # 通过1个函数来创建数组，第二个参数为数组的行与列数，传给func的参数行列的索引
	array([[ 1., 2., 3., 4., 5., 6., 7., 8., 9.],
	[ 2., 4., 6., 8., 10., 12., 14., 16., 18.],
	[ 3., 6., 9., 12., 15., 18., 21., 24., 27.],
	[ 4., 8., 12., 16., 20., 24., 28., 32., 36.],
	[ 5., 10., 15., 20., 25., 30., 35., 40., 45.],
	[ 6., 12., 18., 24., 30., 36., 42., 48., 54.],
	[ 7., 14., 21., 28., 35., 42., 49., 56., 63.],
	[ 8., 16., 24., 32., 40., 48., 56., 64., 72.],
	[ 9., 18., 27., 36., 45., 54., 63., 72., 81.]])
	>>> np.add(aarr, barr) # ufunc的数组+
	array([[5, 6],
	[8, 9]])
	>>> np.add.accumulate([1,2,3]) # reduce的方式对数组求和，并返回求和后的新的数组
	array([1, 3, 6])


	# Series 列有序字段
	>>> import pandas as pd
	>>> pd.Series([1,2.0,'a']) # 自动增加索引，类似与字典的key-value结构
	0 1
	1 2
	2 a
	dtype: object
	>>> aser = pd.Series([1,2,3], index=['a', 'b', 'c']) # 自定义索引名称
	>>> aser
	a 1
	b 2
	c 3
	dtype: int64
	>>> aser.index
	Index([u'a', u'b', u'c'], dtype='object') # 获取Series的索引
	>>> aser.values
	array([1, 2, 3]) # 获取Series的值，返回数组对象
	>>> aser['b'] # 通过索引取值
	2
	>>> aser * 2 # 运算，可类比ndarray的运算
	a 2
	b 4
	c 6
	dtype: int64
	>>> import numpy as np
	>>> np.exp(aser) # 计算自然对数的n次方
	a 2.718282
	b 7.389056
	c 20.085537
	dtype: float64
	>>> data = {'a':1, 'b':2}
	>>> index = ['a', 'b', 'c'] # 数据对齐，如果没有对应索引的数据，自动填充NaN
	>>> bser = pd.Series(data, index=index)
	>>> bser
	a 1.0
	b 2.0
	c NaN
	dtype: float64
	>>> pd.isnull(bser) # 判断是否有NaN元素，返回一个新的结果Series
	a False
	b False
	c True
	dtype: bool
	>>> cser = pd.Series({'a':2,'b':5,'d':10})
	>>> bser + cser # 运算时如果有不相称的索引也会对齐
	a 3.0
	b 7.0
	c NaN
	d NaN
	dtype: float64
	>>> aser.name # 名称属性，分别对应Series的名称与索引名称
	>>> aser.index.name
	>>> aser.name = 'names'
	>>> aser.index.name = 'col'
	>>> aser
	col
	a 1
	b 2
	c 3
	Name: names, dtype: int64


	# DataFrame 数据帧表
	>>> data = {'col1': [1,2,3], 'col2': [3,4,5]}
	>>> adf = pd.DataFrame(data)
	>>> adf
	col1 col2
	0 1 3
	1 2 4
	2 3 5
	>>> adf['col1'] # 通过列名获取单列数据，返回Series对象
	0 1
	1 2
	2 3
	Name: col1, dtype: int64
	>>> adf.col1 # . 也可以用
	0 1
	1 2
	2 3
	Name: col1, dtype: int64
	>>> adf.ix[0] # 通过索引获取一行数据，返回Series
	col1 1
	col2 3
	Name: 0, dtype: int64
	>>> adf.col1 = 'admin' # 重写一列的值
	>>> adf
	col1 col2
	0 admin 3
	1 admin 4
	2 admin 5
	>>> adf['col1'] = [10,9,8]
	>>> adf
	col1 col2
	0 10 3
	1 9 4
	2 8 5
	>>> del adf['col1'] # 删除一列
	>>> adf
	col2
	0 3
	1 4
	2 5
	>>> adf.index.name = 'index' # 索引名
	>>> adf
	col2
	index
	0 3
	1 4
	2 5