# 【6.1.1】numpy的ndaray与pandas的series和dataframe、list、dict之间互转

data可以为list、series、hash

## 一、初始化数据

#### 创建serires数据

import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = [[2000,'Ohino',1.5],
[2001,'Ohino',1.7],
ser = Series(data,index=['one','two','three'])

print 'Ser的结果:\n'
print ser


Ser的结果:

one       [2000, Ohino, 1.5]
two       [2001, Ohino, 1.7]
dtype: object


#### 创建dataframe：

df = DataFrame(data,index=['one','two','three'],columns=['year','state','pop'])

print '\ndataframe的结果:\n'
print df


dataframe的结果:

year   state  pop
one    2000   Ohino  1.5
two    2001   Ohino  1.7


## 二、数据转换

### 2.1 series 转matrix

foo = ser.as_matrix()

print '\n ser to matrix的结果:\n'
print foo


 ser to matrix的结果:

[list([2000, 'Ohino', 1.5])
list([2001, 'Ohino', 1.7])


### 2.2 Series转frame

type(month)
pandas.core.series.Series


import pandas as pd

dict_month = {'month':month.index,'numbers':month.values}
df_month = pd.DataFrame(dict_month)


### 2.2 dataframe转matrix

foo = df.as_matrix()

print '\n dataframe to matrix的结果:\n'
print foo


 dataframe to matrix的结果:

[[2000 'Ohino' 1.5]
[2001 'Ohino' 1.7]


### 2.3 dataframe转array

foo_2 = np.array(df)

print '\n dataframe to array的结果:\n'
print foo_2

[[2000 'Ohino' 1.5]
[2001 'Ohino' 1.7]


foo_3 = df.as_matrix(['pop'])
print '\n dataframe to array的结果:\n'
print foo_3


 dataframe to array的结果:

[[1.5]
[1.7]
[2.4]]


### 2.4 转成list

import pandas as pd

>>> df = pd.DataFrame({'a':[1,3,5,7,4,5,6,4,7,8,9],
'b':[3,5,6,2,4,6,7,8,7,8,9]})

>>> df['a'].values.tolist()
[1, 3, 5, 7, 4, 5, 6, 4, 7, 8, 9]
or you can just use

>>> df['a'].tolist()
[1, 3, 5, 7, 4, 5, 6, 4, 7, 8, 9]
To drop duplicates you can do one of the following:

>>> df['a'].drop_duplicates().values.tolist()
[1, 3, 5, 7, 4, 6, 8, 9]

>>> list(set(df['a'])) # as pointed out by EdChum
[1, 3, 4, 5, 6, 7, 8, 9]

# convert df to list[list]
>>> df.values.tolist()

# conver series to list
>>> Series.tolist()


### 3.4 dataframe 转dict

import pandas as pd
df = pd.DataFrame({'col1': [1, 2],
'col2': [0.5, 0.75]},
index=['row1', 'row2'])

df_2 = df.set_index('col1').to_dict()
print df_2

print  df.set_index('col1')['col2'].to_dict()


{'col2': {1: 0.5, 2: 0.75}}

{1: 0.5, 2: 0.75}


>>> df = pd.DataFrame({'col1': [1, 2],
...                    'col2': [0.5, 0.75]},
...                   index=['row1', 'row2'])
>>> df
col1  col2
row1     1  0.50
row2     2  0.75
>>> df.to_dict()
{'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}}
You can specify the return orientation.

>>> df.to_dict('series')
{'col1': row1    1
row2    2
Name: col1, dtype: int64,
'col2': row1    0.50
row2    0.75
Name: col2, dtype: float64}

>>> df.to_dict('split')
{'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
'data': [[1, 0.5], [2, 0.75]]}

>>> df.to_dict('records')
[{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]

>>> df.to_dict('index')
{'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
You can also specify the mapping type.

>>> from collections import OrderedDict, defaultdict
>>> df.to_dict(into=OrderedDict)
OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])),
('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))])

If you want a defaultdict, you need to initialize it:

>>> dd = defaultdict(list)
>>> df.to_dict('records', into=dd)
[defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]


In [1]: import pandas as pd

In [2]: import numpy as np

In [3]: df = pd.DataFrame({'colA' : list('AABCA'), 'colB' : ['X',np.nan,'Ya','Xb','Xa'],'colC' : [100,50,30,5
...: 0,20], 'colD': [90,60,60,80,50]})

In [4]: df
Out[4]:
colA colB  colC  colD
0    A    X   100    90
1    A  NaN    50    60
2    B   Ya    30    60
3    C   Xb    50    80
4    A   Xa    20    50

In [5]: df.to_dict(orient='dict')
Out[5]:
{'colA': {0: 'A', 1: 'A', 2: 'B', 3: 'C', 4: 'A'},
'colB': {0: 'X', 1: nan, 2: 'Ya', 3: 'Xb', 4: 'Xa'},
'colC': {0: 100, 1: 50, 2: 30, 3: 50, 4: 20},
'colD': {0: 90, 1: 60, 2: 60, 3: 80, 4: 50}}

In [6]: df.to_dict(orient='list')
Out[6]:
{'colA': ['A', 'A', 'B', 'C', 'A'],
'colB': ['X', nan, 'Ya', 'Xb', 'Xa'],
'colC': [100, 50, 30, 50, 20],
'colD': [90, 60, 60, 80, 50]

In [7]: df.to_dict(orient='series')
Out[7]:
{'colA': 0    A
1    A
2    B
3    C
4    A
Name: colA, dtype: object, 'colB': 0      X
1    NaN
2     Ya
3     Xb
4     Xa
Name: colB, dtype: object, 'colC': 0    100
1     50
2     30
3     50
4     20
Name: colC, dtype: int64, 'colD': 0    90
1    60
2    60
3    80
4    50
Name: colD, dtype: int64}

In [8]: df.to_dict(orient='split')
Out[8]:
{'columns': ['colA', 'colB', 'colC', 'colD'],
'data': [['A', 'X', 100, 90],
['A', nan, 50, 60],
['B', 'Ya', 30, 60],
['C', 'Xb', 50, 80],
['A', 'Xa', 20, 50]],
'index': [0, 1, 2, 3, 4]}

In [9]: df.to_dict(orient='records')
Out[9]:
[{'colA': 'A', 'colB': 'X', 'colC': 100, 'colD': 90},
{'colA': 'A', 'colB': nan, 'colC': 50, 'colD': 60},
{'colA': 'B', 'colB': 'Ya', 'colC': 30, 'colD': 60},
{'colA': 'C', 'colB': 'Xb', 'colC': 50, 'colD': 80},
{'colA': 'A', 'colB': 'Xa', 'colC': 20, 'colD': 50}]

In [10]: df.to_dict(orient='index')
Out[10]:
{0: {'colA': 'A', 'colB': 'X', 'colC': 100, 'colD': 90},
1: {'colA': 'A', 'colB': nan, 'colC': 50, 'colD': 60},
2: {'colA': 'B', 'colB': 'Ya', 'colC': 30, 'colD': 60},
3: {'colA': 'C', 'colB': 'Xb', 'colC': 50, 'colD': 80},
4: {'colA': 'A', 'colB': 'Xa', 'colC': 20, 'colD': 50}}