# 【3】数据分析--10--科学计算--Pandas--8--numpy的ndaray与pandas的series和dataframe、list、dict之间互转

data可以为list、series、hash

## 一、初始化数据

#### 创建serires数据

import numpy as np
import pandas as pd
from pandas import Series,DataFrame
data = [[2000,'Ohino',1.5],
[2001,'Ohino',1.7],
ser = Series(data,index=['one','two','three'])

print 'Ser的结果:\n'
print ser


Ser的结果:

one       [2000, Ohino, 1.5]
two       [2001, Ohino, 1.7]
dtype: object


#### 创建dataframe：

df = DataFrame(data,index=['one','two','three'],columns=['year','state','pop'])

print '\ndataframe的结果:\n'
print df


dataframe的结果:

year   state  pop
one    2000   Ohino  1.5
two    2001   Ohino  1.7


## 二、数据转换

### 2.1 series 转matrix

foo = ser.as_matrix()

print '\n ser to matrix的结果:\n'
print foo


 ser to matrix的结果:

[list([2000, 'Ohino', 1.5])
list([2001, 'Ohino', 1.7])


### 2.2 dataframe转matrix

foo = df.as_matrix()

print '\n dataframe to matrix的结果:\n'
print foo


 dataframe to matrix的结果:

[[2000 'Ohino' 1.5]
[2001 'Ohino' 1.7]


### 2.3 dataframe转array

foo_2 = np.array(df)

print '\n dataframe to array的结果:\n'
print foo_2

[[2000 'Ohino' 1.5]
[2001 'Ohino' 1.7]


foo_3 = df.as_matrix(['pop'])
print '\n dataframe to array的结果:\n'
print foo_3


 dataframe to array的结果:

[[1.5]
[1.7]
[2.4]]


### 2.4 转成list

import pandas as pd

>>> df = pd.DataFrame({'a':[1,3,5,7,4,5,6,4,7,8,9],
'b':[3,5,6,2,4,6,7,8,7,8,9]})

>>> df['a'].values.tolist()
[1, 3, 5, 7, 4, 5, 6, 4, 7, 8, 9]
or you can just use

>>> df['a'].tolist()
[1, 3, 5, 7, 4, 5, 6, 4, 7, 8, 9]
To drop duplicates you can do one of the following:

>>> df['a'].drop_duplicates().values.tolist()
[1, 3, 5, 7, 4, 6, 8, 9]

>>> list(set(df['a'])) # as pointed out by EdChum
[1, 3, 4, 5, 6, 7, 8, 9]

# convert df to list[list]
>>> df.values.tolist()

# conver series to list
>>> Series.tolist()


### 3.4 dataframe 转dict

import pandas as pd
df = pd.DataFrame({'col1': [1, 2],
'col2': [0.5, 0.75]},
index=['row1', 'row2'])

df_2 = df.set_index('col1').to_dict()
print df_2

print  df.set_index('col1')['col2'].to_dict()


{'col2': {1: 0.5, 2: 0.75}}

{1: 0.5, 2: 0.75}


>>> df = pd.DataFrame({'col1': [1, 2],
...                    'col2': [0.5, 0.75]},
...                   index=['row1', 'row2'])
>>> df
col1  col2
row1     1  0.50
row2     2  0.75
>>> df.to_dict()
{'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}}
You can specify the return orientation.

>>> df.to_dict('series')
{'col1': row1    1
row2    2
Name: col1, dtype: int64,
'col2': row1    0.50
row2    0.75
Name: col2, dtype: float64}

>>> df.to_dict('split')
{'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
'data': [[1, 0.5], [2, 0.75]]}

>>> df.to_dict('records')
[{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]

>>> df.to_dict('index')
{'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
You can also specify the mapping type.

>>> from collections import OrderedDict, defaultdict
>>> df.to_dict(into=OrderedDict)
OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])),
('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))])

If you want a defaultdict, you need to initialize it:

>>> dd = defaultdict(list)
>>> df.to_dict('records', into=dd)
[defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]