# 【3】数据分析-5-实用的数据类型--collections

1. namedtuple(): 生成可以使用名字来访问元素内容的tuple子类
2. deque: 双端队列，可以快速的从另外一侧追加和推出对象
3. Counter: 计数器，主要用来计数
4. OrderedDict: 有序字典
5. defaultdict: 带有默认值的字典

## 一、OrderedDict

>>> from collections import OrderedDict
>>> d = dict([('a', 1), ('b', 2), ('c', 3)])
>>> d # dict的Key是无序的
{'a': 1, 'c': 3, 'b': 2}
>>> od = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
>>> od # OrderedDict的Key是有序的
OrderedDict([('a', 1), ('b', 2), ('c', 3)])


>>> od = OrderedDict()
>>> od['z'] = 1
>>> od['y'] = 2
>>> od['x'] = 3
>>> od.keys() # 按照插入的Key的顺序返回
['z', 'y', 'x']


from collections import OrderedDict as od


OrderedDict可以实现一个FIFO（先进先出）的dict，当容量超出限制时，先删除最早添加的Key

## 二、namedtuple()

namedtuple主要用来产生可以使用名称来访问元素的数据对象，通常用来增强代码的可读性， 在访问一些tuple类型的数据时尤其好用

# -*- coding: utf-8 -*-
"""

"""
from collections import namedtuple
websites = [
('Sina', 'http://www.sina.com.cn/', u'王志东'),
('163', 'http://www.163.com/', u'丁磊')
]
Website = namedtuple('Website', ['name', 'url', 'founder'])
for website in websites:
website = Website._make(website)
print website
# Result:
Website(name='Sina', url='http://www.sina.com.cn/', founder=u'\u738b\u5fd7\u4e1c')
Website(name='163', url='http://www.163.com/', founder=u'\u4e01\u78ca')


## 三、deque

deque其实是 double-ended queue 的缩写，翻译过来就是双端队列，它最大的好处就是实现了从队列 头部快速增加和取出对象: .popleft(), .appendleft() 。 你可能会说，原生的list也可以从头部添加和取出对象啊？就像这样：

l.insert(0, v)
l.pop(0)


# -*- coding: utf-8 -*-
"""

"""
import sys
import time
from collections import deque
while True:
sys.stdout.flush()
time.sleep(0.08)
# Result:
# 一个无尽循环的跑马灯
------------->-------


## 四、Counter

# -*- coding: utf-8 -*-
"""

"""
from collections import Counter
s = '''A Counter is a dict subclass for counting hashable objects. It is an unordered collection where elements are stored as dictionary keys and their counts are stored as dictionary values. Counts are allowed to be any integer value including zero or negative counts. The Counter class is similar to bags or multisets in other languages.'''.lower()
c = Counter(s)
# 获取出现频率最高的5个字符
print c.most_common(5)
# Result:
[(' ', 54), ('e', 32), ('s', 25), ('a', 24), ('t', 24)]


## 五、defaultdict

import collections
s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
# defaultdict
d = collections.defaultdict(list)
for k, v in s:
d[k].append(v)
# Use dict and setdefault
g = {}
for k, v in s:
g.setdefault(k, []).append(v)

# Use dict
e = {}
for k, v in s:
e[k] = v
##list(d.items())
##list(g.items())
##list(e.items())


list(d.items())
[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
>>> list(g.items())
[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]
>>> list(e.items())
[('blue', 4), ('red', 1), ('yellow', 3)]
>>> d
defaultdict(<class 'list'>, {'blue': [2, 4], 'red': [1], 'yellow': [1, 3]})
>>> g
{'blue': [2, 4], 'red': [1], 'yellow': [1, 3]}
>>> e
{'blue': 4, 'red': 1, 'yellow': 3}
>>> d.items()
dict_items([('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])])
>>> d["blue"]
[2, 4]
>>> d.keys()
dict_keys(['blue', 'red', 'yellow'])
>>> d.default_factory
<class 'list'>
>>> d.values()
dict_values([[2, 4], [1], [1, 3]])


def get_counts(sequence):
counts = {}
for x in sequence:
if x in counts:
counts[x] += 1
else:
counts[x] = 1
return counts


from collections import defaultdict
def get_counts2(sequence):
counts = defaultdict(int) # 所有的值都被初始化为0
for x in sequence:
counts[x] +=1
return counts