# 【3】数据分析-3-python迭代工具--itertools

## 一、 无限迭代器

### 1.count()

count(start[, step])

>>> import itertools
>>> seq=itertools.count(4)
>>> for i in seq:
print i

4
5
6
7
8
9
10
…


### 2.cycle()

>>> cs=itertools.cycle("abcd")
>>> for c in cs:
print c

a
b
c
d
a
b
c
d
…


### 3. repeat()

>>> ns = itertools.repeat('A', 10)
>>> for n in ns:
... print n
...



### 4. takewhile()

>>> natuals = itertools.count(1)
>>> ns = itertools.takewhile(lambda x: x <= 10, natuals)
>>> for n in ns:
... print n
...



## 二．其他的几个迭代函数

### 5. chain()

>>> for c in itertools.chain("abc","123"):
print c

a
b
c
1
2
3


## 6. groupby()

groupby()把迭代器中相邻的重复元素挑出来放在一起：

groupby(iterable[, keyfunc])

>>> for key, group in itertools.groupby('AAABBBCCAAA'):
... print key, list(group) # 为什么这里要用list()函数呢？
...
A ['A', 'A', 'A']
B ['B', 'B', 'B']
C ['C', 'C']
A ['A', 'A', 'A']


>>> for key, group in itertools.groupby('AaaBBbcCAAa', lambda c: c.upper()):
... print key, list(group)
...
A ['A', 'a', 'a']
B ['B', 'B', 'b']
C ['c', 'C']
A ['A', 'A', 'a']

from itertools import *
a = ['aa', 'ab', 'abc', 'bcd', 'abcde']
for i, k in groupby(a, len):#按照字符串的长度对a的每个元素进行分组
for m in k:
print m,
print i

aa ab 2
abc bcd 3
abcde 5


## 7.imap()

imap()和map()的区别在于，imap()

>>> for x in itertools.imap(lambda x, y: x * y, [10, 20, 30], itertools.count(1)):
... print x
...
10
40
90


>>> r = map(lambda x: x*x, [1, 2, 3])
>>> r # r已经计算出来了
[1, 4, 9]

>>> r = itertools.imap(lambda x: x*x, [1, 2, 3])
>>> r
<itertools.imap object at 0x103d3ff90>
# r只是一个迭代对象

>>> for x in r:
... print x
...
1
4
9


## 9.islice

islice(seq[, start], stop[, step])


Python读取指定行数

from itertools import islice

input_file = open("C:\\Python34\\test.csv")
for line in islice(input_file, 1, None):


for each_line in islice(data, 0, 2):
print each_line


## 10.izip

izip(*iterator)
import itertools
listone = ['a','b','c']
listtwo = ['11','22','abc']
listthree = listone + listtwo
for item in itertools.izip(listone,listtwo):
print item,



## 11 product 每个list中选择一个

list(itertools.product([1, 5, 8], [0.5, 4]))

[(1, 0.5), (1, 4), (5, 0.5), (5, 4), (8, 0.5), (8, 4)]


aa =[[1, 5, 8], [0.5, 4],['a','m']]
list(itertools.product(*aa))

[(1, 0.5, 'a'),
(1, 0.5, 'm'),
(1, 4, 'a'),
(1, 4, 'm'),
(5, 0.5, 'a'),
(5, 0.5, 'm'),
(5, 4, 'a'),
(5, 4, 'm'),
(8, 0.5, 'a'),
(8, 0.5, 'm'),
(8, 4, 'a'),
(8, 4, 'm')]


## PS

product('ABCD', repeat=2)       AA AB AC AD BA BB BC BD CA CB CC CD DA DB DC DD
permutations('ABCD', 2)     AB AC AD BA BC BD CA CB CD DA DB DC
combinations('ABCD', 2)     AB AC AD BC BD CD
combinations_with_replacement('ABCD', 2)        AA AB AC AD BB BC BD CC CD DD


## 参考资料

http://blog.csdn.net/vernice/article/details/46501885

http://blog.csdn.net/xiaocaiju/article/details/6968123

http://www.wklken.me/posts/2013/08/20/python-extra-itertools.html

http://www.ibm.com/developerworks/cn/linux/l-cpyiter/

http://www.liaoxuefeng.com/wiki/001374738125095c955c1e6d8bb493182103fac9270762a000/001415616001996f6b32d80b6454caca3d33c965a07611f000

http://outofmemory.cn/code-snippet/2390/python-itertools-module-learn-note