# index - python list unique count

## 如何在保持秩序的同時從列表中刪除重複項? (20)

5倍快速減少變體，但更複雜

``````>>> l = [5, 6, 6, 1, 1, 2, 2, 3, 4]
>>> reduce(lambda r, v: v in r[1] and r or (r[0].append(v) or r[1].add(v)) or r, l, ([], set()))[0]
[5, 6, 1, 2, 3, 4]
``````

``````default = (list(), set())
# use list to keep order
# use set to make lookup faster

def reducer(result, item):
if item not in result[1]:
result[0].append(item)
return result

>>> reduce(reducer, l, default)[0]
[5, 6, 1, 2, 3, 4]
``````

``````def uniq(input):
output = []
for x in input:
if x not in output:
output.append(x)
return output
``````

（感謝unwind這個代碼示例 。）

MizardX的答案提供了多種方法的好集合。

``````mylist = [x for i,x in enumerate(mylist) if x not in mylist[i+1:]]
``````

``````>>> from  more_itertools import unique_everseen
>>> items = [1, 2, 0, 1, 3, 2]
>>> list(unique_everseen(items))
[1, 2, 0, 3]
``````

``````def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
if key is None:
for element in filterfalse(seen.__contains__, iterable):
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
yield element
``````

``````>>> from collections import OrderedDict
>>> items = [1, 2, 0, 1, 3, 2]
>>> list(OrderedDict.fromkeys(items))
[1, 2, 0, 3]
``````

``````seen = set()
[x for x in seq if x not in seen and not seen.add(x)]
``````

``````not seen.add(x)
``````

``````def uniquefy_list(a):
return uniquefy_list(a[1:]) if a[0] in a[1:] else [a[0]]+uniquefy_list(a[1:]) if len(a)>1 else [a[0]]
``````

``````import pandas as pd

my_list = range(5) + range(5)  # [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
>>> pd.Series(my_list).drop_duplicates().tolist()
# Output:
# [0, 1, 2, 3, 4]
``````

``````[l[i] for i in range(len(l)) if l.index(l[i]) == i]
``````

``````>>> from iteration_utilities import unique_everseen
>>> lst = [1,1,1,2,3,2,2,2,1,3,4]

>>> list(unique_everseen(lst))
[1, 2, 3, 4]
``````

# 計時

``````%matplotlib notebook

from iteration_utilities import unique_everseen
from collections import OrderedDict
from more_itertools import unique_everseen as mi_unique_everseen

def f7(seq):
seen = set()
return [x for x in seq if not (x in seen or seen_add(x))]

def iteration_utilities_unique_everseen(seq):
return list(unique_everseen(seq))

def more_itertools_unique_everseen(seq):
return list(mi_unique_everseen(seq))

def odict(seq):
return list(OrderedDict.fromkeys(seq))

from simple_benchmark import benchmark

b = benchmark([f7, iteration_utilities_unique_everseen, more_itertools_unique_everseen, odict],
{2**i: list(range(2**i)) for i in range(1, 20)},
'list size (no duplicates)')
b.plot()
``````

``````import random

b = benchmark([f7, iteration_utilities_unique_everseen, more_itertools_unique_everseen, odict],
{2**i: [random.randint(0, 2**(i-1)) for _ in range(2**i)] for i in range(1, 20)},
'list size (lots of duplicates)')
b.plot()
``````

``````b = benchmark([f7, iteration_utilities_unique_everseen, more_itertools_unique_everseen, odict],
{2**i: [1]*(2**i) for i in range(1, 20)},
'list size (only duplicates)')
b.plot()
``````

``````>>> lst = [{1}, {1}, {2}, {1}, {3}]

>>> list(unique_everseen(lst))
[{1}, {2}, {3}]
``````

1免責聲明：我是該軟件包的作者。

.get（True）XOR .setdefault（False）

``````# Explanation of d.get(x,True) != d.setdefault(x,False)
#
# x in d | d[x]  | A = d.get(x,True) | x in d | B = d.setdefault(x,False) | x in d | d[x]    | A xor B
# False  | None  | True          (1) | False  | False                 (2) | True   | False   | True
# True   | False | False         (3) | True   | False                 (4) | True   | False   | False
#
# Notes
# (1) x is not in the dictionary, so get(x,<default>) returns True but does __not__ add the value to the dictionary
# (2) x is not in the dictionary, so setdefault(x,<default>) adds the {x:False} and returns False
# (3) since x is in the dictionary, the <default> argument is ignored, and the value of the key is returned, which was
#     set to False in (2)
# (4) since the key is already in the dictionary, its value is returned directly and the argument is ignored
#
# A != B is how to do boolean XOR in Python
#
def sort_with_order(s):
d = dict()
return [x for x in s if d.get(x,True) != d.setdefault(x,False)]
``````

__OVERRIDING ___missing_____（受此答案啟發）

``````class Tracker(dict):
# returns True if missing, otherwise sets the value to False
# so next time d[key] is called, the value False will be returned
# and __missing__ will not be called again
def __missing__(self, key):
self[key] = False
return True

t = Tracker()
unique_with_order = [x for x in samples if t[x]]
``````

2.5版本中的新增功能：如果dict的子類定義了缺少_____（）的方法，如果鍵值不存在，則d [key]操作使用鍵值作為參數調用該方法。 如果密鑰不存在，d [key]操作會返回或引發由_____缺失的_____（key）調用返回或引發的任何操作。 沒有其他操作或方法調用_____缺少_____（）。 如果_____缺少_____（）未定義，則引發KeyError。 _____缺少_____（）必須是一種方法; 它不能是一個實例變量。 有關示例，請參閱collections.defaultdict。

``````>>> list1 = [ 1,1,2,2,3,3 ]
>>> [ list1.pop(i) for i in range(len(list1))[::-1] if list1.count(list1[i]) > 1 ]
[1, 2, 3]
``````

``````def f7(seq):
seen = set()
return [x for x in seq if not (x in seen or seen_add(x))]
``````

O （1）每個操作的插入，刪除和成員檢查。

``````list1 = [0, 2, 4, 9]
for x in range(0, 7):
list1.append(x)
``````

``````list1 = [0, 2, 4, 9]
for x in range(0, 7)
if x not in list1:
list1.append(x)
``````

``````    import pandas as pd
import numpy as np

uniquifier = lambda alist: pd.Series(alist).drop_duplicates().tolist()

def f7(seq):
seen = set()
return [ x for x in seq if not (x in seen or seen_add(x))]

alist = np.random.randint(low=0, high=1000, size=10000).tolist()

print uniquifier(alist) == f7(alist)  # True
``````

``````    In [104]: %timeit f7(alist)
1000 loops, best of 3: 1.3 ms per loop
In [110]: %timeit uniquifier(alist)
100 loops, best of 3: 4.39 ms per loop
``````

`itertools`食譜有一個功能，使用`seen`設置技術，但是：

• 處理標準的`key`功能。
• 不使用不合適的黑客。
• 通過預先綁定`seen.add`優化循環，而不是查看N次。 （ `f7`也是這樣，但有些版本不。）
• 通過使用`ifilterfalse`優化循環，因此您只需循環遍歷Python中的獨特元素，而不是所有元素。 （當然，你仍然可以在`ifilterfalse`裡面遍歷所有的內容，但這是C語言，而且要快得多。）

``````def unique(iterable):
seen = set()
for element in itertools.ifilterfalse(seen.__contains__, iterable):
yield element
``````

``````def unique(my_list):
return [x for x in my_list if x not in locals()['_[1]']]
``````

``````l1 = [1, 2, 3, 4, 1, 2, 3, 4, 5]
l2 = [x for x in l1 if x not in locals()['_[1]']]
print l2
``````

``````[1, 2, 3, 4, 5]
``````

``````>>> l = [3, 4, 3, 6, 4, 1, 4, 8]

>>> l = [l[i] for i in range(len(l)) if i == l.index(l[i])]

>>> l = [3, 4, 6, 1, 8]
``````

`_sorted_`一個`numpy`數組比較有效的方法：

``````b = np.array([1,3,3, 8, 12, 12,12])
numpy.hstack([b[0], [x[0] for x in zip(b[1:], b[:-1]) if x[0]!=x[1]]])
``````

``````array([ 1,  3,  8, 12])
``````

``````l = list(set(l))
``````

...如果您的列表項不可排除，則不起作用。

``````l = reduce(lambda x, y: x if y in x else x + [y], l, [])
``````

``````def uniquify(s):
if len(s) < 2:
return s
return uniquify(s[:-1]) + [s[-1]] * (s[-1] not in s[:-1])
``````

``````from itertools import groupby
[ key for key,_ in groupby(sortedList)]
``````

``````l = [1,2,2,3,3,...]
n = []
n.extend(ele for ele in l if ele not in set(n))
``````