Python list去重及找出,统计重复项

时间:2023-03-09 22:01:10
Python list去重及找出,统计重复项

http://bbs.chinaunix.net/thread-1680208-1-1.html

如何找出 python list 中有重复的项

http://www.cnblogs.com/feisky/archive/2012/12/06/2805251.html

比较容易记忆的是用内置的set
l1 = ['b','c','d','b','c','a','a']
l2 = list(set(l1))
print l2

还有一种据说速度更快的,没测试过两者的速度差别
l1 = ['b','c','d','b','c','a','a']
l2 = {}.fromkeys(l1).keys()
print l2

这两种都有个缺点,祛除重复元素后排序变了:
['a', 'c', 'b', 'd']

如果想要保持他们原来的排序:

用list类的sort方法
l1 = ['b','c','d','b','c','a','a']
l2 = list(set(l1))
l2.sort(key=l1.index)
print l2
也可以这样写
l1 = ['b','c','d','b','c','a','a']
l2 = sorted(set(l1),key=l1.index)
print l2

也可以用遍历
l1 = ['b','c','d','b','c','a','a']
l2 = []
for i in l1:
if not i in l2:
l2.append(i)
print l2
上面的代码也可以这样写
l1 = ['b','c','d','b','c','a','a']
l2 = []
[l2.append(i) for i in l1 if not i in l2]
print l2

这样就可以保证排序不变了:
['b', 'c', 'd', 'a']

转自:http://blog.****.net/rainharder/article/details/5728443

 #!/usr/bin/env python
# coding: utf-8 import os
import sys
import string
import operator
import re
import threading
import csv from time import sleep,ctime
from collections import defaultdict
from collections import Counter def test_01():
#content ==> ###pos=350143600,pts=2676718###
#filename="F:\\yingc\\work\\goxceed-dvbs-hd\\6605\\solution\\aa"
filename="./aa"
pos=-1
dts=-1
poslist=[]
dtslist=[] str1="###pos="
str2=",pts=" f = open(filename)
for line in f:
aa=line[0:len(str1)]
if aa == str1:
pos=line[len(str1):line.index(str2)]
dts=line[line.index(str2)+len(str2):len(line)-3-1]
poslist.append(pos)
dtslist.append(dts)
f.close() #s=[11,22,11,44,22,33]
d = defaultdict(list)
for k,va in [(v,i) for i,v in enumerate(poslist)]:
d[k].append(va)
#print d.items()
count=0
for value in d.items():
if len(value[1])>1:
print value
count=count+1
print "poslen:"+str(len(poslist))+",dtslen"+str(len(dtslist))
print str(len(d))+","+str(count) #d = defaultdict(list)
#for k,va in [(v,i) for i,v in enumerate(dtslist)]:
# d[k].append(va)
##print d.items()
#for value in d.items():
# if len(value[1])>1:
# print value #print Counter([11,22,11,44,22,33]) if __name__ == "__main__":
test_01()
print "finish"

aa文件中的内容如:

###pos=1349796,pts=15015###
###pos=2337820,pts=27986###
###pos=2705098,pts=29988###
###pos=6660200,pts=54721###
###pos=8055314,pts=61061###
###pos=8871800,pts=65315###
###pos=9503420,pts=68401###
###pos=12855218,pts=88338###
###pos=14253082,pts=98765###
###pos=15813764,pts=109192###
###pos=15813764,pts=109192###
###pos=15813764,pts=109192###
###pos=15813764,pts=109192###
###pos=16056146,pts=110735###
###pos=16394580,pts=113988###
###pos=17011532,pts=119911###
###pos=17257542,pts=122372###
###pos=17417974,pts=124040###
###pos=17816976,pts=128169###
###pos=17993398,pts=129838###
###pos=18302190,pts=132215###
###pos=19166088,pts=139055###
###pos=19675276,pts=143059###
###pos=19994992,pts=146146###