# python - 在numpy中快速查找对称对

## pandas (4)

### `frozenset`

``````mask = pd.Series(map(frozenset, zip(df.c1, df.c2))).duplicated()

``````from itertools import product
import pandas as pd

df = pd.DataFrame.from_records(product(range(10), range(10)))
df = df.sample(90)
df.columns = "c1 c2".split()
df = df.sort_values(df.columns.tolist()).reset_index(drop=True)
#     c1  c2
# 0    0   0
# 1    0   1
# 2    0   2
# 3    0   3
# 4    0   4
# ..  ..  ..
# 85   9   4
# 86   9   5
# 87   9   7
# 88   9   8
# 89   9   9
#
# [90 rows x 2 columns]``````

``````a = np.sort(df.values)
_, ix = np.unique(a, return_index=True, axis=0)

print(df.iloc[ix, :])

c1  c2
0    0   0
1    0   1
20   2   0
3    0   3
40   4   0
50   5   0
6    0   6
70   7   0
8    0   8
9    0   9
11   1   1
21   2   1
13   1   3
41   4   1
51   5   1
16   1   6
71   7   1
...``````

``````a= np.sort(df.to_numpy(), axis=1)
df.groupby([a[:,0], a[:,1]], as_index=False, sort=False).first()``````

``````a= np.sort(df.to_numpy(), axis=1)

(df.assign(one=a[:,0], two=a[:,1])   # one and two can be changed
.drop_duplicates(['one','two'])   # taken from above
.reindex(df.columns, axis=1)
)``````

``df[~pd.DataFrame(np.sort(df.values,1)).duplicated().values]``

``````s=pd.crosstab(df.c1,df.c2)