# python - 將索引列表轉換為2D numpy數組的最快方法

## arrays performance (4)

``````a = [
[1,2,4],
[0,2,3],
[1,3,4],
[0,2]]``````

``````output = array([
[0,1,1,0,1],
[1,0,1,1,0],
[0,1,0,1,1],
[1,0,1,0,0]])``````

``````output = np.zeros((4,5))
for i, (x, y) in enumerate(zip(a, output)):
y[x] = 1
output[i] = y
print(output)``````

``````[[ 0.  1.  1.  0.  1.]
[ 1.  0.  1.  1.  0.]
[ 0.  1.  0.  1.  1.]
[ 1.  0.  1.  0.  0.]]``````

``````import numpy as np

def main():
row_count = 4
col_count = 5
a = [[1,2,4],[0,2,3],[1,3,4],[0,2]]

# iterate through each row, concatenate all indices and convert them to linear

# numpy append performs copy even if you don't want it, list append is faster
b = []
for row_idx, row in enumerate(a):
b.append(np.array(row, dtype=np.int64) + (row_idx * col_count))

linear_idxs = np.hstack(b)
#could skip previous steps if given index inputs well before hand, or in linear index order.
c = np.zeros(row_count * col_count)
c[linear_idxs] = 1
c = c.reshape(row_count, col_count)
print(c)

if __name__ == "__main__":
main()

#output
# [[0. 1. 1. 0. 1.]
#  [1. 0. 1. 1. 0.]
#  [0. 1. 0. 1. 1.]
#  [1. 0. 1. 0. 0.]]``````

``````ncol = 5
nrow = len(a)
out = np.zeros((nrow, ncol), int)
out[np.arange(nrow).repeat([*map(len,a)]), np.concatenate(a)] = 1
out
# array([[0, 1, 1, 0, 1],
#        [1, 0, 1, 1, 0],
#        [0, 1, 0, 1, 1],
#        [1, 0, 1, 0, 0]])``````

``````pp 21.717635259992676 ms
ts 37.10938713003998 ms
u9 37.32933565042913 ms``````

``````import itertools as it
import numpy as np

def make_data(n,m):
I,J = np.where(np.random.random((n,m))<np.random.random((n,1)))
return [*map(np.ndarray.tolist, np.split(J, I.searchsorted(np.arange(1,n))))]

def pp():
sz = np.fromiter(map(len,a),int,nrow)
out = np.zeros((nrow,ncol),int)
out[np.arange(nrow).repeat(sz),np.fromiter(it.chain.from_iterable(a),int,sz.sum())] = 1
return out

def ts():
out = np.zeros((nrow,ncol),int)
for i, ix in enumerate(a):
out[i][ix] = 1
return out

def u9():
out = np.zeros((nrow,ncol),int)
for i, (x, y) in enumerate(zip(a, out)):
y[x] = 1
out[i] = y
return out

nrow,ncol = 1000,1000
a = make_data(nrow,ncol)

from timeit import timeit
assert (pp()==ts()).all()
assert (pp()==u9()).all()

print("pp", timeit(pp,number=100)*10, "ms")
print("ts", timeit(ts,number=100)*10, "ms")
print("u9", timeit(u9,number=100)*10, "ms")``````

``````output = np.zeros((4,5))
for i, ix in enumerate(a):
output[i][ix] = 1

# output ->
#   array([[0, 1, 1, 0, 1],
#   [1, 0, 1, 1, 0],
#   [0, 1, 0, 1, 1],
#   [1, 0, 1, 0, 0]])``````