"""
This module calculate pairwise distance for data that is a numpy array of list of string

parameters:
* X : (numpy array(list(string))) a numpy array of list of string
"""

import numpy as np
from itertools import combinations

def calculate_pairwise_distance(X):
    # TODO : this function is very slow need improvement
    data_size = X.shape[0]
    precomputed = np.zeros((data_size, data_size))
    iterator = combinations(range(X.shape[0]), 2)

    for i, j in iterator: 
        precomputed[i, j] = jaccard_seq(X[i], X[j])  

    # Make symmetric and return
    return precomputed + precomputed.T - np.diag(np.diag(precomputed))

"""
This module calculate Jaccard's distance score

parameters:
x : (numpy array) first string sequence
y : (numpy array) second string sequence

return:
(float) : Jaccard's distance socre (1 - Jaccard's similarlity)
"""

def jaccard_seq(x, y):
    len_x = len(x)
    len_y = len(y)
    fst, snd = (x, y) if len_x < len_y else (y, x)
    num_intersect = len(set(fst).intersection(snd))
    return 1 - (num_intersect / (len_x + len_y - num_intersect))