#!/usr/bin/python
# -*- coding: utf-8 -*-
# --------------------------------------------------
# File Name: exp_2a.py
# Location:
# Purpose:
# Creation Date: 29-10-2017
# Last Modified: Wed, Jan 31, 2018 9:18:26 PM
# Author(s): Mike Stout
# Copyright 2017 The Author(s) All Rights Reserved
# Credits:
# --------------------------------------------------
import sys
import numpy as np
#----------------------------------------------------------------
# https://github.com/aflc/editdistance
import editdistance
def ed(q,f):
k = 1. # Not Normalised
# k = float(len(q)+len(f)) # Normalised
return editdistance.eval(q,f) / k
#----------------------------------------------------------------
from mlpy import dtw_subsequence, dtw_std
# Encode Lower and upercase letters to same values ...
lc = 'abcdefghijklmnopqrstuvwxyz'
lc_lut = [ (a, i) for i,a in enumerate(lc) ]
uc_lut = [ (a.upper(), i) for a,i in lc_lut ]
lut = dict( lc_lut + uc_lut )
#def encode(c): return ord(c.lower())*200
def encChar(c):
try: val = lut[c.lower()]
except: val = ord(c) #* 2
return val
def isOK(c):
return c in "aeiouAEIOUst"
def encString(s):
#if s=='of': s='from'
s = filter(isOK, s) ## NB consider only vowels...
s_ = map(encChar, s)#[::-1]
#s_ = map(ord, s)
val = sum(s_)
#val = float( ''.join(map(str, s_)) + ".0")
if val==0: return 0.
else: return 1.0/val
def encode(s):
return np.array(map(encString, s))
#return np.array(map(float, map(ord, s)))
'''
def splits(n, f, xs):
return [ f+":\t" + xs[i:i+n] for i in range(0, len(xs), n)]
def fix(xs, i, path):
if i+1<len(path) and path[i]!=path[i+1]: return xs[path[i]]
else: return "_" # xs[path[i]]
def recover(xs, ys, xp, yp):
xl,yl,flip = len(xp),len(yp),False
k = xl
if xl<yl: xl,yl,Flip = yl,xl.True
res = []
xoff = 0, 0
for i in xrange(yl):
while xp[i+xoff]==yp[i+yoff]: res.append((xp[i+xoff],xp[i+yoff]))
while xp[i+xoff]!=yp[i+yoff]: xoff+=1
res = []
for i in xrange(k):
x = xs[i]
y = ys[i]
if xs[i]==ys[i]: = x,y # res.append((x,y))
if xs[i+1]==xs[i]:
for xy in recover(xs, ys[i+1:], xs, yp[i+1:]):
if ys[i+1]==ys[i]: return recover(xs, ys[i+1:], xs, yp[i+1:])
re
path_ = (path + path[:1]) # [::-1]
xs_ = (xs + xs[:1])
return ([ fix(xs_, i, path_) for i in xrange(len(path_))])[::-1]
'''
def rl_enc(input_string):
count = 1
prev = ''
lst = []
for item in input_string:
if item != prev:
if prev:
entry = (prev,count)
lst.append(entry)
#print lst
count = 1
prev = item
else:
count += 1
else:
entry = (item,count)
lst.append(entry)
return lst
def rl_dec(lst):
xs = []
for i,(item,n) in enumerate(lst):
if n==1: xs.append([item])
else: xs.append([item] + [str((i,n-1))])
return [item for sublist in xs for item in sublist]
'''
def rl_dec(lst):
q = ""
for character, count in lst:
q += character * count
return q
'''
def recover(xs):
return rl_dec( rl_enc(xs) )
def decode(xs,x):
return [ x[i] for i in xs]
def fixLen((i,(x,y))):
if x=='_': x = str(i) # *(len(y)) # -len(x))
if y=='_': y = str(i) # *(len(x)) # -len(y))
return x,y
def dtw(a,b):
#a = a[::-1]
#b = b[::-1]
x,y,flip = a,b, False
if len(a) > len(b): x,y,flip = b,a,True
xe = encode(x[::-1])
ye = encode(y[::-1])
#dist, cost, path = dtw_subsequence(xe, ye)
dist, cost, path = dtw_std(xe, ye, dist_only=False)
p0, p1 = path
xa = recover(decode(p0,x))
ya = recover(decode(p1,y))
xa,ya = zip(*map(fixLen, enumerate(zip(xa,ya))))
#xa = xa[::-1]
#ya = ya[::-1]
al = xa,ya
if flip: al = ya,xa
return dist, al
#----------------------------------------------------------------
from scipy.stats import entropy # == KL Divergence
def jsd(x,y):
P = np.array(map(float, encode(x)))
Q = np.array(map(float, encode(y)))
M = 0.5 * (P + Q)
return 0.5 * (entropy(P, M) + entropy(Q, M))
#----------------------------------------------------------------
from sklearn.metrics import normalized_mutual_info_score
def nmi(x,y):
x = encode(x)
y = encode(y)
return normalized_mutual_info_score(x,y)