Upload 2 files
Browse files- HaystackSearch.py +187 -0
- TMIDIX.py +139 -0
HaystackSearch.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Needle in a haystack search
|
| 3 |
+
|
| 4 |
+
Original source code is located here:
|
| 5 |
+
https://github.com/agapow/py-gsp/blob/master/gsp/motifsearch.py
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
"""
|
| 9 |
+
A modifiable GSP algorithm.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
__version__ = '0.1'
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
### IMPORTS
|
| 16 |
+
|
| 17 |
+
### CONSTANTS & DEFINES
|
| 18 |
+
|
| 19 |
+
PP_INDENT = 3
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
### CODE ###
|
| 23 |
+
|
| 24 |
+
class GspSearch (object):
|
| 25 |
+
"""
|
| 26 |
+
A generic GSP algorithm, alllowing the individual parts to be overridden.
|
| 27 |
+
|
| 28 |
+
This is setup so the object can be created once, but searched multiple times
|
| 29 |
+
at different thresholds. In this generic form, we assume that the transactions
|
| 30 |
+
are simply strings.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__ (self, raw_transactions):
|
| 34 |
+
"""
|
| 35 |
+
C'tor, simply shaping the raw transactions into a useful form.
|
| 36 |
+
"""
|
| 37 |
+
self.process_transactions (raw_transactions)
|
| 38 |
+
|
| 39 |
+
def process_transactions (self, raw_transactions):
|
| 40 |
+
"""
|
| 41 |
+
Create the alphabet & (normalized) transactions.
|
| 42 |
+
"""
|
| 43 |
+
self.transactions = []
|
| 44 |
+
alpha = {}
|
| 45 |
+
for r in raw_transactions:
|
| 46 |
+
for c in r:
|
| 47 |
+
alpha[c] = True
|
| 48 |
+
self.transactions.append (r)
|
| 49 |
+
self.alpha = alpha.keys()
|
| 50 |
+
|
| 51 |
+
def generate_init_candidates (self):
|
| 52 |
+
"""
|
| 53 |
+
Make the initial set of candidate.
|
| 54 |
+
|
| 55 |
+
Usually this would just be the alphabet.
|
| 56 |
+
"""
|
| 57 |
+
return list (self.alpha)
|
| 58 |
+
|
| 59 |
+
def generate_new_candidates (self, freq_pat):
|
| 60 |
+
"""
|
| 61 |
+
Given existing patterns, generate a set of new patterns, one longer.
|
| 62 |
+
"""
|
| 63 |
+
old_cnt = len (freq_pat)
|
| 64 |
+
old_len = len (freq_pat[0])
|
| 65 |
+
print ("Generating new candidates from %s %s-mers ..." % (old_cnt, old_len))
|
| 66 |
+
|
| 67 |
+
new_candidates = []
|
| 68 |
+
for c in freq_pat:
|
| 69 |
+
for d in freq_pat:
|
| 70 |
+
merged_candidate = self.merge_candidates (c, d)
|
| 71 |
+
if merged_candidate and (merged_candidate not in new_candidates):
|
| 72 |
+
new_candidates.append (merged_candidate)
|
| 73 |
+
|
| 74 |
+
## Postconditions & return:
|
| 75 |
+
return new_candidates
|
| 76 |
+
|
| 77 |
+
def merge_candidates (self, a, b):
|
| 78 |
+
if a[1:] == b[:-1]:
|
| 79 |
+
return a + b[-1:]
|
| 80 |
+
else:
|
| 81 |
+
return None
|
| 82 |
+
|
| 83 |
+
def filter_candidates (self, trans_min):
|
| 84 |
+
"""
|
| 85 |
+
Return a list of the candidates that occur in at least the given number of transactions.
|
| 86 |
+
"""
|
| 87 |
+
filtered_candidates = []
|
| 88 |
+
for c in self.candidates:
|
| 89 |
+
curr_cand_hits = self.single_candidate_freq (c)
|
| 90 |
+
if trans_min <= curr_cand_hits:
|
| 91 |
+
filtered_candidates.append ((c, curr_cand_hits))
|
| 92 |
+
return filtered_candidates
|
| 93 |
+
|
| 94 |
+
def single_candidate_freq (self, c):
|
| 95 |
+
"""
|
| 96 |
+
Return true if a candidate is found in the transactions.
|
| 97 |
+
"""
|
| 98 |
+
hits = 0
|
| 99 |
+
for t in self.transactions:
|
| 100 |
+
if self.search_transaction (t, c):
|
| 101 |
+
hits += 1
|
| 102 |
+
return hits
|
| 103 |
+
|
| 104 |
+
def search_transaction (self, t, c):
|
| 105 |
+
"""
|
| 106 |
+
Does this candidate appear in this transaction?
|
| 107 |
+
"""
|
| 108 |
+
return (t.find (c) != -1)
|
| 109 |
+
|
| 110 |
+
def search (self, threshold):
|
| 111 |
+
## Preparation:
|
| 112 |
+
assert (0.0 < threshold) and (threshold <= 1.0)
|
| 113 |
+
trans_cnt = len (self.transactions)
|
| 114 |
+
trans_min = trans_cnt * threshold
|
| 115 |
+
|
| 116 |
+
print ("The number of transactions is: %s" % trans_cnt)
|
| 117 |
+
print ("The minimal support is: %s" % threshold)
|
| 118 |
+
print ("The minimal transaction support is: %s" % trans_min)
|
| 119 |
+
|
| 120 |
+
## Main:
|
| 121 |
+
# generate initial candidates & do initial filter
|
| 122 |
+
self.candidates = list (self.generate_init_candidates())
|
| 123 |
+
print ("There are %s initial candidates." % len (self.candidates))
|
| 124 |
+
freq_patterns = []
|
| 125 |
+
new_freq_patterns = self.filter_candidates (trans_min)
|
| 126 |
+
print ("The initial candidates have been filtered down to %s." % len (new_freq_patterns))
|
| 127 |
+
|
| 128 |
+
while True:
|
| 129 |
+
# is there anything left?
|
| 130 |
+
if new_freq_patterns:
|
| 131 |
+
freq_patterns = new_freq_patterns
|
| 132 |
+
else:
|
| 133 |
+
return freq_patterns
|
| 134 |
+
|
| 135 |
+
# if any left, generate new candidates & filter
|
| 136 |
+
self.candidates = self.generate_new_candidates ([x[0] for x in freq_patterns])
|
| 137 |
+
print ("There are %s new candidates." % len (self.candidates))
|
| 138 |
+
new_freq_patterns = self.filter_candidates (trans_min)
|
| 139 |
+
print ("The candidates have been filtered down to %s." % len (new_freq_patterns))
|
| 140 |
+
|
| 141 |
+
### END ###
|
| 142 |
+
|
| 143 |
+
__version__ = '0.1'
|
| 144 |
+
|
| 145 |
+
### CONSTANTS & DEFINES
|
| 146 |
+
|
| 147 |
+
NULL_SYMBOL = 'X'
|
| 148 |
+
|
| 149 |
+
### CODE ###
|
| 150 |
+
|
| 151 |
+
def HaystackSearch(needle, haystack):
|
| 152 |
+
"""
|
| 153 |
+
Return the index of the needle in the haystack
|
| 154 |
+
|
| 155 |
+
Parameters:
|
| 156 |
+
needle: any iterable
|
| 157 |
+
haystack: any other iterable
|
| 158 |
+
|
| 159 |
+
Returns:
|
| 160 |
+
the index of the start of needle or -1 if it is not found.
|
| 161 |
+
|
| 162 |
+
Looking for a sub-list of a list is actually a tricky thing. This
|
| 163 |
+
approach uses the Boyer-Moore-Horspool algorithm. Needle and haystack
|
| 164 |
+
should be any iterable, as long as their elements are hashable.
|
| 165 |
+
Example:
|
| 166 |
+
|
| 167 |
+
>>> find ([1, 2], [1, 1, 2])
|
| 168 |
+
1
|
| 169 |
+
>>> find ((1, 2, 3), range (10))
|
| 170 |
+
1
|
| 171 |
+
>>> find ('gh', 'abcdefghi')
|
| 172 |
+
6
|
| 173 |
+
>>> find ([2, 3], [7, 8, 9])
|
| 174 |
+
-1
|
| 175 |
+
"""
|
| 176 |
+
h = len (haystack)
|
| 177 |
+
n = len (needle)
|
| 178 |
+
skip = {needle[i]: n - i - 1 for i in range(n - 1)}
|
| 179 |
+
i = n - 1
|
| 180 |
+
while i < h:
|
| 181 |
+
for j in range(n):
|
| 182 |
+
if haystack[i - j] != needle[-j - 1]:
|
| 183 |
+
i += skip.get(haystack[i], n)
|
| 184 |
+
break
|
| 185 |
+
else:
|
| 186 |
+
return i - n + 1
|
| 187 |
+
return -1
|
TMIDIX.py
CHANGED
|
@@ -6895,6 +6895,145 @@ def binary_matrix_to_original_escore_notes(binary_matrix,
|
|
| 6895 |
|
| 6896 |
###################################################################################
|
| 6897 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6898 |
# This is the end of the TMIDI X Python module
|
| 6899 |
|
| 6900 |
###################################################################################
|
|
|
|
| 6895 |
|
| 6896 |
###################################################################################
|
| 6897 |
|
| 6898 |
+
def escore_notes_averages(escore_notes,
|
| 6899 |
+
times_index=1,
|
| 6900 |
+
durs_index=2,
|
| 6901 |
+
chans_index=3,
|
| 6902 |
+
ptcs_index=4,
|
| 6903 |
+
vels_index=5,
|
| 6904 |
+
average_drums=False,
|
| 6905 |
+
score_is_delta=False,
|
| 6906 |
+
return_ptcs_and_vels=False
|
| 6907 |
+
):
|
| 6908 |
+
|
| 6909 |
+
if score_is_delta:
|
| 6910 |
+
if average_drums:
|
| 6911 |
+
times = [e[times_index] for e in escore_notes if e[times_index] != 0]
|
| 6912 |
+
else:
|
| 6913 |
+
times = [e[times_index] for e in escore_notes if e[times_index] != 0 and e[chans_index] != 9]
|
| 6914 |
+
|
| 6915 |
+
else:
|
| 6916 |
+
descore_notes = delta_score_notes(escore_notes)
|
| 6917 |
+
if average_drums:
|
| 6918 |
+
times = [e[times_index] for e in descore_notes if e[times_index] != 0]
|
| 6919 |
+
else:
|
| 6920 |
+
times = [e[times_index] for e in descore_notes if e[times_index] != 0 and e[chans_index] != 9]
|
| 6921 |
+
|
| 6922 |
+
if average_drums:
|
| 6923 |
+
durs = [e[durs_index] for e in escore_notes]
|
| 6924 |
+
else:
|
| 6925 |
+
durs = [e[durs_index] for e in escore_notes if e[chans_index] != 9]
|
| 6926 |
+
|
| 6927 |
+
if return_ptcs_and_vels:
|
| 6928 |
+
if average_drums:
|
| 6929 |
+
ptcs = [e[ptcs_index] for e in escore_notes]
|
| 6930 |
+
vels = [e[vels_index] for e in escore_notes]
|
| 6931 |
+
else:
|
| 6932 |
+
ptcs = [e[ptcs_index] for e in escore_notes if e[chans_index] != 9]
|
| 6933 |
+
vels = [e[vels_index] for e in escore_notes if e[chans_index] != 9]
|
| 6934 |
+
|
| 6935 |
+
return [sum(times) / len(times), sum(durs) / len(durs), sum(ptcs) / len(ptcs), sum(vels) / len(vels)]
|
| 6936 |
+
|
| 6937 |
+
else:
|
| 6938 |
+
return [sum(times) / len(times), sum(durs) / len(durs)]
|
| 6939 |
+
|
| 6940 |
+
###################################################################################
|
| 6941 |
+
|
| 6942 |
+
def adjust_escore_notes_timings(escore_notes,
|
| 6943 |
+
adj_k=1,
|
| 6944 |
+
times_index=1,
|
| 6945 |
+
durs_index=2,
|
| 6946 |
+
score_is_delta=False,
|
| 6947 |
+
return_delta_scpre=False
|
| 6948 |
+
):
|
| 6949 |
+
|
| 6950 |
+
if score_is_delta:
|
| 6951 |
+
adj_escore_notes = copy.deepcopy(escore_notes)
|
| 6952 |
+
else:
|
| 6953 |
+
adj_escore_notes = delta_score_notes(escore_notes)
|
| 6954 |
+
|
| 6955 |
+
for e in adj_escore_notes:
|
| 6956 |
+
|
| 6957 |
+
if e[times_index] != 0:
|
| 6958 |
+
e[times_index] = max(1, round(e[times_index] * adj_k))
|
| 6959 |
+
|
| 6960 |
+
e[durs_index] = max(1, round(e[durs_index] * adj_k))
|
| 6961 |
+
|
| 6962 |
+
if return_delta_scpre:
|
| 6963 |
+
return adj_escore_notes
|
| 6964 |
+
|
| 6965 |
+
else:
|
| 6966 |
+
return delta_score_to_abs_score(adj_escore_notes)
|
| 6967 |
+
|
| 6968 |
+
###################################################################################
|
| 6969 |
+
|
| 6970 |
+
def escore_notes_delta_times(escore_notes,
|
| 6971 |
+
times_index=1
|
| 6972 |
+
):
|
| 6973 |
+
|
| 6974 |
+
descore_notes = delta_score_notes(escore_notes)
|
| 6975 |
+
|
| 6976 |
+
return [e[times_index] for e in descore_notes]
|
| 6977 |
+
|
| 6978 |
+
###################################################################################
|
| 6979 |
+
|
| 6980 |
+
def escore_notes_durations(escore_notes,
|
| 6981 |
+
durs_index=1
|
| 6982 |
+
):
|
| 6983 |
+
|
| 6984 |
+
descore_notes = delta_score_notes(escore_notes)
|
| 6985 |
+
|
| 6986 |
+
return [e[durs_index] for e in descore_notes]
|
| 6987 |
+
|
| 6988 |
+
###################################################################################
|
| 6989 |
+
|
| 6990 |
+
def ordered_lists_match_ratio(src_list, trg_list):
|
| 6991 |
+
|
| 6992 |
+
zlist = list(zip(src_list, trg_list))
|
| 6993 |
+
|
| 6994 |
+
return sum([a == b for a, b in zlist]) / len(list(zlist))
|
| 6995 |
+
|
| 6996 |
+
###################################################################################
|
| 6997 |
+
|
| 6998 |
+
def lists_intersections(src_list, trg_list):
|
| 6999 |
+
return list(set(src_list) & set(trg_list))
|
| 7000 |
+
|
| 7001 |
+
###################################################################################
|
| 7002 |
+
|
| 7003 |
+
def transpose_escore_notes(escore_notes,
|
| 7004 |
+
transpose_value=0,
|
| 7005 |
+
channel_index=3,
|
| 7006 |
+
pitches_index=4
|
| 7007 |
+
):
|
| 7008 |
+
|
| 7009 |
+
tr_escore_notes = copy.deepcopy(escore_notes)
|
| 7010 |
+
|
| 7011 |
+
for e in tr_escore_notes:
|
| 7012 |
+
if e[channel_index] != 9:
|
| 7013 |
+
e[pitches_index] = max(1, min(127, e[pitches_index] + transpose_value))
|
| 7014 |
+
|
| 7015 |
+
return tr_escore_notes
|
| 7016 |
+
|
| 7017 |
+
###################################################################################
|
| 7018 |
+
|
| 7019 |
+
def transpose_escore_notes_to_pitch(escore_notes,
|
| 7020 |
+
target_pitch_value=60,
|
| 7021 |
+
channel_index=3,
|
| 7022 |
+
pitches_index=4
|
| 7023 |
+
):
|
| 7024 |
+
|
| 7025 |
+
tr_escore_notes = copy.deepcopy(escore_notes)
|
| 7026 |
+
|
| 7027 |
+
transpose_delta = int(round(target_pitch_value)) - int(round(escore_notes_averages(escore_notes, return_ptcs_and_vels=True)[2]))
|
| 7028 |
+
|
| 7029 |
+
for e in tr_escore_notes:
|
| 7030 |
+
if e[channel_index] != 9:
|
| 7031 |
+
e[pitches_index] = max(1, min(127, e[pitches_index] + transpose_delta))
|
| 7032 |
+
|
| 7033 |
+
return tr_escore_notes
|
| 7034 |
+
|
| 7035 |
+
###################################################################################
|
| 7036 |
+
|
| 7037 |
# This is the end of the TMIDI X Python module
|
| 7038 |
|
| 7039 |
###################################################################################
|