import sets ## Various functions useful on operating on sets of genes represented as signed integers def genelist_equivalency(list1, list2): """returns True if list1 is, in genetic terms, equivalent to list2; allows for reversals, or opposite strands, etc.""" prxy = [g * -1 for g in list1] prxy.reverse() if list1 == list2 or prxy == list2: return True else: return False def discrim_list(list, discriminators): """turns a list of integers into a list of collapsed strings representing ranges, with a set of of integers to base the list discrimination on, e.g. if [1,2,3,4,5] is passed as a list, with a discriminator of [-3], then the returned list is [[1,2],[3],[4,5]] (ordered by original list) list -- list of integers to break apart discriminators -- list of discriminators returns a separated list based on list, organized by contiguousness and discriminators""" filtered = [] discrim = [] for t in list: if not t in discriminators and not -t in discriminators: filtered.append(t) else: discrim.append(t) filtered = consec_list(filtered) filtered.extend(consec_list(discrim)) filtered.sort(lambda x,y: cmp(list.index(x[0]), list.index(y[0]))) # sort list return filtered def adv_discrim_list(list, listofdisc): """much like discrim_list, but instead of taking a list of signed integers in its second argument, it takes a list of lists of signed integers, e.g. if list=[5,3,4,2,1] and listofdisc=[[3],[1]] is passed, the returned result is [[5],[3],[4, 2],[1]] (note order is retained); this function assumes that the orders in list and listofdisc are sympatico! e.g. the following is not valid: list=[1,2,3,4,5], listofdisc=[[[4,3],[1,2]] because [4,3] is out of the original order of 3,4 and the fact that [4,3] and [1,2] should be in reverse order list -- list of integers used as base listofdisc -- list of list of integers to discriminate against""" searchlist = [t for t in list] for x in listofdisc: try: ix = -1 if x[0] in searchlist: ix = searchlist.index(x[0]) elif -x[0] in list: ix = searchlist.index(-x[0]) if ix >= 0: getset = searchlist[ix:len(x)+ix] del searchlist[ix:len(x)+ix] searchlist.insert(ix, getset) except ValueError: pass return consec_list(searchlist) def num_short(list): """turns a list of integers into a list of collapsed strings representing ranges list -- list to prettify""" pretty_list = [] curpos = 0 while curpos < len(list): nxtpos = find_consecutives(curpos, list) strfy = list[curpos:nxtpos] if len(strfy) <= 1: pretty_list.append(str(strfy[0])) else: pretty_list.append("("+ str(strfy[0]) + " " + str(strfy[-1]) + ")") curpos = nxtpos return pretty_list def consec_list(list): """turns a list of integers (genes) into a list of lists, collected by consecutiveness""" newlist = [] curpos = 0 while curpos < len(list): addlist = [] nxtpos = find_consecutives(curpos, list) cnt = curpos while cnt < nxtpos: addlist.append(list[cnt]) cnt += 1 try: # total hack - help to account for addlist[0][0] # the functionality of passing lists within lists newlist.extend(addlist) # remove if needed except TypeError: # newlist.append(addlist) curpos = nxtpos return newlist def find_consecutives(pos, list): """returns the index (exclusive) of the range of numbers valid starting at pos. for example, in list [1,3,5,6,7,9], it would return, after each successive call on pos=0,2,5: 1 5 6 pos -- position to start searching""" endmk = pos + 1 while endmk < len(list): wset = list[pos:endmk+1] rwset = list[pos:endmk+1] rwset.reverse() try: # try clause part of consec_list hacK if not range(list[pos], list[endmk]+1) == wset and\ not range(list[endmk], list[pos]+1) == rwset: return endmk except TypeError: # return endmk # endmk += 1 return endmk def get_gene_intersect(list_a, list_b): """returns a list of the genes that are common to both arguments - can return an empty list if no genes shared list_a -- first list of genes to check list_b -- second list of genes to check complist -- optional default list to begin with returns a list of genes if there are genes common to both - [] otherwise""" complist = [] tlista = [abs(x) for x in list_a] tlistb = [abs(y) for y in list_b] seta = sets.Set(tlista) setb = sets.Set(tlistb) complist.extend(list(seta & setb)) return complist def make_rearrangement_chart(network): """creates a rearrangement chart network -- RearrangementNetwork to use in chart creation""" gene_matrix = [] for k in network[0].get_dictionary().keys(): netmatrix = [] for i in network: if i.has_gene_active(k): netmatrix.append(i.get_grimmaction().get_action()) else: netmatrix.append('') gene_matrix.append(netmatrix) return gene_matrix def print_rearrangement_chart(matrix, filename): """prints a rearrangement chart matrix to filename""" chartfile = open(filename + '.txt', 'w') gencount = 1 chartfile.write(',') for x in range(len(matrix[0])): chartfile.write(str(x)) chartfile.write(',') chartfile.write('\n') for col in matrix: chartfile.write(str(gencount)) chartfile.write(',') for row in col: chartfile.write(str(row)) chartfile.write(',') chartfile.write('\n') gencount += 1 chartfile.close()