Source code for twol.twbt

"""Module for detailed handling basic transducers 

Copyright 2015-2020, Kimmo Koskenniemi

This program is free software under Gnu GPL 3 or later
"""

import hfst_dev as hfst

import twol.cfg as cfg

[docs]def pairname(insym, outsym): """Convert a pair of symbols into a single label insym -- input symbol as a string outsym -- output symbol as a string Returns a string notation of the pair, eg. >>> pairname ('a', 'a') a >>> pairname('i','j') i:j """ if insym == outsym: return(insym) else: return(insym + ":" + outsym)
[docs]def equivpairs(bfst): """Find and print all sets of equivalent transition pairs. bfst -- a HfstIterableTransducer whose transition symbol pairs are analyzed Sets of transition symbol pairs behaving identicaly are computed. The sets are printed if they contain more than one element. """ transitions_for_pairsymbol = {} # {pairsym: list of trs, ..} for state in bfst.states(): for arc in bfst.transitions(state): target = arc.get_target_state() pair_symbol = pairname(arc.get_input_symbol(), arc.get_output_symbol()) if pair_symbol not in transitions_for_pairsymbol: transitions_for_pairsymbol[pair_symbol] = set() transitions_for_pairsymbol[pair_symbol].add((state,target)) pairsymbols_for_transition_sets = {} # {tr set: pair syms, ..} for pair_symbol, st in transitions_for_pairsymbol.items(): froz = frozenset(st) if froz not in pairsymbols_for_transition_sets: pairsymbols_for_transition_sets[froz] = [] pairsymbols_for_transition_sets[froz].append(pair_symbol) labelsym = {} # {sym: sym representing it in pprinting} for fs, sl in pairsymbols_for_transition_sets.items(): sorted_sl = sorted(sl) model = sorted(sl)[0] for sym in sorted(sl): if len(sym) < len(model): model = sym for sym in sorted(sl): labelsym[sym] = model #print("labelsym: ", labelsym) ## return(labelsym, pairsymbols_for_transition_sets)
[docs]def fst2dicfst(FST): """Returns a dict which gives the transition dict for each state""" BFST = hfst.HfstIterableTransducer(FST) dicfst = {} for state in BFST.states(): tdir = {} for arc in BFST.transitions(state): prnm = pairname(arc.get_input_symbol(), arc.get_output_symbol()) tdir[prnm] = arc.get_target_state() dicfst[state] = (BFST.is_final_state(state), tdir) return(dicfst)
[docs]def fst_to_fsa(FST, separator='^'): """Converts FST into an FSA by joining input and output symbols with separator""" FB = hfst.HfstIterableTransducer(FST) sym_pairs = FB.get_transition_pairs() dict = {} for sym_pair in sym_pairs: in_sym, out_sym = sym_pair joint_sym = in_sym + separator + out_sym dict[sym_pair] = (joint_sym, joint_sym) FB.substitute(dict) FSA = hfst.HfstTransducer(FB) # print("fst_to_fsa:\n", FSA) ## return FSA
[docs]def fsa_to_fst(FSA, separator='^'): """hfst.fsa_to_fst does the same """ BFSA = hfst.HfstIterableTransducer(FSA) sym_pairs = BFSA.get_transition_pairs() dic = {} for sym_pair in sym_pairs: insym, outsym = sym_pair in_sym, out_sym = outsym.split(separator) dic[sym_pair] = (in_sym, out_sym) BFSA.substitute(dic) FST = hfst.HfstTransducer(BFSA) return FST
[docs]def ppfst(FST, print_equiv_classes=True, title=""): """Pretty-prints a HfstTransducer or a HfstIterableTransducer. FST -- the transducer to be pretty-printed print_equiv_classes -- if True, then print also the equivalence classes title -- an explicit additional title to be printed If the transducer has a name, it is printed as a heading. >>> twbt.ppfst(hfst.regex("a* [b:p|c] [c|b:p]"), True) 0 . -> 0 a ; -> 1 b:p ; 1 . -> 2 b ; 2 : Classes of equivalent symbols: b:p c """ if title: print("\n" + title) else: print("\n" + FST.get_name()) BFST = hfst.HfstIterableTransducer(FST) labsy, transy = equivpairs(BFST) for state in BFST.states(): d = {} for arc in BFST.transitions(state): target = arc.get_target_state() if target not in d: d[target] = [] prnm = pairname(arc.get_input_symbol(), arc.get_output_symbol()) d[target].append(prnm) print(" ", state, (": " if BFST.is_final_state(state) else ". "), end="") for st, plist in d.items(): ls = [p for p in plist if p == labsy[p]] print( " " + (" ".join(ls)) + " -> " + str(st), end=" ;" ) print() #print(transy) ## if print_equiv_classes: all_short = True for ss, pl in transy.items(): if len(pl) > 1: all_short = False break if not all_short: print("Classes of equivalent symbols:") for ss, pl in transy.items(): if len(pl) > 1: print(" ", " ".join(sorted(pl))) return
[docs]def ppdef(XRC, name, displayed_formula): FST = XRC.compile(name) BFST = hfst.HfstIterableTransducer(FST) FST = hfst.HfstTransducer(BFST) FST.set_name(name + " = " + displayed_formula) ppfst(FST, True) #alph = [pairname(insym, outsym) for insym, outsym # in FST.get_transition_pairs()] #print(name, '=',', '.join(sorted(alph))) return
[docs]def pp_paths(TR, heading, limit=30): results = paths(TR, limit) print(heading, end="") if len(results) == 0: print(" None") else: print() for line in results: print(line)
[docs]def paths(TR, limit=30): path_tuple = TR.extract_paths(output='raw', max_number=limit) results = [] for weight, path in path_tuple: lst = [pairname(insym, outsym) for insym, outsym in path] str = " ".join(lst) results.append(str) return(results)
[docs]def expanded_examples(TR, insyms, symbol_pair_set): # print("symbol_pair_set =", symbol_pair_set) ## BT = hfst.HfstIterableTransducer(TR) # print("BT.get_transition_pairs() =", BT.get_transition_pairs()) ## for insym in insyms: lst = [(ins, outs) for ins, outs in symbol_pair_set if ins == insym] for sympair in lst: # print("sympair, lst =", sympair, lst) ## BT.substitute(sympair, tuple(lst)) T = hfst.HfstTransducer(BT) T.set_name("negative and positive together") T.minimize() # ppfst(T, True) ## #T.minus(TR) #T.minimize() return(T)
if __name__ == "__main__": print("twbt module is not meant to be used as a script")