Source code for twol.fs

"""fs.py: A wrapper module for basic finite-state operations

The HFST engine used for accomplishing the operations but all functions make copies of their arguments when it is necessary to avoid side-effects.

© Kimmo Koskenniemi, 2018. This is free code under the GPL 3 license."""

import hfst_dev as hfst
import grapheme
import twol.cfg as cfg

[docs]def expr(e): """Return an FST corresponding to a XFST regular expression""" res = hfst.regex(e) res.minimize() return res
[docs]def concat(f, g): """Return the concatenation of two FSTs""" res = f.copy() res.concatenate(g) res.minimize() return res
[docs]def star(f): """Return the Kleene star iteration of an FST""" res = f.copy() res.repeat_star() res.minimize() return res
[docs]def plus(f): """Return the Kleene plus iteration of an FST""" res = f.copy() res.repeat_plus() res.minimize() return res
[docs]def crossprod(f, g): """Return the cross-product of two FSAs""" res = f.copy() res.cross_product(g) res.minimize() return res
[docs]def compose(f, g): """Return the composition of two FSTs""" res = f.copy() res.compose(g) res.minimize() return res
[docs]def union(f, g): """Return the union of two FSTs""" res = f.copy() res.disjunct(g) res.minimize() return res
[docs]def intersect(f, g): """Return the intersection of two FSTs Both arguments are assumed to be length preserving mappings. """ res = f.copy() res.conjunct(g) res.minimize() return res
[docs]def upper(f): """Return the input projection of an FST""" res = f.copy() res.input_project() res.minimize() return res
[docs]def lower(f): """Return the output projection of an FST""" res = f.copy() res.output_project() res.minimize() return res
[docs]def symbol_to_fsa(sym): """Return a FSA which accepts the one letter string 'sym' The symbol 'sym' may be e.g. a composed Unicode grapheme, i.e. a string of two or more Unicode characters. """ bfsa = hfst.HfstIterableTransducer() string_pair_path = ((sym, sym)) bfsa.disjunct(string_pair_path, 0) fsa = hfst.fst(bfsa) return(fsa)
[docs]def symbol_pair_to_fst(insym, outsym): """"Return a FST which accepts one the pair string 'insym:outsym'""" bfst = hfst.HfstIterableTransducer() string_pair_path = ((insym, outsym)) bfsa.disjunct(string_pair_path, 0) fst = hfst.fst(bfst) return(fst)
[docs]def string_to_fsa(grapheme_string): """Return a FSA which accepts the sequence of graphemes in the string""" bfsa = hfst.HfstIterableTransducer() grapheme_list = list(grapheme.graphemes(grapheme_string)) string_pair_path = tuple(zip(grapheme_list, grapheme_list)) if cfg.verbosity >= 10: print(grapheme_list) print(string_pair_path) bfsa.disjunct(string_pair_path, 0) fsa = hfst.HfstTransducer(bfsa) fsa.minimize() fsa.set_name(grapheme_string) if cfg.verbosity >= 10: print(fsa) return(fsa)
if __name__ == "__main__": print("fs module is not meant to be used as a script")