Source code for twol.cfg
"""Global values and functions for twol modules.
These definitions and values are used by several twol-related
programs, e.g. `twol`, `multialign`, `table2words`,
`words2zerofilled` etc.
"""
__author__ = """© Kimmo Koskenniemi, 2018, 2022"""
import re
verbosity = 0
all_zero_weight = 1000.0
"""in multialign: the weight for a set {'Ø'} which is normally
not preferred but sometimes is needed, eg. when using Øs in
the input words"""
final = False
"""In multialign: preferring of the deletion of final phonemes"""
definitions = {}
error_message = ""
input_symbol_set = set()
"""The set of input symbols (phonemes and morphophonemes) occurring in
the examples"""
output_symbol_set = set()
"""The set of output symbols (surface characters) occurring in the
examples"""
symbol_pair_set = set()
"""The set of all symbol pairs (e.g. ('{aä}', 'a')) in the examples."""
all_pairs_fst = None
"""An FST which accepts any one symbol pair in symbol_pair_set"""
pair_symbol_set = set()
"""The set of all normalized pair symbols (e.g. 'k', '{aä}:a')
occurring in the examples"""
examples_fst = None
"""Examples as a tranducer that accepts them as symbol pair sequences"""
example_lst = []
"""List of example words as pair symbol strings"""
example_set = set()
"""Set of examples as space-separated string of normalized pair symbols"""
[docs]def timestamp(sourcefile):
import os.path
import time
from datetime import date
mtime = os.path.getmtime(sourcefile)
return time.strftime('%Y-%m-%d',
time.localtime(mtime))
[docs]def pairsym2sympair(pairsym):
"""Converts one pair symbol into a corresponding symbol pair
pairsym -- a pair symbol, e.g. 'k' or '{aä}:a' or 'k:k'
returns -- a symbol pair, e.g. ('k', 'k') or ('{aä}', 'a') or ('k','k')
"""
m = re.match(r"^([^:]*):([^:]*)$", pairsym)
if m:
return(m.group(1), m.group(2))
else:
return(pairsym, pairsym)
[docs]def sympair2pairsym(insym, outsym):
"""Converts a symbol pair into a corresponding normalized pair symbol
insym -- a symbol in the input alphabet, e.g. 'k' or '{aä}'
outsym -- a symbol in the output alphabet, e.g. 'k' or 'a'
returns -- a normalized pair symbol, e.g. 'k' or '{aä}:a' (not 'k:k')
"""
if insym == outsym:
return(insym)
else:
return(insym + ':' + outsym)
if __name__ == "__main__":
print("cgf module is not meant to be used as a script")