Source code for twol.discover

# discovery.py
# ============
"""A module for dicovering raw two-level rules from a set of carefully chosen examples

Examples, contexts and rules are treated in terms of strings without any
finite-state machinery or rule compilation.  Examples and contexts are
space separated sequences of pair-symbols. 

© Kimmo Koskenniemi, 2017-2023. Free software under the GPL 3 or later.
"""

import sys
import re
from collections import deque, defaultdict

import twol.cfg as cfg
from twol.cfg import pairsym2sympair, sympair2pairsym, pair_symbol_set
from twol.cfg import definitions, symbol_pair_set
from twol.cfg import input_symbol_set, output_symbol_set
import twol.twexamp as twexamp
from typing import List, Dict, Set, Tuple, DefaultDict, Deque

import twol.discopars as discopars

Context = Tuple[str, str]
ContextSet = Set[Context]
PairSym = str
SymPair = Tuple[str, str]
Result = Dict
ResultList = List[Result]

insym2pairsym_set: DefaultDict[str, set] = defaultdict(set)
# key: input symbol, value: set of pair symbols

outsym2pairsym_set: DefaultDict[str, set] = defaultdict(set)
# key: output symbol, value: set of pair symbols

positive_context_set = {}
negative_context_set = {}

#=============================================================
[docs]def relevant_contexts(pair_symbol: PairSym) -> None:
    """Select positive and negative contexts for a given pair-symbol
    
    :parameter pair_symbol:  The pairsym for which the contexts are selected.
    
    Sets a global variable ``positive_context_set[pair_symbol]`` to a set of those contexts in the examples in which the pair_symbol occurs
    
    Sets a global variable ``negative_context_set[pair_symbol]`` to a set of contexts where the input-symbol of the pair_symbol occurs with another output-symbol but so that there is no example in the example_set where the pair_symbol occurs in such a context.

    """
    input_symbol, output_symbol = pairsym2sympair(pair_symbol)
    positive_ctx_set: ContextSet = set()
    negative_ctx_set: ContextSet = set()
    pairsymlist = [re.sub(r"([}{])", r"\\\1", psym) # eg. {ao}:o --> \{ao\}:o
                   for psym
                   in insym2pairsym_set[input_symbol]]
    # print(f"in relevant_contexts: {pairsymlist = }") ####
    pattern = re.compile("|".join(pairsymlist))
    for example in cfg.example_set:
        for m in pattern.finditer(example):
            i1 = m.start()
            i2 = m.end()
            # print('"' + example[0:i1] +'"', '"' + example[i2:] + '"') ##
            left_context = ".#. " + example[0:i1-1]
            centre = example[i1:i2]
            if i2 >= len(example):
                right_context = ".#."
            else:
                right_context = example[i2+1:] + " .#."
            context = (left_context, right_context)
            # print(centre, context) ##
            if centre == pair_symbol:
                positive_ctx_set.add(context)
            else:
                negative_ctx_set.add(context)
    negative_ctx_set = negative_ctx_set - positive_ctx_set

    positive_context_set[pair_symbol] = positive_ctx_set
    negative_context_set[pair_symbol] = negative_ctx_set
    
[docs]def max_left_len(pos_context_set: set) -> int:
    maxlen = max([len(lc.split()) for lc, rc in pos_context_set])
    return maxlen

[docs]def max_right_len(pos_context_set: set) -> int:
    maxlen = max([len(rc.split()) for lc, rc in pos_context_set])
    return maxlen

#================================================================
[docs]def truncate_left(syms_to_remain: int,
                  context_set: ContextSet
                  ) -> ContextSet:
    """Truncate the left contexts

    :parameter syms_to_remain: A minimum number of pair symbols to remain in the left context.

    :parameter context_set:  A set of (positive) contexts to be truncated.

    :returns: A new context set where left contexts are truncated

"""
    if cfg.verbosity >= 25:
        print(f"entering truncate_left, {syms_to_remain =}") ####
        print(f"{context_set = }") ####
    new_context_set: ContextSet = set()
    for left_context, right_context in context_set:
        left_lst = left_context.split()
        start = max(0, len(left_lst) - syms_to_remain)
        new_lc = " ".join(left_lst[start:])
        new_context_set.add((new_lc, right_context))

    if cfg.verbosity >= 25:
        print(f"leaving truncate_left, {syms_to_remain =}") ####
        print(f"{new_context_set = }") ####
    return new_context_set

#=================================================================
[docs]def truncate_right(syms_to_remain: int,
                   context_set: ContextSet
                   ) -> ContextSet:
    """Truncate the right contexts

    :parameter syms_to_remain: A minimum number of pair symbols to remain in the right context.

    :parameter context_set:  A set of (positive) contexts to be truncated.

    :returns: A new context set where right contexts are truncated

"""
    if cfg.verbosity >= 25:
        print(f"entering truncate_right, {syms_to_remain =}") ####
        print(f"{context_set = }") ####
    new_context_set = set()
    for left_context, right_context in context_set:
        right_lst = right_context.split()
        new_rc = " ".join(right_lst[0:syms_to_remain])
        # print(f"{new_rc = }") ####
        new_context_set.add((left_context, new_rc))

    if cfg.verbosity >= 25:
        print(f"leaving truncate_right, {syms_to_remain =}") ####
        print(f"{new_context_set = }") ####
    return new_context_set

#=============================================================
[docs]def reduce_set(set_name: str,
               pos_context_set: ContextSet,
               ) -> ContextSet:
    """
    Reduce contexts by substituting pair symbols with set names

    :parameter set_name: A name of a pairsym set in definitions.

    :parameter pos_context_set:  A set of contexts to which the reduction is to be applied.

    :returns: A new set of contexts where every occurrence of pairsyms in ``definitions[set_name]`` have been substituted with ``set_name``.

"""
    symbol_set = definitions[set_name]
    # print(f"{symbol_set = }") ####
    new_pos_context_set: ContextSet = set()
    for left_context, right_context in pos_context_set:
        new_left_ctx = [set_name
                        if (pairsym in symbol_set or
                            (definitions.get(pairsym,
                                             pair_symbol_set) <=
                             symbol_set)
                            ) else pairsym
                        for pairsym in left_context.split()]
        new_rght_ctx = [set_name
                        if (pairsym in symbol_set or
                            (definitions.get(pairsym,
                                             pair_symbol_set) <=
                             symbol_set)
                            ) else pairsym
                        for pairsym in right_context.split()]
        new_pos_context_set.add((" ".join(new_left_ctx),
                                 " ".join(new_rght_ctx)))
    if cfg.verbosity >= 20:
        print(f"{new_pos_context_set = }") ####
    return new_pos_context_set
    

#==============================================================
[docs]def surface_subset(pos_context_set: ContextSet,
                   subset_name: str
                   ) -> ContextSet:
    """
    Reduce the contexts by substituting insym:outsym pairs with :outsym

    :parameter pos_context_set:  The set of contexts to be reduced

    :parameter subset_name:  A defined subset whose sympairs are considered for reduction.

    :returns:  A new context set where all occurences of pairsyms (``insym:outsym``) in ``definitions[subset_name]`` are replaced with (``:outsym``), e.g. ``{tds}:s`` have been reduced into e.g. ``:s``.

"""
    pairsym2outsym_map = {}
    for ctx_pair in pos_context_set:
        for ctx in ctx_pair:
            sym_lst = ctx.split()
            # print(f"{sym_lst = }") ####
            for pair_sym in sym_lst:
                outsym = pairsym2sympair(pair_sym)[1]
                # print(f"{pair_sym = }, {outsym = }") ####
                if outsym == ".#.":
                    pairsym2outsym_map[pair_sym] = outsym
                else:
                    pairsym2outsym_map[pair_sym] = ":" + outsym
    # print(f"{pairsym2outsym_map = }") ####
    new_pos_context_set: ContextSet = set()
    subset = definitions[subset_name]
    for left_context, right_context in pos_context_set:
        new_left_ctx = [pairsym2outsym_map.get(pairsym, pairsym)
                        if pairsym in subset else pairsym
                        for pairsym in left_context.split()]
        new_rght_ctx = [pairsym2outsym_map.get(pairsym, pairsym)
                        if pairsym in subset else pairsym
                        for pairsym in right_context.split()]
        new_pos_context_set.add((" ".join(new_left_ctx),
                                 " ".join(new_rght_ctx)))
    if cfg.verbosity >= 20:
        print(f"{new_pos_context_set = }") ####
    return new_pos_context_set

#==============================================================
[docs]def mphon_subset(pos_context_set: ContextSet,
                 subset_name: str
                 )-> ContextSet:
    """Reduces a set of contexts by replacing e.g. {ij}:i with {ij}:

    :param pos_context_set:  A set of positive context which might be truncated and already reduced.

    :param subset_name:  Only pairs which are in definitions[subset_name] are reduced.

    :returns: A modified context set where pair symbols (insym:outsym) belonging to the given subset have been reduced into (insym:).

"""
    pairsym2insym_map = {}
    for ctx_pair in pos_context_set:
        for ctx in ctx_pair:
            sym_lst = ctx.split()
            # print(f"{sym_lst = }") ####
            for pair_sym in sym_lst:
                insym = pairsym2sympair(pair_sym)[0]
                # print(f"{pair_sym = }, {insym = }") ####
                if insym == ".#.":
                    pairsym2insym_map[pair_sym] = insym
                else:
                    pairsym2insym_map[pair_sym] =  insym + ":"
    # print(f"{pairsym2outsym_map = }") ####
    new_pos_context_set: ContextSet = set()
    subset = definitions[subset_name]
    for left_context, right_context in pos_context_set:
        new_left_ctx = [pairsym2insym_map.get(pairsym, pairsym)
                        if pairsym in subset else pairsym
                        for pairsym in left_context.split()]
        new_rght_ctx = [pairsym2insym_map.get(pairsym, pairsym)
                        if pairsym in subset else pairsym
                        for pairsym in right_context.split()]
        new_pos_context_set.add((" ".join(new_left_ctx),
                                 " ".join(new_rght_ctx)))
    if cfg.verbosity >= 20:
        print(f"{new_pos_context_set = }") ####
    return new_pos_context_set

[docs]def print_context_set(msg: str, context_set: ContextSet) -> None:
    print(msg)
    rule_lst = [f"    {lc} _ {rc}" for lc, rc in context_set]
    print(" ,\n".join(rule_lst) + " ;")

#====================================================================

[docs]def overlap(set_lst: list[str],
            pairsym_lst: list[str]) -> bool:
    """Tests whether list of set names covers the list of pairsyms

    :param set_lst:  List of pair symbols or names of defined sets

    :param pairsym_lst:  List of pair symbols.  If shorter than set_lst, the match fails.

    :returns: True if each pairsym in the latter list is included in a respective set name (or pairsym) in the former list

    """
    #print("in overlap: set_lst = {}".format(" ".join(set_lst))) #####
    #print("in overlap: pairsym_lst = {}".format(" ".join(pairsym_lst))) #####
    if len(set_lst) > len(pairsym_lst):
        pairsym_lst = pairsym_lst + ["XYZ"]*(len(set_lst) - len(pairsym_lst))
    for s, p in zip(set_lst, pairsym_lst):
        if s in definitions:
            if p not in definitions[s]:
                break
        else:
            if p != s:
                break
    else:
        #print(f"in overlap: return True") #####
        return True
    #print(f"in overlap: return False") #####
    return False

[docs]def pos_neg_is_disjoint(pos_ctx_set: ContextSet,
                        other_ctx_set: ContextSet) -> bool:
    """
    Tests whether a pos context set is disjoint from a negative one

    :parameter pos_ctx_set:  A set of left and right context pairs where the contexts are represented as space-separated strings of pair symbols or set names.  

    :parameter other_ctx_set:  A context set to which the pos context is compared.  The contexts are space-separated strings of pair symbols.

    :returns:  True if the context sets are logically disjoint.

"""
    #print_context_set(f"in pos_neg_is_disjoint: {pair_symbol = }",
    #                  pos_ctx_set) #####
    for pos_left_str, pos_rght_str in pos_ctx_set:
        pos_left_lst = list(reversed(pos_left_str.split()))
        #print(f"in pos_neg_is_disjoint: {pos_left_lst = }") #####
        pos_rght_lst = pos_rght_str.split()
        #print(f"in pos_neg_is_disjoint: {pos_rght_lst = }") #####
        for neg_left_str, neg_rght_str in other_ctx_set:
            neg_left_lst = list(reversed(neg_left_str.split()))
            neg_rght_lst = neg_rght_str.split()
            if (overlap(pos_left_lst, neg_left_lst)
                and
                overlap(pos_rght_lst, neg_rght_lst)):
                #print(f"{pos_left_lst} _ {pos_rght_lst}") #####
                #print(f"{neg_left_lst} _ {neg_rght_lst}\n") #####
                #print("in pos_neg_is_disjoint: return False") #####
                return(False)
    #print("in pos_neg_is_disjoint: return True\n") #####
    return True
    
[docs]def pos_neg_is_subset(pos_ctx_set: ContextSet,
                      ctx_set: ContextSet) -> bool: # *** not needed any more
    """
    Tests whether the first context set is logically subset of the second

    :parameter pos_ctx_set:  A positive context set which has gone through reductions such as truncation or replacements.

    :parameter ctx_set:  An intact negative context set which has not been reduced.

    :returns: True if all context in the first set match some context in the second set.  

"""
    #print_context_set("in pos_neg_is_subset, neg set", ctx_set) #####
    for pos_left_str, pos_rght_str in pos_ctx_set:
        pos_left_lst = list(reversed(pos_left_str.split()))
        #print(f"in pos_neg_is_subset: {pos_left_lst = }") #####
        pos_rght_lst = pos_rght_str.split()
        #print(f"in pos_neg_is_subset: {pos_rght_lst = }") #####
        for neg_left_str, neg_rght_str in ctx_set:
            neg_left_lst = list(reversed(neg_left_str.split()))
            #print(f"in pos_neg_is_subset: {neg_left_lst = }") #####
            neg_rght_lst = neg_rght_str.split()
            #print(f"in pos_neg_is_subset: {neg_rght_lst = }") #####
            if (overlap(pos_left_lst, neg_left_lst)
                and
                overlap(pos_rght_lst, neg_rght_lst)):
                break
        else:
            #print("in pos_neg_is_subset: return False\n") #####
            return False
    #print("in pos_neg_is_subset: return True\n") #####
    return True
    

#====================================================================
[docs]def search_reductions(agenda: Deque,
                      pair_symbol: PairSym,
                      pos_context_set: ContextSet,
                      ) -> ContextSet:
    """Tests and executes context reductions according to a recipe
    
    :parameter agenda:  Initially a recipe. Consumed and updated during the process.

    :parameter pair_symbol:  The pairsym for which a rule is deduced.

    :parameter pos_context_set:  set of contexts, i.e. pairs (left_context, right_context) where the contexts are space-separated strings of pairsyms.  The context are reduced during the process.

    """
    if cfg.verbosity >= 10:
        print(f"in search_reductions: {agenda = }") ####

    if not agenda:                  # No more reductions to do.
        return pos_context_set

    task = agenda.popleft()             # Next step to be tested.
    if type(task) is str:
        op = task
    elif type(task) is dict:
        op = task["op"]
    else:
        exit(1)
    if cfg.verbosity >= 10:
        print(f"in search_reductions: {op = }, {task = }")

    # -- Start with truncatig all and then truncating less and less --
    if op == "truncate":
        side = task["side"]
        target_len = task.get("minimum", 0)
        if side == "left":
            max_len = max_left_len(pos_context_set)
        else:
            max_len = max_right_len(pos_context_set)
        if target_len <= max_len:           # possible to truncate
            if side == "left":
                new_pos_ctx_set = truncate_left(target_len,
                                                pos_context_set)
            else:
                new_pos_ctx_set = truncate_right(target_len,
                                                   pos_context_set)
            good = pos_neg_is_disjoint(new_pos_ctx_set,
                                       negative_context_set[pair_symbol])
        else:
            good = False
        #print_context_set("in search_reductions, new_pos_ctx_set:",
        #                  new_pos_ctx_set) ####
        #print_context_set("in search_reductions, negative_context_set:",
        #                  negative_context_set[pair_symbol]) ####
        if (not good) and (target_len < max_len): # Possible to go on trunc
            new_task = {"op": op, "side": side, "minimum": target_len+1}
            # print(f"in search_reductions, failing but pushing {new_task = }") ####
            agenda.appendleft(new_task)     # Next attempt pushed into agenda

        if good:
            # print("succeeding and no further tasks created from this") ####
            return search_reductions(agenda, pair_symbol, new_pos_ctx_set)
        else:
            return search_reductions(agenda, pair_symbol, pos_context_set)

    elif (op == "surface"):
        subset_name = task.get("set", "None")
        new_pos_ctx_set = surface_subset(pos_context_set,
                                         subset_name)
        # print_context_set(f"from surface_subset {subset_name}", new_pos_ctx_set) ######
        if pos_neg_is_disjoint(new_pos_ctx_set,
                               negative_context_set[pair_symbol]): # ***
            return search_reductions(agenda, pair_symbol,
                                     new_pos_ctx_set)
        else:
            return search_reductions(agenda, pair_symbol,
                                     pos_context_set)
    elif (op == "mphon"):
        subset_name = task.get("set", "None")
        new_pos_ctx_set = mphon_subset(pos_context_set,
                                       subset_name)
        if pos_neg_is_disjoint(new_pos_ctx_set,
                               negative_context_set[pair_symbol]):
            return search_reductions(agenda, pair_symbol,
                                     new_pos_ctx_set)
        else:
            return search_reductions(agenda, pair_symbol,
                                     pos_context_set)
    elif (op in definitions):
        # print(f"in search_reductions, entering reduce_set({op},...)") ####
        new_pos_ctx_set = reduce_set(op, pos_context_set)
        #print_context_set(f"from reduce_set {op}", new_pos_ctx_set) ######
        if pos_neg_is_disjoint(new_pos_ctx_set,
                               negative_context_set[pair_symbol]):
            return search_reductions(agenda, pair_symbol,
                                     new_pos_ctx_set)
        else:
            return search_reductions(agenda, pair_symbol,
                                     pos_context_set)
    else:
        print(f"in search_reductions, exiting with invalid task: {task = }")
        exit(1)
    return set() ## just to make mypy happy


[docs]def context_to_output_str(pairsym_str: str) -> str:
    """Converts a pair symbol string into its surface string"""
    pairsym_lst = pairsym_str.split()
    sympair_lst = [pairsym2sympair(psym) for psym in pairsym_lst]
    outsym_lst = [outsym for insym, outsym in sympair_lst]
    return "".join(outsym_lst)

[docs]def context_set_penalty(context_set: ContextSet):
    width = max_left_len(context_set) + max_right_len(context_set)
    height = len(context_set)
    sym_set = set()
    for ctx in context_set:
        left_str, right_str = ctx
        sym_set |= set(left_str.split())
        sym_set |= set(right_str.split())
    depth = len(sym_set)
    penalty = width * height * depth
    return penalty

[docs]def print_rule(result: Result,
               operator: str) -> None:
    """Prints one rule"""
    if cfg.verbosity >= 2:
        print("\n! recipe:")
        for task in result["recipe"]:
            print(f"!\t\t{task}")
    pair_symbol = result["pairsym"]
    context_set = result["posctx"]
    weight = result["weight"]
    print(f"{pair_symbol} {operator}\t\t! {weight}")
    rule_lst = [f"    {lc} _ {rc}" for lc, rc in context_set]
    print(" ,\n".join(rule_lst) + " ;")

#====================================================================
[docs]def process_results_into_rules(pairsym_lst: List[PairSym],
                               result_lst_lst: List[List[Result]]):
    # process rule candidates for each recipe
    cand_rule_lst_dict = defaultdict(list)
    for result_lst in result_lst_lst:
        assert len(pairsym_lst) == len (result_lst)
        recipe = result_lst[0]["recipe"]
        srt_res_lst = sorted(result_lst, key=lambda x: x["weight"])
        for result in srt_res_lst:
            pair_sym = result["pairsym"]
            weight = result["weight"]
            pos_ctx_set = result["posctx"]
            exclusive = True
            for other_result in srt_res_lst:
                if other_result == result:
                    continue
                other_pair_sym = other_result["pairsym"]
                other_pos_ctx_set = negative_context_set[other_pair_sym]
                if not pos_neg_is_disjoint(pos_ctx_set,
                                           other_pos_ctx_set):
                    exclusive = False
                    break
            if exclusive:
                cand_rule_lst_dict[pair_sym].append((weight, recipe,
                                                     "<=>", pos_ctx_set))
                if cfg.verbosity >= 4:
                    print_rule(result, "<=>")
            else:
                cand_rule_lst_dict[pair_sym].append((weight+1000, recipe,
                                            "=>", pos_ctx_set))
                if cfg.verbosity >= 4:
                    print_rule(result, "=>")

    if cfg.verbosity >= 4:
        print("!\n! selected rules:\n!")
    res_lst = []
    for pairsym in pairsym_lst:
        cand_rule_lst = sorted(cand_rule_lst_dict[pairsym],
                               key=lambda x: x[0])
        #print(f"{cand_rule_lst = }") #####
        for (weight, recipe, rule_op, pos_ctx_set) in cand_rule_lst[:1]:
            res = {"pairsym": pairsym,
                   "posctx": pos_ctx_set,
                   "recipe": recipe,
                   "weight": weight,
                   "ruleop": rule_op}
            res_lst.append(res)
 
    #print(f"{res_lst = }") #####

    res_lst.sort(key=lambda x: x["weight"])
    #print(f"{res_lst = }") #####

    exclusive = True
    for res in res_lst:
        rule_op = res["ruleop"]
        if (not exclusive) or res != res_lst[-1]:
            print_rule(res, rule_op)
        if rule_op != "<=>":
            exclusive = False
        
#====================================================================
[docs]def main():
    
    version = cfg.timestamp(__file__)

    global outsym2pairsym_set
    import argparse
    import json
    
    arpar = argparse.ArgumentParser(
        "twol-discov",
        description=f"Deduces two-level rules out of"\
        " a file of examples.  The file must consist of"\
        " lines of space-separated pair string.  Such a file"\
        " can be produced e.g. by twol-raw2renamed program."\
        " Version: {version}")
    arpar.add_argument(
        "examples",
        help="Example pair strings file.",
        default="test.pstr")
    arpar.add_argument(
        "-s", "--symbol",
        help="Input symbol for which rules are produced.",
        default="")
    arpar.add_argument(
        "-r", "--recipes",
        help="Initial list of recipes for the context reductions.",
        default="")
    arpar.add_argument(
        "-g", "--grammar",
        help="EBNF grammar which defines the syntax of the definitions.",
        default="discovdef.ebnf")
    arpar.add_argument(
        "-d", "--definitions",
        help="definitions of pair symbol sets",
        default="setdefs.twol")
    arpar.add_argument(
        "-m", "--max-examples",
        help="Maximun number of examples per morphophoneme to be printed"\
        " as comments.  Default is 20 for each pair symbol.",
        type=int, default=20)

    arpar.add_argument(
        "-v", "--verbosity",
        help="Level of  diagnostic output, default is 1. Set to"\
        " 0 to omit the printing of relevant examples for the rules",
        type=int, default=1)
    args = arpar.parse_args()

    cfg.verbosity = args.verbosity

    # -- read in all examples --
    twexamp.read_examples(filename_lst=[args.examples], build_fsts=False)
    if cfg.verbosity >= 10:
        print("--- all examples read in ---")
    
    parser = discopars.init(args.grammar)
    discopars.parse_defs(parser, args.definitions)
    if cfg.verbosity >= 10:
        for nm, cs in definitions.items():
            s_str = " ".join(sorted(list(cs)))
            print(f"{nm}: {s_str}\n")

    if args.recipes:
        recipe_f = open(args.recipes, 'r')
        task_lst_lst = list(json.load(recipe_f))
        # print(f"{task_lst_lst = }") ####
    else:
        task_lst_lst = [[{"op": "truncate", "side": "left"},
	                 {"op": "truncate", "side": "right"}]]

    for insym, outsym in symbol_pair_set:
        pair_symbol = sympair2pairsym(insym, outsym)
        insym2pairsym_set[insym].add(pair_symbol)
        outsym2pairsym_set[outsym].add(pair_symbol)
    for insym, symset in insym2pairsym_set.items():
        definitions[insym + ":"] = symset
    for outsym, symset in outsym2pairsym_set.items():
        definitions[":" + outsym] = symset
    #print(f"in main: {definitions =}") ####

    # -- expand a plain input symbol into a list of symbol pairs --
    if args.symbol in input_symbol_set:
        input_symbol_lst = [args.symbol]
    elif not args.symbol:
        input_symbol_lst = sorted(list({x for x in input_symbol_set
                                        if not len(x) < 3 }))
    else:
        print(f"Symbol {args.symbol!r} does not occur in the examples")
        lst = list(input_symbol_set)
        print("The following input symbols would be valid:\n",
              " ".join(sorted(lst)))
        exit("")

    for input_symbol in input_symbol_lst:
        pairsym_lst: List[PairSym]  = []
        for pairsym in insym2pairsym_set[input_symbol]:
            pairsym_lst.append(pairsym)
        pairsym_lst.sort()
        if cfg.verbosity >= 10:
            print(f"{pairsym_lst = }")

        if len(pairsym_lst) <= 2:
            continue
        for pairsym in pairsym_lst:
            relevant_contexts(pairsym)

        result_lst_lst = []
        # try each recipe in the task_lst_lst
        for task_lst in task_lst_lst:
            result_lst: ResultList = []
            agenda: Deque = deque(task_lst.copy())
            # -- collect the minimal contexts for each sym pair --
            for pair_symbol in pairsym_lst:
                result_pos_ctx_set: ContextSet = search_reductions(
                    agenda.copy(),
                    pair_symbol,
                    positive_context_set[pair_symbol].copy())
                # print(f"{pos_contexts = }\n{neg_contexts}") ####
                pos_pena = context_set_penalty(result_pos_ctx_set)
                result_lst.append({"pairsym": pair_symbol,
                                   "posctx": result_pos_ctx_set,
                                   "weight": pos_pena,
                                   "recipe": task_lst})

            for result in result_lst:
                if cfg.verbosity >= 5 :
                    print_rule(result, "=>")
                result_lst_lst.append(result_lst)
        
        process_results_into_rules(pairsym_lst, result_lst_lst)
    
        if cfg.verbosity >= 1:
            for pair_symbol in pairsym_lst:
                insym, outsym = pairsym2sympair(pair_symbol)
                pos_ctx_lst = list(positive_context_set[pair_symbol])
                srt_ctx_lst = sorted(pos_ctx_lst,
                                     key=lambda x: x[1])
                step = len(srt_ctx_lst) // args.max_examples
                if step == 0: step = 1
                for lc, rc in srt_ctx_lst[::step]:
                    l_str = context_to_output_str(lc[3:])
                    r_str = context_to_output_str(rc[:-3])
                    print(f"!{l_str:>29}<{outsym}>{r_str}")
        print("\n!-------------------------------------------------")

if __name__ == "__main__":
    main()
Source code for twol.discover

twol

Navigation

Related Topics