Source code for eta.util.tt.parse

"""Methods for parsing choice trees and word features from LISP definitions."""

import glob
import importlib
from transduction.tt import isa

from eta.util.general import remove_duplicates
from eta.util.sexpr import read_lisp



[docs]
def init_node(pattern):
  """Initialize a node of a choice tree."""
  return {
    'pattern' : pattern,
    'directive' : None,
    'latency' : 0,
    'count' : 0,
    'child' : {},
    'next' : {}
  }




[docs]
def readrules(packet):
  """Create a choice tree from a packet of pattern and template rules.

  Parameters
  ----------
  packet : list[str]
    A list of form ``[depth, pattern, optional-pair, depth, pattern, optional-pair, ...]``,
    where:
    
      - ``depth`` is 1 for top-level rules, 2 for direct children, etc.,
      - ``pattern`` is a decomposition pattern or other output,
      - ``optional-pair`` is present iff ``pattern`` is a reassembly pattern or other output,
        and consists of a ``(latency, directive)`` tuple, where latency is an integer >= 0
        specifying how long to wait to use a rule again, and directive is a symbol such
        as ``:out``, ``:subtree``, ``:gist``, etc. specifying how the output should be used.

  Returns
  -------
  root : dict
    The root of the choice tree (a nested dict structure) created from the packet.
  """
  if len(packet) < 2:
    return {}
  root = init_node(packet[1])
  stack = [(1, root)]
  # Advance past the 1st dept-# and pattern
  rest = packet[2:]

  # Loop until full rule tree is built
  while rest:
    n = rest[0]
    rest = rest[1:]

    # If n is a number, it is the depth of a new rule
    if (isinstance(n, int) or (isinstance(n, str) and n.isdigit())) and int(n) > 0:
      n = int(n)
      node = init_node(rest[0])
      # Advance past the current pattern
      rest = rest[1:]

      # New rule at same depth?
      if n == stack[-1][0]:
        # Let 'next' of previous rule point to new rule,
        # pop the previous rule and push new rule onto stack
        stack.pop()[1]['next'] = node
        stack.append((n, node))

      # New rule at greater depth?
      elif n > stack[-1][0]:
        # Let 'child' of previous rule point to new rule, and
        # push the new rule onto stack
        stack[-1][1]['child'] = node
        stack.append((n, node))

      # New rule at lower depth?
      else:
        # Pop a number of stack elements equal to depth differential
        for _ in range(stack[-1][0] - n):
          stack.pop()
        # Resulting top element must be same depth, so set 'next' pointer to new rule
        stack.pop()[1]['next'] = node
        stack.append((n, node))

    # If n is a [latency, directive] pair rather than depth number,
    # set the latency and directive of the rule at the top of the stack
    else:
      stack[-1][1]['latency'] = int(n[0])
      stack[-1][1]['directive'] = n[1]

  return root

  # END readrules



[docs]
def attachfeat(feat_xx, feats):
  """Stores a feature list in a dictionary of word features, modifying the dictionary in-place.

  Parameters
  ----------
  feat_xx : list[str]
    A list of form ``[feat, x1, x2, ..., xk]``,
    where:

      - ``feat`` is a string, regarded as a feature.
      - ``x1``, ``x2``, ... are words that will be assigned ``feat`` as a feature,
        i.e., ``isa(xi, feat)`` will be True for each xi among x1, x2, ..., xk.

  feats : dict
    A dict mapping words to features, to be modified in-place.
  """
  feat = feat_xx[0]
  for x in feat_xx[1:]:
    if not isa(x, feat, feats):
      if x in feats:
        feats[x].append(feat)
      else:
        feats[x] = [feat]




[docs]
def merge_feats(feats1, feats2):
  """Merges two feature dicts."""
  for x, f in feats2.items():
    if x in feats1:
      feats1[x] = remove_duplicates(feats1[x]+f)
    else:
      feats1[x] = f
  return feats1




[docs]
def merge_trees(trees1, trees2):
  """Merges two choice tree dicts (overriding any duplicates)."""
  for x, t in trees2.items():
    trees1[x] = t
  return trees1




[docs]
def merge_preds(preds1, preds2):
  """Merges two predicate dicts (overriding any duplicates)."""
  for x, t in preds2.items():
    preds1[x] = t
  return preds1




[docs]
def from_lisp_file(fname):
  """Read a LISP file and parse the rule trees and feature definitions contained within.

  Parameters
  ----------
  fname : str
    The filename to read.
  
  Returns
  -------
  trees : dict
    A dict containing all choice trees, keyed on their root names.
  feats : dict
    A dict mapping words to feature lists.
  """
  trees = {}
  feats = {}
  contents = read_lisp(fname)
  for decl in contents:
    if decl[0] == 'readrules':
      name = decl[1].strip("'").strip('*')
      tree = readrules(decl[2])
      trees[name] = tree
    elif decl[0] == 'attachfeat':
      feat_xx = decl[1]
      attachfeat(feat_xx, feats)
    elif (decl[0] == 'mapc' or decl[0] == 'mapcar') and decl[1].strip("'").strip('*') == 'attachfeat':
      for feat_xx in decl[2]:
        attachfeat(feat_xx, feats)
  return trees, feats




[docs]
def read_preds_file(fname):
  """Read a Python file containing predicate function definitions, and return a predicate dict.
  
  Parameters
  ----------
  fname : str
    The filename to read.
  
  Returns
  -------
  preds : dict
    A dict mapping predicate names to functions.
  """
  mod_name = fname.split('.py')[0].replace("/", ".").replace('\\', '.')
  mod = importlib.import_module(mod_name)
  funcs = [f for f in dir(mod) if callable(getattr(mod, f)) and f[0] != '_']
  return { f.replace('_', '-') : getattr(mod, f) for f in funcs }




[docs]
def from_lisp_dirs(dirs):
  """Recursively read choice trees and word features from all LISP files in a directory or list of directories.

  Parameters
  ----------
  dirs : str or list[str]
    The directory or directories to read.
  
  Returns
  -------
  trees : dict
    A dict containing all choice trees, keyed on their root names.
  feats : dict
    A dict mapping words to feature lists.
  preds : dict
    A dict mapping predicate names to functions.
  """
  trees = {}
  feats = {}
  preds = {}
  if isinstance(dirs, str):
    dirs = [dirs]
  for dir in dirs:
    fnames = glob.glob(dir + '/**/*.lisp', recursive=True)
    for fname in fnames:
      trees_new, feats_new = from_lisp_file(fname)
      trees = merge_trees(trees, trees_new)
      feats = merge_feats(feats, feats_new)
    pred_fnames = glob.glob(dir + '/**/preds.py', recursive=True)
    for pred_fname in pred_fnames:
      preds_new = read_preds_file(pred_fname)
      preds = merge_preds(preds, preds_new)
  return trees, feats, preds