Source code for eta.util.tt.preds

"""Definitions for custom evaluable predicates used in TT patterns.

Definitions for custom evaluable predicates used in TT pattern matching and template filling, based
on the original LISP definitions:
https://github.com/bkane2/eta/blob/master/core/tt/tt-match-predicates.lisp

Some simplifications were made here, limiting the extensibility and (to a small extent) efficiency
of custom predicates. First, due to Python lacking a macro system, all predicates must be defined
here, rather than in domain-specific rule files. Second, predicate functions are invoked based on
name strings, obtained through manipulation of the corresponding variable strings, rather than through
the implicit-pred and *-variant hash tables.

Ultimately, this should be reworked to allow custom domain-specific definitions of TT predicates, in
order to avoid the redundant loading of lexical data into the core code.
"""

import eta.util.general as gute
import eta.util.ulf.lex as lex

NEG = ['neg', 'deny', 'disagree', 'no', 'nah', 'nope', 'not', 'never', 'zero', 'hardly',
             'little', 'barely', 'scarcely', 'incorrect', 'inaccurate', 'untrue', 'disagreed', 'doubt']

NEG_MOD = ['really', 'quite', 'very', 'exactly', 'entirely', 'all', 'completely']

AFFIRM_ADV = ['certainly', 'certain', 'absolutely', 'really', 'quite', 'completely', 'exactly',
                    'entirely', 'sure', 'definitely', 'correct', 'accurate', 'true']

MODAL = ['can', 'will', 'shall', 'could', 'would', 'should', 'might', 'may', 'ought']

SENTENCE_CONJUNCTIONS = ['but' ,'and', 'however', 'because', 'since', 'although', 'as', 'if']


[docs]
def comma(x):
  """Match a comma."""
  return x == ','



[docs]
def zero(x):
  """Match zero."""
  return x == 0 or x == '0'



[docs]
def non_neg(x):
  """Match any word except for negative polarity items."""
  return x not in NEG



[docs]
def non_neg_mod(x):
  """Matches any word except for negative polarity items or negative modifiers."""
  return x not in NEG and x not in NEG_MOD



[docs]
def affirm_adv(x):
  """Matches any affirmatory modifier."""
  return x in AFFIRM_ADV



[docs]
def modal(x):
  """Matches any modal word."""
  return x in MODAL



[docs]
def lex_ulf(cat, word):
  """ULF lexicalizer predicate."""
  return lex.to_ulf(cat, word)



[docs]
def quote_to_list(s):
  """Predicate to create a word list from a quoted expression."""
  if not isinstance(s, str):
    return s
  return s.strip('"').split()



[docs]
def split_sentences(wordlist):
  """Predicate to split a word list into multiple word lists for each sentence."""
  if not all([isinstance(w, str) for w in wordlist]):
    return wordlist
  
  result = []
  cur = []
  for word in wordlist:
    if word in ['.', '?', '!']:
      cur.append(word)
      result.append(cur)
      cur = []
    else:
      cur.append(word)
  if cur:
    result.append(cur)
  cur = []
  # Loop through each word with lookahead to split sentences conjoined by a comma+connective
  result1 = result.copy()
  for words in result:
    for word, lookahead in zip(words, words[1:]+[None]):
      if word in [','] and lookahead in SENTENCE_CONJUNCTIONS:
        cur.append('.')
        result1.append(cur)
        cur = []
      else:
        cur.append(word)
    if cur:
      result1.append(cur)
    cur = []
  return gute.remove_duplicates(result1, order=True)



[docs]
def prefix_each(prefix, lst):
  """Cons `prefix` to each list in `lst`."""
  return [gute.cons(prefix, l) for l in lst]