Source code for eta.discourse

"""Tools for storing and processing discourse in Eta dialogues."""

import eta.util.file as file
from eta.constants import EMOTIONS_LIST
from eta.util.general import cons, replaceall

CONTRACTIONS = file.load_json('resources/lexical/contractions.json', in_module=True)
NEGPAIRS = file.load_json('resources/lexical/negpairs.json', in_module=True)
DUALS = file.load_json('resources/lexical/duals.json', in_module=True)



[docs]
class Utterance:
  """Represents a single utterance, which contains both the words of the utterance and an affect.
  
  Parameters
  ----------
	agent : str
    The agent making the utterance.
  words : str
    The content of the utterance.
  affect : str, default='neutral'
    The affect of the utterance. Must be contained within the list of supported emotions.

  Attributes
  ----------
  agent : str
  words : str
  affect : str
  """

  def __init__(self, agent, words, affect=EMOTIONS_LIST[0]):
    self.agent = agent
    self.words = words
    if affect in EMOTIONS_LIST:
      self.affect = affect
    else:
      self.affect = EMOTIONS_LIST[0]




[docs]
class DialogueTurn:
  """Represents a dialogue turn by an agent, which contains the utterance as well as any associated dialogue information.
  
  Parameters
  ----------
  utterance : Utterance
    The utterance of this turn.
  gists : list[str], optional
    A list of gist clauses capturing the meaning of this turn.
  semantics : list[s-expr], optional
    A list of semantic interpretations of this turn.
  pragmatics : list[s-expr], optional
    A list of pragmatic inferences drawn from this turn.
  obligations : list[s-expr], optional
    A list of obligations created by this turn.
  ep : str, optional
    The episode that this turn corresponds to.

  Attributes
  ----------
  agent : str
    The agent of this turn (copied from the utterance for convenience).
  utterance : Utterance
  gists : list[str]
  semantics : list[s-expr]
  pragmatics : list[s-expr]
  obligations : list[s-expr]
  ep : str or None
  """
  
  def __init__(self, utterance, gists=[], semantics=[], pragmatics=[], obligations=[], ep=None):
    self.agent = utterance.agent
    self.utterance = utterance
    self.gists = gists
    self.semantics = semantics
    self.pragmatics = pragmatics
    self.obligations = obligations
    self.ep = ep




[docs]
def get_prior_turn(turns, agent=None):
  """Retrieve the immediately prior turn by the specified agent(s).
  
  Parameters
  ----------
  turns : list[DialogueTurn]
    A list of dialogue turns in the conversation, in chronological order.
  agent : str, optional
    The agent whose prior turn should be found (by default, find the prior turn by any agent).

  Returns
  -------
  DialogueTurn or None
    The prior turn by the given agent (if one exists).
  """
  if agent:
    agent_turns = [t for t in turns if t.agent == agent]
    return agent_turns[-1] if agent_turns else None
  else:
    return turns[-1] if turns else None

	


[docs]
def get_prior_words(turns, agent=None):
	"""Retrieve the prior utterance in the conversation, using a generic utterance if none is found.
  
  Parameters
  ----------
  turns : list[DialogueTurn]
    A list of dialogue turns in the conversation, in chronological order.
  agent : str, optional
    The agent whose prior words should be found (by default, find the prior turn by any agent).

  Returns
  -------
  str
    The prior words by the given agent, if one exists, or a generic utterance.
	"""
	prior_utt = 'Hello.'
	prior_turn = get_prior_turn(turns, agent)
	if prior_turn:
		prior_utt = prior_turn.utterance.words
	return prior_utt

  


[docs]
def parse_utt_str(str):
  """Parse an utterance string into a word string and affect.
  
  Parameters
  ----------
  str : str
    A string representing an utterance, potentially prefixed by an emotion tag, e.g., ``[happy]`` or ``[sad]``.
  
  Returns
  -------
  affect : str
    The affect/emotion of the utterance.
  words : str
    The word string of the utterance.
  """
  affect = None
  words = str
  for e in EMOTIONS_LIST:
    tag1 = f'[{e}]'
    tag2 = f'[{e.upper()}]'
    if tag1 in str or tag2 in str:
      words = words.replace(tag1, '').replace(tag2, '').strip()
      affect = e
  return affect, words




[docs]
def decompress(str):
	"""Expand contractions into full phrases (e.g. 'don't' or 'dont' by 'do not')."""
	def decompress_rec(words):
		if not words:
			return []
		elif words[0] in CONTRACTIONS:
			return cons(CONTRACTIONS[words[0]], decompress_rec(words[1:]))
		else:
			return cons(words[0], decompress_rec(words[1:]))
	return ' '.join(decompress_rec(str.split()))




[docs]
def compress(str):
	"""Replace auxiliary-NOT combinations by -N'T contractions."""
	def compress_rec(words):
		if not words:
			return []
		elif not words[1:]:
			return words
		elif words[1] == 'not' and words[0] in NEGPAIRS:
			return cons(NEGPAIRS[words[0]], compress_rec(words[2:]))
		else:
			return cons(words[0], compress_rec(words[1:]))
	return ' '.join(compress_rec(str.split()))




[docs]
def presubst(str):
  """Prepare a string for calling the swap_duals function to avoid ungrammatical substitutions.

  For example, in swapping dual pronouns, we want to avoid outputs such as "why do you say i are stupid",
  while still correctly producing "why do you say your brothers are stupid".

	This function replaces "are" by "are2" when preceded or followed by "you"; similarly, it replaces
  "were" by "were2" and "was" by "was2".

	It also replaces "you" by "you2" when it is the last word, or when it is not one of the first two
  words and is not preceded by certain conjunctions ("and", "or", "but", "that", "because", "if",
	"when", "then", "why", ...), or certain subordinating verbs ("think", "believe", "know", ...), or
  when it follows "to".

	This is in preparation for replacement of "you2" by "me" (rather than "i") when swap_duals is applied.
	"""
  re_punct = ['?','!',',','.',':',';']
  re_blocker = ['and', 'or', 'but', 'that', 'because', 'if', 'so', 'when', 'then', 'why',
			  				'think', 'see', 'guess', 'believe', 'hope', 'do', 'can', 'would', 'should',
								'than', 'know', 'i', 'you', '-', '--']
  str = ' '+str+' '
  str = replaceall(str, [
		(" you are ", " you1 are2 ", False),
		(" are you ", " are2 you1 ", False),
		(" i was ", " i was2 ", False),
		(" was i ", " was2 i ", False),
		(" you were ", " you1 were2 ", False),
		(" were you ", " were2 you1 ", False),
		(fr" you ([{'|'.join(re_punct)}]) ", r" you2 \1 ", True),
		(" to you ", " to you2 ", False),
	])
  str = str.replace(' you ', ' you0 ')
  str = replaceall(str, [
		(r"^ you0 ", r" you ", True),
		(r"^ ([\S]+) you0 ", r" \1 you ", True),
		(fr"([{'|'.join(re_punct)}]) you0 ", r"\1 you ", True),
		(fr"([{'|'.join(re_punct)}]) ([\S]+) you0 ", r"\1 \2 you ", True),
		(fr"({'|'.join(re_blocker)}) you0 ", r"\1 you ", True)
  ])
  return str.replace(' you0 ', ' you2 ').strip()




[docs]
def swap_duals(str):
	"""Swap first-person pronouns (I, me, ...) with second-person pronouns (you, ...), and vice-versa."""
	def swap_duals_rec(words):
		if not words:
			return []
		elif words[0] in DUALS:
			return cons(DUALS[words[0]], swap_duals_rec(words[1:]))
		else:
			return cons(words[0], swap_duals_rec(words[1:]))
	str = presubst(str)
	return ' '.join(swap_duals_rec(str.split()))