"""Utilities for handling S-expression lists.
Contains functions for parsing and manipulating S-expressions in Python, which are
represented as recursively nested lists, with strings as "symbols".
Some of this code is borrowed from the following repository:
https://github.com/bitbanger/schemas/blob/master/pyschemas/sexpr.py
"""
from eta.util.general import flatten, replaceall, symbolp, atom, escaped_symbol_p, isquote, standardize
import eta.util.file as file
[docs]
def balanced_substr(s):
"""Find a substring with a balanced number of parentheses."""
count = 1
for i in range(1, len(s)):
c = s[i]
if c == '(':
count += 1
if c == ')':
count -= 1
if count == 0:
return s[:i+1]
return None
[docs]
def clean_s_expr(s_expr):
"""Clean a string representation of an S-expression by removing newlines and standardizing whitespace."""
s_expr = s_expr.replace('\n', '')
s_expr = s_expr.replace('\t', '')
s_expr = replaceall(s_expr,
[(r"\| ([a-zA-Z0-9-_.']+)\|", r'|\1|', True),
(r"\|([a-zA-Z0-9-_.']+) \|", r'|\1|', True),
# For handling proper names with multiple words (up to 4)
(r"\|([a-zA-Z0-9-_.']+)[ ]?([a-zA-Z0-9-_.']+)?[ ]?([a-zA-Z0-9-_.']+)?[ ]?([a-zA-Z0-9-_.']+)?\|",
r'|\1_\2_\3_\4|', True)])
while '__' in s_expr:
s_expr = s_expr.replace('__', '_')
while ' ' in s_expr:
s_expr = s_expr.replace(' ', ' ')
return s_expr
[docs]
def standardize_symbols(s_expr):
"""Standardize the symbols within an S-expression by mapping to lowercase, unless enclosed in escape symbols."""
def standardize_rec(e):
if symbolp(e) and not escaped_symbol_p(e) and not isquote(e):
return e.lower()
elif symbolp(e) and escaped_symbol_p(e):
parts = e.split('|')
before = parts[0].lower()
escaped = parts[1]
after = parts[2].lower()
if '.' in escaped:
word, suffix = escaped.split('.')
if suffix and suffix[0] != '_':
escaped = word
suffix = suffix.strip('_')
after = '.'+suffix.lower()+after
escaped = escaped.strip('_').replace('_', ' ')
return before+escaped+after
elif symbolp(e) and isquote(e):
return e
else:
return [standardize_rec(x) for x in e]
return standardize_rec(s_expr)
[docs]
def convert_quotes(s_expr):
"""Convert any quoted word lists (i.e., single ' symbol followed by a list of symbols) to a single quoted string."""
def convert_quotes_rec(e):
if atom(e):
return e
elif len(e) == 1:
return [convert_quotes_rec(e[0])]
else:
e1 = []
for x1, x2 in zip([None]+e[:-1], e):
if x1 == "'" and x2 and isinstance(x2, list) and all([isinstance(x, str) for x in x2]):
e1.append('"'+standardize(' '.join(x2))+'"')
elif x2 != "'":
e1.append(x2)
return [convert_quotes_rec(x) for x in e1]
return convert_quotes_rec(s_expr)
[docs]
def compress_quotes(s_expr):
"""Compress quoted expressions that were split between multiple words during parsing."""
def compress_quotes_rec(e):
if atom(e):
return e
elif len(e) == 1:
return [compress_quotes_rec(e[0])]
else:
e1 = []
acc = []
i = 0
while i < len(e):
if isinstance(e[i], str) and e[i][0] == '"':
j = i
while j < len(e):
acc.append(e[j])
if not isinstance(e[j], str):
e1 += acc
acc = []
j += 1
break
elif e[j][-1] == '"':
e1.append(' '.join(acc))
acc = []
j += 1
break
else:
j += 1
i = j
else:
e1.append(e[i])
i += 1
return [compress_quotes_rec(x) for x in e1]
return compress_quotes_rec(s_expr)
[docs]
def parse_s_expr(s_expr):
"""Parse a string containing an S-expression (in LISP form) into a structured list.
Parameters
----------
s_expr : str
An S-expression in LISP form, e.g., ``(a (b c (d e)) '(f g h))``.
Returns
-------
s-expr
A structured S-expression, i.e., a recursively nested list structure with string "symbols" as atoms.
e.g., ``['a', ['b', 'c', ['d', 'e']], "f g h"]``
"""
def parse_s_expr_rec(s_expr):
s_expr = clean_s_expr(s_expr)
if len(s_expr) == 0:
return None
if s_expr[0] != '(' or s_expr[-1] != ')':
return s_expr
items = []
item_buf = []
i = 1
while i < len(s_expr):
c = s_expr[i]
if c == ' ':
if len(item_buf) > 0:
items.append(''.join(item_buf))
item_buf = []
i += 1
elif c != '(':
if c != ')':
item_buf.append(c)
i += 1
else:
if len(item_buf) > 0:
items.append(''.join(item_buf))
item_buf = []
inner = balanced_substr(s_expr[i:])
items.append(parse_s_expr_rec(inner))
i += len(inner)
if len(item_buf) > 0:
items.append(''.join(item_buf))
item_buf = []
return items
return convert_quotes(standardize_symbols(compress_quotes(parse_s_expr_rec(s_expr))))
[docs]
def list_to_s_expr(lst):
"""Convert an S-expression list structure to a string representing a LISP formatted S-expression.
Parameters
----------
lst : s-expr
An S-expression in recursively nested list form, e.g., ``['a', ['b', ['c', 'd']], 'e']``.
Returns
-------
str
A LISP formatted string representation of the S-expression, e.g., ``(a (b (c d)) e)``.
"""
if type(lst) != list:
return str(lst)
buf = []
buf.append('(')
for i in range(len(lst)):
if i > 0:
buf.append(' ')
buf.append(list_to_s_expr(lst[i]))
buf.append(')')
return ''.join(buf)
[docs]
def list_to_str(lst):
"""Convert an S-expression list structure to a flattened string containing each of the symbols.
Parameters
----------
lst : s-expr
An S-expression in recursively nested list form, e.g., ``['a', ['b', ['c', 'd']], 'e']``.
Returns
-------
str
A flattened string containing each of the symbols, e.g., ``"a b c d e"``.
"""
if type(lst) != list:
return str(lst)
words = [str(w) for w in flatten(lst)]
return ' '.join(words)
[docs]
def clean_lisp(str):
"""Clean S-expressions from a LISP file by removing all commented lines and removing escape characters on symbols."""
lines = [l.replace('\;', '[TEMP]') for l in str.split('\n')]
lines = [l.split(';')[0].strip() for l in lines]
lines = [l.replace('[TEMP]', '\;') for l in lines if l]
lines = [' '+l if l and l[0] not in ['(', ')', "'"] else l for l in lines]
return replaceall('\n'.join(lines), [
('\.', '.', False),
('\,', ',', False),
('\:', ':', False),
('\;', ';', False),
("\\'", "'", False)
])
[docs]
def read_lisp(fname):
"""Read a list of S-expressions from a LISP file.
Parameters
----------
fname : str
The LISP file to read from.
Returns
-------
list[s-expr]
A list of S-expressions, in recursively nested list form.
"""
contents = '(' + clean_lisp(file.read_file(fname)) + ')'
sexpr = parse_s_expr(contents)
return sexpr
[docs]
def write_lisp(fname, sexpr):
"""Write an S-expression to a LISP file.
Parameters
----------
fname : str
The LISP file to write to.
sexpr : s-expr
An S-expression, in recursively nested list form, to write to the file in LISP format.
"""
file.write_file(fname, list_to_s_expr(sexpr))