################################################################################
# translator.py
#
# An abstract class and subclasses for mapping strings such as file paths to
# other information. It is implemented in several ways including Python
# dictionaries and regular expression/substitution pairs.
################################################################################
import re
try:
from ._version import __version__
except ImportError: # pragma: no cover
__version__ = 'Version unspecified' # pragma: no cover
[docs]
class Translator(object):
"""Abstract class to define translators from a set of strings (such as file
paths) to associated information.
All subclasses implement the following methods:
- Translator.all(strings, strings_first=False)
- Translator.first(strings, strings_first=False)
Input is a list of strings. The Translator object tests each string and
determines if it passes a test. If it does pass the test, then one or more
"translated" strings are returned.
Translator.all() returns every translated string. Translator.first() returns
the first translated string.
Parameters:
strings: A string or list of strings to translate.
strings_first: If True, every test is applied to the first string, then
every test is applied to the second string, etc. If False, the first
test is applied to every string, then the second test is applied to
every string, etc. This can affect the order of the translated
strings returned by all(), or the single translated string that is
returned by first().
"""
[docs]
def __add__(self, other):
"""Add two translators together using the + operator.
Parameters:
other: A Translator object to append.
Returns:
A new Translator combining self and other.
"""
return self.append(other)
[docs]
def __iadd__(self, other):
"""Add two translators together using the += operator.
Parameters:
other: A Translator object to append.
Returns:
A new Translator combining self and other.
"""
return self.append(other)
################################################################################
################################################################################
[docs]
class TranslatorBySequence(Translator):
"""Translator defined by a sequence of other translators."""
TAG = 'SEQUENCE'
[docs]
def __init__(self, args):
"""Initialize a TranslatorBySequence.
Parameters:
args: A sequence of Translator objects to apply in order.
"""
for arg in args:
assert isinstance(arg, Translator)
self.sequence = args
[docs]
def all(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning every unique
result in priority order.
Parameters:
strings: A string or list of strings to translate.
strings_first: If True, try each string in order with all translators.
If False, try each translator in order with all strings.
Returns:
A list of all unique translated results in priority order.
"""
# Convert an individual string to a list
if isinstance(strings, str):
strings = [strings]
# Initialize the set of results
results = []
# Two options for priority...
if strings_first: # Try each string in order
for string in strings:
for translator in self.sequence:
partial_results = translator.all(string)
if partial_results:
for item in partial_results:
if item not in results:
results.append(item)
else: # Try each translator in order
for translator in self.sequence:
partial_results = translator.all(strings)
if partial_results:
for item in partial_results:
if item not in results:
results.append(item)
return results
[docs]
def first(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning the first
result.
Parameters:
strings: A string or list of strings to translate.
strings_first: If True, try each string in order with all translators.
If False, try each translator in order with all strings.
Returns:
The first translated result, or None if no translation is found.
"""
# Convert an individual string to a list
if isinstance(strings, str):
strings = [strings]
# Two options for priority...
if strings_first: # Try each string in order
for string in strings:
for translator in self.sequence:
result = translator.first(string)
if result is not None:
return result
else: # Try each string in order
for translator in self.sequence:
result = translator.first(strings)
if result is not None:
return result
return None
[docs]
def keys(self):
"""Return all of the keys.
Returns:
A list of keys from all translators in the sequence.
"""
key_list = []
for translator in self.sequence:
key_list.append(translator.keys())
return key_list
[docs]
def values(self):
"""Return all of the values in the same order as keys().
Returns:
A list of values from all translators in the sequence.
"""
value_list = []
for translator in self.sequence:
value_list.append(translator.values())
return value_list
[docs]
def prepend(self, translator):
"""Return a new translator with the given translator in front of this
one."""
if translator.TAG == 'NULL':
return translator
# If arg is also a sequence, merge
if translator.TAG == self.TAG:
return TranslatorBySequence(translator.sequence + self.sequence)
# If arg matches class of first in sequence, merge
if translator.TAG == self.sequence[0].TAG:
new_translator = self.sequence[0].prepend(translator)
if new_translator.TAG == translator.TAG:
return TranslatorBySequence(
[new_translator] + self.sequence[1:]
)
return TranslatorBySequence([translator, self])
[docs]
def append(self, translator):
"""Return a new translator with the given translator after this one.
"""
if translator.TAG == 'NULL':
return translator
# If arg is also a sequence, merge
if translator.TAG == 'SEQUENCE':
return TranslatorBySequence(self.sequence + translator.sequence)
# If arg matches class of last in sequence, merge
if translator.TAG == self.sequence[-1].TAG:
new_translator = self.sequence[-1].append(translator)
if new_translator.TAG == translator.TAG:
return TranslatorBySequence(
self.sequence[:-1] + [new_translator]
)
return TranslatorBySequence([self, translator])
################################################################################
################################################################################
[docs]
class TranslatorByDict(Translator):
"""Translator defined by a standard dictionary. Fast but inflexible.
If the value is string containing "\1", that substring is replaced by the
key.
Parameters:
arg: A dictionary mapping keys to values. Values can be strings (with
optional "\1" replacement), lists, or tuples.
path_translator: Optional Translator object that translates input strings
into the keys used in the dictionary.
"""
TAG = 'DICT'
[docs]
def __init__(self, arg, path_translator=None):
"""Initialize a TranslatorByDict.
Parameters:
arg: A dictionary mapping keys to values.
path_translator: Optional Translator to translate input strings to keys.
"""
assert isinstance(arg, dict)
self.dict = arg
self.path_translator = path_translator
[docs]
def all(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning every unique
result in priority order.
Parameters:
strings: A string or list of strings to translate.
strings_first: Ignored for this subclass.
Returns:
A list of all unique translated results in priority order.
"""
# Convert an individual string to a list
if isinstance(strings, str):
strings = [strings]
# Convert the strings to dictionary keys
if self.path_translator is None:
keys = strings
else:
keys = self.path_translator.all(strings)
# Initialize the set of results
results = []
# Test keys in order
for key in keys:
if key in self.dict:
result = self.dict[key]
expanded = TranslatorByDict.expand(result, key)
for result in expanded:
if result not in results:
results.append(result)
return results
[docs]
def first(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning the first
result.
Parameters:
strings: A string or list of strings to translate.
strings_first: Ignored for this subclass.
Returns:
The first translated result, or None if no translation is found.
"""
# Convert an individual string to a list
if isinstance(strings, str):
strings = [strings]
# Convert the strings to dictionary keys, preserving order
if self.path_translator is None:
keys = strings
else:
keys = self.path_translator.all(strings)
# Test keys in order
for key in keys:
if key in self.dict:
result = self.dict[key]
expanded = TranslatorByDict.expand(result, key)
return expanded[0]
return None
[docs]
@staticmethod
def expand(results, key):
"""Expand result values by replacing "\1" with the key.
Parameters:
results: A value or list of values from the dictionary.
key: The dictionary key to use for "\1" replacement.
Returns:
A list of expanded results.
"""
if not isinstance(results, list):
results = [results]
expanded = []
for result in results:
if isinstance(result, str):
result = result.replace(r'\1', key)
elif isinstance(result, tuple):
items = []
for item in result:
if isinstance(item, str):
item = item.replace(r'\1', key)
items.append(item)
result = tuple(items)
expanded.append(result)
return expanded
[docs]
def keys(self):
"""Return all of the keys (in a vaguely sensible order).
Returns:
A sorted list of dictionary keys.
"""
keylist = list(self.dict.keys())
keylist.sort()
return keylist
[docs]
def values(self):
"""Return all of the values in the same order as keys()."""
keylist = self.keys()
return [self.dict[k] for k in keylist]
[docs]
def prepend(self, translator):
"""Return a new translator with the given translator in front of this
one."""
if translator.TAG == 'NULL':
return translator
if translator.TAG == 'SEQUENCE':
return translator.append(self)
return TranslatorBySequence([translator, self])
[docs]
def append(self, translator):
"""Add a new translator after this one."""
if translator.TAG == 'NULL':
return translator
if translator.TAG == 'SEQUENCE':
return translator.prepend(self)
return TranslatorBySequence([self, translator])
################################################################################
################################################################################
[docs]
class TranslatorByRegex(Translator):
"""Translator defined by a list of tuples defining regular expressions.
Each element in the list must be a tuple:
(regular expression string, flags, value)
or a tuple:
(compiled regex, value)
Upon evaluation, if the regular expression matches a given string, using the
given set of regular expression flags, then the replacement patterns are
applied to a value and the modified value is returned.
Parameters:
tuples: A list of tuples. Each tuple can be:
- (regex_string, flags, replacement_value) for 3-tuple
- (regex_string, replacement_value) for 2-tuple
- (compiled_regex, replacement_value) for 2-tuple with compiled regex
The replacement value can be a string, a list of strings, or a tuple of
strings. Strings can contain "#UPPER#", "#LOWER#", "#MIXED#" directives for
case control, and dictionary expressions like "{'a': 'b'}['a']" for inline
evaluation.
"""
TAG = 'REGEX'
[docs]
def __init__(self, tuples):
"""Initialize a TranslatorByRegex.
Parameters:
tuples: A list of tuples defining regex patterns and replacements.
"""
# Compile regular expressions (if not already compiled)
compiled_tuples = []
for items in tuples:
if len(items) == 2:
if isinstance(items[0], str):
items = (re.compile('^' + items[0] + '$'), items[1])
compiled_tuples.append(items)
else:
regex = re.compile('^' + items[0] + '$', flags=items[1])
compiled_tuples.append((regex, items[2]))
self.tuples = compiled_tuples
[docs]
def all(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning every unique
value in priority order.
Parameters:
strings: A string or list of strings to translate.
strings_first: If True, try each string in order with all regex patterns.
If False, try each regex pattern in order with all strings.
Returns:
A list of all unique translated results in priority order.
"""
# Convert an individual string to a list
if isinstance(strings, str):
strings = [strings]
# Initialize the list of results
results = []
# Two options for priority...
if strings_first: # Try each string in order
for string in strings:
for (regex, replacement) in self.tuples:
expanded = TranslatorByRegex.expand(
regex, string, replacement
)
for item in expanded:
if item not in results:
results.append(item)
else: # Try each regex in order
for (regex, replacement) in self.tuples:
for string in strings:
expanded = TranslatorByRegex.expand(
regex, string, replacement
)
for item in expanded:
if item not in results:
results.append(item)
return results
[docs]
def first(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning the first
result. Return None if no translation is found."""
# Convert an individual string to a list
if isinstance(strings, str):
strings = [strings]
# Two options for priority...
if strings_first: # Try each string in order
for string in strings:
for (regex, replacement) in self.tuples:
expanded = TranslatorByRegex.expand(
regex, string, replacement
)
if expanded:
return expanded[0]
else: # Try each regex in order
for (regex, replacement) in self.tuples:
for string in strings:
expanded = TranslatorByRegex.expand(
regex, string, replacement
)
if expanded:
return expanded[0]
return None
[docs]
@staticmethod
def expand(regex, string, replacements):
"""Handle substitutions in the cases where the replacement is a list, a
string, or a tuple containing strings.
Parameters:
regex: A compiled regular expression pattern.
string: The string to match against the regex.
replacements: A string, list of strings, tuple of strings, or other
value to use as replacement.
Returns:
A list of expanded replacement results, or empty list if no match.
"""
def _fix_case(string):
# Change text following "#UPPER#" to upper case
# Change text following "#LOWER#" to lower case
# Stop changing case of text following "#MIXED#"
parts = string.split('#')
newparts = []
change = 'MIXED'
literal_hash = False
for part in parts:
if part in ('LOWER', 'UPPER', 'MIXED'):
change = part
literal_hash = False
else:
if change == 'UPPER':
part = part.upper()
elif change == 'LOWER':
part = part.lower()
if literal_hash:
newparts.append('#')
newparts.append(part)
literal_hash = True
return ''.join(newparts)
def _evaluate_dict(string):
# Evaluate an in-line dictionary expression
dicts = re.findall(r'{.*?}\[.*?\]', string)
for d in dicts:
value = eval(d)
string = string.replace(d, value)
return string
matchobj = regex.match(string)
if matchobj is None:
return []
if not isinstance(replacements, list):
replacements = [replacements]
results = []
for replacement in replacements:
# If replacement is a string, apply substitution
if isinstance(replacement, str):
result = matchobj.expand(replacement)
result = _fix_case(result)
result = _evaluate_dict(result)
results.append(result)
# Deal with a tuple
elif isinstance(replacement, tuple):
items = []
for item in replacement:
if isinstance(item, str):
result = matchobj.expand(item)
result = _fix_case(result)
result = _evaluate_dict(result)
items.append(matchobj.expand(result))
else:
items.append(item)
results.append(tuple(items))
# Anything else is unchanged
else:
results.append(replacement)
return results
[docs]
def keys(self):
"""Return all of the keys."""
return [t[0] for t in self.tuples]
[docs]
def values(self):
"""Return all of the values in the same order as keys()."""
return [t[1] for t in self.tuples]
[docs]
def prepend(self, translator):
"""Return a new translator with the given translator in front of this
one."""
if translator.TAG == 'NULL':
return translator
if translator.TAG == self.TAG:
return TranslatorByRegex(translator.tuples + self.tuples)
if translator.TAG == 'SEQUENCE':
return translator.append(self)
return TranslatorBySequence([translator, self])
[docs]
def append(self, translator):
"""Add a new translator after this one."""
if translator.TAG == 'NULL':
return translator
if translator.TAG == self.TAG:
return TranslatorByRegex(self.tuples + translator.tuples)
if translator.TAG == 'SEQUENCE':
return translator.prepend(self)
return TranslatorBySequence([self, translator])
################################################################################
################################################################################
[docs]
class NullTranslator(Translator):
"""Translator that returns nothing."""
TAG = 'NULL'
[docs]
def __init__(self):
pass
[docs]
def all(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning every unique
result."""
return []
[docs]
def first(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning the first
result. Return None if no translation is found."""
return None
[docs]
def keys(self):
"""Return all of the keys."""
return []
[docs]
def values(self):
"""Return all of the values in the same order as keys()."""
return []
[docs]
def prepend(self, translator):
"""Return a new translator with the given translator in front of this
one."""
return translator
[docs]
def append(self, translator):
"""Return a new translator with the given translator after this one.
"""
return translator
################################################################################
################################################################################
[docs]
class SelfTranslator(Translator):
"""Translator that returns itself."""
TAG = 'SELF'
[docs]
def __init__(self):
pass
[docs]
def all(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning every unique
result."""
return strings
[docs]
def first(self, strings, strings_first=False):
"""Apply a translator to one or more strings, returning the first
result. Return None if no translation is found."""
return strings[0]
[docs]
def keys(self):
"""Return all of the keys."""
return []
[docs]
def values(self):
"""Return all of the values in the same order as keys()."""
return []
[docs]
def prepend(self, translator):
"""Return a new translator with the given translator in front of this
one."""
if translator.TAG == self.TAG:
return self
if translator.TAG == 'NULL':
return self
if translator.TAG == 'SEQUENCE':
return translator.append(self)
return TranslatorBySequence([translator, self])
[docs]
def append(self, translator):
"""Return a new translator with the given translator after this one.
"""
if translator.TAG == self.TAG:
return self
if translator.TAG == 'NULL':
return self
if translator.TAG == 'SEQUENCE':
return translator.prepend(self)
return TranslatorBySequence([self, translator])
################################################################################
################################################################################