How to resolve the algorithm Sort an outline at every level step by step in the Python programming language
How to resolve the algorithm Sort an outline at every level step by step in the Python programming language
Table of Contents
Problem Statement
Write and test a function over an indented plain text outline which either:
Your code should detect and warn of at least two types of inconsistent indentation:
Your code should be able to detect and handle both tab-indented, and space-indented (e.g. 4 space, 2 space etc) outlines, without being given any advance warning of the indent characters used, or the size of the indent units. You should also be able to specify different types of sort, for example, as a minimum, both ascending and descending lexical sorts. Your sort should not alter the type or size of the indentation units used in the input outline.
(For an application of Indent Respectful Sort, see the Sublime Text package of that name. The Python source text [1] is available for inspection on Github).
Tests
The output sequence of an ascending lexical sort of each level should be: The output sequence of a descending lexical sort of each level should be:
Let's start with the solution:
Step by Step solution about How to resolve the algorithm Sort an outline at every level step by step in the Python programming language
Python code to sort an outline at every level
from itertools import chain, product, takewhile, tee
from functools import cmp_to_key, reduce
# Sorted outline at every level
def sortedOutline(cmp):
def go(outlineText):
indentTuples = indentTextPairs(
outlineText.splitlines()
)
return bindLR(
minimumIndent(enumerate(indentTuples))
)(lambda unitIndent: Right(
outlineFromForest(
unitIndent,
nest(foldTree(
lambda x: lambda xs: Node(x)(
sorted(xs, key=cmp_to_key(cmp))
)
)(Node('')(
forestFromIndentLevels(
indentLevelsFromLines(
unitIndent
)(indentTuples)
)
)))
)
))
return go
# Testing the function with different comparators and outlines
def main():
ascending = comparing(root)
descending = flip(ascending)
spacedOutline = '''
zeta
beta
gamma
lambda
kappa
mu
delta
alpha
theta
iota
epsilon'''
tabbedOutline = '''
zeta
beta
gamma
lambda
kappa
mu
delta
alpha
theta
iota
epsilon'''
confusedOutline = '''
alpha
epsilon
iota
theta
zeta
beta
delta
gamma
kappa
lambda
mu'''
raggedOutline = '''
zeta
beta
gamma
lambda
kappa
mu
delta
alpha
theta
iota
epsilon'''
def displaySort(kcmp):
'''Sort function output with labeled comparator
for a set of four labeled outlines.
'''
k, cmp = kcmp
return [
tested(cmp, k, label)(
outline
) for (label, outline) in [
('4-space indented', spacedOutline),
('tab indented', tabbedOutline),
('Unknown 1', confusedOutline),
('Unknown 2', raggedOutline)
]
]
def tested(cmp, cmpName, outlineName):
'''Print either message or result.
'''
def go(outline):
print('\n' + outlineName, cmpName + ':')
either(print)(print)(
sortedOutline(cmp)(outline)
)
return go
# Tests applied to two comparators:
ap([
displaySort
])([
("(A -> Z)", ascending),
("(Z -> A)", descending)
])
# Helper functions
# Convert a list of strings into a list of tuples of (indent, text)
def indentTextPairs(xs):
def indentAndText(s):
pfx = list(takewhile(lambda c: c.isspace(), s))
return (pfx, s[len(pfx):])
return [indentAndText(x) for x in xs]
# Convert a list of (indent, text) tuples into a list of trees
def forestFromIndentLevels(tuples):
def go(xs):
if xs:
intIndent, v = xs[0]
firstTreeLines, rest = span(
lambda x: intIndent < x[0]
)(xs[1:])
return [Node(v)(go(firstTreeLines))] + go(rest)
else:
return []
return go(tuples)
# Convert a list of strings into a list of tuples of
# (indent level, string)
def indentLevelsFromLines(indentUnit):
def go(xs):
w = len(indentUnit)
return [
(len(x[0]) // w, x[1])
for x in xs
]
return go
# Find the minimum indent unit
def minimumIndent(indexedPrefixes):
(xs, ts) = tee(indexedPrefixes)
(ys, zs) = tee(ts)
def mindentLR(charSet):
if list(charSet):
def w(x):
return len(x[1][0])
unit = min(filter(w, ys), key=w)[1][0]
unitWidth = len(unit)
def widthCheck(a, ix):
'''Is there a line number at which
an anomalous indent width is seen?
'''
wx = len(ix[1][0])
return a if (a or 0 == wx) else (
ix[0] if 0 != wx % unitWidth else a
)
oddLine = reduce(widthCheck, zs, None)
return Left(
'Inconsistent indentation width at line ' + (
str(1 + oddLine)
)
) if oddLine else Right(''.join(unit))
else:
return Right('')
def tabSpaceCheck(a, ics):
'''Is there a line number at which a
variant indent character is used?
'''
charSet = a[0].union(set(ics[1][0]))
return a if a[1] else (
charSet, ics[0] if 1 < len(charSet) else None
)
indentCharSet, mbAnomalyLine = reduce(
tabSpaceCheck, xs, (set([]), None)
)
return bindLR(
Left(
'Mixed indent characters found in line ' + str(
1 + mbAnomalyLine
)
) if mbAnomalyLine else Right(list(indentCharSet))
)(mindentLR)
# Convert a forest of trees into an indented string
def outlineFromForest(tabString, forest):
def go(indent):
def serial(node):
return [indent + root(node)] + list(
concatMap(
go(tabString + indent)
)(nest(node))
)
return serial
return '\n'.join(
concatMap(go(''))(forest)
)
# Generic functions
# Left constructor for Either type
def Left(x):
return {'type': 'Either', 'Right': None, 'Left': x}
# Right constructor for Either type
def Right(x):
return {'type': 'Either', 'Left': None, 'Right': x}
# Node constructor for Tree type
def Node(v):
return lambda xs: {'type': 'Tree', 'root': v, 'nest': xs}
# Apply a list of functions to a list of values
def ap(fs):
def go(xs):
return [
f(x) for (f, x)
in product(fs, xs)
]
return go
# Monad injection operator
def bindLR(m):
def go(mf):
return (
mf(m.get('Right')) if None is m.get('Left') else m
)
return go
# Create an ordering function based on a property accessor
def comparing(f):
def go(x, y):
fx = f(x)
fy = f(y)
return -1 if fx < fy else (1 if fx > fy else 0)
return go
# Concatenate a list over which a function has been mapped
def concatMap(f):
def go(xs):
return chain.from_iterable(map(f, xs))
return go
# Apply a function to either a Left or a Right value
def either(fl):
return lambda fr: lambda e: fl(e['Left']) if (
None is e['Right']
) else fr(e['Right'])
# Reverse the arguments of a binary function
def flip(f):
return lambda a, b: f(b, a)
# Catamorphism on trees. A summary value defined by a depth-first fold
def foldTree(f):
def go(node):
return f(root(node))([
go(x) for x in nest(node)
])
return go
# Accessor function for children of tree node
def nest(t):
return t.get('nest')
# Accessor function for data of tree node
def root(t):
return t.get('root')
# Find the longest prefix of a list that contains only elements satisfying a predicate
def span(p):
def match(ab):
b = ab[1]
return not b or not p(b[0])
def f(ab):
a, b = ab
return a + [b[0]], b[1:]
def go(xs):
return until(match)(f)(([], xs))
return go
# Repeatedly apply a
<div id="sourcecode"/>
## Source code in the python programming language
{% raw %}
```python
'''Sort an outline at every level'''
from itertools import chain, product, takewhile, tee
from functools import cmp_to_key, reduce
# ------------- OUTLINE SORTED AT EVERY LEVEL --------------
# sortedOutline :: (Tree String -> Tree String -> Ordering)
# -> String
# -> Either String String
def sortedOutline(cmp):
'''Either a message reporting inconsistent
indentation, or an outline sorted at every
level by the supplied comparator function.
'''
def go(outlineText):
indentTuples = indentTextPairs(
outlineText.splitlines()
)
return bindLR(
minimumIndent(enumerate(indentTuples))
)(lambda unitIndent: Right(
outlineFromForest(
unitIndent,
nest(foldTree(
lambda x: lambda xs: Node(x)(
sorted(xs, key=cmp_to_key(cmp))
)
)(Node('')(
forestFromIndentLevels(
indentLevelsFromLines(
unitIndent
)(indentTuples)
)
)))
)
))
return go
# -------------------------- TEST --------------------------
# main :: IO ()
def main():
'''Ascending and descending sorts attempted on
space-indented and tab-indented outlines, both
well-formed and ill-formed.
'''
ascending = comparing(root)
descending = flip(ascending)
spacedOutline = '''
zeta
beta
gamma
lambda
kappa
mu
delta
alpha
theta
iota
epsilon'''
tabbedOutline = '''
zeta
beta
gamma
lambda
kappa
mu
delta
alpha
theta
iota
epsilon'''
confusedOutline = '''
alpha
epsilon
iota
theta
zeta
beta
delta
gamma
kappa
lambda
mu'''
raggedOutline = '''
zeta
beta
gamma
lambda
kappa
mu
delta
alpha
theta
iota
epsilon'''
def displaySort(kcmp):
'''Sort function output with labelled comparator
for a set of four labelled outlines.
'''
k, cmp = kcmp
return [
tested(cmp, k, label)(
outline
) for (label, outline) in [
('4-space indented', spacedOutline),
('tab indented', tabbedOutline),
('Unknown 1', confusedOutline),
('Unknown 2', raggedOutline)
]
]
def tested(cmp, cmpName, outlineName):
'''Print either message or result.
'''
def go(outline):
print('\n' + outlineName, cmpName + ':')
either(print)(print)(
sortedOutline(cmp)(outline)
)
return go
# Tests applied to two comparators:
ap([
displaySort
])([
("(A -> Z)", ascending),
("(Z -> A)", descending)
])
# ------------- OUTLINE PARSING AND RENDERING --------------
# forestFromIndentLevels :: [(Int, a)] -> [Tree a]
def forestFromIndentLevels(tuples):
'''A list of trees derived from a list of values paired
with integers giving their levels of indentation.
'''
def go(xs):
if xs:
intIndent, v = xs[0]
firstTreeLines, rest = span(
lambda x: intIndent < x[0]
)(xs[1:])
return [Node(v)(go(firstTreeLines))] + go(rest)
else:
return []
return go(tuples)
# indentLevelsFromLines :: String -> [(String, String)]
# -> [(Int, String)]
def indentLevelsFromLines(indentUnit):
'''Each input line stripped of leading
white space, and tupled with a preceding integer
giving its level of indentation from 0 upwards.
'''
def go(xs):
w = len(indentUnit)
return [
(len(x[0]) // w, x[1])
for x in xs
]
return go
# indentTextPairs :: [String] -> (String, String)
def indentTextPairs(xs):
'''A list of (indent, bodyText) pairs.'''
def indentAndText(s):
pfx = list(takewhile(lambda c: c.isspace(), s))
return (pfx, s[len(pfx):])
return [indentAndText(x) for x in xs]
# outlineFromForest :: String -> [Tree String] -> String
def outlineFromForest(tabString, forest):
'''An indented outline serialisation of forest,
using tabString as the unit of indentation.
'''
def go(indent):
def serial(node):
return [indent + root(node)] + list(
concatMap(
go(tabString + indent)
)(nest(node))
)
return serial
return '\n'.join(
concatMap(go(''))(forest)
)
# --------------- MINIMUM INDENT, OR ANOMALY ---------------
# minimumIndent :: [(Int, [Char])]
# -> Either String String
def minimumIndent(indexedPrefixes):
'''Either a message, if indentation characters are
mixed, or indentation widths are inconsistent,
or the smallest consistent non-empty indentation.
'''
(xs, ts) = tee(indexedPrefixes)
(ys, zs) = tee(ts)
def mindentLR(charSet):
if list(charSet):
def w(x):
return len(x[1][0])
unit = min(filter(w, ys), key=w)[1][0]
unitWidth = len(unit)
def widthCheck(a, ix):
'''Is there a line number at which
an anomalous indent width is seen?
'''
wx = len(ix[1][0])
return a if (a or 0 == wx) else (
ix[0] if 0 != wx % unitWidth else a
)
oddLine = reduce(widthCheck, zs, None)
return Left(
'Inconsistent indentation width at line ' + (
str(1 + oddLine)
)
) if oddLine else Right(''.join(unit))
else:
return Right('')
def tabSpaceCheck(a, ics):
'''Is there a line number at which a
variant indent character is used?
'''
charSet = a[0].union(set(ics[1][0]))
return a if a[1] else (
charSet, ics[0] if 1 < len(charSet) else None
)
indentCharSet, mbAnomalyLine = reduce(
tabSpaceCheck, xs, (set([]), None)
)
return bindLR(
Left(
'Mixed indent characters found in line ' + str(
1 + mbAnomalyLine
)
) if mbAnomalyLine else Right(list(indentCharSet))
)(mindentLR)
# ------------------------ GENERIC -------------------------
# Left :: a -> Either a b
def Left(x):
'''Constructor for an empty Either (option type) value
with an associated string.
'''
return {'type': 'Either', 'Right': None, 'Left': x}
# Right :: b -> Either a b
def Right(x):
'''Constructor for a populated Either (option type) value'''
return {'type': 'Either', 'Left': None, 'Right': x}
# Node :: a -> [Tree a] -> Tree a
def Node(v):
'''Constructor for a Tree node which connects a
value of some kind to a list of zero or
more child trees.
'''
return lambda xs: {'type': 'Tree', 'root': v, 'nest': xs}
# ap (<*>) :: [(a -> b)] -> [a] -> [b]
def ap(fs):
'''The application of each of a list of functions,
to each of a list of values.
'''
def go(xs):
return [
f(x) for (f, x)
in product(fs, xs)
]
return go
# bindLR (>>=) :: Either a -> (a -> Either b) -> Either b
def bindLR(m):
'''Either monad injection operator.
Two computations sequentially composed,
with any value produced by the first
passed as an argument to the second.
'''
def go(mf):
return (
mf(m.get('Right')) if None is m.get('Left') else m
)
return go
# comparing :: (a -> b) -> (a -> a -> Ordering)
def comparing(f):
'''An ordering function based on
a property accessor f.
'''
def go(x, y):
fx = f(x)
fy = f(y)
return -1 if fx < fy else (1 if fx > fy else 0)
return go
# concatMap :: (a -> [b]) -> [a] -> [b]
def concatMap(f):
'''A concatenated list over which a function has been mapped.
The list monad can be derived by using a function f which
wraps its output in a list,
(using an empty list to represent computational failure).
'''
def go(xs):
return chain.from_iterable(map(f, xs))
return go
# either :: (a -> c) -> (b -> c) -> Either a b -> c
def either(fl):
'''The application of fl to e if e is a Left value,
or the application of fr to e if e is a Right value.
'''
return lambda fr: lambda e: fl(e['Left']) if (
None is e['Right']
) else fr(e['Right'])
# flip :: (a -> b -> c) -> b -> a -> c
def flip(f):
'''The binary function f with its
arguments reversed.
'''
return lambda a, b: f(b, a)
# foldTree :: (a -> [b] -> b) -> Tree a -> b
def foldTree(f):
'''The catamorphism on trees. A summary
value defined by a depth-first fold.
'''
def go(node):
return f(root(node))([
go(x) for x in nest(node)
])
return go
# nest :: Tree a -> [Tree a]
def nest(t):
'''Accessor function for children of tree node.'''
return t.get('nest')
# root :: Tree a -> a
def root(t):
'''Accessor function for data of tree node.'''
return t.get('root')
# span :: (a -> Bool) -> [a] -> ([a], [a])
def span(p):
'''The longest (possibly empty) prefix of xs
that contains only elements satisfying p,
tupled with the remainder of xs.
span p xs is equivalent to
(takeWhile p xs, dropWhile p xs).
'''
def match(ab):
b = ab[1]
return not b or not p(b[0])
def f(ab):
a, b = ab
return a + [b[0]], b[1:]
def go(xs):
return until(match)(f)(([], xs))
return go
# until :: (a -> Bool) -> (a -> a) -> a -> a
def until(p):
'''The result of repeatedly applying f until p holds.
The initial seed value is x.
'''
def go(f):
def g(x):
v = x
while not p(v):
v = f(v)
return v
return g
return go
# MAIN ---
if __name__ == '__main__':
main()
{% endraw %}
You may also check:How to resolve the algorithm Range expansion step by step in the Wren programming language
You may also check:How to resolve the algorithm Assertions step by step in the Lua programming language
You may also check:How to resolve the algorithm Shoelace formula for polygonal area step by step in the REXX programming language
You may also check:How to resolve the algorithm Number names step by step in the PL/I programming language
You may also check:How to resolve the algorithm Loop over multiple arrays simultaneously step by step in the Harbour programming language