blob: 4d618865ac4f3f084481479ce186a508e75017a6 [file] [log] [blame]
Andrew Geisslerd159c7f2021-09-02 21:05:58 -05001# This file is part of Hypothesis, which may be found at
2# https://github.com/HypothesisWorks/hypothesis/
3#
4# Most of this work is copyright (C) 2013-2021 David R. MacIver
5# (david@drmaciver.com), but it contains contributions by others. See
6# CONTRIBUTING.rst for a full list of people who may hold copyright, and
7# consult the git log if you need to determine who owns an individual
8# contribution.
9#
10# This Source Code Form is subject to the terms of the Mozilla Public License,
11# v. 2.0. If a copy of the MPL was not distributed with this file, You can
12# obtain one at https://mozilla.org/MPL/2.0/.
13#
14# END HEADER
15#
16# SPDX-License-Identifier: MPL-2.0
17
18"""This example demonstrates testing a run length encoding scheme. That is, we
19take a sequence and represent it by a shorter sequence where each 'run' of
20consecutive equal elements is represented as a single element plus a count. So
21e.g.
22
23[1, 1, 1, 1, 2, 1] is represented as [[1, 4], [2, 1], [1, 1]]
24
25This demonstrates the useful decode(encode(x)) == x invariant that is often
26a fruitful source of testing with Hypothesis.
27
28It also has an example of testing invariants in response to changes in the
29underlying data.
30"""
31
32from hypothesis import assume, given, strategies as st
33
34
35def run_length_encode(seq):
36 """Encode a sequence as a new run-length encoded sequence."""
37 if not seq:
38 return []
39 # By starting off the count at zero we simplify the iteration logic
40 # slightly.
41 result = [[seq[0], 0]]
42 for s in seq:
43 if (
44 # If you uncomment this line this branch will be skipped and we'll
45 # always append a new run of length 1. Note which tests fail.
46 # False and
47 s
48 == result[-1][0]
49 # Try uncommenting this line and see what problems occur:
50 # and result[-1][-1] < 2
51 ):
52 result[-1][1] += 1
53 else:
54 result.append([s, 1])
55 return result
56
57
58def run_length_decode(seq):
59 """Take a previously encoded sequence and reconstruct the original from
60 it."""
61 result = []
62 for s, i in seq:
63 for _ in range(i):
64 result.append(s)
65 return result
66
67
68# We use lists of a type that should have a relatively high duplication rate,
69# otherwise we'd almost never get any runs.
70Lists = st.lists(st.integers(0, 10))
71
72
73@given(Lists)
74def test_decodes_to_starting_sequence(ls):
75 """If we encode a sequence and then decode the result, we should get the
76 original sequence back.
77
78 Otherwise we've done something very wrong.
79 """
80 assert run_length_decode(run_length_encode(ls)) == ls
81
82
83@given(Lists, st.data())
84def test_duplicating_an_element_does_not_increase_length(ls, data):
85 """The previous test could be passed by simply returning the input sequence
86 so we need something that tests the compression property of our encoding.
87
88 In this test we deliberately introduce or extend a run and assert
89 that this does not increase the length of our encoding, because they
90 should be part of the same run in the final result.
91 """
92 # We use assume to get a valid index into the list. We could also have used
93 # e.g. flatmap, but this is relatively straightforward and will tend to
94 # perform better.
95 assume(ls)
96 i = data.draw(st.integers(0, len(ls) - 1))
97 ls2 = list(ls)
98 # duplicating the value at i right next to it guarantees they are part of
99 # the same run in the resulting compression.
100 ls2.insert(i, ls2[i])
101 assert len(run_length_encode(ls2)) == len(run_length_encode(ls))