| # This file is part of Hypothesis, which may be found at |
| # https://github.com/HypothesisWorks/hypothesis/ |
| # |
| # Most of this work is copyright (C) 2013-2021 David R. MacIver |
| # (david@drmaciver.com), but it contains contributions by others. See |
| # CONTRIBUTING.rst for a full list of people who may hold copyright, and |
| # consult the git log if you need to determine who owns an individual |
| # contribution. |
| # |
| # This Source Code Form is subject to the terms of the Mozilla Public License, |
| # v. 2.0. If a copy of the MPL was not distributed with this file, You can |
| # obtain one at https://mozilla.org/MPL/2.0/. |
| # |
| # END HEADER |
| # |
| # SPDX-License-Identifier: MPL-2.0 |
| |
| """This example demonstrates testing a run length encoding scheme. That is, we |
| take a sequence and represent it by a shorter sequence where each 'run' of |
| consecutive equal elements is represented as a single element plus a count. So |
| e.g. |
| |
| [1, 1, 1, 1, 2, 1] is represented as [[1, 4], [2, 1], [1, 1]] |
| |
| This demonstrates the useful decode(encode(x)) == x invariant that is often |
| a fruitful source of testing with Hypothesis. |
| |
| It also has an example of testing invariants in response to changes in the |
| underlying data. |
| """ |
| |
| from hypothesis import assume, given, strategies as st |
| |
| |
| def run_length_encode(seq): |
| """Encode a sequence as a new run-length encoded sequence.""" |
| if not seq: |
| return [] |
| # By starting off the count at zero we simplify the iteration logic |
| # slightly. |
| result = [[seq[0], 0]] |
| for s in seq: |
| if ( |
| # If you uncomment this line this branch will be skipped and we'll |
| # always append a new run of length 1. Note which tests fail. |
| # False and |
| s |
| == result[-1][0] |
| # Try uncommenting this line and see what problems occur: |
| # and result[-1][-1] < 2 |
| ): |
| result[-1][1] += 1 |
| else: |
| result.append([s, 1]) |
| return result |
| |
| |
| def run_length_decode(seq): |
| """Take a previously encoded sequence and reconstruct the original from |
| it.""" |
| result = [] |
| for s, i in seq: |
| for _ in range(i): |
| result.append(s) |
| return result |
| |
| |
| # We use lists of a type that should have a relatively high duplication rate, |
| # otherwise we'd almost never get any runs. |
| Lists = st.lists(st.integers(0, 10)) |
| |
| |
| @given(Lists) |
| def test_decodes_to_starting_sequence(ls): |
| """If we encode a sequence and then decode the result, we should get the |
| original sequence back. |
| |
| Otherwise we've done something very wrong. |
| """ |
| assert run_length_decode(run_length_encode(ls)) == ls |
| |
| |
| @given(Lists, st.data()) |
| def test_duplicating_an_element_does_not_increase_length(ls, data): |
| """The previous test could be passed by simply returning the input sequence |
| so we need something that tests the compression property of our encoding. |
| |
| In this test we deliberately introduce or extend a run and assert |
| that this does not increase the length of our encoding, because they |
| should be part of the same run in the final result. |
| """ |
| # We use assume to get a valid index into the list. We could also have used |
| # e.g. flatmap, but this is relatively straightforward and will tend to |
| # perform better. |
| assume(ls) |
| i = data.draw(st.integers(0, len(ls) - 1)) |
| ls2 = list(ls) |
| # duplicating the value at i right next to it guarantees they are part of |
| # the same run in the resulting compression. |
| ls2.insert(i, ls2[i]) |
| assert len(run_length_encode(ls2)) == len(run_length_encode(ls)) |