Andrew Geissler | d159c7f | 2021-09-02 21:05:58 -0500 | [diff] [blame^] | 1 | # This file is part of Hypothesis, which may be found at |
| 2 | # https://github.com/HypothesisWorks/hypothesis/ |
| 3 | # |
| 4 | # Most of this work is copyright (C) 2013-2021 David R. MacIver |
| 5 | # (david@drmaciver.com), but it contains contributions by others. See |
| 6 | # CONTRIBUTING.rst for a full list of people who may hold copyright, and |
| 7 | # consult the git log if you need to determine who owns an individual |
| 8 | # contribution. |
| 9 | # |
| 10 | # This Source Code Form is subject to the terms of the Mozilla Public License, |
| 11 | # v. 2.0. If a copy of the MPL was not distributed with this file, You can |
| 12 | # obtain one at https://mozilla.org/MPL/2.0/. |
| 13 | # |
| 14 | # END HEADER |
| 15 | # |
| 16 | # SPDX-License-Identifier: MPL-2.0 |
| 17 | |
| 18 | """This example demonstrates testing a run length encoding scheme. That is, we |
| 19 | take a sequence and represent it by a shorter sequence where each 'run' of |
| 20 | consecutive equal elements is represented as a single element plus a count. So |
| 21 | e.g. |
| 22 | |
| 23 | [1, 1, 1, 1, 2, 1] is represented as [[1, 4], [2, 1], [1, 1]] |
| 24 | |
| 25 | This demonstrates the useful decode(encode(x)) == x invariant that is often |
| 26 | a fruitful source of testing with Hypothesis. |
| 27 | |
| 28 | It also has an example of testing invariants in response to changes in the |
| 29 | underlying data. |
| 30 | """ |
| 31 | |
| 32 | from hypothesis import assume, given, strategies as st |
| 33 | |
| 34 | |
| 35 | def run_length_encode(seq): |
| 36 | """Encode a sequence as a new run-length encoded sequence.""" |
| 37 | if not seq: |
| 38 | return [] |
| 39 | # By starting off the count at zero we simplify the iteration logic |
| 40 | # slightly. |
| 41 | result = [[seq[0], 0]] |
| 42 | for s in seq: |
| 43 | if ( |
| 44 | # If you uncomment this line this branch will be skipped and we'll |
| 45 | # always append a new run of length 1. Note which tests fail. |
| 46 | # False and |
| 47 | s |
| 48 | == result[-1][0] |
| 49 | # Try uncommenting this line and see what problems occur: |
| 50 | # and result[-1][-1] < 2 |
| 51 | ): |
| 52 | result[-1][1] += 1 |
| 53 | else: |
| 54 | result.append([s, 1]) |
| 55 | return result |
| 56 | |
| 57 | |
| 58 | def run_length_decode(seq): |
| 59 | """Take a previously encoded sequence and reconstruct the original from |
| 60 | it.""" |
| 61 | result = [] |
| 62 | for s, i in seq: |
| 63 | for _ in range(i): |
| 64 | result.append(s) |
| 65 | return result |
| 66 | |
| 67 | |
| 68 | # We use lists of a type that should have a relatively high duplication rate, |
| 69 | # otherwise we'd almost never get any runs. |
| 70 | Lists = st.lists(st.integers(0, 10)) |
| 71 | |
| 72 | |
| 73 | @given(Lists) |
| 74 | def test_decodes_to_starting_sequence(ls): |
| 75 | """If we encode a sequence and then decode the result, we should get the |
| 76 | original sequence back. |
| 77 | |
| 78 | Otherwise we've done something very wrong. |
| 79 | """ |
| 80 | assert run_length_decode(run_length_encode(ls)) == ls |
| 81 | |
| 82 | |
| 83 | @given(Lists, st.data()) |
| 84 | def test_duplicating_an_element_does_not_increase_length(ls, data): |
| 85 | """The previous test could be passed by simply returning the input sequence |
| 86 | so we need something that tests the compression property of our encoding. |
| 87 | |
| 88 | In this test we deliberately introduce or extend a run and assert |
| 89 | that this does not increase the length of our encoding, because they |
| 90 | should be part of the same run in the final result. |
| 91 | """ |
| 92 | # We use assume to get a valid index into the list. We could also have used |
| 93 | # e.g. flatmap, but this is relatively straightforward and will tend to |
| 94 | # perform better. |
| 95 | assume(ls) |
| 96 | i = data.draw(st.integers(0, len(ls) - 1)) |
| 97 | ls2 = list(ls) |
| 98 | # duplicating the value at i right next to it guarantees they are part of |
| 99 | # the same run in the resulting compression. |
| 100 | ls2.insert(i, ls2[i]) |
| 101 | assert len(run_length_encode(ls2)) == len(run_length_encode(ls)) |