Andrew Geissler | d159c7f | 2021-09-02 21:05:58 -0500 | [diff] [blame] | 1 | # This file is part of Hypothesis, which may be found at |
| 2 | # https://github.com/HypothesisWorks/hypothesis/ |
| 3 | # |
| 4 | # Most of this work is copyright (C) 2013-2021 David R. MacIver |
| 5 | # (david@drmaciver.com), but it contains contributions by others. See |
| 6 | # CONTRIBUTING.rst for a full list of people who may hold copyright, and |
| 7 | # consult the git log if you need to determine who owns an individual |
| 8 | # contribution. |
| 9 | # |
| 10 | # This Source Code Form is subject to the terms of the Mozilla Public License, |
| 11 | # v. 2.0. If a copy of the MPL was not distributed with this file, You can |
| 12 | # obtain one at https://mozilla.org/MPL/2.0/. |
| 13 | # |
| 14 | # END HEADER |
| 15 | # |
| 16 | # SPDX-License-Identifier: MPL-2.0 |
| 17 | |
| 18 | """This file demonstrates testing a binary search. |
| 19 | |
| 20 | It's a useful example because the result of the binary search is so clearly |
| 21 | determined by the invariants it must satisfy, so we can simply test for those |
| 22 | invariants. |
| 23 | |
| 24 | It also demonstrates the useful testing technique of testing how the answer |
| 25 | should change (or not) in response to movements in the underlying data. |
| 26 | """ |
| 27 | |
| 28 | from hypothesis import given, strategies as st |
| 29 | |
| 30 | |
| 31 | def binary_search(ls, v): |
| 32 | """Take a list ls and a value v such that ls is sorted and v is comparable |
| 33 | with the elements of ls. |
| 34 | |
| 35 | Return an index i such that 0 <= i <= len(v) with the properties: |
| 36 | |
| 37 | 1. ls.insert(i, v) is sorted |
| 38 | 2. ls.insert(j, v) is not sorted for j < i |
| 39 | """ |
| 40 | # Without this check we will get an index error on the next line when the |
| 41 | # list is empty. |
| 42 | if not ls: |
| 43 | return 0 |
| 44 | |
| 45 | # Without this check we will miss the case where the insertion point should |
| 46 | # be zero: The invariant we maintain in the next section is that lo is |
| 47 | # always strictly lower than the insertion point. |
| 48 | if v <= ls[0]: |
| 49 | return 0 |
| 50 | |
| 51 | # Invariant: There is no insertion point i with i <= lo |
| 52 | lo = 0 |
| 53 | |
| 54 | # Invariant: There is an insertion point i with i <= hi |
| 55 | hi = len(ls) |
| 56 | while lo + 1 < hi: |
| 57 | mid = (lo + hi) // 2 |
| 58 | if v > ls[mid]: |
| 59 | # Inserting v anywhere below mid would result in an unsorted list |
| 60 | # because it's > the value at mid. Therefore mid is a valid new lo |
| 61 | lo = mid |
| 62 | # Uncommenting the following lines will cause this to return a valid |
| 63 | # insertion point which is not always minimal. |
| 64 | # elif v == ls[mid]: |
| 65 | # return mid |
| 66 | else: |
| 67 | # Either v == ls[mid] in which case mid is a valid insertion point |
| 68 | # or v < ls[mid], in which case all valid insertion points must be |
| 69 | # < hi. Either way, mid is a valid new hi. |
| 70 | hi = mid |
| 71 | assert lo + 1 == hi |
| 72 | # We now know that there is a valid insertion point <= hi and there is no |
| 73 | # valid insertion point < hi because hi - 1 is lo. Therefore hi is the |
| 74 | # answer we were seeking |
| 75 | return hi |
| 76 | |
| 77 | |
| 78 | def is_sorted(ls): |
| 79 | """Is this list sorted?""" |
| 80 | for i in range(len(ls) - 1): |
| 81 | if ls[i] > ls[i + 1]: |
| 82 | return False |
| 83 | return True |
| 84 | |
| 85 | |
| 86 | Values = st.integers() |
| 87 | |
| 88 | # We generate arbitrary lists and turn this into generating sorting lists |
| 89 | # by just sorting them. |
| 90 | SortedLists = st.lists(Values).map(sorted) |
| 91 | |
| 92 | # We could also do it this way, but that would be a bad idea: |
| 93 | # SortedLists = st.lists(Values).filter(is_sorted) |
| 94 | # The problem is that Hypothesis will only generate long sorted lists with very |
| 95 | # low probability, so we are much better off post-processing values into the |
| 96 | # form we want than filtering them out. |
| 97 | |
| 98 | |
| 99 | @given(ls=SortedLists, v=Values) |
| 100 | def test_insert_is_sorted(ls, v): |
| 101 | """We test the first invariant: binary_search should return an index such |
| 102 | that inserting the value provided at that index would result in a sorted |
| 103 | set.""" |
| 104 | ls.insert(binary_search(ls, v), v) |
| 105 | assert is_sorted(ls) |
| 106 | |
| 107 | |
| 108 | @given(ls=SortedLists, v=Values) |
| 109 | def test_is_minimal(ls, v): |
| 110 | """We test the second invariant: binary_search should return an index such |
| 111 | that no smaller index is a valid insertion point for v.""" |
| 112 | for i in range(binary_search(ls, v)): |
| 113 | ls2 = list(ls) |
| 114 | ls2.insert(i, v) |
| 115 | assert not is_sorted(ls2) |
| 116 | |
| 117 | |
| 118 | @given(ls=SortedLists, v=Values) |
| 119 | def test_inserts_into_same_place_twice(ls, v): |
| 120 | """In this we test a *consequence* of the second invariant: When we insert |
| 121 | a value into a list twice, the insertion point should be the same both |
| 122 | times. This is because we know that v is > the previous element and == the |
| 123 | next element. |
| 124 | |
| 125 | In theory if the former passes, this should always pass. In practice, |
| 126 | failures are detected by this test with much higher probability because it |
| 127 | deliberately puts the data into a shape that is likely to trigger a |
| 128 | failure. |
| 129 | |
| 130 | This is an instance of a good general category of test: Testing how the |
| 131 | function moves in responses to changes in the underlying data. |
| 132 | """ |
| 133 | i = binary_search(ls, v) |
| 134 | ls.insert(i, v) |
| 135 | assert binary_search(ls, v) == i |