blob: e2b762dbf09a4b6d3cb1f186a06237969404567f [file] [log] [blame]
Brad Bishop19323692019-04-05 15:28:33 -04001#! /usr/bin/env python3
2#
Brad Bishopa34c0302019-09-23 22:34:48 -04003# Copyright (C) 2018-2019 Garmin Ltd.
Brad Bishop19323692019-04-05 15:28:33 -04004#
Brad Bishopc342db32019-05-15 21:57:59 -04005# SPDX-License-Identifier: GPL-2.0-only
Brad Bishop19323692019-04-05 15:28:33 -04006#
Brad Bishop19323692019-04-05 15:28:33 -04007
Brad Bishopa34c0302019-09-23 22:34:48 -04008from . import create_server, create_client
Brad Bishop19323692019-04-05 15:28:33 -04009import hashlib
Brad Bishopa34c0302019-09-23 22:34:48 -040010import logging
11import multiprocessing
Andrew Geisslerc9f78652020-09-18 14:11:35 -050012import os
Brad Bishopa34c0302019-09-23 22:34:48 -040013import sys
Brad Bishop08902b02019-08-20 09:16:51 -040014import tempfile
Brad Bishopa34c0302019-09-23 22:34:48 -040015import threading
16import unittest
Andrew Geisslerc3d88e42020-10-02 09:45:00 -050017import socket
Brad Bishop19323692019-04-05 15:28:33 -040018
Andrew Geissler6ce62a22020-11-30 19:58:47 -060019def _run_server(server, idx):
20 # logging.basicConfig(level=logging.DEBUG, filename='bbhashserv.log', filemode='w',
21 # format='%(levelname)s %(filename)s:%(lineno)d %(message)s')
22 sys.stdout = open('bbhashserv-%d.log' % idx, 'w')
23 sys.stderr = sys.stdout
24 server.serve_forever()
Brad Bishopa34c0302019-09-23 22:34:48 -040025
Andrew Geissler09209ee2020-12-13 08:44:15 -060026
27class HashEquivalenceTestSetup(object):
Brad Bishopa34c0302019-09-23 22:34:48 -040028 METHOD = 'TestMethod'
29
Andrew Geissler6ce62a22020-11-30 19:58:47 -060030 server_index = 0
31
Andrew Geisslerd1e89492021-02-12 15:35:20 -060032 def start_server(self, dbpath=None, upstream=None, read_only=False):
Andrew Geissler6ce62a22020-11-30 19:58:47 -060033 self.server_index += 1
34 if dbpath is None:
35 dbpath = os.path.join(self.temp_dir.name, "db%d.sqlite" % self.server_index)
36
37 def cleanup_thread(thread):
38 thread.terminate()
39 thread.join()
40
Andrew Geisslerd1e89492021-02-12 15:35:20 -060041 server = create_server(self.get_server_addr(self.server_index),
42 dbpath,
43 upstream=upstream,
44 read_only=read_only)
Andrew Geissler6ce62a22020-11-30 19:58:47 -060045 server.dbpath = dbpath
46
47 server.thread = multiprocessing.Process(target=_run_server, args=(server, self.server_index))
48 server.thread.start()
49 self.addCleanup(cleanup_thread, server.thread)
50
51 def cleanup_client(client):
52 client.close()
53
54 client = create_client(server.address)
55 self.addCleanup(cleanup_client, client)
56
57 return (client, server)
Brad Bishopa34c0302019-09-23 22:34:48 -040058
Brad Bishop19323692019-04-05 15:28:33 -040059 def setUp(self):
Brad Bishopa34c0302019-09-23 22:34:48 -040060 if sys.version_info < (3, 5, 0):
61 self.skipTest('Python 3.5 or later required')
62
63 self.temp_dir = tempfile.TemporaryDirectory(prefix='bb-hashserv')
Andrew Geissler6ce62a22020-11-30 19:58:47 -060064 self.addCleanup(self.temp_dir.cleanup)
Brad Bishopa34c0302019-09-23 22:34:48 -040065
Andrew Geissler6ce62a22020-11-30 19:58:47 -060066 (self.client, self.server) = self.start_server()
Brad Bishop19323692019-04-05 15:28:33 -040067
Andrew Geissler6ce62a22020-11-30 19:58:47 -060068 def assertClientGetHash(self, client, taskhash, unihash):
69 result = client.get_unihash(self.METHOD, taskhash)
70 self.assertEqual(result, unihash)
Brad Bishop19323692019-04-05 15:28:33 -040071
Andrew Geissler09209ee2020-12-13 08:44:15 -060072
73class HashEquivalenceCommonTests(object):
Brad Bishop19323692019-04-05 15:28:33 -040074 def test_create_hash(self):
75 # Simple test that hashes can be created
76 taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
77 outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
78 unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
79
Andrew Geissler6ce62a22020-11-30 19:58:47 -060080 self.assertClientGetHash(self.client, taskhash, None)
Brad Bishop19323692019-04-05 15:28:33 -040081
Brad Bishopa34c0302019-09-23 22:34:48 -040082 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
83 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
Brad Bishop19323692019-04-05 15:28:33 -040084
85 def test_create_equivalent(self):
86 # Tests that a second reported task with the same outhash will be
87 # assigned the same unihash
88 taskhash = '53b8dce672cb6d0c73170be43f540460bfc347b4'
89 outhash = '5a9cb1649625f0bf41fc7791b635cd9c2d7118c7f021ba87dcd03f72b67ce7a8'
90 unihash = 'f37918cc02eb5a520b1aff86faacbc0a38124646'
Brad Bishopa34c0302019-09-23 22:34:48 -040091
92 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
93 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
Brad Bishop19323692019-04-05 15:28:33 -040094
95 # Report a different task with the same outhash. The returned unihash
96 # should match the first task
97 taskhash2 = '3bf6f1e89d26205aec90da04854fbdbf73afe6b4'
98 unihash2 = 'af36b199320e611fbb16f1f277d3ee1d619ca58b'
Brad Bishopa34c0302019-09-23 22:34:48 -040099 result = self.client.report_unihash(taskhash2, self.METHOD, outhash, unihash2)
100 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
Brad Bishop19323692019-04-05 15:28:33 -0400101
102 def test_duplicate_taskhash(self):
103 # Tests that duplicate reports of the same taskhash with different
104 # outhash & unihash always return the unihash from the first reported
105 # taskhash
106 taskhash = '8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a'
107 outhash = 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e'
108 unihash = '218e57509998197d570e2c98512d0105985dffc9'
Brad Bishopa34c0302019-09-23 22:34:48 -0400109 self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
Brad Bishop19323692019-04-05 15:28:33 -0400110
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600111 self.assertClientGetHash(self.client, taskhash, unihash)
Brad Bishop19323692019-04-05 15:28:33 -0400112
113 outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d'
114 unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c'
Brad Bishopa34c0302019-09-23 22:34:48 -0400115 self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2)
Brad Bishop19323692019-04-05 15:28:33 -0400116
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600117 self.assertClientGetHash(self.client, taskhash, unihash)
Brad Bishop19323692019-04-05 15:28:33 -0400118
119 outhash3 = '77623a549b5b1a31e3732dfa8fe61d7ce5d44b3370f253c5360e136b852967b4'
120 unihash3 = '9217a7d6398518e5dc002ed58f2cbbbc78696603'
Brad Bishopa34c0302019-09-23 22:34:48 -0400121 self.client.report_unihash(taskhash, self.METHOD, outhash3, unihash3)
Brad Bishop19323692019-04-05 15:28:33 -0400122
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600123 self.assertClientGetHash(self.client, taskhash, unihash)
Brad Bishopa34c0302019-09-23 22:34:48 -0400124
Andrew Geissler475cb722020-07-10 16:00:51 -0500125 def test_huge_message(self):
126 # Simple test that hashes can be created
127 taskhash = 'c665584ee6817aa99edfc77a44dd853828279370'
128 outhash = '3c979c3db45c569f51ab7626a4651074be3a9d11a84b1db076f5b14f7d39db44'
129 unihash = '90e9bc1d1f094c51824adca7f8ea79a048d68824'
130
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600131 self.assertClientGetHash(self.client, taskhash, None)
Andrew Geissler475cb722020-07-10 16:00:51 -0500132
133 siginfo = "0" * (self.client.max_chunk * 4)
134
135 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash, {
136 'outhash_siginfo': siginfo
137 })
138 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
139
140 result = self.client.get_taskhash(self.METHOD, taskhash, True)
141 self.assertEqual(result['taskhash'], taskhash)
142 self.assertEqual(result['unihash'], unihash)
143 self.assertEqual(result['method'], self.METHOD)
144 self.assertEqual(result['outhash'], outhash)
145 self.assertEqual(result['outhash_siginfo'], siginfo)
146
Brad Bishopa34c0302019-09-23 22:34:48 -0400147 def test_stress(self):
148 def query_server(failures):
149 client = Client(self.server.address)
150 try:
151 for i in range(1000):
152 taskhash = hashlib.sha256()
153 taskhash.update(str(i).encode('utf-8'))
154 taskhash = taskhash.hexdigest()
155 result = client.get_unihash(self.METHOD, taskhash)
156 if result != taskhash:
157 failures.append("taskhash mismatch: %s != %s" % (result, taskhash))
158 finally:
159 client.close()
160
161 # Report hashes
162 for i in range(1000):
163 taskhash = hashlib.sha256()
164 taskhash.update(str(i).encode('utf-8'))
165 taskhash = taskhash.hexdigest()
166 self.client.report_unihash(taskhash, self.METHOD, taskhash, taskhash)
167
168 failures = []
169 threads = [threading.Thread(target=query_server, args=(failures,)) for t in range(100)]
170
171 for t in threads:
172 t.start()
173
174 for t in threads:
175 t.join()
176
177 self.assertFalse(failures)
Brad Bishop19323692019-04-05 15:28:33 -0400178
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600179 def test_upstream_server(self):
180 # Tests upstream server support. This is done by creating two servers
181 # that share a database file. The downstream server has it upstream
182 # set to the test server, whereas the side server doesn't. This allows
183 # verification that the hash requests are being proxied to the upstream
184 # server by verifying that they appear on the downstream client, but not
185 # the side client. It also verifies that the results are pulled into
186 # the downstream database by checking that the downstream and side servers
187 # match after the downstream is done waiting for all backfill tasks
188 (down_client, down_server) = self.start_server(upstream=self.server.address)
189 (side_client, side_server) = self.start_server(dbpath=down_server.dbpath)
190
191 def check_hash(taskhash, unihash, old_sidehash):
192 nonlocal down_client
193 nonlocal side_client
194
195 # check upstream server
196 self.assertClientGetHash(self.client, taskhash, unihash)
197
198 # Hash should *not* be present on the side server
199 self.assertClientGetHash(side_client, taskhash, old_sidehash)
200
201 # Hash should be present on the downstream server, since it
202 # will defer to the upstream server. This will trigger
203 # the backfill in the downstream server
204 self.assertClientGetHash(down_client, taskhash, unihash)
205
206 # After waiting for the downstream client to finish backfilling the
207 # task from the upstream server, it should appear in the side server
208 # since the database is populated
209 down_client.backfill_wait()
210 self.assertClientGetHash(side_client, taskhash, unihash)
211
212 # Basic report
213 taskhash = '8aa96fcffb5831b3c2c0cb75f0431e3f8b20554a'
214 outhash = 'afe240a439959ce86f5e322f8c208e1fedefea9e813f2140c81af866cc9edf7e'
215 unihash = '218e57509998197d570e2c98512d0105985dffc9'
216 self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
217
218 check_hash(taskhash, unihash, None)
219
220 # Duplicated taskhash with multiple output hashes and unihashes.
221 # All servers should agree with the originally reported hash
222 outhash2 = '0904a7fe3dc712d9fd8a74a616ddca2a825a8ee97adf0bd3fc86082c7639914d'
223 unihash2 = 'ae9a7d252735f0dafcdb10e2e02561ca3a47314c'
224 self.client.report_unihash(taskhash, self.METHOD, outhash2, unihash2)
225
226 check_hash(taskhash, unihash, unihash)
227
228 # Report an equivalent task. The sideload will originally report
229 # no unihash until backfilled
230 taskhash3 = "044c2ec8aaf480685a00ff6ff49e6162e6ad34e1"
231 unihash3 = "def64766090d28f627e816454ed46894bb3aab36"
232 self.client.report_unihash(taskhash3, self.METHOD, outhash, unihash3)
233
234 check_hash(taskhash3, unihash, None)
235
236 # Test that reporting a unihash in the downstream client isn't
237 # propagating to the upstream server
238 taskhash4 = "e3da00593d6a7fb435c7e2114976c59c5fd6d561"
239 outhash4 = "1cf8713e645f491eb9c959d20b5cae1c47133a292626dda9b10709857cbe688a"
240 unihash4 = "3b5d3d83f07f259e9086fcb422c855286e18a57d"
241 down_client.report_unihash(taskhash4, self.METHOD, outhash4, unihash4)
242 down_client.backfill_wait()
243
244 self.assertClientGetHash(down_client, taskhash4, unihash4)
245 self.assertClientGetHash(side_client, taskhash4, unihash4)
246 self.assertClientGetHash(self.client, taskhash4, None)
247
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600248 # Test that reporting a unihash in the downstream is able to find a
249 # match which was previously reported to the upstream server
250 taskhash5 = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
251 outhash5 = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
252 unihash5 = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
253 result = self.client.report_unihash(taskhash5, self.METHOD, outhash5, unihash5)
254
255 taskhash6 = '35788efcb8dfb0a02659d81cf2bfd695fb30fafa'
256 unihash6 = 'f46d3fbb439bd9b921095da657a4de906510d2ce'
257 result = down_client.report_unihash(taskhash6, self.METHOD, outhash5, unihash6)
258 self.assertEqual(result['unihash'], unihash5, 'Server failed to copy unihash from upstream')
259
260 def test_ro_server(self):
261 (ro_client, ro_server) = self.start_server(dbpath=self.server.dbpath, read_only=True)
262
263 # Report a hash via the read-write server
264 taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
265 outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
266 unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
267
268 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
269 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
270
271 # Check the hash via the read-only server
272 self.assertClientGetHash(ro_client, taskhash, unihash)
273
274 # Ensure that reporting via the read-only server fails
275 taskhash2 = 'c665584ee6817aa99edfc77a44dd853828279370'
276 outhash2 = '3c979c3db45c569f51ab7626a4651074be3a9d11a84b1db076f5b14f7d39db44'
277 unihash2 = '90e9bc1d1f094c51824adca7f8ea79a048d68824'
278
Andrew Geisslerc926e172021-05-07 16:11:35 -0500279 with self.assertRaises(ConnectionError):
Andrew Geisslerd1e89492021-02-12 15:35:20 -0600280 ro_client.report_unihash(taskhash2, self.METHOD, outhash2, unihash2)
281
282 # Ensure that the database was not modified
283 self.assertClientGetHash(self.client, taskhash2, None)
284
Brad Bishop19323692019-04-05 15:28:33 -0400285
Andrew Geissler09209ee2020-12-13 08:44:15 -0600286class TestHashEquivalenceUnixServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase):
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600287 def get_server_addr(self, server_idx):
288 return "unix://" + os.path.join(self.temp_dir.name, 'sock%d' % server_idx)
Brad Bishopa34c0302019-09-23 22:34:48 -0400289
290
Andrew Geissler09209ee2020-12-13 08:44:15 -0600291class TestHashEquivalenceUnixServerLongPath(HashEquivalenceTestSetup, unittest.TestCase):
292 DEEP_DIRECTORY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ccccccccccccccccccccccccccccccccccccccccccc"
293 def get_server_addr(self, server_idx):
294 os.makedirs(os.path.join(self.temp_dir.name, self.DEEP_DIRECTORY), exist_ok=True)
295 return "unix://" + os.path.join(self.temp_dir.name, self.DEEP_DIRECTORY, 'sock%d' % server_idx)
296
297
298 def test_long_sock_path(self):
299 # Simple test that hashes can be created
300 taskhash = '35788efcb8dfb0a02659d81cf2bfd695fb30faf9'
301 outhash = '2765d4a5884be49b28601445c2760c5f21e7e5c0ee2b7e3fce98fd7e5970796f'
302 unihash = 'f46d3fbb439bd9b921095da657a4de906510d2cd'
303
304 self.assertClientGetHash(self.client, taskhash, None)
305
306 result = self.client.report_unihash(taskhash, self.METHOD, outhash, unihash)
307 self.assertEqual(result['unihash'], unihash, 'Server returned bad unihash')
308
309
310class TestHashEquivalenceTCPServer(HashEquivalenceTestSetup, HashEquivalenceCommonTests, unittest.TestCase):
Andrew Geissler6ce62a22020-11-30 19:58:47 -0600311 def get_server_addr(self, server_idx):
Andrew Geisslerc3d88e42020-10-02 09:45:00 -0500312 # Some hosts cause asyncio module to misbehave, when IPv6 is not enabled.
313 # If IPv6 is enabled, it should be safe to use localhost directly, in general
314 # case it is more reliable to resolve the IP address explicitly.
315 return socket.gethostbyname("localhost") + ":0"