blob: 68f64f983b26d7466acaa1516daf1d47db69d6bd [file] [log] [blame]
# Copyright (C) 2019 Garmin Ltd.
#
# SPDX-License-Identifier: GPL-2.0-only
#
from datetime import datetime, timedelta
import asyncio
import logging
import math
import time
import os
import base64
import hashlib
from . import create_async_client
import bb.asyncrpc
logger = logging.getLogger("hashserv.server")
# This permission only exists to match nothing
NONE_PERM = "@none"
READ_PERM = "@read"
REPORT_PERM = "@report"
DB_ADMIN_PERM = "@db-admin"
USER_ADMIN_PERM = "@user-admin"
ALL_PERM = "@all"
ALL_PERMISSIONS = {
READ_PERM,
REPORT_PERM,
DB_ADMIN_PERM,
USER_ADMIN_PERM,
ALL_PERM,
}
DEFAULT_ANON_PERMS = (
READ_PERM,
REPORT_PERM,
DB_ADMIN_PERM,
)
TOKEN_ALGORITHM = "sha256"
# 48 bytes of random data will result in 64 characters when base64
# encoded. This number also ensures that the base64 encoding won't have any
# trailing '=' characters.
TOKEN_SIZE = 48
SALT_SIZE = 8
class Measurement(object):
def __init__(self, sample):
self.sample = sample
def start(self):
self.start_time = time.perf_counter()
def end(self):
self.sample.add(time.perf_counter() - self.start_time)
def __enter__(self):
self.start()
return self
def __exit__(self, *args, **kwargs):
self.end()
class Sample(object):
def __init__(self, stats):
self.stats = stats
self.num_samples = 0
self.elapsed = 0
def measure(self):
return Measurement(self)
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
self.end()
def add(self, elapsed):
self.num_samples += 1
self.elapsed += elapsed
def end(self):
if self.num_samples:
self.stats.add(self.elapsed)
self.num_samples = 0
self.elapsed = 0
class Stats(object):
def __init__(self):
self.reset()
def reset(self):
self.num = 0
self.total_time = 0
self.max_time = 0
self.m = 0
self.s = 0
self.current_elapsed = None
def add(self, elapsed):
self.num += 1
if self.num == 1:
self.m = elapsed
self.s = 0
else:
last_m = self.m
self.m = last_m + (elapsed - last_m) / self.num
self.s = self.s + (elapsed - last_m) * (elapsed - self.m)
self.total_time += elapsed
if self.max_time < elapsed:
self.max_time = elapsed
def start_sample(self):
return Sample(self)
@property
def average(self):
if self.num == 0:
return 0
return self.total_time / self.num
@property
def stdev(self):
if self.num <= 1:
return 0
return math.sqrt(self.s / (self.num - 1))
def todict(self):
return {
k: getattr(self, k)
for k in ("num", "total_time", "max_time", "average", "stdev")
}
token_refresh_semaphore = asyncio.Lock()
async def new_token():
# Prevent malicious users from using this API to deduce the entropy
# pool on the server and thus be able to guess a token. *All* token
# refresh requests lock the same global semaphore and then sleep for a
# short time. The effectively rate limits the total number of requests
# than can be made across all clients to 10/second, which should be enough
# since you have to be an authenticated users to make the request in the
# first place
async with token_refresh_semaphore:
await asyncio.sleep(0.1)
raw = os.getrandom(TOKEN_SIZE, os.GRND_NONBLOCK)
return base64.b64encode(raw, b"._").decode("utf-8")
def new_salt():
return os.getrandom(SALT_SIZE, os.GRND_NONBLOCK).hex()
def hash_token(algo, salt, token):
h = hashlib.new(algo)
h.update(salt.encode("utf-8"))
h.update(token.encode("utf-8"))
return ":".join([algo, salt, h.hexdigest()])
def permissions(*permissions, allow_anon=True, allow_self_service=False):
"""
Function decorator that can be used to decorate an RPC function call and
check that the current users permissions match the require permissions.
If allow_anon is True, the user will also be allowed to make the RPC call
if the anonymous user permissions match the permissions.
If allow_self_service is True, and the "username" property in the request
is the currently logged in user, or not specified, the user will also be
allowed to make the request. This allows users to access normal privileged
API, as long as they are only modifying their own user properties (e.g.
users can be allowed to reset their own token without @user-admin
permissions, but not the token for any other user.
"""
def wrapper(func):
async def wrap(self, request):
if allow_self_service and self.user is not None:
username = request.get("username", self.user.username)
if username == self.user.username:
request["username"] = self.user.username
return await func(self, request)
if not self.user_has_permissions(*permissions, allow_anon=allow_anon):
if not self.user:
username = "Anonymous user"
user_perms = self.server.anon_perms
else:
username = self.user.username
user_perms = self.user.permissions
self.logger.info(
"User %s with permissions %r denied from calling %s. Missing permissions(s) %r",
username,
", ".join(user_perms),
func.__name__,
", ".join(permissions),
)
raise bb.asyncrpc.InvokeError(
f"{username} is not allowed to access permissions(s) {', '.join(permissions)}"
)
return await func(self, request)
return wrap
return wrapper
class ServerClient(bb.asyncrpc.AsyncServerConnection):
def __init__(self, socket, server):
super().__init__(socket, "OEHASHEQUIV", server.logger)
self.server = server
self.max_chunk = bb.asyncrpc.DEFAULT_MAX_CHUNK
self.user = None
self.handlers.update(
{
"get": self.handle_get,
"get-outhash": self.handle_get_outhash,
"get-stream": self.handle_get_stream,
"exists-stream": self.handle_exists_stream,
"get-stats": self.handle_get_stats,
"get-db-usage": self.handle_get_db_usage,
"get-db-query-columns": self.handle_get_db_query_columns,
# Not always read-only, but internally checks if the server is
# read-only
"report": self.handle_report,
"auth": self.handle_auth,
"get-user": self.handle_get_user,
"get-all-users": self.handle_get_all_users,
"become-user": self.handle_become_user,
}
)
if not self.server.read_only:
self.handlers.update(
{
"report-equiv": self.handle_equivreport,
"reset-stats": self.handle_reset_stats,
"backfill-wait": self.handle_backfill_wait,
"remove": self.handle_remove,
"gc-mark": self.handle_gc_mark,
"gc-sweep": self.handle_gc_sweep,
"gc-status": self.handle_gc_status,
"clean-unused": self.handle_clean_unused,
"refresh-token": self.handle_refresh_token,
"set-user-perms": self.handle_set_perms,
"new-user": self.handle_new_user,
"delete-user": self.handle_delete_user,
}
)
def raise_no_user_error(self, username):
raise bb.asyncrpc.InvokeError(f"No user named '{username}' exists")
def user_has_permissions(self, *permissions, allow_anon=True):
permissions = set(permissions)
if allow_anon:
if ALL_PERM in self.server.anon_perms:
return True
if not permissions - self.server.anon_perms:
return True
if self.user is None:
return False
if ALL_PERM in self.user.permissions:
return True
if not permissions - self.user.permissions:
return True
return False
def validate_proto_version(self):
return self.proto_version > (1, 0) and self.proto_version <= (1, 1)
async def process_requests(self):
async with self.server.db_engine.connect(self.logger) as db:
self.db = db
if self.server.upstream is not None:
self.upstream_client = await create_async_client(self.server.upstream)
else:
self.upstream_client = None
try:
await super().process_requests()
finally:
if self.upstream_client is not None:
await self.upstream_client.close()
async def dispatch_message(self, msg):
for k in self.handlers.keys():
if k in msg:
self.logger.debug("Handling %s" % k)
if "stream" in k:
return await self.handlers[k](msg[k])
else:
with self.server.request_stats.start_sample() as self.request_sample, self.request_sample.measure():
return await self.handlers[k](msg[k])
raise bb.asyncrpc.ClientError("Unrecognized command %r" % msg)
@permissions(READ_PERM)
async def handle_get(self, request):
method = request["method"]
taskhash = request["taskhash"]
fetch_all = request.get("all", False)
return await self.get_unihash(method, taskhash, fetch_all)
async def get_unihash(self, method, taskhash, fetch_all=False):
d = None
if fetch_all:
row = await self.db.get_unihash_by_taskhash_full(method, taskhash)
if row is not None:
d = {k: row[k] for k in row.keys()}
elif self.upstream_client is not None:
d = await self.upstream_client.get_taskhash(method, taskhash, True)
await self.update_unified(d)
else:
row = await self.db.get_equivalent(method, taskhash)
if row is not None:
d = {k: row[k] for k in row.keys()}
elif self.upstream_client is not None:
d = await self.upstream_client.get_taskhash(method, taskhash)
await self.db.insert_unihash(d["method"], d["taskhash"], d["unihash"])
return d
@permissions(READ_PERM)
async def handle_get_outhash(self, request):
method = request["method"]
outhash = request["outhash"]
taskhash = request["taskhash"]
with_unihash = request.get("with_unihash", True)
return await self.get_outhash(method, outhash, taskhash, with_unihash)
async def get_outhash(self, method, outhash, taskhash, with_unihash=True):
d = None
if with_unihash:
row = await self.db.get_unihash_by_outhash(method, outhash)
else:
row = await self.db.get_outhash(method, outhash)
if row is not None:
d = {k: row[k] for k in row.keys()}
elif self.upstream_client is not None:
d = await self.upstream_client.get_outhash(method, outhash, taskhash)
await self.update_unified(d)
return d
async def update_unified(self, data):
if data is None:
return
await self.db.insert_unihash(data["method"], data["taskhash"], data["unihash"])
await self.db.insert_outhash(data)
async def _stream_handler(self, handler):
await self.socket.send_message("ok")
while True:
upstream = None
l = await self.socket.recv()
if not l:
break
try:
# This inner loop is very sensitive and must be as fast as
# possible (which is why the request sample is handled manually
# instead of using 'with', and also why logging statements are
# commented out.
self.request_sample = self.server.request_stats.start_sample()
request_measure = self.request_sample.measure()
request_measure.start()
if l == "END":
break
msg = await handler(l)
await self.socket.send(msg)
finally:
request_measure.end()
self.request_sample.end()
await self.socket.send("ok")
return self.NO_RESPONSE
@permissions(READ_PERM)
async def handle_get_stream(self, request):
async def handler(l):
(method, taskhash) = l.split()
# self.logger.debug('Looking up %s %s' % (method, taskhash))
row = await self.db.get_equivalent(method, taskhash)
if row is not None:
# self.logger.debug('Found equivalent task %s -> %s', (row['taskhash'], row['unihash']))
return row["unihash"]
if self.upstream_client is not None:
upstream = await self.upstream_client.get_unihash(method, taskhash)
if upstream:
await self.server.backfill_queue.put((method, taskhash))
return upstream
return ""
return await self._stream_handler(handler)
@permissions(READ_PERM)
async def handle_exists_stream(self, request):
async def handler(l):
if await self.db.unihash_exists(l):
return "true"
if self.upstream_client is not None:
if await self.upstream_client.unihash_exists(l):
return "true"
return "false"
return await self._stream_handler(handler)
async def report_readonly(self, data):
method = data["method"]
outhash = data["outhash"]
taskhash = data["taskhash"]
info = await self.get_outhash(method, outhash, taskhash)
if info:
unihash = info["unihash"]
else:
unihash = data["unihash"]
return {
"taskhash": taskhash,
"method": method,
"unihash": unihash,
}
# Since this can be called either read only or to report, the check to
# report is made inside the function
@permissions(READ_PERM)
async def handle_report(self, data):
if self.server.read_only or not self.user_has_permissions(REPORT_PERM):
return await self.report_readonly(data)
outhash_data = {
"method": data["method"],
"outhash": data["outhash"],
"taskhash": data["taskhash"],
"created": datetime.now(),
}
for k in ("owner", "PN", "PV", "PR", "task", "outhash_siginfo"):
if k in data:
outhash_data[k] = data[k]
if self.user:
outhash_data["owner"] = self.user.username
# Insert the new entry, unless it already exists
if await self.db.insert_outhash(outhash_data):
# If this row is new, check if it is equivalent to another
# output hash
row = await self.db.get_equivalent_for_outhash(
data["method"], data["outhash"], data["taskhash"]
)
if row is not None:
# A matching output hash was found. Set our taskhash to the
# same unihash since they are equivalent
unihash = row["unihash"]
else:
# No matching output hash was found. This is probably the
# first outhash to be added.
unihash = data["unihash"]
# Query upstream to see if it has a unihash we can use
if self.upstream_client is not None:
upstream_data = await self.upstream_client.get_outhash(
data["method"], data["outhash"], data["taskhash"]
)
if upstream_data is not None:
unihash = upstream_data["unihash"]
await self.db.insert_unihash(data["method"], data["taskhash"], unihash)
unihash_data = await self.get_unihash(data["method"], data["taskhash"])
if unihash_data is not None:
unihash = unihash_data["unihash"]
else:
unihash = data["unihash"]
return {
"taskhash": data["taskhash"],
"method": data["method"],
"unihash": unihash,
}
@permissions(READ_PERM, REPORT_PERM)
async def handle_equivreport(self, data):
await self.db.insert_unihash(data["method"], data["taskhash"], data["unihash"])
# Fetch the unihash that will be reported for the taskhash. If the
# unihash matches, it means this row was inserted (or the mapping
# was already valid)
row = await self.db.get_equivalent(data["method"], data["taskhash"])
if row["unihash"] == data["unihash"]:
self.logger.info(
"Adding taskhash equivalence for %s with unihash %s",
data["taskhash"],
row["unihash"],
)
return {k: row[k] for k in ("taskhash", "method", "unihash")}
@permissions(READ_PERM)
async def handle_get_stats(self, request):
return {
"requests": self.server.request_stats.todict(),
}
@permissions(DB_ADMIN_PERM)
async def handle_reset_stats(self, request):
d = {
"requests": self.server.request_stats.todict(),
}
self.server.request_stats.reset()
return d
@permissions(READ_PERM)
async def handle_backfill_wait(self, request):
d = {
"tasks": self.server.backfill_queue.qsize(),
}
await self.server.backfill_queue.join()
return d
@permissions(DB_ADMIN_PERM)
async def handle_remove(self, request):
condition = request["where"]
if not isinstance(condition, dict):
raise TypeError("Bad condition type %s" % type(condition))
return {"count": await self.db.remove(condition)}
@permissions(DB_ADMIN_PERM)
async def handle_gc_mark(self, request):
condition = request["where"]
mark = request["mark"]
if not isinstance(condition, dict):
raise TypeError("Bad condition type %s" % type(condition))
if not isinstance(mark, str):
raise TypeError("Bad mark type %s" % type(mark))
return {"count": await self.db.gc_mark(mark, condition)}
@permissions(DB_ADMIN_PERM)
async def handle_gc_sweep(self, request):
mark = request["mark"]
if not isinstance(mark, str):
raise TypeError("Bad mark type %s" % type(mark))
current_mark = await self.db.get_current_gc_mark()
if not current_mark or mark != current_mark:
raise bb.asyncrpc.InvokeError(
f"'{mark}' is not the current mark. Refusing to sweep"
)
count = await self.db.gc_sweep()
return {"count": count}
@permissions(DB_ADMIN_PERM)
async def handle_gc_status(self, request):
(keep_rows, remove_rows, current_mark) = await self.db.gc_status()
return {
"keep": keep_rows,
"remove": remove_rows,
"mark": current_mark,
}
@permissions(DB_ADMIN_PERM)
async def handle_clean_unused(self, request):
max_age = request["max_age_seconds"]
oldest = datetime.now() - timedelta(seconds=-max_age)
return {"count": await self.db.clean_unused(oldest)}
@permissions(DB_ADMIN_PERM)
async def handle_get_db_usage(self, request):
return {"usage": await self.db.get_usage()}
@permissions(DB_ADMIN_PERM)
async def handle_get_db_query_columns(self, request):
return {"columns": await self.db.get_query_columns()}
# The authentication API is always allowed
async def handle_auth(self, request):
username = str(request["username"])
token = str(request["token"])
async def fail_auth():
nonlocal username
# Rate limit bad login attempts
await asyncio.sleep(1)
raise bb.asyncrpc.InvokeError(f"Unable to authenticate as {username}")
user, db_token = await self.db.lookup_user_token(username)
if not user or not db_token:
await fail_auth()
try:
algo, salt, _ = db_token.split(":")
except ValueError:
await fail_auth()
if hash_token(algo, salt, token) != db_token:
await fail_auth()
self.user = user
self.logger.info("Authenticated as %s", username)
return {
"result": True,
"username": self.user.username,
"permissions": sorted(list(self.user.permissions)),
}
@permissions(USER_ADMIN_PERM, allow_self_service=True, allow_anon=False)
async def handle_refresh_token(self, request):
username = str(request["username"])
token = await new_token()
updated = await self.db.set_user_token(
username,
hash_token(TOKEN_ALGORITHM, new_salt(), token),
)
if not updated:
self.raise_no_user_error(username)
return {"username": username, "token": token}
def get_perm_arg(self, arg):
if not isinstance(arg, list):
raise bb.asyncrpc.InvokeError("Unexpected type for permissions")
arg = set(arg)
try:
arg.remove(NONE_PERM)
except KeyError:
pass
unknown_perms = arg - ALL_PERMISSIONS
if unknown_perms:
raise bb.asyncrpc.InvokeError(
"Unknown permissions %s" % ", ".join(sorted(list(unknown_perms)))
)
return sorted(list(arg))
def return_perms(self, permissions):
if ALL_PERM in permissions:
return sorted(list(ALL_PERMISSIONS))
return sorted(list(permissions))
@permissions(USER_ADMIN_PERM, allow_anon=False)
async def handle_set_perms(self, request):
username = str(request["username"])
permissions = self.get_perm_arg(request["permissions"])
if not await self.db.set_user_perms(username, permissions):
self.raise_no_user_error(username)
return {
"username": username,
"permissions": self.return_perms(permissions),
}
@permissions(USER_ADMIN_PERM, allow_self_service=True, allow_anon=False)
async def handle_get_user(self, request):
username = str(request["username"])
user = await self.db.lookup_user(username)
if user is None:
return None
return {
"username": user.username,
"permissions": self.return_perms(user.permissions),
}
@permissions(USER_ADMIN_PERM, allow_anon=False)
async def handle_get_all_users(self, request):
users = await self.db.get_all_users()
return {
"users": [
{
"username": u.username,
"permissions": self.return_perms(u.permissions),
}
for u in users
]
}
@permissions(USER_ADMIN_PERM, allow_anon=False)
async def handle_new_user(self, request):
username = str(request["username"])
permissions = self.get_perm_arg(request["permissions"])
token = await new_token()
inserted = await self.db.new_user(
username,
permissions,
hash_token(TOKEN_ALGORITHM, new_salt(), token),
)
if not inserted:
raise bb.asyncrpc.InvokeError(f"Cannot create new user '{username}'")
return {
"username": username,
"permissions": self.return_perms(permissions),
"token": token,
}
@permissions(USER_ADMIN_PERM, allow_self_service=True, allow_anon=False)
async def handle_delete_user(self, request):
username = str(request["username"])
if not await self.db.delete_user(username):
self.raise_no_user_error(username)
return {"username": username}
@permissions(USER_ADMIN_PERM, allow_anon=False)
async def handle_become_user(self, request):
username = str(request["username"])
user = await self.db.lookup_user(username)
if user is None:
raise bb.asyncrpc.InvokeError(f"User {username} doesn't exist")
self.user = user
self.logger.info("Became user %s", username)
return {
"username": self.user.username,
"permissions": self.return_perms(self.user.permissions),
}
class Server(bb.asyncrpc.AsyncServer):
def __init__(
self,
db_engine,
upstream=None,
read_only=False,
anon_perms=DEFAULT_ANON_PERMS,
admin_username=None,
admin_password=None,
):
if upstream and read_only:
raise bb.asyncrpc.ServerError(
"Read-only hashserv cannot pull from an upstream server"
)
disallowed_perms = set(anon_perms) - set(
[NONE_PERM, READ_PERM, REPORT_PERM, DB_ADMIN_PERM]
)
if disallowed_perms:
raise bb.asyncrpc.ServerError(
f"Permission(s) {' '.join(disallowed_perms)} are not allowed for anonymous users"
)
super().__init__(logger)
self.request_stats = Stats()
self.db_engine = db_engine
self.upstream = upstream
self.read_only = read_only
self.backfill_queue = None
self.anon_perms = set(anon_perms)
self.admin_username = admin_username
self.admin_password = admin_password
self.logger.info(
"Anonymous user permissions are: %s", ", ".join(self.anon_perms)
)
def accept_client(self, socket):
return ServerClient(socket, self)
async def create_admin_user(self):
admin_permissions = (ALL_PERM,)
async with self.db_engine.connect(self.logger) as db:
added = await db.new_user(
self.admin_username,
admin_permissions,
hash_token(TOKEN_ALGORITHM, new_salt(), self.admin_password),
)
if added:
self.logger.info("Created admin user '%s'", self.admin_username)
else:
await db.set_user_perms(
self.admin_username,
admin_permissions,
)
await db.set_user_token(
self.admin_username,
hash_token(TOKEN_ALGORITHM, new_salt(), self.admin_password),
)
self.logger.info("Admin user '%s' updated", self.admin_username)
async def backfill_worker_task(self):
async with await create_async_client(
self.upstream
) as client, self.db_engine.connect(self.logger) as db:
while True:
item = await self.backfill_queue.get()
if item is None:
self.backfill_queue.task_done()
break
method, taskhash = item
d = await client.get_taskhash(method, taskhash)
if d is not None:
await db.insert_unihash(d["method"], d["taskhash"], d["unihash"])
self.backfill_queue.task_done()
def start(self):
tasks = super().start()
if self.upstream:
self.backfill_queue = asyncio.Queue()
tasks += [self.backfill_worker_task()]
self.loop.run_until_complete(self.db_engine.create())
if self.admin_username:
self.loop.run_until_complete(self.create_admin_user())
return tasks
async def stop(self):
if self.backfill_queue is not None:
await self.backfill_queue.put(None)
await super().stop()