Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 1 | # Copyright (C) 2013 Intel Corporation |
| 2 | # |
| 3 | # Released under the MIT license (see COPYING.MIT) |
| 4 | |
| 5 | # This module provides a class for starting qemu images using runqemu. |
| 6 | # It's used by testimage.bbclass. |
| 7 | |
| 8 | import subprocess |
| 9 | import os |
| 10 | import time |
| 11 | import signal |
| 12 | import re |
| 13 | import socket |
| 14 | import select |
| 15 | import errno |
| 16 | import threading |
| 17 | from oeqa.utils.dump import HostDumper |
| 18 | |
| 19 | import logging |
| 20 | logger = logging.getLogger("BitBake.QemuRunner") |
| 21 | |
| 22 | class QemuRunner: |
| 23 | |
| 24 | def __init__(self, machine, rootfs, display, tmpdir, deploy_dir_image, logfile, boottime, dump_dir, dump_host_cmds): |
| 25 | |
| 26 | # Popen object for runqemu |
| 27 | self.runqemu = None |
| 28 | # pid of the qemu process that runqemu will start |
| 29 | self.qemupid = None |
| 30 | # target ip - from the command line |
| 31 | self.ip = None |
| 32 | # host ip - where qemu is running |
| 33 | self.server_ip = None |
| 34 | |
| 35 | self.machine = machine |
| 36 | self.rootfs = rootfs |
| 37 | self.display = display |
| 38 | self.tmpdir = tmpdir |
| 39 | self.deploy_dir_image = deploy_dir_image |
| 40 | self.logfile = logfile |
| 41 | self.boottime = boottime |
| 42 | self.logged = False |
| 43 | self.thread = None |
| 44 | |
| 45 | self.runqemutime = 60 |
| 46 | self.host_dumper = HostDumper(dump_host_cmds, dump_dir) |
| 47 | |
| 48 | def create_socket(self): |
| 49 | try: |
| 50 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| 51 | sock.setblocking(0) |
| 52 | sock.bind(("127.0.0.1",0)) |
| 53 | sock.listen(2) |
| 54 | port = sock.getsockname()[1] |
| 55 | logger.info("Created listening socket for qemu serial console on: 127.0.0.1:%s" % port) |
| 56 | return (sock, port) |
| 57 | |
| 58 | except socket.error: |
| 59 | sock.close() |
| 60 | raise |
| 61 | |
| 62 | def log(self, msg): |
| 63 | if self.logfile: |
| 64 | with open(self.logfile, "a") as f: |
| 65 | f.write("%s" % msg) |
| 66 | |
| 67 | def getOutput(self, o): |
| 68 | import fcntl |
| 69 | fl = fcntl.fcntl(o, fcntl.F_GETFL) |
| 70 | fcntl.fcntl(o, fcntl.F_SETFL, fl | os.O_NONBLOCK) |
| 71 | return os.read(o.fileno(), 1000000) |
| 72 | |
| 73 | |
| 74 | def handleSIGCHLD(self, signum, frame): |
| 75 | if self.runqemu and self.runqemu.poll(): |
| 76 | if self.runqemu.returncode: |
| 77 | logger.info('runqemu exited with code %d' % self.runqemu.returncode) |
| 78 | logger.info("Output from runqemu:\n%s" % self.getOutput(self.runqemu.stdout)) |
| 79 | self.stop() |
| 80 | self._dump_host() |
| 81 | raise SystemExit |
| 82 | |
| 83 | def start(self, qemuparams = None): |
| 84 | if self.display: |
| 85 | os.environ["DISPLAY"] = self.display |
| 86 | else: |
| 87 | logger.error("To start qemu I need a X desktop, please set DISPLAY correctly (e.g. DISPLAY=:1)") |
| 88 | return False |
| 89 | if not os.path.exists(self.rootfs): |
| 90 | logger.error("Invalid rootfs %s" % self.rootfs) |
| 91 | return False |
| 92 | if not os.path.exists(self.tmpdir): |
| 93 | logger.error("Invalid TMPDIR path %s" % self.tmpdir) |
| 94 | return False |
| 95 | else: |
| 96 | os.environ["OE_TMPDIR"] = self.tmpdir |
| 97 | if not os.path.exists(self.deploy_dir_image): |
| 98 | logger.error("Invalid DEPLOY_DIR_IMAGE path %s" % self.deploy_dir_image) |
| 99 | return False |
| 100 | else: |
| 101 | os.environ["DEPLOY_DIR_IMAGE"] = self.deploy_dir_image |
| 102 | |
| 103 | try: |
| 104 | threadsock, threadport = self.create_socket() |
| 105 | self.server_socket, self.serverport = self.create_socket() |
| 106 | except socket.error, msg: |
| 107 | logger.error("Failed to create listening socket: %s" % msg[1]) |
| 108 | return False |
| 109 | |
| 110 | # Set this flag so that Qemu doesn't do any grabs as SDL grabs interact |
| 111 | # badly with screensavers. |
| 112 | os.environ["QEMU_DONT_GRAB"] = "1" |
| 113 | self.qemuparams = 'bootparams="console=tty1 console=ttyS0,115200n8" qemuparams="-serial tcp:127.0.0.1:{}"'.format(threadport) |
| 114 | if qemuparams: |
| 115 | self.qemuparams = self.qemuparams[:-1] + " " + qemuparams + " " + '\"' |
| 116 | |
| 117 | self.origchldhandler = signal.getsignal(signal.SIGCHLD) |
| 118 | signal.signal(signal.SIGCHLD, self.handleSIGCHLD) |
| 119 | |
| 120 | launch_cmd = 'runqemu tcpserial=%s %s %s %s' % (self.serverport, self.machine, self.rootfs, self.qemuparams) |
| 121 | # FIXME: We pass in stdin=subprocess.PIPE here to work around stty |
| 122 | # blocking at the end of the runqemu script when using this within |
| 123 | # oe-selftest (this makes stty error out immediately). There ought |
| 124 | # to be a proper fix but this will suffice for now. |
| 125 | self.runqemu = subprocess.Popen(launch_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE, preexec_fn=os.setpgrp) |
| 126 | output = self.runqemu.stdout |
| 127 | |
| 128 | # |
| 129 | # We need the preexec_fn above so that all runqemu processes can easily be killed |
| 130 | # (by killing their process group). This presents a problem if this controlling |
| 131 | # process itself is killed however since those processes don't notice the death |
| 132 | # of the parent and merrily continue on. |
| 133 | # |
| 134 | # Rather than hack runqemu to deal with this, we add something here instead. |
| 135 | # Basically we fork off another process which holds an open pipe to the parent |
| 136 | # and also is setpgrp. If/when the pipe sees EOF from the parent dieing, it kills |
| 137 | # the process group. This is like pctrl's PDEATHSIG but for a process group |
| 138 | # rather than a single process. |
| 139 | # |
| 140 | r, w = os.pipe() |
| 141 | self.monitorpid = os.fork() |
| 142 | if self.monitorpid: |
| 143 | os.close(r) |
| 144 | self.monitorpipe = os.fdopen(w, "w") |
| 145 | else: |
| 146 | # child process |
| 147 | os.setpgrp() |
| 148 | os.close(w) |
| 149 | r = os.fdopen(r) |
| 150 | x = r.read() |
| 151 | os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM) |
| 152 | sys.exit(0) |
| 153 | |
| 154 | logger.info("runqemu started, pid is %s" % self.runqemu.pid) |
| 155 | logger.info("waiting at most %s seconds for qemu pid" % self.runqemutime) |
| 156 | endtime = time.time() + self.runqemutime |
| 157 | while not self.is_alive() and time.time() < endtime: |
| 158 | if self.runqemu.poll(): |
| 159 | if self.runqemu.returncode: |
| 160 | # No point waiting any longer |
| 161 | logger.info('runqemu exited with code %d' % self.runqemu.returncode) |
| 162 | self._dump_host() |
| 163 | self.stop() |
| 164 | logger.info("Output from runqemu:\n%s" % self.getOutput(output)) |
| 165 | return False |
| 166 | time.sleep(1) |
| 167 | |
| 168 | if self.is_alive(): |
| 169 | logger.info("qemu started - qemu procces pid is %s" % self.qemupid) |
| 170 | cmdline = '' |
| 171 | with open('/proc/%s/cmdline' % self.qemupid) as p: |
| 172 | cmdline = p.read() |
| 173 | try: |
| 174 | ips = re.findall("((?:[0-9]{1,3}\.){3}[0-9]{1,3})", cmdline.split("ip=")[1]) |
| 175 | if not ips or len(ips) != 3: |
| 176 | raise ValueError |
| 177 | else: |
| 178 | self.ip = ips[0] |
| 179 | self.server_ip = ips[1] |
| 180 | except IndexError, ValueError: |
| 181 | logger.info("Couldn't get ip from qemu process arguments! Here is the qemu command line used:\n%s\nand output from runqemu:\n%s" % (cmdline, self.getOutput(output))) |
| 182 | self._dump_host() |
| 183 | self.stop() |
| 184 | return False |
| 185 | logger.info("qemu cmdline used:\n{}".format(cmdline)) |
| 186 | logger.info("Target IP: %s" % self.ip) |
| 187 | logger.info("Server IP: %s" % self.server_ip) |
| 188 | |
| 189 | logger.info("Starting logging thread") |
| 190 | self.thread = LoggingThread(self.log, threadsock, logger) |
| 191 | self.thread.start() |
| 192 | if not self.thread.connection_established.wait(self.boottime): |
| 193 | logger.error("Didn't receive a console connection from qemu. " |
| 194 | "Here is the qemu command line used:\n%s\nand " |
| 195 | "output from runqemu:\n%s" % (cmdline, |
| 196 | self.getOutput(output))) |
| 197 | self.stop_thread() |
| 198 | return False |
| 199 | |
| 200 | logger.info("Waiting at most %d seconds for login banner" % self.boottime) |
| 201 | endtime = time.time() + self.boottime |
| 202 | socklist = [self.server_socket] |
| 203 | reachedlogin = False |
| 204 | stopread = False |
| 205 | qemusock = None |
| 206 | bootlog = '' |
| 207 | while time.time() < endtime and not stopread: |
| 208 | sread, swrite, serror = select.select(socklist, [], [], 5) |
| 209 | for sock in sread: |
| 210 | if sock is self.server_socket: |
| 211 | qemusock, addr = self.server_socket.accept() |
| 212 | qemusock.setblocking(0) |
| 213 | socklist.append(qemusock) |
| 214 | socklist.remove(self.server_socket) |
| 215 | logger.info("Connection from %s:%s" % addr) |
| 216 | else: |
| 217 | data = sock.recv(1024) |
| 218 | if data: |
| 219 | bootlog += data |
| 220 | if re.search(".* login:", bootlog): |
| 221 | self.server_socket = qemusock |
| 222 | stopread = True |
| 223 | reachedlogin = True |
| 224 | logger.info("Reached login banner") |
| 225 | else: |
| 226 | socklist.remove(sock) |
| 227 | sock.close() |
| 228 | stopread = True |
| 229 | |
| 230 | if not reachedlogin: |
| 231 | logger.info("Target didn't reached login boot in %d seconds" % self.boottime) |
| 232 | lines = "\n".join(bootlog.splitlines()[-25:]) |
| 233 | logger.info("Last 25 lines of text:\n%s" % lines) |
| 234 | logger.info("Check full boot log: %s" % self.logfile) |
| 235 | self._dump_host() |
| 236 | self.stop() |
| 237 | return False |
| 238 | |
| 239 | # If we are not able to login the tests can continue |
| 240 | try: |
| 241 | (status, output) = self.run_serial("root\n", raw=True) |
| 242 | if re.search("root@[a-zA-Z0-9\-]+:~#", output): |
| 243 | self.logged = True |
| 244 | logger.info("Logged as root in serial console") |
| 245 | else: |
| 246 | logger.info("Couldn't login into serial console" |
| 247 | " as root using blank password") |
| 248 | except: |
| 249 | logger.info("Serial console failed while trying to login") |
| 250 | |
| 251 | else: |
| 252 | logger.info("Qemu pid didn't appeared in %s seconds" % self.runqemutime) |
| 253 | self._dump_host() |
| 254 | self.stop() |
| 255 | logger.info("Output from runqemu:\n%s" % self.getOutput(output)) |
| 256 | return False |
| 257 | |
| 258 | return self.is_alive() |
| 259 | |
| 260 | def stop(self): |
| 261 | self.stop_thread() |
| 262 | if self.runqemu: |
| 263 | signal.signal(signal.SIGCHLD, self.origchldhandler) |
| 264 | os.kill(self.monitorpid, signal.SIGKILL) |
| 265 | logger.info("Sending SIGTERM to runqemu") |
| 266 | try: |
| 267 | os.killpg(os.getpgid(self.runqemu.pid), signal.SIGTERM) |
| 268 | except OSError as e: |
| 269 | if e.errno != errno.ESRCH: |
| 270 | raise |
| 271 | endtime = time.time() + self.runqemutime |
| 272 | while self.runqemu.poll() is None and time.time() < endtime: |
| 273 | time.sleep(1) |
| 274 | if self.runqemu.poll() is None: |
| 275 | logger.info("Sending SIGKILL to runqemu") |
| 276 | os.killpg(os.getpgid(self.runqemu.pid), signal.SIGKILL) |
| 277 | self.runqemu = None |
| 278 | if hasattr(self, 'server_socket') and self.server_socket: |
| 279 | self.server_socket.close() |
| 280 | self.server_socket = None |
| 281 | self.qemupid = None |
| 282 | self.ip = None |
| 283 | signal.signal(signal.SIGCHLD, self.origchldhandler) |
| 284 | |
| 285 | def stop_thread(self): |
| 286 | if self.thread and self.thread.is_alive(): |
| 287 | self.thread.stop() |
| 288 | self.thread.join() |
| 289 | |
| 290 | def restart(self, qemuparams = None): |
| 291 | logger.info("Restarting qemu process") |
| 292 | if self.runqemu.poll() is None: |
| 293 | self.stop() |
| 294 | if self.start(qemuparams): |
| 295 | return True |
| 296 | return False |
| 297 | |
| 298 | def is_alive(self): |
| 299 | if not self.runqemu: |
| 300 | return False |
| 301 | qemu_child = self.find_child(str(self.runqemu.pid)) |
| 302 | if qemu_child: |
| 303 | self.qemupid = qemu_child[0] |
| 304 | if os.path.exists("/proc/" + str(self.qemupid)): |
| 305 | return True |
| 306 | return False |
| 307 | |
| 308 | def find_child(self,parent_pid): |
| 309 | # |
| 310 | # Walk the process tree from the process specified looking for a qemu-system. Return its [pid'cmd] |
| 311 | # |
| 312 | ps = subprocess.Popen(['ps', 'axww', '-o', 'pid,ppid,command'], stdout=subprocess.PIPE).communicate()[0] |
| 313 | processes = ps.split('\n') |
| 314 | nfields = len(processes[0].split()) - 1 |
| 315 | pids = {} |
| 316 | commands = {} |
| 317 | for row in processes[1:]: |
| 318 | data = row.split(None, nfields) |
| 319 | if len(data) != 3: |
| 320 | continue |
| 321 | if data[1] not in pids: |
| 322 | pids[data[1]] = [] |
| 323 | |
| 324 | pids[data[1]].append(data[0]) |
| 325 | commands[data[0]] = data[2] |
| 326 | |
| 327 | if parent_pid not in pids: |
| 328 | return [] |
| 329 | |
| 330 | parents = [] |
| 331 | newparents = pids[parent_pid] |
| 332 | while newparents: |
| 333 | next = [] |
| 334 | for p in newparents: |
| 335 | if p in pids: |
| 336 | for n in pids[p]: |
| 337 | if n not in parents and n not in next: |
| 338 | next.append(n) |
| 339 | if p not in parents: |
| 340 | parents.append(p) |
| 341 | newparents = next |
| 342 | #print "Children matching %s:" % str(parents) |
| 343 | for p in parents: |
| 344 | # Need to be careful here since runqemu-internal runs "ldd qemu-system-xxxx" |
| 345 | # Also, old versions of ldd (2.11) run "LD_XXXX qemu-system-xxxx" |
| 346 | basecmd = commands[p].split()[0] |
| 347 | basecmd = os.path.basename(basecmd) |
| 348 | if "qemu-system" in basecmd and "-serial tcp" in commands[p]: |
| 349 | return [int(p),commands[p]] |
| 350 | |
| 351 | def run_serial(self, command, raw=False): |
| 352 | # We assume target system have echo to get command status |
| 353 | if not raw: |
| 354 | command = "%s; echo $?\n" % command |
| 355 | self.server_socket.sendall(command) |
| 356 | data = '' |
| 357 | status = 0 |
| 358 | stopread = False |
| 359 | endtime = time.time()+5 |
| 360 | while time.time()<endtime and not stopread: |
| 361 | sread, _, _ = select.select([self.server_socket],[],[],5) |
| 362 | for sock in sread: |
| 363 | answer = sock.recv(1024) |
| 364 | if answer: |
| 365 | data += answer |
| 366 | # Search the prompt to stop |
| 367 | if re.search("[a-zA-Z0-9]+@[a-zA-Z0-9\-]+:~#", data): |
| 368 | stopread = True |
| 369 | break |
| 370 | else: |
| 371 | raise Exception("No data on serial console socket") |
| 372 | if data: |
| 373 | if raw: |
| 374 | status = 1 |
| 375 | else: |
| 376 | # Remove first line (command line) and last line (prompt) |
| 377 | data = data[data.find('$?\r\n')+4:data.rfind('\r\n')] |
| 378 | index = data.rfind('\r\n') |
| 379 | if index == -1: |
| 380 | status_cmd = data |
| 381 | data = "" |
| 382 | else: |
| 383 | status_cmd = data[index+2:] |
| 384 | data = data[:index] |
| 385 | if (status_cmd == "0"): |
| 386 | status = 1 |
| 387 | return (status, str(data)) |
| 388 | |
| 389 | |
| 390 | def _dump_host(self): |
| 391 | self.host_dumper.create_dir("qemu") |
| 392 | logger.warn("Qemu ended unexpectedly, dump data from host" |
| 393 | " is in %s" % self.host_dumper.dump_dir) |
| 394 | self.host_dumper.dump_host() |
| 395 | |
| 396 | # This class is for reading data from a socket and passing it to logfunc |
| 397 | # to be processed. It's completely event driven and has a straightforward |
| 398 | # event loop. The mechanism for stopping the thread is a simple pipe which |
| 399 | # will wake up the poll and allow for tearing everything down. |
| 400 | class LoggingThread(threading.Thread): |
| 401 | def __init__(self, logfunc, sock, logger): |
| 402 | self.connection_established = threading.Event() |
| 403 | self.serversock = sock |
| 404 | self.logfunc = logfunc |
| 405 | self.logger = logger |
| 406 | self.readsock = None |
| 407 | self.running = False |
| 408 | |
| 409 | self.errorevents = select.POLLERR | select.POLLHUP | select.POLLNVAL |
| 410 | self.readevents = select.POLLIN | select.POLLPRI |
| 411 | |
| 412 | threading.Thread.__init__(self, target=self.threadtarget) |
| 413 | |
| 414 | def threadtarget(self): |
| 415 | try: |
| 416 | self.eventloop() |
| 417 | finally: |
| 418 | self.teardown() |
| 419 | |
| 420 | def run(self): |
| 421 | self.logger.info("Starting logging thread") |
| 422 | self.readpipe, self.writepipe = os.pipe() |
| 423 | threading.Thread.run(self) |
| 424 | |
| 425 | def stop(self): |
| 426 | self.logger.info("Stopping logging thread") |
| 427 | if self.running: |
| 428 | os.write(self.writepipe, "stop") |
| 429 | |
| 430 | def teardown(self): |
| 431 | self.logger.info("Tearing down logging thread") |
| 432 | self.close_socket(self.serversock) |
| 433 | |
| 434 | if self.readsock is not None: |
| 435 | self.close_socket(self.readsock) |
| 436 | |
| 437 | self.close_ignore_error(self.readpipe) |
| 438 | self.close_ignore_error(self.writepipe) |
| 439 | self.running = False |
| 440 | |
| 441 | def eventloop(self): |
| 442 | poll = select.poll() |
| 443 | eventmask = self.errorevents | self.readevents |
| 444 | poll.register(self.serversock.fileno()) |
| 445 | poll.register(self.readpipe, eventmask) |
| 446 | |
| 447 | breakout = False |
| 448 | self.running = True |
| 449 | self.logger.info("Starting thread event loop") |
| 450 | while not breakout: |
| 451 | events = poll.poll() |
| 452 | for event in events: |
| 453 | # An error occurred, bail out |
| 454 | if event[1] & self.errorevents: |
| 455 | raise Exception(self.stringify_event(event[1])) |
| 456 | |
| 457 | # Event to stop the thread |
| 458 | if self.readpipe == event[0]: |
| 459 | self.logger.info("Stop event received") |
| 460 | breakout = True |
| 461 | break |
| 462 | |
| 463 | # A connection request was received |
| 464 | elif self.serversock.fileno() == event[0]: |
| 465 | self.logger.info("Connection request received") |
| 466 | self.readsock, _ = self.serversock.accept() |
| 467 | self.readsock.setblocking(0) |
| 468 | poll.unregister(self.serversock.fileno()) |
| 469 | poll.register(self.readsock.fileno()) |
| 470 | |
| 471 | self.logger.info("Setting connection established event") |
| 472 | self.connection_established.set() |
| 473 | |
| 474 | # Actual data to be logged |
| 475 | elif self.readsock.fileno() == event[0]: |
| 476 | data = self.recv(1024) |
| 477 | self.logfunc(data) |
| 478 | |
| 479 | # Since the socket is non-blocking make sure to honor EAGAIN |
| 480 | # and EWOULDBLOCK. |
| 481 | def recv(self, count): |
| 482 | try: |
| 483 | data = self.readsock.recv(count) |
| 484 | except socket.error as e: |
| 485 | if e.errno == errno.EAGAIN or e.errno == errno.EWOULDBLOCK: |
| 486 | return '' |
| 487 | else: |
| 488 | raise |
| 489 | |
| 490 | if data is None: |
| 491 | raise Exception("No data on read ready socket") |
| 492 | elif not data: |
| 493 | # This actually means an orderly shutdown |
| 494 | # happened. But for this code it counts as an |
| 495 | # error since the connection shouldn't go away |
| 496 | # until qemu exits. |
| 497 | raise Exception("Console connection closed unexpectedly") |
| 498 | |
| 499 | return data |
| 500 | |
| 501 | def stringify_event(self, event): |
| 502 | val = '' |
| 503 | if select.POLLERR == event: |
| 504 | val = 'POLLER' |
| 505 | elif select.POLLHUP == event: |
| 506 | val = 'POLLHUP' |
| 507 | elif select.POLLNVAL == event: |
| 508 | val = 'POLLNVAL' |
| 509 | return val |
| 510 | |
| 511 | def close_socket(self, sock): |
| 512 | sock.shutdown(socket.SHUT_RDWR) |
| 513 | sock.close() |
| 514 | |
| 515 | def close_ignore_error(self, fd): |
| 516 | try: |
| 517 | os.close(fd) |
| 518 | except OSError: |
| 519 | pass |