Multithread rootfs_size

I dug this out of the Disney vault on my machine, as something I did to
make this script run quite a bit faster.  It probably needs some
cleanups, and I'm sure suffers from my usual "I stopped when it got the
job done", but with any luck, it might be useful to someone.

Signed-off-by: Ed Tanous <edtanous@google.com>
Change-Id: Ic4d9b5e2f363d487b1bbdf443aec2c28dc068039
diff --git a/rootfs_size/rootfs_size.py b/rootfs_size/rootfs_size.py
index cfacb02..3e21b78 100755
--- a/rootfs_size/rootfs_size.py
+++ b/rootfs_size/rootfs_size.py
@@ -5,6 +5,9 @@
 import os
 from os.path import join, getsize
 import argparse
+from multiprocessing import Pool, cpu_count
+import shutil
+import sys
 
 # Set command line arguments
 parser = argparse.ArgumentParser(
@@ -21,55 +24,71 @@
                     "/intel-platforms-wolfpass.squashfs-xz",
                     help="Squashfs file.")
 
+parser.add_argument("-t", "--threads",
+                    dest="threads",
+                    default=int(cpu_count()),
+                    type=int,
+                    help="Number of threads to use (defaults to cpu count)")
+
 args = parser.parse_args()
 
 # files below this size wont be attempted
 FILE_SIZE_LIMIT = 0
 
-SQUASHFS = args.BUILD_DIR + args.SQUASHFS_FILE
+SQUASHFS = args.SQUASHFS_FILE
+if not os.path.isabs(args.SQUASHFS_FILE):
+    SQUASHFS = args.BUILD_DIR + args.SQUASHFS_FILE
 
 original_size = getsize(SQUASHFS)
 print("squashfs size: {}".format(original_size))
 
 results = []
 
-with tempfile.TemporaryDirectory() as tempremovedfile:
-    with tempfile.TemporaryDirectory() as tempsquashfsdir:
-        print("writing to " + tempsquashfsdir)
-        command = ["unsquashfs", "-d",
-                   os.path.join(tempsquashfsdir, "squashfs-root"),  SQUASHFS]
-        print(" ".join(command))
-        subprocess.check_call(command)
-        squashfsdir = tempsquashfsdir + "/squashfs-root"
 
-        files_to_test = []
-        for root, dirs, files in os.walk(squashfsdir):
-            for name in files + dirs:
-                filepath = os.path.join(root, name)
-                if not os.path.islink(filepath):
-                    # ensure files/dirs can be renamed
-                    os.chmod(filepath, 0o711)
-                    if getsize(filepath) > FILE_SIZE_LIMIT:
-                        files_to_test.append(filepath)
+def get_unsquash_results(filepath):
+    with tempfile.TemporaryDirectory() as newsquashfsroot:
+        input_path = os.path.join(newsquashfsroot, "input")
+        shutil.copytree(squashfsdir, input_path, symlinks=True,
+                        ignore_dangling_symlinks=True)
+        file_to_remove = os.path.join(input_path, filepath)
+        try:
+            os.remove(file_to_remove)
+        except IsADirectoryError:
+            shutil.rmtree(file_to_remove)
+        subprocess.check_output(
+            ["mksquashfs", input_path,
+             newsquashfsroot + "/test", "-comp", "xz", '-processors', '1'])
 
-        print("{} files to attempt removing".format(len(files_to_test)))
+        return ((filepath.replace(squashfsdir, ""),
+                 original_size -
+                 getsize(newsquashfsroot + "/test")))
 
-        for filepath in files_to_test:
-            name = os.path.basename(filepath)
-            newname = os.path.join(tempremovedfile, name)
-            os.rename(filepath, newname)
-            with tempfile.TemporaryDirectory() as newsquashfsroot:
-                subprocess.check_output(
-                    ["mksquashfs", tempsquashfsdir,
-                     newsquashfsroot + "/test", "-comp", "xz"])
 
-                results.append((filepath.replace(squashfsdir, ""),
-                                original_size -
-                                getsize(newsquashfsroot + "/test")))
+with tempfile.TemporaryDirectory() as tempsquashfsdir:
+    print("writing to " + tempsquashfsdir)
+    squashfsdir = os.path.join(tempsquashfsdir, "squashfs-root")
+    #squashfsdir = os.path.join("/tmp", "squashfs-root")
+    command = ["unsquashfs", "-d", squashfsdir, SQUASHFS]
+    print(" ".join(command))
+    subprocess.check_call(command)
 
-            os.rename(newname, filepath)
+    files_to_test = []
+    for root, dirs, files in os.walk(squashfsdir):
+        for name in files + dirs:
+            filepath = os.path.join(root, name)
+            if not os.path.islink(filepath):
+                if getsize(filepath) > FILE_SIZE_LIMIT:
+                    files_to_test.append(
+                        os.path.relpath(filepath, squashfsdir))
 
-            print("{:>6} of {}".format(len(results), len(files_to_test)))
+    print("{} files to attempt removing".format(len(files_to_test)))
+
+    print("Using {} threads".format(args.threads))
+    with Pool(args.threads) as p:
+        for i, res in enumerate(p.imap_unordered(get_unsquash_results, files_to_test)):
+            results.append(res)
+            sys.stderr.write('\rdone {:.1f}%'.format(
+                100 * (i/len(files_to_test))))
 
 results.sort(key=lambda x: x[1], reverse=True)