blob: 46bf199c6eefb77bf69d31bf5e6ba4fcf049c1af [file] [log] [blame]
#! /usr/bin/python3
import argparse
import os
import gzip
import hashlib
from subprocess import Popen, PIPE
from collections import defaultdict
import re
THIS_DIR = os.path.dirname(os.path.realpath(__file__))
ENABLE_CACHING = True
# TODO(ed) this needs to be better
CONTENT_TYPES = {
'.css': "text/css;charset=UTF-8",
'.html': "text/html;charset=UTF-8",
'.js': "text/html;charset=UTF-8",
'.png': "image/png;charset=UTF-8",
'.woff': "application/x-font-woff",
}
CPP_MIDDLE_BUFFER = """ CROW_ROUTE(app, "{relative_path_sha1}")
([](const crow::request& req, crow::response& res) {{
{CACHE_FOREVER_HEADER}
res.add_header("ETag", "{sha1}");
if (req.headers.count("If-None-Match") == 1) {{
if (req.get_header_value("If-None-Match") == "{sha1}") {{
res.code = 304;
res.end();
return;
}}
}}
res.code = 200;
// TODO, if you have a browser from the dark ages that doesn't support gzip,
// unzip it before sending based on Accept-Encoding header
res.add_header("Content-Encoding", {content_encoding});
res.add_header("Content-Type", "{content_type}");
res.write(staticassets::{relative_path_escaped});
res.end();
}});
"""
def twos_comp(val, bits):
"""compute the 2's compliment of int value val"""
if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
val = val - (1 << bits) # compute negative value
return val # return positive value as is
def get_relative_path(full_filepath):
pathsplit = full_filepath.split(os.path.sep)
relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:])
relative_path_escaped = relative_path
for character in ['/', '.', '-']:
relative_path_escaped = relative_path_escaped.replace(character, "_")
relative_path = "static/" + relative_path
return relative_path, relative_path_escaped
def get_sha1_path_from_relative(relative_path, sha1):
if sha1 != "":
path, extension = os.path.splitext(relative_path)
return path + "-" + sha1[:10] + extension
else:
return relative_path
def filter_html(sha1_list, file_content):
string_content = file_content.decode()
for key, value in sha1_list.items():
replace_name = get_sha1_path_from_relative(key, value)
string_content_new = re.sub(
"((src|href)=[\"'])(" + re.escape(key) + ")([\"'])", "\\1" + replace_name + "\\4", string_content)
if string_content_new != string_content:
print(" Replaced {}".format(key))
print(" With {}".format(replace_name))
string_content = string_content_new
return string_content.encode()
def embed_angular_templates(sha1_list, dependency_ordered_file_list, content_dict, file_content):
string_content = file_content.decode()
index = string_content.find("<script")
if index == -1:
raise Exception("Couldn't find first script tag in html?")
preload_string = ""
for full_filepath in dependency_ordered_file_list:
relative_path, _ = get_relative_path(full_filepath)
if re.search("partial-.*\\.html", relative_path):
sha1_path = get_sha1_path_from_relative(relative_path, sha1_list[relative_path])
preload_string += (
"<script type=\"text/ng-template\" id=\"" + sha1_path + "\">\n" +
open(full_filepath, 'r').read() +
"</script>\n"
)
for key in content_dict:
print(key)
string_content = string_content[:index] + preload_string + string_content[index:]
return string_content.encode()
def filter_js(sha1_list, file_content):
string_content = file_content.decode()
for key, value in sha1_list.items():
replace_name = get_sha1_path_from_relative(key, value)
string_content_new = re.sub(key, replace_name, string_content)
if string_content_new != string_content:
print(" Replaced {}".format(key))
print(" With {}".format(replace_name))
string_content = string_content_new
return string_content.encode()
def compute_sha1_and_update_dict(sha1_list, file_content, relative_path):
sha = hashlib.sha1()
sha.update(file_content)
sha_text = sha.hexdigest()
sha1_list[relative_path] = sha_text
def get_dependencies(dependency_list, full_filepath):
r = []
my_dependencies = dependency_list[full_filepath]
r.extend(my_dependencies)
sub_deps = []
for dependency in my_dependencies:
sub_deps += get_dependencies(dependency_list, dependency)
r.extend(sub_deps)
return r
def remove_duplicates_preserve_order(seq):
seen = set()
seen_add = seen.add
return [x for x in seq if not (x in seen or seen_add(x))]
def main():
""" Main Function """
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', nargs='+', type=str)
parser.add_argument('-o', '--output', type=str)
parser.add_argument('-d', '--debug', action='store_true')
args = parser.parse_args()
file_list = args.input
file_list = [os.path.realpath(f) for f in file_list]
sha1_list = {}
content_dict = {}
depends_on = {}
gzip_content = not(args.debug)
for full_filepath in file_list:
relative_path, relative_path_escaped = get_relative_path(full_filepath)
text_file_types = ['.css', '.js', '.html']
ext = os.path.splitext(relative_path)[1]
depends_on[full_filepath] = []
if ext in text_file_types:
with open(full_filepath, 'r') as input_file:
file_content = input_file.read()
for full_replacename in file_list:
relative_replacename, _ = get_relative_path(full_replacename)
if ext == ".html":
match = re.search(
"((src|href)=[\"'])(" + relative_replacename + ")([\"'])", file_content)
if match:
depends_on[full_filepath].append(full_replacename)
elif ext == ".js" or ext == ".css":
match = re.search(
"(\.\./)*" + relative_replacename, file_content)
if match:
depends_on[full_filepath].append(full_replacename)
dependency_ordered_file_list = []
for full_filepath in file_list:
relative_path, relative_path_escaped = get_relative_path(full_filepath)
deps = get_dependencies(depends_on, full_filepath)
dependency_ordered_file_list.extend(deps)
dependency_ordered_file_list.append(full_filepath)
dependency_ordered_file_list = remove_duplicates_preserve_order(
dependency_ordered_file_list)
total_payload_size = 0
for full_filepath in dependency_ordered_file_list:
# make sure none of the files are hidden
with open(full_filepath, 'rb') as input_file:
file_content = input_file.read()
relative_path, relative_path_escaped = get_relative_path(
full_filepath)
extension = os.path.splitext(relative_path)[1]
print("Including {:<40} raw size {:>7}".format(
relative_path, len(file_content)))
if extension == ".html" or relative_path == "/":
new_file_content = filter_html(sha1_list, file_content)
if relative_path.endswith("index.html"):
new_file_content = embed_angular_templates(sha1_list, dependency_ordered_file_list, content_dict, new_file_content)
elif extension == ".js" or extension == ".css":
new_file_content = filter_js(sha1_list, file_content)
else:
new_file_content = file_content
file_content = new_file_content
if gzip_content:
file_content = gzip.compress(file_content)
compute_sha1_and_update_dict(
sha1_list, file_content, relative_path)
content_dict[full_filepath] = file_content
total_payload_size += len(file_content)
with open(args.output.replace("cpp", "hpp"), 'w') as hpp_output:
hpp_output.write("#pragma once\n"
"\n"
"#include <string>\n"
"\n"
"#include <crow/app.h>\n"
"#include <crow/http_request.h>\n"
"#include <crow/http_response.h>\n"
"\n"
"#include <crow/routing.h>\n"
"\n"
"namespace crow {\n"
"namespace webassets {\n"
"static const std::string gzip_string = \"gzip\";\n"
"static const std::string none_string = \"none\";\n"
)
hpp_output.write("struct staticassets {\n")
for full_filepath in dependency_ordered_file_list:
relative_path, relative_path_escaped = get_relative_path(
full_filepath)
hpp_output.write(
" static const std::string {};\n".format(relative_path_escaped))
hpp_output.write("};\n\n")
hpp_output.write("template <typename... Middlewares>\n")
hpp_output.write("void request_routes(Crow<Middlewares...>& app) {\n")
for full_filepath in dependency_ordered_file_list:
relative_path, relative_path_escaped = get_relative_path(
full_filepath)
sha1 = sha1_list.get(relative_path, '')
content_type = CONTENT_TYPES.get(
os.path.splitext(relative_path)[1], "")
if content_type == "":
print("unknown content type for {}".format(relative_path))
# handle the default routes
if relative_path == "static/index.html":
relative_path = "/"
relative_path_sha1 = "/"
else:
relative_path_sha1 = "/" + \
get_sha1_path_from_relative(relative_path, sha1)
#print("relative_path_sha1: " + relative_path_sha1)
#print("sha1: " + sha1)
content_encoding = 'gzip_string' if gzip_content else 'none_string'
environment = {
'relative_path': relative_path,
'relative_path_escaped': relative_path_escaped,
'relative_path_sha1': relative_path_sha1,
'sha1': sha1,
'sha1_short': sha1[:20],
'content_type': content_type,
'content_encoding': content_encoding,
"CACHE_FOREVER_HEADER": ""
}
if ENABLE_CACHING:
# if we have a valid sha1, and we have a unique path to the resource
# it can be safely cached forever
if sha1 != "" and relative_path != relative_path_sha1:
environment["CACHE_FOREVER_HEADER"] = "res.add_header(\"Cache-Control\", \"public, max-age=31556926\");\n"
content = CPP_MIDDLE_BUFFER.format(**environment)
hpp_output.write(content)
hpp_output.write("}\n}\n}")
with open(args.output, 'w') as cpp_output:
cpp_output.write("#include <webassets.hpp>\n"
"namespace crow{\n"
"namespace webassets{\n")
for full_filepath in dependency_ordered_file_list:
file_content = content_dict[full_filepath]
relative_path, relative_path_escaped = get_relative_path(
full_filepath)
# compute the 2s complement for negative numbers.
# If you don't, you get narrowing warnings from gcc/clang
array_binary_text = ', '.join(str(twos_comp(x, 8))
for x in file_content)
cpp_end_buffer = "const std::string staticassets::{relative_path_escaped}{{{file_bytes}}};\n"
cpp_output.write(
cpp_end_buffer.format(
relative_path=relative_path,
file_bytes=array_binary_text,
relative_path_escaped=relative_path_escaped
)
)
print("{:<40} took {:>6} KB".format(relative_path_escaped, int(len(array_binary_text)/1024)))
cpp_output.write("}\n}\n")
print("Total static file size: {}KB".format(int(total_payload_size/1024)))
if __name__ == "__main__":
main()