Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 1 | #! /usr/bin/python3 |
| 2 | |
| 3 | import argparse |
| 4 | import os |
| 5 | import gzip |
| 6 | import hashlib |
| 7 | from subprocess import Popen, PIPE |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 8 | import re |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 9 | |
| 10 | THIS_DIR = os.path.dirname(os.path.realpath(__file__)) |
| 11 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 12 | ENABLE_CACHING = True |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 13 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 14 | # TODO(ed) this needs to be better |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 15 | CONTENT_TYPES = { |
| 16 | '.css': "text/css;charset=UTF-8", |
| 17 | '.html': "text/html;charset=UTF-8", |
| 18 | '.js': "text/html;charset=UTF-8", |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 19 | '.png': "image/png;charset=UTF-8", |
| 20 | '.woff': "application/x-font-woff", |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 21 | } |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 22 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 23 | CPP_BEGIN_BUFFER = """ |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 24 | #include <webassets.hpp> |
| 25 | |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 26 | """ |
| 27 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 28 | ROUTE_DECLARATION = """ |
| 29 | |
| 30 | void crow::webassets::request_routes(BmcAppType& app){ |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 31 | """ |
| 32 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 33 | CPP_MIDDLE_CACHING_HANDLER = """ |
| 34 | res.add_header("Cache-Control", "public, max-age=31556926"); |
| 35 | res.add_header("ETag", "{sha1}"); |
| 36 | if (req.headers.count("If-None-Match") == 1) {{ |
| 37 | if (req.get_header_value("If-None-Match") == "{sha1}"){{ |
| 38 | res.code = 304; |
| 39 | res.end(); |
| 40 | return; |
| 41 | }} |
| 42 | }} |
| 43 | """ |
| 44 | |
| 45 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 46 | CPP_MIDDLE_BUFFER = """ |
| 47 | CROW_ROUTE(app, "{relative_path_sha1}")([](const crow::request& req, crow::response& res) {{ |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 48 | {CPP_MIDDLE_CACHING_HANDLER} |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 49 | res.code = 200; |
| 50 | // TODO, if you have a browser from the dark ages that doesn't support gzip, |
| 51 | // unzip it before sending based on Accept-Encoding header |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 52 | res.add_header("Content-Encoding", "{content_encoding}"); |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 53 | res.add_header("Content-Type", "{content_type}"); |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 54 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 55 | res.write({relative_path_escaped}); |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 56 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 57 | res.end(); |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 58 | }}); |
| 59 | """ |
| 60 | |
| 61 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 62 | def twos_comp(val, bits): |
| 63 | """compute the 2's compliment of int value val""" |
| 64 | if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255 |
| 65 | val = val - (1 << bits) # compute negative value |
| 66 | return val # return positive value as is |
| 67 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 68 | CPP_END_BUFFER = """ |
| 69 | } |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 70 | """ |
| 71 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 72 | CPP_END_BUFFER2 = """const static std::string {relative_path_escaped}{{{file_bytes}}}; |
| 73 | """ |
| 74 | |
| 75 | def get_relative_path(full_filepath): |
| 76 | pathsplit = full_filepath.split(os.path.sep) |
| 77 | relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:]) |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 78 | relative_path_escaped = relative_path |
| 79 | for character in ['/', '.', '-']: |
| 80 | relative_path_escaped = relative_path_escaped.replace(character, "_") |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 81 | |
| 82 | relative_path = "/static/" + relative_path |
| 83 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 84 | return relative_path, relative_path_escaped |
| 85 | |
| 86 | def get_sha1_path_from_relative(relative_path, sha1): |
| 87 | if sha1 != "": |
| 88 | path, extension = os.path.splitext(relative_path) |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 89 | return path + "-" + sha1[:10] + extension |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 90 | else: |
| 91 | return relative_path |
| 92 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 93 | def filter_html(sha1_list, file_content): |
| 94 | string_content = file_content.decode() |
| 95 | for key, value in sha1_list.items(): |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 96 | key = key.lstrip("/") |
| 97 | replace_name = get_sha1_path_from_relative(key, value) |
| 98 | key = re.escape(key) |
| 99 | string_content = re.sub("((src|href)=[\"'])(" + key + ")([\"'])", "\\1" + replace_name + "\\4", string_content) |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 100 | return string_content.encode() |
| 101 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 102 | |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 103 | def main(): |
| 104 | """ Main Function """ |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 105 | |
| 106 | parser = argparse.ArgumentParser() |
| 107 | parser.add_argument('-i', '--input', nargs='+', type=str) |
| 108 | parser.add_argument('-o', '--output', type=str) |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 109 | parser.add_argument('-d', '--debug', action='store_true') |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 110 | args = parser.parse_args() |
| 111 | |
| 112 | file_list = args.input |
| 113 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 114 | file_list = [os.path.realpath(f) for f in file_list] |
| 115 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 116 | sha1_list = {} |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 117 | if not args.debug: |
| 118 | # TODO(ed) most html and woff cacheable |
| 119 | excluded_types = [".html", ".woff"] |
| 120 | # sha1 hash everthing |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 121 | for full_filepath in file_list: |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 122 | if os.path.splitext(full_filepath)[1] not in excluded_types: |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 123 | with open(full_filepath, 'rb') as input_file: |
| 124 | file_content = input_file.read() |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 125 | sha = hashlib.sha1() |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 126 | sha.update(file_content) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 127 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 128 | sha_text = "".join("{:02x}".format(x) for x in sha.digest()) |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 129 | relative_path, relative_path_escaped = get_relative_path(full_filepath) |
| 130 | sha1_list[relative_path] = sha_text |
| 131 | |
| 132 | with open(args.output, 'w') as cpp_output: |
| 133 | cpp_output.write(CPP_BEGIN_BUFFER) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 134 | |
| 135 | for full_filepath in file_list: |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 136 | # make sure none of the files are hidden |
| 137 | with open(full_filepath, 'rb') as input_file: |
| 138 | file_content = input_file.read() |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 139 | relative_path, relative_path_escaped = get_relative_path(full_filepath) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 140 | |
| 141 | print("Including {:<40} size {:>7}".format(relative_path, len(file_content))) |
| 142 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 143 | if relative_path.endswith(".html") or relative_path == "/": |
| 144 | print("Fixing {}".format(relative_path)) |
| 145 | file_content = filter_html(sha1_list, file_content) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 146 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 147 | if not args.debug: |
| 148 | file_content = gzip.compress(file_content) |
| 149 | #file_content = file_content[:10] |
| 150 | # compute the 2s complement. If you don't, you get narrowing warnings from gcc/clang |
| 151 | |
| 152 | array_binary_text = ', '.join(str(twos_comp(x, 8)) for x in file_content) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 153 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 154 | cpp_output.write( |
| 155 | CPP_END_BUFFER2.format( |
| 156 | relative_path=relative_path, |
| 157 | file_bytes=array_binary_text, |
| 158 | relative_path_escaped=relative_path_escaped |
| 159 | ) |
| 160 | ) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 161 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 162 | cpp_output.write(ROUTE_DECLARATION) |
| 163 | |
| 164 | |
| 165 | for full_filepath in file_list: |
| 166 | relative_path, relative_path_escaped = get_relative_path(full_filepath) |
| 167 | sha1 = sha1_list.get(relative_path, '') |
| 168 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 169 | content_type = CONTENT_TYPES.get(os.path.splitext(relative_path)[1], "") |
| 170 | if content_type == "": |
| 171 | print("unknown content type for {}".format(relative_path)) |
| 172 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 173 | # handle the default routes |
| 174 | if relative_path == "/static/index.html": |
| 175 | relative_path = "/" |
| 176 | |
| 177 | relative_path_sha1 = get_sha1_path_from_relative(relative_path, sha1) |
| 178 | |
| 179 | content_encoding = 'none' if args.debug else 'gzip' |
| 180 | |
| 181 | environment = { |
| 182 | 'relative_path':relative_path, |
| 183 | 'relative_path_escaped': relative_path_escaped, |
| 184 | 'relative_path_sha1': relative_path_sha1, |
| 185 | 'sha1': sha1, |
| 186 | 'sha1_short': sha1[:20], |
| 187 | 'content_type': content_type, |
| 188 | 'ENABLE_CACHING': str(ENABLE_CACHING).lower(), |
| 189 | 'content_encoding': '' |
| 190 | } |
| 191 | if ENABLE_CACHING and sha1 != "": |
| 192 | environment["CPP_MIDDLE_CACHING_HANDLER"] = CPP_MIDDLE_CACHING_HANDLER.format( |
| 193 | **environment |
| 194 | ) |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 195 | else: |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 196 | environment["CPP_MIDDLE_CACHING_HANDLER"] = "" |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 197 | |
| 198 | content = CPP_MIDDLE_BUFFER.format( |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame^] | 199 | **environment |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 200 | ) |
| 201 | cpp_output.write(content) |
| 202 | |
| 203 | cpp_output.write(CPP_END_BUFFER) |
| 204 | |
| 205 | |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 206 | |
| 207 | if __name__ == "__main__": |
| 208 | main() |