Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 1 | #! /usr/bin/python3 |
| 2 | |
| 3 | import argparse |
| 4 | import os |
| 5 | import gzip |
| 6 | import hashlib |
| 7 | from subprocess import Popen, PIPE |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 8 | import re |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 9 | |
| 10 | THIS_DIR = os.path.dirname(os.path.realpath(__file__)) |
| 11 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 12 | ENABLE_CACHING = True |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 13 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 14 | # TODO(ed) this needs to be better |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 15 | CONTENT_TYPES = { |
| 16 | '.css': "text/css;charset=UTF-8", |
| 17 | '.html': "text/html;charset=UTF-8", |
| 18 | '.js': "text/html;charset=UTF-8", |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 19 | '.png': "image/png;charset=UTF-8", |
| 20 | '.woff': "application/x-font-woff", |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 21 | } |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 22 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 23 | CPP_BEGIN_BUFFER = """ |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 24 | #include <webassets.hpp> |
| 25 | |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 26 | """ |
| 27 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 28 | ROUTE_DECLARATION = """ |
| 29 | |
| 30 | void crow::webassets::request_routes(BmcAppType& app){ |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 31 | """ |
| 32 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 33 | CACHE_FOREVER_HEADER = """ |
| 34 | res.add_header("Cache-Control", "public, max-age=31556926"); |
| 35 | """ |
| 36 | |
| 37 | CPP_MIDDLE_BUFFER = """ |
| 38 | CROW_ROUTE(app, "{relative_path_sha1}")([](const crow::request& req, crow::response& res) {{ |
| 39 | {CACHE_FOREVER_HEADER} |
| 40 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 41 | res.add_header("ETag", "{sha1}"); |
| 42 | if (req.headers.count("If-None-Match") == 1) {{ |
| 43 | if (req.get_header_value("If-None-Match") == "{sha1}"){{ |
| 44 | res.code = 304; |
| 45 | res.end(); |
| 46 | return; |
| 47 | }} |
| 48 | }} |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 49 | |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 50 | res.code = 200; |
| 51 | // TODO, if you have a browser from the dark ages that doesn't support gzip, |
| 52 | // unzip it before sending based on Accept-Encoding header |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 53 | res.add_header("Content-Encoding", "{content_encoding}"); |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 54 | res.add_header("Content-Type", "{content_type}"); |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 55 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 56 | res.write({relative_path_escaped}); |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 57 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 58 | res.end(); |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 59 | }}); |
| 60 | """ |
| 61 | |
| 62 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 63 | def twos_comp(val, bits): |
| 64 | """compute the 2's compliment of int value val""" |
| 65 | if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255 |
| 66 | val = val - (1 << bits) # compute negative value |
| 67 | return val # return positive value as is |
| 68 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 69 | CPP_END_BUFFER = """ |
| 70 | } |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 71 | """ |
| 72 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 73 | CPP_END_BUFFER2 = """const static std::string {relative_path_escaped}{{{file_bytes}}}; |
| 74 | """ |
| 75 | |
| 76 | def get_relative_path(full_filepath): |
| 77 | pathsplit = full_filepath.split(os.path.sep) |
| 78 | relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:]) |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 79 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 80 | relative_path_escaped = relative_path |
| 81 | for character in ['/', '.', '-']: |
| 82 | relative_path_escaped = relative_path_escaped.replace(character, "_") |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 83 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 84 | relative_path = "static/" + relative_path |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 85 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 86 | return relative_path, relative_path_escaped |
| 87 | |
| 88 | def get_sha1_path_from_relative(relative_path, sha1): |
| 89 | if sha1 != "": |
| 90 | path, extension = os.path.splitext(relative_path) |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 91 | return path + "-" + sha1[:10] + extension |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 92 | else: |
| 93 | return relative_path |
| 94 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 95 | def filter_html(sha1_list, file_content): |
| 96 | string_content = file_content.decode() |
| 97 | for key, value in sha1_list.items(): |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 98 | replace_name = get_sha1_path_from_relative(key, value) |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 99 | string_content_new = re.sub("((src|href)=[\"'])(" + re.escape(key) + ")([\"'])", "\\1" + replace_name + "\\4", string_content) |
| 100 | if string_content_new != string_content: |
| 101 | print(" Replaced {}".format(key)) |
| 102 | print(" With {}".format(replace_name)) |
| 103 | string_content = string_content_new |
| 104 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 105 | return string_content.encode() |
| 106 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 107 | def filter_js(sha1_list, file_content): |
| 108 | |
| 109 | string_content = file_content.decode() |
| 110 | for key, value in sha1_list.items(): |
| 111 | replace_name = get_sha1_path_from_relative(key, value) |
| 112 | |
| 113 | string_content_new = re.sub(key, replace_name, string_content) |
| 114 | if string_content_new != string_content: |
| 115 | print(" Replaced {}".format(key)) |
| 116 | print(" With {}".format(replace_name)) |
| 117 | string_content = string_content_new |
| 118 | return string_content.encode() |
| 119 | |
| 120 | def compute_sha1_and_update_dict(sha1_list, file_content, relative_path): |
| 121 | sha = hashlib.sha1() |
| 122 | sha.update(file_content) |
| 123 | sha_bytes = sha.digest() |
| 124 | |
| 125 | sha_text = "".join("{:02x}".format(x) for x in sha_bytes) |
| 126 | sha1_list[relative_path] = sha_text |
| 127 | |
| 128 | FILE_PRECIDENCE = ['.woff', '.png' ,'.css', '.js', '.html'] |
| 129 | def sort_order(full_filepath): |
| 130 | # sort list based on users |
| 131 | path, ext = os.path.splitext(full_filepath) |
| 132 | if ext in FILE_PRECIDENCE: |
| 133 | return FILE_PRECIDENCE.index(ext) + 1 |
| 134 | else: |
| 135 | return 0 |
| 136 | |
| 137 | |
| 138 | def get_dependencies(dependency_list, full_filepath): |
| 139 | r = [] |
| 140 | my_dependencies = dependency_list[full_filepath] |
| 141 | r.extend(my_dependencies) |
| 142 | sub_deps = [] |
| 143 | for dependency in my_dependencies: |
| 144 | sub_deps += get_dependencies(dependency_list, dependency) |
| 145 | r.extend(sub_deps) |
| 146 | return r |
| 147 | |
| 148 | def remove_duplicates_preserve_order(seq): |
| 149 | seen = set() |
| 150 | seen_add = seen.add |
| 151 | return [x for x in seq if not (x in seen or seen_add(x))] |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 152 | |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 153 | def main(): |
| 154 | """ Main Function """ |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 155 | |
| 156 | parser = argparse.ArgumentParser() |
| 157 | parser.add_argument('-i', '--input', nargs='+', type=str) |
| 158 | parser.add_argument('-o', '--output', type=str) |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 159 | parser.add_argument('-d', '--debug', action='store_true') |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 160 | args = parser.parse_args() |
| 161 | |
| 162 | file_list = args.input |
| 163 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 164 | file_list = [os.path.realpath(f) for f in file_list] |
| 165 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 166 | sha1_list = {} |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 167 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 168 | file_list.sort(key=sort_order) |
| 169 | from collections import defaultdict |
| 170 | depends_on = {} |
| 171 | |
| 172 | for full_filepath in file_list: |
| 173 | relative_path, relative_path_escaped = get_relative_path(full_filepath) |
| 174 | text_file_types = ['.css', '.js', '.html'] |
| 175 | ext = os.path.splitext(relative_path)[1] |
| 176 | depends_on[full_filepath] = [] |
| 177 | if ext in text_file_types: |
| 178 | with open(full_filepath, 'r') as input_file: |
| 179 | file_content = input_file.read() |
| 180 | for full_replacename in file_list: |
| 181 | relative_replacename, _ = get_relative_path(full_replacename) |
| 182 | if ext == ".html": |
| 183 | match = re.search("((src|href)=[\"'])(" + relative_replacename + ")([\"'])", file_content) |
| 184 | if match: |
| 185 | depends_on[full_filepath].append(full_replacename) |
| 186 | |
| 187 | elif ext == ".js": |
| 188 | match = re.search("([\"'])(" + relative_replacename + ")([\"'])", file_content) |
| 189 | if match: |
| 190 | depends_on[full_filepath].append(full_replacename) |
| 191 | |
| 192 | dependency_ordered_file_list = [] |
| 193 | for full_filepath in file_list: |
| 194 | relative_path, relative_path_escaped = get_relative_path(full_filepath) |
| 195 | deps = get_dependencies(depends_on, full_filepath) |
| 196 | dependency_ordered_file_list.extend(deps) |
| 197 | dependency_ordered_file_list.append(full_filepath) |
| 198 | |
| 199 | dependency_ordered_file_list = remove_duplicates_preserve_order(dependency_ordered_file_list) |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 200 | |
| 201 | with open(args.output, 'w') as cpp_output: |
| 202 | cpp_output.write(CPP_BEGIN_BUFFER) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 203 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 204 | for full_filepath in dependency_ordered_file_list: |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 205 | # make sure none of the files are hidden |
| 206 | with open(full_filepath, 'rb') as input_file: |
| 207 | file_content = input_file.read() |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 208 | relative_path, relative_path_escaped = get_relative_path(full_filepath) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 209 | |
| 210 | print("Including {:<40} size {:>7}".format(relative_path, len(file_content))) |
| 211 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 212 | if relative_path.endswith(".html") or relative_path == "/": |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 213 | new_file_content = filter_html(sha1_list, file_content) |
| 214 | elif relative_path.endswith(".js"): |
| 215 | new_file_content = filter_js(sha1_list, file_content) |
| 216 | else: |
| 217 | new_file_content = file_content |
| 218 | |
| 219 | file_content = new_file_content |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 220 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 221 | if not args.debug: |
| 222 | file_content = gzip.compress(file_content) |
| 223 | #file_content = file_content[:10] |
| 224 | # compute the 2s complement. If you don't, you get narrowing warnings from gcc/clang |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 225 | |
| 226 | compute_sha1_and_update_dict(sha1_list, file_content, relative_path) |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 227 | array_binary_text = ', '.join(str(twos_comp(x, 8)) for x in file_content) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 228 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 229 | cpp_output.write( |
| 230 | CPP_END_BUFFER2.format( |
| 231 | relative_path=relative_path, |
| 232 | file_bytes=array_binary_text, |
| 233 | relative_path_escaped=relative_path_escaped |
| 234 | ) |
| 235 | ) |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 236 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 237 | cpp_output.write(ROUTE_DECLARATION) |
| 238 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 239 | for full_filepath in dependency_ordered_file_list: |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 240 | relative_path, relative_path_escaped = get_relative_path(full_filepath) |
| 241 | sha1 = sha1_list.get(relative_path, '') |
| 242 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 243 | content_type = CONTENT_TYPES.get(os.path.splitext(relative_path)[1], "") |
| 244 | if content_type == "": |
| 245 | print("unknown content type for {}".format(relative_path)) |
| 246 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 247 | # handle the default routes |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 248 | if relative_path == "static/index.html": |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 249 | relative_path = "/" |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 250 | relative_path_sha1 = "/" |
| 251 | # TODO(ed), handle woff files better. They are referenced in CSS, which at this |
| 252 | # point isn't scrubbed with a find and replace algorithm |
| 253 | elif relative_path.endswith(".woff"): |
| 254 | relative_path_sha1 = relative_path |
| 255 | else: |
| 256 | relative_path_sha1 = "/" + get_sha1_path_from_relative(relative_path, sha1) |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 257 | |
| 258 | content_encoding = 'none' if args.debug else 'gzip' |
| 259 | |
| 260 | environment = { |
| 261 | 'relative_path':relative_path, |
| 262 | 'relative_path_escaped': relative_path_escaped, |
| 263 | 'relative_path_sha1': relative_path_sha1, |
| 264 | 'sha1': sha1, |
| 265 | 'sha1_short': sha1[:20], |
| 266 | 'content_type': content_type, |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 267 | 'content_encoding': content_encoding |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 268 | } |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame^] | 269 | environment["CACHE_FOREVER_HEADER"] = "" |
| 270 | if ENABLE_CACHING: |
| 271 | # if we have a valid sha1, and we have a unique path to the resource |
| 272 | # it can be safely cached forever |
| 273 | if sha1 != "" and relative_path != relative_path_sha1: |
| 274 | environment["CACHE_FOREVER_HEADER"] = CACHE_FOREVER_HEADER |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 275 | |
| 276 | content = CPP_MIDDLE_BUFFER.format( |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 277 | **environment |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 278 | ) |
| 279 | cpp_output.write(content) |
| 280 | |
| 281 | cpp_output.write(CPP_END_BUFFER) |
| 282 | |
| 283 | |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 284 | |
| 285 | if __name__ == "__main__": |
| 286 | main() |