Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 1 | #! /usr/bin/python3 |
| 2 | |
| 3 | import argparse |
| 4 | import os |
| 5 | import gzip |
| 6 | import hashlib |
| 7 | from subprocess import Popen, PIPE |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 8 | from collections import defaultdict |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 9 | import re |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 10 | |
| 11 | THIS_DIR = os.path.dirname(os.path.realpath(__file__)) |
| 12 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 13 | ENABLE_CACHING = True |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 14 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 15 | # TODO(ed) this needs to be better |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 16 | CONTENT_TYPES = { |
| 17 | '.css': "text/css;charset=UTF-8", |
| 18 | '.html': "text/html;charset=UTF-8", |
| 19 | '.js': "text/html;charset=UTF-8", |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 20 | '.png': "image/png;charset=UTF-8", |
| 21 | '.woff': "application/x-font-woff", |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 22 | } |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 23 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 24 | CPP_MIDDLE_BUFFER = """ CROW_ROUTE(app, "{relative_path_sha1}") |
| 25 | ([](const crow::request& req, crow::response& res) {{ |
| 26 | {CACHE_FOREVER_HEADER} |
| 27 | res.add_header("ETag", "{sha1}"); |
| 28 | if (req.headers.count("If-None-Match") == 1) {{ |
| 29 | if (req.get_header_value("If-None-Match") == "{sha1}") {{ |
| 30 | res.code = 304; |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 31 | res.end(); |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 32 | return; |
| 33 | }} |
| 34 | }} |
| 35 | |
| 36 | res.code = 200; |
| 37 | // TODO, if you have a browser from the dark ages that doesn't support gzip, |
| 38 | // unzip it before sending based on Accept-Encoding header |
| 39 | res.add_header("Content-Encoding", "{content_encoding}"); |
| 40 | res.add_header("Content-Type", "{content_type}"); |
| 41 | |
| 42 | res.write(staticassets::{relative_path_escaped}); |
| 43 | |
| 44 | res.end(); |
| 45 | }}); |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 46 | """ |
| 47 | |
| 48 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 49 | def twos_comp(val, bits): |
| 50 | """compute the 2's compliment of int value val""" |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 51 | if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255 |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 52 | val = val - (1 << bits) # compute negative value |
| 53 | return val # return positive value as is |
| 54 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 55 | def get_relative_path(full_filepath): |
| 56 | pathsplit = full_filepath.split(os.path.sep) |
| 57 | relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:]) |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 58 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 59 | relative_path_escaped = relative_path |
| 60 | for character in ['/', '.', '-']: |
| 61 | relative_path_escaped = relative_path_escaped.replace(character, "_") |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 62 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 63 | relative_path = "static/" + relative_path |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 64 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 65 | return relative_path, relative_path_escaped |
| 66 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 67 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 68 | def get_sha1_path_from_relative(relative_path, sha1): |
| 69 | if sha1 != "": |
| 70 | path, extension = os.path.splitext(relative_path) |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 71 | return path + "-" + sha1[:10] + extension |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 72 | else: |
| 73 | return relative_path |
| 74 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 75 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 76 | def filter_html(sha1_list, file_content): |
| 77 | string_content = file_content.decode() |
| 78 | for key, value in sha1_list.items(): |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 79 | replace_name = get_sha1_path_from_relative(key, value) |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 80 | string_content_new = re.sub( |
| 81 | "((src|href)=[\"'])(" + re.escape(key) + ")([\"'])", "\\1" + replace_name + "\\4", string_content) |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 82 | if string_content_new != string_content: |
| 83 | print(" Replaced {}".format(key)) |
Ed Tanous | 8041f31 | 2017-04-03 09:47:01 -0700 | [diff] [blame] | 84 | print(" With {}".format(replace_name)) |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 85 | string_content = string_content_new |
| 86 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 87 | return string_content.encode() |
| 88 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 89 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 90 | def filter_js(sha1_list, file_content): |
| 91 | |
| 92 | string_content = file_content.decode() |
| 93 | for key, value in sha1_list.items(): |
| 94 | replace_name = get_sha1_path_from_relative(key, value) |
| 95 | |
| 96 | string_content_new = re.sub(key, replace_name, string_content) |
| 97 | if string_content_new != string_content: |
| 98 | print(" Replaced {}".format(key)) |
| 99 | print(" With {}".format(replace_name)) |
| 100 | string_content = string_content_new |
| 101 | return string_content.encode() |
| 102 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 103 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 104 | def compute_sha1_and_update_dict(sha1_list, file_content, relative_path): |
| 105 | sha = hashlib.sha1() |
| 106 | sha.update(file_content) |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 107 | sha_text = sha.hexdigest() |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 108 | sha1_list[relative_path] = sha_text |
| 109 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 110 | |
| 111 | def get_dependencies(dependency_list, full_filepath): |
| 112 | r = [] |
| 113 | my_dependencies = dependency_list[full_filepath] |
| 114 | r.extend(my_dependencies) |
| 115 | sub_deps = [] |
| 116 | for dependency in my_dependencies: |
| 117 | sub_deps += get_dependencies(dependency_list, dependency) |
| 118 | r.extend(sub_deps) |
| 119 | return r |
| 120 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 121 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 122 | def remove_duplicates_preserve_order(seq): |
| 123 | seen = set() |
| 124 | seen_add = seen.add |
| 125 | return [x for x in seq if not (x in seen or seen_add(x))] |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 126 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 127 | |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 128 | def main(): |
| 129 | """ Main Function """ |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 130 | |
| 131 | parser = argparse.ArgumentParser() |
| 132 | parser.add_argument('-i', '--input', nargs='+', type=str) |
| 133 | parser.add_argument('-o', '--output', type=str) |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 134 | parser.add_argument('-d', '--debug', action='store_true') |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 135 | args = parser.parse_args() |
| 136 | |
| 137 | file_list = args.input |
| 138 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 139 | file_list = [os.path.realpath(f) for f in file_list] |
| 140 | |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 141 | sha1_list = {} |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 142 | content_dict = {} |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 143 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 144 | depends_on = {} |
| 145 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 146 | gzip_content = not(args.debug) |
| 147 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 148 | for full_filepath in file_list: |
| 149 | relative_path, relative_path_escaped = get_relative_path(full_filepath) |
| 150 | text_file_types = ['.css', '.js', '.html'] |
| 151 | ext = os.path.splitext(relative_path)[1] |
| 152 | depends_on[full_filepath] = [] |
| 153 | if ext in text_file_types: |
| 154 | with open(full_filepath, 'r') as input_file: |
| 155 | file_content = input_file.read() |
| 156 | for full_replacename in file_list: |
| 157 | relative_replacename, _ = get_relative_path(full_replacename) |
| 158 | if ext == ".html": |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 159 | match = re.search( |
| 160 | "((src|href)=[\"'])(" + relative_replacename + ")([\"'])", file_content) |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 161 | if match: |
| 162 | depends_on[full_filepath].append(full_replacename) |
| 163 | |
Ed Tanous | 8041f31 | 2017-04-03 09:47:01 -0700 | [diff] [blame] | 164 | elif ext == ".js" or ext == ".css": |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 165 | match = re.search( |
| 166 | "([\"'](\.\./)*)(" + relative_replacename + ")([\"'\?])", file_content) |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 167 | if match: |
| 168 | depends_on[full_filepath].append(full_replacename) |
| 169 | |
| 170 | dependency_ordered_file_list = [] |
| 171 | for full_filepath in file_list: |
| 172 | relative_path, relative_path_escaped = get_relative_path(full_filepath) |
| 173 | deps = get_dependencies(depends_on, full_filepath) |
| 174 | dependency_ordered_file_list.extend(deps) |
| 175 | dependency_ordered_file_list.append(full_filepath) |
| 176 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 177 | dependency_ordered_file_list = remove_duplicates_preserve_order( |
| 178 | dependency_ordered_file_list) |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 179 | |
Ed Tanous | 1ff4878 | 2017-04-18 12:45:08 -0700 | [diff] [blame^] | 180 | total_payload_size = 0 |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 181 | for full_filepath in dependency_ordered_file_list: |
| 182 | # make sure none of the files are hidden |
| 183 | with open(full_filepath, 'rb') as input_file: |
| 184 | file_content = input_file.read() |
| 185 | relative_path, relative_path_escaped = get_relative_path( |
| 186 | full_filepath) |
| 187 | extension = os.path.splitext(relative_path)[1] |
| 188 | |
| 189 | print("Including {:<40} size {:>7}".format( |
| 190 | relative_path, len(file_content))) |
| 191 | |
| 192 | if extension == ".html" or relative_path == "/": |
| 193 | new_file_content = filter_html(sha1_list, file_content) |
| 194 | elif extension == ".js" or extension == ".css": |
| 195 | new_file_content = filter_js(sha1_list, file_content) |
| 196 | else: |
| 197 | new_file_content = file_content |
| 198 | |
| 199 | file_content = new_file_content |
| 200 | |
| 201 | if gzip_content: |
| 202 | file_content = gzip.compress(file_content) |
| 203 | |
| 204 | compute_sha1_and_update_dict( |
| 205 | sha1_list, file_content, relative_path) |
| 206 | content_dict[full_filepath] = file_content |
| 207 | |
Ed Tanous | 1ff4878 | 2017-04-18 12:45:08 -0700 | [diff] [blame^] | 208 | total_payload_size += len(file_content) |
| 209 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 210 | with open(args.output.replace("cpp", "hpp"), 'w') as hpp_output: |
| 211 | hpp_output.write("#pragma once\n" |
| 212 | "\n" |
| 213 | "#include <string>\n" |
| 214 | "\n" |
| 215 | "#include <crow/app.h>\n" |
| 216 | "#include <crow/http_request.h>\n" |
| 217 | "#include <crow/http_response.h>\n" |
| 218 | "\n" |
| 219 | "#include <crow/routing.h>\n" |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 220 | "\n" |
| 221 | "namespace crow {\n" |
| 222 | "namespace webassets {\n" |
| 223 | ) |
| 224 | |
| 225 | hpp_output.write("struct staticassets {\n") |
| 226 | for full_filepath in dependency_ordered_file_list: |
| 227 | relative_path, relative_path_escaped = get_relative_path( |
| 228 | full_filepath) |
| 229 | hpp_output.write( |
| 230 | " static const std::string {};\n".format(relative_path_escaped)) |
| 231 | hpp_output.write("};\n\n") |
| 232 | hpp_output.write("template <typename... Middlewares>\n") |
| 233 | hpp_output.write("void request_routes(Crow<Middlewares...>& app) {\n") |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 234 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 235 | for full_filepath in dependency_ordered_file_list: |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 236 | relative_path, relative_path_escaped = get_relative_path( |
| 237 | full_filepath) |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 238 | sha1 = sha1_list.get(relative_path, '') |
| 239 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 240 | content_type = CONTENT_TYPES.get( |
| 241 | os.path.splitext(relative_path)[1], "") |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 242 | if content_type == "": |
| 243 | print("unknown content type for {}".format(relative_path)) |
| 244 | |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 245 | # handle the default routes |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 246 | if relative_path == "static/index.html": |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 247 | relative_path = "/" |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 248 | relative_path_sha1 = "/" |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 249 | else: |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 250 | relative_path_sha1 = "/" + \ |
| 251 | get_sha1_path_from_relative(relative_path, sha1) |
| 252 | #print("relative_path_sha1: " + relative_path_sha1) |
| 253 | #print("sha1: " + sha1) |
| 254 | content_encoding = 'gzip' if gzip_content else 'none' |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 255 | |
| 256 | environment = { |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 257 | 'relative_path': relative_path, |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 258 | 'relative_path_escaped': relative_path_escaped, |
| 259 | 'relative_path_sha1': relative_path_sha1, |
| 260 | 'sha1': sha1, |
| 261 | 'sha1_short': sha1[:20], |
| 262 | 'content_type': content_type, |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 263 | 'content_encoding': content_encoding, |
| 264 | "CACHE_FOREVER_HEADER": "" |
Ed Tanous | 1ccd57c | 2017-03-21 13:15:58 -0700 | [diff] [blame] | 265 | } |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 266 | |
Ed Tanous | b4d29f4 | 2017-03-24 16:39:25 -0700 | [diff] [blame] | 267 | if ENABLE_CACHING: |
| 268 | # if we have a valid sha1, and we have a unique path to the resource |
| 269 | # it can be safely cached forever |
| 270 | if sha1 != "" and relative_path != relative_path_sha1: |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 271 | environment["CACHE_FOREVER_HEADER"] = "res.add_header(\"Cache-Control\", \"public, max-age=31556926\");\n" |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 272 | |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 273 | content = CPP_MIDDLE_BUFFER.format(**environment) |
| 274 | hpp_output.write(content) |
| 275 | |
| 276 | hpp_output.write("}\n}\n}") |
| 277 | |
| 278 | with open(args.output, 'w') as cpp_output: |
| 279 | cpp_output.write("#include <webassets.hpp>\n" |
| 280 | "namespace crow{\n" |
| 281 | "namespace webassets{\n") |
| 282 | |
| 283 | for full_filepath in dependency_ordered_file_list: |
| 284 | file_content = content_dict[full_filepath] |
| 285 | relative_path, relative_path_escaped = get_relative_path( |
| 286 | full_filepath) |
| 287 | # compute the 2s complement for negative numbers. |
| 288 | # If you don't, you get narrowing warnings from gcc/clang |
| 289 | array_binary_text = ', '.join(str(twos_comp(x, 8)) |
| 290 | for x in file_content) |
| 291 | cpp_end_buffer = "const std::string staticassets::{relative_path_escaped}{{{file_bytes}}};\n" |
| 292 | cpp_output.write( |
| 293 | cpp_end_buffer.format( |
| 294 | relative_path=relative_path, |
| 295 | file_bytes=array_binary_text, |
| 296 | relative_path_escaped=relative_path_escaped |
| 297 | ) |
Ed Tanous | c4771fb | 2017-03-13 13:39:49 -0700 | [diff] [blame] | 298 | ) |
Ed Tanous | b4a7bfa | 2017-04-04 17:23:00 -0700 | [diff] [blame] | 299 | cpp_output.write("}\n}\n") |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 300 | |
Ed Tanous | 1ff4878 | 2017-04-18 12:45:08 -0700 | [diff] [blame^] | 301 | print("Total static file size: {}KB".format(int(total_payload_size/1024))) |
| 302 | |
Ed Tanous | 904063f | 2017-03-02 16:48:24 -0800 | [diff] [blame] | 303 | if __name__ == "__main__": |
| 304 | main() |