blob: 46bf199c6eefb77bf69d31bf5e6ba4fcf049c1af [file] [log] [blame]
Ed Tanous904063f2017-03-02 16:48:24 -08001#! /usr/bin/python3
2
3import argparse
4import os
5import gzip
6import hashlib
7from subprocess import Popen, PIPE
Ed Tanousb4a7bfa2017-04-04 17:23:00 -07008from collections import defaultdict
Ed Tanous1ccd57c2017-03-21 13:15:58 -07009import re
Ed Tanous904063f2017-03-02 16:48:24 -080010
11THIS_DIR = os.path.dirname(os.path.realpath(__file__))
12
Ed Tanous1ccd57c2017-03-21 13:15:58 -070013ENABLE_CACHING = True
Ed Tanous904063f2017-03-02 16:48:24 -080014
Ed Tanous1ccd57c2017-03-21 13:15:58 -070015# TODO(ed) this needs to be better
Ed Tanousc4771fb2017-03-13 13:39:49 -070016CONTENT_TYPES = {
17 '.css': "text/css;charset=UTF-8",
18 '.html': "text/html;charset=UTF-8",
19 '.js': "text/html;charset=UTF-8",
Ed Tanous1ccd57c2017-03-21 13:15:58 -070020 '.png': "image/png;charset=UTF-8",
21 '.woff': "application/x-font-woff",
Ed Tanousc4771fb2017-03-13 13:39:49 -070022}
Ed Tanous904063f2017-03-02 16:48:24 -080023
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070024CPP_MIDDLE_BUFFER = """ CROW_ROUTE(app, "{relative_path_sha1}")
25 ([](const crow::request& req, crow::response& res) {{
26 {CACHE_FOREVER_HEADER}
27 res.add_header("ETag", "{sha1}");
28 if (req.headers.count("If-None-Match") == 1) {{
29 if (req.get_header_value("If-None-Match") == "{sha1}") {{
30 res.code = 304;
Ed Tanous1ccd57c2017-03-21 13:15:58 -070031 res.end();
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070032 return;
33 }}
34 }}
35
36 res.code = 200;
37 // TODO, if you have a browser from the dark ages that doesn't support gzip,
38 // unzip it before sending based on Accept-Encoding header
Ed Tanous0d485ef2017-05-23 09:23:53 -070039 res.add_header("Content-Encoding", {content_encoding});
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070040 res.add_header("Content-Type", "{content_type}");
41
42 res.write(staticassets::{relative_path_escaped});
43
44 res.end();
45 }});
Ed Tanous904063f2017-03-02 16:48:24 -080046"""
47
48
Ed Tanous1ccd57c2017-03-21 13:15:58 -070049def twos_comp(val, bits):
50 """compute the 2's compliment of int value val"""
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070051 if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
Ed Tanous1ccd57c2017-03-21 13:15:58 -070052 val = val - (1 << bits) # compute negative value
53 return val # return positive value as is
54
Ed Tanousc4771fb2017-03-13 13:39:49 -070055def get_relative_path(full_filepath):
56 pathsplit = full_filepath.split(os.path.sep)
57 relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:])
Ed Tanousb4d29f42017-03-24 16:39:25 -070058
Ed Tanous1ccd57c2017-03-21 13:15:58 -070059 relative_path_escaped = relative_path
60 for character in ['/', '.', '-']:
61 relative_path_escaped = relative_path_escaped.replace(character, "_")
Ed Tanousc4771fb2017-03-13 13:39:49 -070062
Ed Tanousb4d29f42017-03-24 16:39:25 -070063 relative_path = "static/" + relative_path
Ed Tanousc4771fb2017-03-13 13:39:49 -070064
Ed Tanousc4771fb2017-03-13 13:39:49 -070065 return relative_path, relative_path_escaped
66
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070067
Ed Tanousc4771fb2017-03-13 13:39:49 -070068def get_sha1_path_from_relative(relative_path, sha1):
69 if sha1 != "":
70 path, extension = os.path.splitext(relative_path)
Ed Tanous1ccd57c2017-03-21 13:15:58 -070071 return path + "-" + sha1[:10] + extension
Ed Tanousc4771fb2017-03-13 13:39:49 -070072 else:
73 return relative_path
74
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070075
Ed Tanousc4771fb2017-03-13 13:39:49 -070076def filter_html(sha1_list, file_content):
77 string_content = file_content.decode()
78 for key, value in sha1_list.items():
Ed Tanous1ccd57c2017-03-21 13:15:58 -070079 replace_name = get_sha1_path_from_relative(key, value)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070080 string_content_new = re.sub(
81 "((src|href)=[\"'])(" + re.escape(key) + ")([\"'])", "\\1" + replace_name + "\\4", string_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -070082 if string_content_new != string_content:
83 print(" Replaced {}".format(key))
Ed Tanous8041f312017-04-03 09:47:01 -070084 print(" With {}".format(replace_name))
Ed Tanousb4d29f42017-03-24 16:39:25 -070085 string_content = string_content_new
86
Ed Tanousc4771fb2017-03-13 13:39:49 -070087 return string_content.encode()
88
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070089
Ed Tanous9140a672017-04-24 17:01:32 -070090def embed_angular_templates(sha1_list, dependency_ordered_file_list, content_dict, file_content):
91 string_content = file_content.decode()
92 index = string_content.find("<script")
93 if index == -1:
94 raise Exception("Couldn't find first script tag in html?")
95 preload_string = ""
96 for full_filepath in dependency_ordered_file_list:
97 relative_path, _ = get_relative_path(full_filepath)
98 if re.search("partial-.*\\.html", relative_path):
99 sha1_path = get_sha1_path_from_relative(relative_path, sha1_list[relative_path])
Ed Tanousb4d29f42017-03-24 16:39:25 -0700100
Ed Tanous9140a672017-04-24 17:01:32 -0700101 preload_string += (
102 "<script type=\"text/ng-template\" id=\"" + sha1_path + "\">\n" +
103 open(full_filepath, 'r').read() +
104 "</script>\n"
105 )
106
107 for key in content_dict:
108 print(key)
109 string_content = string_content[:index] + preload_string + string_content[index:]
110 return string_content.encode()
111
112def filter_js(sha1_list, file_content):
Ed Tanousb4d29f42017-03-24 16:39:25 -0700113 string_content = file_content.decode()
114 for key, value in sha1_list.items():
115 replace_name = get_sha1_path_from_relative(key, value)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700116 string_content_new = re.sub(key, replace_name, string_content)
117 if string_content_new != string_content:
118 print(" Replaced {}".format(key))
119 print(" With {}".format(replace_name))
120 string_content = string_content_new
121 return string_content.encode()
122
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700123
Ed Tanousb4d29f42017-03-24 16:39:25 -0700124def compute_sha1_and_update_dict(sha1_list, file_content, relative_path):
125 sha = hashlib.sha1()
126 sha.update(file_content)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700127 sha_text = sha.hexdigest()
Ed Tanousb4d29f42017-03-24 16:39:25 -0700128 sha1_list[relative_path] = sha_text
129
Ed Tanousb4d29f42017-03-24 16:39:25 -0700130
131def get_dependencies(dependency_list, full_filepath):
132 r = []
133 my_dependencies = dependency_list[full_filepath]
134 r.extend(my_dependencies)
135 sub_deps = []
136 for dependency in my_dependencies:
137 sub_deps += get_dependencies(dependency_list, dependency)
138 r.extend(sub_deps)
139 return r
140
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700141
Ed Tanousb4d29f42017-03-24 16:39:25 -0700142def remove_duplicates_preserve_order(seq):
143 seen = set()
144 seen_add = seen.add
145 return [x for x in seq if not (x in seen or seen_add(x))]
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700146
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700147
Ed Tanous904063f2017-03-02 16:48:24 -0800148def main():
149 """ Main Function """
Ed Tanous904063f2017-03-02 16:48:24 -0800150
151 parser = argparse.ArgumentParser()
152 parser.add_argument('-i', '--input', nargs='+', type=str)
153 parser.add_argument('-o', '--output', type=str)
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700154 parser.add_argument('-d', '--debug', action='store_true')
Ed Tanous904063f2017-03-02 16:48:24 -0800155 args = parser.parse_args()
156
157 file_list = args.input
158
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700159 file_list = [os.path.realpath(f) for f in file_list]
160
Ed Tanousc4771fb2017-03-13 13:39:49 -0700161 sha1_list = {}
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700162 content_dict = {}
Ed Tanous904063f2017-03-02 16:48:24 -0800163
Ed Tanousb4d29f42017-03-24 16:39:25 -0700164 depends_on = {}
165
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700166 gzip_content = not(args.debug)
167
Ed Tanousb4d29f42017-03-24 16:39:25 -0700168 for full_filepath in file_list:
169 relative_path, relative_path_escaped = get_relative_path(full_filepath)
170 text_file_types = ['.css', '.js', '.html']
171 ext = os.path.splitext(relative_path)[1]
172 depends_on[full_filepath] = []
173 if ext in text_file_types:
174 with open(full_filepath, 'r') as input_file:
175 file_content = input_file.read()
176 for full_replacename in file_list:
177 relative_replacename, _ = get_relative_path(full_replacename)
178 if ext == ".html":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700179 match = re.search(
180 "((src|href)=[\"'])(" + relative_replacename + ")([\"'])", file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700181 if match:
182 depends_on[full_filepath].append(full_replacename)
183
Ed Tanous8041f312017-04-03 09:47:01 -0700184 elif ext == ".js" or ext == ".css":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700185 match = re.search(
Ed Tanous9140a672017-04-24 17:01:32 -0700186 "(\.\./)*" + relative_replacename, file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700187 if match:
188 depends_on[full_filepath].append(full_replacename)
189
190 dependency_ordered_file_list = []
191 for full_filepath in file_list:
192 relative_path, relative_path_escaped = get_relative_path(full_filepath)
193 deps = get_dependencies(depends_on, full_filepath)
194 dependency_ordered_file_list.extend(deps)
195 dependency_ordered_file_list.append(full_filepath)
196
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700197 dependency_ordered_file_list = remove_duplicates_preserve_order(
198 dependency_ordered_file_list)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700199
Ed Tanous1ff48782017-04-18 12:45:08 -0700200 total_payload_size = 0
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700201 for full_filepath in dependency_ordered_file_list:
202 # make sure none of the files are hidden
203 with open(full_filepath, 'rb') as input_file:
204 file_content = input_file.read()
205 relative_path, relative_path_escaped = get_relative_path(
206 full_filepath)
207 extension = os.path.splitext(relative_path)[1]
208
Ed Tanouscc5a37f2017-05-11 10:27:23 -0700209 print("Including {:<40} raw size {:>7}".format(
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700210 relative_path, len(file_content)))
211
212 if extension == ".html" or relative_path == "/":
213 new_file_content = filter_html(sha1_list, file_content)
Ed Tanous9140a672017-04-24 17:01:32 -0700214 if relative_path.endswith("index.html"):
215 new_file_content = embed_angular_templates(sha1_list, dependency_ordered_file_list, content_dict, new_file_content)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700216 elif extension == ".js" or extension == ".css":
217 new_file_content = filter_js(sha1_list, file_content)
218 else:
219 new_file_content = file_content
220
221 file_content = new_file_content
222
223 if gzip_content:
224 file_content = gzip.compress(file_content)
225
226 compute_sha1_and_update_dict(
227 sha1_list, file_content, relative_path)
228 content_dict[full_filepath] = file_content
229
Ed Tanous1ff48782017-04-18 12:45:08 -0700230 total_payload_size += len(file_content)
231
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700232 with open(args.output.replace("cpp", "hpp"), 'w') as hpp_output:
233 hpp_output.write("#pragma once\n"
234 "\n"
235 "#include <string>\n"
236 "\n"
237 "#include <crow/app.h>\n"
238 "#include <crow/http_request.h>\n"
239 "#include <crow/http_response.h>\n"
240 "\n"
241 "#include <crow/routing.h>\n"
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700242 "\n"
243 "namespace crow {\n"
244 "namespace webassets {\n"
Ed Tanous0d485ef2017-05-23 09:23:53 -0700245 "static const std::string gzip_string = \"gzip\";\n"
246 "static const std::string none_string = \"none\";\n"
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700247 )
248
249 hpp_output.write("struct staticassets {\n")
250 for full_filepath in dependency_ordered_file_list:
251 relative_path, relative_path_escaped = get_relative_path(
252 full_filepath)
253 hpp_output.write(
254 " static const std::string {};\n".format(relative_path_escaped))
255 hpp_output.write("};\n\n")
256 hpp_output.write("template <typename... Middlewares>\n")
257 hpp_output.write("void request_routes(Crow<Middlewares...>& app) {\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800258
Ed Tanousb4d29f42017-03-24 16:39:25 -0700259 for full_filepath in dependency_ordered_file_list:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700260 relative_path, relative_path_escaped = get_relative_path(
261 full_filepath)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700262 sha1 = sha1_list.get(relative_path, '')
263
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700264 content_type = CONTENT_TYPES.get(
265 os.path.splitext(relative_path)[1], "")
Ed Tanousc4771fb2017-03-13 13:39:49 -0700266 if content_type == "":
267 print("unknown content type for {}".format(relative_path))
268
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700269 # handle the default routes
Ed Tanousb4d29f42017-03-24 16:39:25 -0700270 if relative_path == "static/index.html":
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700271 relative_path = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700272 relative_path_sha1 = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700273 else:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700274 relative_path_sha1 = "/" + \
275 get_sha1_path_from_relative(relative_path, sha1)
276 #print("relative_path_sha1: " + relative_path_sha1)
277 #print("sha1: " + sha1)
Ed Tanous0d485ef2017-05-23 09:23:53 -0700278 content_encoding = 'gzip_string' if gzip_content else 'none_string'
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700279
280 environment = {
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700281 'relative_path': relative_path,
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700282 'relative_path_escaped': relative_path_escaped,
283 'relative_path_sha1': relative_path_sha1,
284 'sha1': sha1,
285 'sha1_short': sha1[:20],
286 'content_type': content_type,
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700287 'content_encoding': content_encoding,
288 "CACHE_FOREVER_HEADER": ""
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700289 }
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700290
Ed Tanousb4d29f42017-03-24 16:39:25 -0700291 if ENABLE_CACHING:
292 # if we have a valid sha1, and we have a unique path to the resource
293 # it can be safely cached forever
294 if sha1 != "" and relative_path != relative_path_sha1:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700295 environment["CACHE_FOREVER_HEADER"] = "res.add_header(\"Cache-Control\", \"public, max-age=31556926\");\n"
Ed Tanousc4771fb2017-03-13 13:39:49 -0700296
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700297 content = CPP_MIDDLE_BUFFER.format(**environment)
298 hpp_output.write(content)
299
300 hpp_output.write("}\n}\n}")
301
302 with open(args.output, 'w') as cpp_output:
303 cpp_output.write("#include <webassets.hpp>\n"
304 "namespace crow{\n"
305 "namespace webassets{\n")
306
307 for full_filepath in dependency_ordered_file_list:
308 file_content = content_dict[full_filepath]
309 relative_path, relative_path_escaped = get_relative_path(
310 full_filepath)
311 # compute the 2s complement for negative numbers.
312 # If you don't, you get narrowing warnings from gcc/clang
313 array_binary_text = ', '.join(str(twos_comp(x, 8))
314 for x in file_content)
315 cpp_end_buffer = "const std::string staticassets::{relative_path_escaped}{{{file_bytes}}};\n"
316 cpp_output.write(
317 cpp_end_buffer.format(
318 relative_path=relative_path,
319 file_bytes=array_binary_text,
320 relative_path_escaped=relative_path_escaped
321 )
Ed Tanousc4771fb2017-03-13 13:39:49 -0700322 )
Ed Tanousdc7b6792017-05-12 15:55:51 -0700323 print("{:<40} took {:>6} KB".format(relative_path_escaped, int(len(array_binary_text)/1024)))
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700324 cpp_output.write("}\n}\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800325
Ed Tanous1ff48782017-04-18 12:45:08 -0700326 print("Total static file size: {}KB".format(int(total_payload_size/1024)))
327
Ed Tanous904063f2017-03-02 16:48:24 -0800328if __name__ == "__main__":
329 main()