blob: 264448e609d0a44486613813a16fbdd6e1ec20aa [file] [log] [blame]
Ed Tanous904063f2017-03-02 16:48:24 -08001#! /usr/bin/python3
2
3import argparse
4import os
5import gzip
6import hashlib
7from subprocess import Popen, PIPE
Ed Tanousb4a7bfa2017-04-04 17:23:00 -07008from collections import defaultdict
Ed Tanous1ccd57c2017-03-21 13:15:58 -07009import re
Ed Tanous904063f2017-03-02 16:48:24 -080010
11THIS_DIR = os.path.dirname(os.path.realpath(__file__))
12
Ed Tanous1ccd57c2017-03-21 13:15:58 -070013ENABLE_CACHING = True
Ed Tanous904063f2017-03-02 16:48:24 -080014
Ed Tanous1ccd57c2017-03-21 13:15:58 -070015# TODO(ed) this needs to be better
Ed Tanousc4771fb2017-03-13 13:39:49 -070016CONTENT_TYPES = {
17 '.css': "text/css;charset=UTF-8",
18 '.html': "text/html;charset=UTF-8",
19 '.js': "text/html;charset=UTF-8",
Ed Tanous1ccd57c2017-03-21 13:15:58 -070020 '.png': "image/png;charset=UTF-8",
21 '.woff': "application/x-font-woff",
Ed Tanousc4771fb2017-03-13 13:39:49 -070022}
Ed Tanous904063f2017-03-02 16:48:24 -080023
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070024CPP_MIDDLE_BUFFER = """ CROW_ROUTE(app, "{relative_path_sha1}")
25 ([](const crow::request& req, crow::response& res) {{
26 {CACHE_FOREVER_HEADER}
27 res.add_header("ETag", "{sha1}");
28 if (req.headers.count("If-None-Match") == 1) {{
29 if (req.get_header_value("If-None-Match") == "{sha1}") {{
30 res.code = 304;
Ed Tanous1ccd57c2017-03-21 13:15:58 -070031 res.end();
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070032 return;
33 }}
34 }}
35
36 res.code = 200;
37 // TODO, if you have a browser from the dark ages that doesn't support gzip,
38 // unzip it before sending based on Accept-Encoding header
39 res.add_header("Content-Encoding", "{content_encoding}");
40 res.add_header("Content-Type", "{content_type}");
41
42 res.write(staticassets::{relative_path_escaped});
43
44 res.end();
45 }});
Ed Tanous904063f2017-03-02 16:48:24 -080046"""
47
48
Ed Tanous1ccd57c2017-03-21 13:15:58 -070049def twos_comp(val, bits):
50 """compute the 2's compliment of int value val"""
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070051 if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
Ed Tanous1ccd57c2017-03-21 13:15:58 -070052 val = val - (1 << bits) # compute negative value
53 return val # return positive value as is
54
Ed Tanousc4771fb2017-03-13 13:39:49 -070055def get_relative_path(full_filepath):
56 pathsplit = full_filepath.split(os.path.sep)
57 relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:])
Ed Tanousb4d29f42017-03-24 16:39:25 -070058
Ed Tanous1ccd57c2017-03-21 13:15:58 -070059 relative_path_escaped = relative_path
60 for character in ['/', '.', '-']:
61 relative_path_escaped = relative_path_escaped.replace(character, "_")
Ed Tanousc4771fb2017-03-13 13:39:49 -070062
Ed Tanousb4d29f42017-03-24 16:39:25 -070063 relative_path = "static/" + relative_path
Ed Tanousc4771fb2017-03-13 13:39:49 -070064
Ed Tanousc4771fb2017-03-13 13:39:49 -070065 return relative_path, relative_path_escaped
66
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070067
Ed Tanousc4771fb2017-03-13 13:39:49 -070068def get_sha1_path_from_relative(relative_path, sha1):
69 if sha1 != "":
70 path, extension = os.path.splitext(relative_path)
Ed Tanous1ccd57c2017-03-21 13:15:58 -070071 return path + "-" + sha1[:10] + extension
Ed Tanousc4771fb2017-03-13 13:39:49 -070072 else:
73 return relative_path
74
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070075
Ed Tanousc4771fb2017-03-13 13:39:49 -070076def filter_html(sha1_list, file_content):
77 string_content = file_content.decode()
78 for key, value in sha1_list.items():
Ed Tanous1ccd57c2017-03-21 13:15:58 -070079 replace_name = get_sha1_path_from_relative(key, value)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070080 string_content_new = re.sub(
81 "((src|href)=[\"'])(" + re.escape(key) + ")([\"'])", "\\1" + replace_name + "\\4", string_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -070082 if string_content_new != string_content:
83 print(" Replaced {}".format(key))
Ed Tanous8041f312017-04-03 09:47:01 -070084 print(" With {}".format(replace_name))
Ed Tanousb4d29f42017-03-24 16:39:25 -070085 string_content = string_content_new
86
Ed Tanousc4771fb2017-03-13 13:39:49 -070087 return string_content.encode()
88
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070089
Ed Tanous9140a672017-04-24 17:01:32 -070090def embed_angular_templates(sha1_list, dependency_ordered_file_list, content_dict, file_content):
91 string_content = file_content.decode()
92 index = string_content.find("<script")
93 if index == -1:
94 raise Exception("Couldn't find first script tag in html?")
95 preload_string = ""
96 for full_filepath in dependency_ordered_file_list:
97 relative_path, _ = get_relative_path(full_filepath)
98 if re.search("partial-.*\\.html", relative_path):
99 sha1_path = get_sha1_path_from_relative(relative_path, sha1_list[relative_path])
Ed Tanousb4d29f42017-03-24 16:39:25 -0700100
Ed Tanous9140a672017-04-24 17:01:32 -0700101 print("full_filepath" + full_filepath)
102 preload_string += (
103 "<script type=\"text/ng-template\" id=\"" + sha1_path + "\">\n" +
104 open(full_filepath, 'r').read() +
105 "</script>\n"
106 )
107
108 for key in content_dict:
109 print(key)
110 string_content = string_content[:index] + preload_string + string_content[index:]
111 return string_content.encode()
112
113def filter_js(sha1_list, file_content):
Ed Tanousb4d29f42017-03-24 16:39:25 -0700114 string_content = file_content.decode()
115 for key, value in sha1_list.items():
116 replace_name = get_sha1_path_from_relative(key, value)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700117 string_content_new = re.sub(key, replace_name, string_content)
118 if string_content_new != string_content:
119 print(" Replaced {}".format(key))
120 print(" With {}".format(replace_name))
121 string_content = string_content_new
122 return string_content.encode()
123
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700124
Ed Tanousb4d29f42017-03-24 16:39:25 -0700125def compute_sha1_and_update_dict(sha1_list, file_content, relative_path):
126 sha = hashlib.sha1()
127 sha.update(file_content)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700128 sha_text = sha.hexdigest()
Ed Tanousb4d29f42017-03-24 16:39:25 -0700129 sha1_list[relative_path] = sha_text
130
Ed Tanousb4d29f42017-03-24 16:39:25 -0700131
132def get_dependencies(dependency_list, full_filepath):
133 r = []
134 my_dependencies = dependency_list[full_filepath]
135 r.extend(my_dependencies)
136 sub_deps = []
137 for dependency in my_dependencies:
138 sub_deps += get_dependencies(dependency_list, dependency)
139 r.extend(sub_deps)
140 return r
141
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700142
Ed Tanousb4d29f42017-03-24 16:39:25 -0700143def remove_duplicates_preserve_order(seq):
144 seen = set()
145 seen_add = seen.add
146 return [x for x in seq if not (x in seen or seen_add(x))]
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700147
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700148
Ed Tanous904063f2017-03-02 16:48:24 -0800149def main():
150 """ Main Function """
Ed Tanous904063f2017-03-02 16:48:24 -0800151
152 parser = argparse.ArgumentParser()
153 parser.add_argument('-i', '--input', nargs='+', type=str)
154 parser.add_argument('-o', '--output', type=str)
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700155 parser.add_argument('-d', '--debug', action='store_true')
Ed Tanous904063f2017-03-02 16:48:24 -0800156 args = parser.parse_args()
157
158 file_list = args.input
159
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700160 file_list = [os.path.realpath(f) for f in file_list]
161
Ed Tanousc4771fb2017-03-13 13:39:49 -0700162 sha1_list = {}
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700163 content_dict = {}
Ed Tanous904063f2017-03-02 16:48:24 -0800164
Ed Tanousb4d29f42017-03-24 16:39:25 -0700165 depends_on = {}
166
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700167 gzip_content = not(args.debug)
168
Ed Tanousb4d29f42017-03-24 16:39:25 -0700169 for full_filepath in file_list:
170 relative_path, relative_path_escaped = get_relative_path(full_filepath)
171 text_file_types = ['.css', '.js', '.html']
172 ext = os.path.splitext(relative_path)[1]
173 depends_on[full_filepath] = []
174 if ext in text_file_types:
175 with open(full_filepath, 'r') as input_file:
176 file_content = input_file.read()
177 for full_replacename in file_list:
178 relative_replacename, _ = get_relative_path(full_replacename)
179 if ext == ".html":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700180 match = re.search(
181 "((src|href)=[\"'])(" + relative_replacename + ")([\"'])", file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700182 if match:
183 depends_on[full_filepath].append(full_replacename)
184
Ed Tanous8041f312017-04-03 09:47:01 -0700185 elif ext == ".js" or ext == ".css":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700186 match = re.search(
Ed Tanous9140a672017-04-24 17:01:32 -0700187 "(\.\./)*" + relative_replacename, file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700188 if match:
189 depends_on[full_filepath].append(full_replacename)
190
191 dependency_ordered_file_list = []
192 for full_filepath in file_list:
193 relative_path, relative_path_escaped = get_relative_path(full_filepath)
194 deps = get_dependencies(depends_on, full_filepath)
195 dependency_ordered_file_list.extend(deps)
196 dependency_ordered_file_list.append(full_filepath)
197
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700198 dependency_ordered_file_list = remove_duplicates_preserve_order(
199 dependency_ordered_file_list)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700200
Ed Tanous1ff48782017-04-18 12:45:08 -0700201 total_payload_size = 0
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700202 for full_filepath in dependency_ordered_file_list:
203 # make sure none of the files are hidden
204 with open(full_filepath, 'rb') as input_file:
205 file_content = input_file.read()
206 relative_path, relative_path_escaped = get_relative_path(
207 full_filepath)
208 extension = os.path.splitext(relative_path)[1]
209
210 print("Including {:<40} size {:>7}".format(
211 relative_path, len(file_content)))
212
213 if extension == ".html" or relative_path == "/":
214 new_file_content = filter_html(sha1_list, file_content)
Ed Tanous9140a672017-04-24 17:01:32 -0700215 if relative_path.endswith("index.html"):
216 new_file_content = embed_angular_templates(sha1_list, dependency_ordered_file_list, content_dict, new_file_content)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700217 elif extension == ".js" or extension == ".css":
218 new_file_content = filter_js(sha1_list, file_content)
219 else:
220 new_file_content = file_content
221
222 file_content = new_file_content
223
224 if gzip_content:
225 file_content = gzip.compress(file_content)
226
227 compute_sha1_and_update_dict(
228 sha1_list, file_content, relative_path)
229 content_dict[full_filepath] = file_content
230
Ed Tanous1ff48782017-04-18 12:45:08 -0700231 total_payload_size += len(file_content)
232
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700233 with open(args.output.replace("cpp", "hpp"), 'w') as hpp_output:
234 hpp_output.write("#pragma once\n"
235 "\n"
236 "#include <string>\n"
237 "\n"
238 "#include <crow/app.h>\n"
239 "#include <crow/http_request.h>\n"
240 "#include <crow/http_response.h>\n"
241 "\n"
242 "#include <crow/routing.h>\n"
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700243 "\n"
244 "namespace crow {\n"
245 "namespace webassets {\n"
246 )
247
248 hpp_output.write("struct staticassets {\n")
249 for full_filepath in dependency_ordered_file_list:
250 relative_path, relative_path_escaped = get_relative_path(
251 full_filepath)
252 hpp_output.write(
253 " static const std::string {};\n".format(relative_path_escaped))
254 hpp_output.write("};\n\n")
255 hpp_output.write("template <typename... Middlewares>\n")
256 hpp_output.write("void request_routes(Crow<Middlewares...>& app) {\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800257
Ed Tanousb4d29f42017-03-24 16:39:25 -0700258 for full_filepath in dependency_ordered_file_list:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700259 relative_path, relative_path_escaped = get_relative_path(
260 full_filepath)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700261 sha1 = sha1_list.get(relative_path, '')
262
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700263 content_type = CONTENT_TYPES.get(
264 os.path.splitext(relative_path)[1], "")
Ed Tanousc4771fb2017-03-13 13:39:49 -0700265 if content_type == "":
266 print("unknown content type for {}".format(relative_path))
267
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700268 # handle the default routes
Ed Tanousb4d29f42017-03-24 16:39:25 -0700269 if relative_path == "static/index.html":
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700270 relative_path = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700271 relative_path_sha1 = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700272 else:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700273 relative_path_sha1 = "/" + \
274 get_sha1_path_from_relative(relative_path, sha1)
275 #print("relative_path_sha1: " + relative_path_sha1)
276 #print("sha1: " + sha1)
277 content_encoding = 'gzip' if gzip_content else 'none'
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700278
279 environment = {
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700280 'relative_path': relative_path,
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700281 'relative_path_escaped': relative_path_escaped,
282 'relative_path_sha1': relative_path_sha1,
283 'sha1': sha1,
284 'sha1_short': sha1[:20],
285 'content_type': content_type,
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700286 'content_encoding': content_encoding,
287 "CACHE_FOREVER_HEADER": ""
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700288 }
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700289
Ed Tanousb4d29f42017-03-24 16:39:25 -0700290 if ENABLE_CACHING:
291 # if we have a valid sha1, and we have a unique path to the resource
292 # it can be safely cached forever
293 if sha1 != "" and relative_path != relative_path_sha1:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700294 environment["CACHE_FOREVER_HEADER"] = "res.add_header(\"Cache-Control\", \"public, max-age=31556926\");\n"
Ed Tanousc4771fb2017-03-13 13:39:49 -0700295
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700296 content = CPP_MIDDLE_BUFFER.format(**environment)
297 hpp_output.write(content)
298
299 hpp_output.write("}\n}\n}")
300
301 with open(args.output, 'w') as cpp_output:
302 cpp_output.write("#include <webassets.hpp>\n"
303 "namespace crow{\n"
304 "namespace webassets{\n")
305
306 for full_filepath in dependency_ordered_file_list:
307 file_content = content_dict[full_filepath]
308 relative_path, relative_path_escaped = get_relative_path(
309 full_filepath)
310 # compute the 2s complement for negative numbers.
311 # If you don't, you get narrowing warnings from gcc/clang
312 array_binary_text = ', '.join(str(twos_comp(x, 8))
313 for x in file_content)
314 cpp_end_buffer = "const std::string staticassets::{relative_path_escaped}{{{file_bytes}}};\n"
315 cpp_output.write(
316 cpp_end_buffer.format(
317 relative_path=relative_path,
318 file_bytes=array_binary_text,
319 relative_path_escaped=relative_path_escaped
320 )
Ed Tanousc4771fb2017-03-13 13:39:49 -0700321 )
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700322 cpp_output.write("}\n}\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800323
Ed Tanous1ff48782017-04-18 12:45:08 -0700324 print("Total static file size: {}KB".format(int(total_payload_size/1024)))
325
Ed Tanous904063f2017-03-02 16:48:24 -0800326if __name__ == "__main__":
327 main()