blob: ee04e229fca64630b346c1e0ab656ed98e4813fe [file] [log] [blame]
Ed Tanous904063f2017-03-02 16:48:24 -08001#! /usr/bin/python3
2
3import argparse
4import os
5import gzip
6import hashlib
7from subprocess import Popen, PIPE
Ed Tanousb4a7bfa2017-04-04 17:23:00 -07008from collections import defaultdict
Ed Tanous1ccd57c2017-03-21 13:15:58 -07009import re
Ed Tanous904063f2017-03-02 16:48:24 -080010
11THIS_DIR = os.path.dirname(os.path.realpath(__file__))
12
Ed Tanous1ccd57c2017-03-21 13:15:58 -070013ENABLE_CACHING = True
Ed Tanous904063f2017-03-02 16:48:24 -080014
Ed Tanous1ccd57c2017-03-21 13:15:58 -070015# TODO(ed) this needs to be better
Ed Tanousc4771fb2017-03-13 13:39:49 -070016CONTENT_TYPES = {
17 '.css': "text/css;charset=UTF-8",
18 '.html': "text/html;charset=UTF-8",
19 '.js': "text/html;charset=UTF-8",
Ed Tanous1ccd57c2017-03-21 13:15:58 -070020 '.png': "image/png;charset=UTF-8",
21 '.woff': "application/x-font-woff",
Ed Tanousc4771fb2017-03-13 13:39:49 -070022}
Ed Tanous904063f2017-03-02 16:48:24 -080023
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070024CPP_MIDDLE_BUFFER = """ CROW_ROUTE(app, "{relative_path_sha1}")
25 ([](const crow::request& req, crow::response& res) {{
26 {CACHE_FOREVER_HEADER}
Ed Tanous4758d5b2017-06-06 15:28:13 -070027 std::string sha1("{sha1}");
28 res.add_header(etag_string, sha1);
29
30 if (req.get_header_value(if_none_match_string) == sha1) {{
31 res.code = 304;
32 }} else {{
33 res.code = 200;
34 // TODO, if you have a browser from the dark ages that doesn't support gzip,
35 // unzip it before sending based on Accept-Encoding header
36 res.add_header(content_encoding_string, {content_encoding});
37 res.add_header(content_type_string, "{content_type}");
38
39 res.write(staticassets::{relative_path_escaped});
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070040 }}
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070041 res.end();
42 }});
Ed Tanous4758d5b2017-06-06 15:28:13 -070043
Ed Tanous904063f2017-03-02 16:48:24 -080044"""
45
Ed Tanous4758d5b2017-06-06 15:28:13 -070046HPP_START_BUFFER = ("#pragma once\n"
47 "\n"
48 "#include <string>\n"
49 "\n"
50 "#include <crow/app.h>\n"
51 "#include <crow/http_request.h>\n"
52 "#include <crow/http_response.h>\n"
53 "\n"
54 "#include <crow/routing.h>\n"
55 "\n"
56 "namespace crow {\n"
57 "namespace webassets {\n"
58 "static const std::string gzip_string = \"gzip\";\n"
59 "static const std::string none_string = \"none\";\n"
60 "static const std::string if_none_match_string = \"If-None-Match\";\n"
61 "static const std::string content_encoding_string = \"Content-Encoding\";\n"
62 "static const std::string content_type_string = \"Content-Type\";\n"
63 "static const std::string etag_string = \"ETag\";\n"
64 )
65
Ed Tanous904063f2017-03-02 16:48:24 -080066
Ed Tanous1ccd57c2017-03-21 13:15:58 -070067def twos_comp(val, bits):
68 """compute the 2's compliment of int value val"""
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070069 if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
Ed Tanous1ccd57c2017-03-21 13:15:58 -070070 val = val - (1 << bits) # compute negative value
71 return val # return positive value as is
72
Ed Tanousc4771fb2017-03-13 13:39:49 -070073def get_relative_path(full_filepath):
74 pathsplit = full_filepath.split(os.path.sep)
75 relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:])
Ed Tanousb4d29f42017-03-24 16:39:25 -070076
Ed Tanous1ccd57c2017-03-21 13:15:58 -070077 relative_path_escaped = relative_path
78 for character in ['/', '.', '-']:
79 relative_path_escaped = relative_path_escaped.replace(character, "_")
Ed Tanousc4771fb2017-03-13 13:39:49 -070080
Ed Tanousb4d29f42017-03-24 16:39:25 -070081 relative_path = "static/" + relative_path
Ed Tanousc4771fb2017-03-13 13:39:49 -070082
Ed Tanousc4771fb2017-03-13 13:39:49 -070083 return relative_path, relative_path_escaped
84
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070085
Ed Tanousc4771fb2017-03-13 13:39:49 -070086def get_sha1_path_from_relative(relative_path, sha1):
87 if sha1 != "":
88 path, extension = os.path.splitext(relative_path)
Ed Tanous1ccd57c2017-03-21 13:15:58 -070089 return path + "-" + sha1[:10] + extension
Ed Tanousc4771fb2017-03-13 13:39:49 -070090 else:
91 return relative_path
92
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070093
Ed Tanousc4771fb2017-03-13 13:39:49 -070094def filter_html(sha1_list, file_content):
95 string_content = file_content.decode()
96 for key, value in sha1_list.items():
Ed Tanous1ccd57c2017-03-21 13:15:58 -070097 replace_name = get_sha1_path_from_relative(key, value)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070098 string_content_new = re.sub(
99 "((src|href)=[\"'])(" + re.escape(key) + ")([\"'])", "\\1" + replace_name + "\\4", string_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700100 if string_content_new != string_content:
101 print(" Replaced {}".format(key))
Ed Tanous8041f312017-04-03 09:47:01 -0700102 print(" With {}".format(replace_name))
Ed Tanousb4d29f42017-03-24 16:39:25 -0700103 string_content = string_content_new
104
Ed Tanousc4771fb2017-03-13 13:39:49 -0700105 return string_content.encode()
106
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700107
Ed Tanous9140a672017-04-24 17:01:32 -0700108def embed_angular_templates(sha1_list, dependency_ordered_file_list, content_dict, file_content):
109 string_content = file_content.decode()
110 index = string_content.find("<script")
111 if index == -1:
112 raise Exception("Couldn't find first script tag in html?")
113 preload_string = ""
114 for full_filepath in dependency_ordered_file_list:
115 relative_path, _ = get_relative_path(full_filepath)
116 if re.search("partial-.*\\.html", relative_path):
117 sha1_path = get_sha1_path_from_relative(relative_path, sha1_list[relative_path])
Ed Tanousb4d29f42017-03-24 16:39:25 -0700118
Ed Tanous9140a672017-04-24 17:01:32 -0700119 preload_string += (
120 "<script type=\"text/ng-template\" id=\"" + sha1_path + "\">\n" +
121 open(full_filepath, 'r').read() +
122 "</script>\n"
123 )
124
125 for key in content_dict:
126 print(key)
127 string_content = string_content[:index] + preload_string + string_content[index:]
128 return string_content.encode()
129
130def filter_js(sha1_list, file_content):
Ed Tanousb4d29f42017-03-24 16:39:25 -0700131 string_content = file_content.decode()
132 for key, value in sha1_list.items():
133 replace_name = get_sha1_path_from_relative(key, value)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700134 string_content_new = re.sub(key, replace_name, string_content)
135 if string_content_new != string_content:
136 print(" Replaced {}".format(key))
137 print(" With {}".format(replace_name))
138 string_content = string_content_new
139 return string_content.encode()
140
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700141
Ed Tanousb4d29f42017-03-24 16:39:25 -0700142def compute_sha1_and_update_dict(sha1_list, file_content, relative_path):
143 sha = hashlib.sha1()
144 sha.update(file_content)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700145 sha_text = sha.hexdigest()
Ed Tanousb4d29f42017-03-24 16:39:25 -0700146 sha1_list[relative_path] = sha_text
147
Ed Tanousb4d29f42017-03-24 16:39:25 -0700148
149def get_dependencies(dependency_list, full_filepath):
150 r = []
151 my_dependencies = dependency_list[full_filepath]
152 r.extend(my_dependencies)
153 sub_deps = []
154 for dependency in my_dependencies:
155 sub_deps += get_dependencies(dependency_list, dependency)
156 r.extend(sub_deps)
157 return r
158
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700159
Ed Tanousb4d29f42017-03-24 16:39:25 -0700160def remove_duplicates_preserve_order(seq):
161 seen = set()
162 seen_add = seen.add
163 return [x for x in seq if not (x in seen or seen_add(x))]
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700164
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700165
Ed Tanous904063f2017-03-02 16:48:24 -0800166def main():
167 """ Main Function """
Ed Tanous904063f2017-03-02 16:48:24 -0800168
169 parser = argparse.ArgumentParser()
170 parser.add_argument('-i', '--input', nargs='+', type=str)
171 parser.add_argument('-o', '--output', type=str)
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700172 parser.add_argument('-d', '--debug', action='store_true')
Ed Tanous904063f2017-03-02 16:48:24 -0800173 args = parser.parse_args()
174
175 file_list = args.input
176
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700177 file_list = [os.path.realpath(f) for f in file_list]
178
Ed Tanousc4771fb2017-03-13 13:39:49 -0700179 sha1_list = {}
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700180 content_dict = {}
Ed Tanous904063f2017-03-02 16:48:24 -0800181
Ed Tanousb4d29f42017-03-24 16:39:25 -0700182 depends_on = {}
183
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700184 gzip_content = not(args.debug)
185
Ed Tanousb4d29f42017-03-24 16:39:25 -0700186 for full_filepath in file_list:
187 relative_path, relative_path_escaped = get_relative_path(full_filepath)
188 text_file_types = ['.css', '.js', '.html']
189 ext = os.path.splitext(relative_path)[1]
190 depends_on[full_filepath] = []
191 if ext in text_file_types:
192 with open(full_filepath, 'r') as input_file:
193 file_content = input_file.read()
194 for full_replacename in file_list:
195 relative_replacename, _ = get_relative_path(full_replacename)
196 if ext == ".html":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700197 match = re.search(
198 "((src|href)=[\"'])(" + relative_replacename + ")([\"'])", file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700199 if match:
200 depends_on[full_filepath].append(full_replacename)
201
Ed Tanous8041f312017-04-03 09:47:01 -0700202 elif ext == ".js" or ext == ".css":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700203 match = re.search(
Ed Tanous9140a672017-04-24 17:01:32 -0700204 "(\.\./)*" + relative_replacename, file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700205 if match:
206 depends_on[full_filepath].append(full_replacename)
207
208 dependency_ordered_file_list = []
209 for full_filepath in file_list:
210 relative_path, relative_path_escaped = get_relative_path(full_filepath)
211 deps = get_dependencies(depends_on, full_filepath)
212 dependency_ordered_file_list.extend(deps)
213 dependency_ordered_file_list.append(full_filepath)
214
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700215 dependency_ordered_file_list = remove_duplicates_preserve_order(
216 dependency_ordered_file_list)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700217
Ed Tanous1ff48782017-04-18 12:45:08 -0700218 total_payload_size = 0
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700219 for full_filepath in dependency_ordered_file_list:
220 # make sure none of the files are hidden
221 with open(full_filepath, 'rb') as input_file:
222 file_content = input_file.read()
223 relative_path, relative_path_escaped = get_relative_path(
224 full_filepath)
225 extension = os.path.splitext(relative_path)[1]
226
Ed Tanouscc5a37f2017-05-11 10:27:23 -0700227 print("Including {:<40} raw size {:>7}".format(
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700228 relative_path, len(file_content)))
229
230 if extension == ".html" or relative_path == "/":
231 new_file_content = filter_html(sha1_list, file_content)
Ed Tanous9140a672017-04-24 17:01:32 -0700232 if relative_path.endswith("index.html"):
233 new_file_content = embed_angular_templates(sha1_list, dependency_ordered_file_list, content_dict, new_file_content)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700234 elif extension == ".js" or extension == ".css":
235 new_file_content = filter_js(sha1_list, file_content)
236 else:
237 new_file_content = file_content
238
239 file_content = new_file_content
240
241 if gzip_content:
242 file_content = gzip.compress(file_content)
243
244 compute_sha1_and_update_dict(
245 sha1_list, file_content, relative_path)
246 content_dict[full_filepath] = file_content
247
Ed Tanous1ff48782017-04-18 12:45:08 -0700248 total_payload_size += len(file_content)
249
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700250 with open(args.output.replace("cpp", "hpp"), 'w') as hpp_output:
Ed Tanous4758d5b2017-06-06 15:28:13 -0700251 hpp_output.write(HPP_START_BUFFER)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700252
253 hpp_output.write("struct staticassets {\n")
254 for full_filepath in dependency_ordered_file_list:
255 relative_path, relative_path_escaped = get_relative_path(
256 full_filepath)
257 hpp_output.write(
258 " static const std::string {};\n".format(relative_path_escaped))
259 hpp_output.write("};\n\n")
260 hpp_output.write("template <typename... Middlewares>\n")
261 hpp_output.write("void request_routes(Crow<Middlewares...>& app) {\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800262
Ed Tanousb4d29f42017-03-24 16:39:25 -0700263 for full_filepath in dependency_ordered_file_list:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700264 relative_path, relative_path_escaped = get_relative_path(
265 full_filepath)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700266 sha1 = sha1_list.get(relative_path, '')
267
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700268 content_type = CONTENT_TYPES.get(
269 os.path.splitext(relative_path)[1], "")
Ed Tanousc4771fb2017-03-13 13:39:49 -0700270 if content_type == "":
271 print("unknown content type for {}".format(relative_path))
272
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700273 # handle the default routes
Ed Tanousb4d29f42017-03-24 16:39:25 -0700274 if relative_path == "static/index.html":
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700275 relative_path = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700276 relative_path_sha1 = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700277 else:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700278 relative_path_sha1 = "/" + \
279 get_sha1_path_from_relative(relative_path, sha1)
280 #print("relative_path_sha1: " + relative_path_sha1)
281 #print("sha1: " + sha1)
Ed Tanous0d485ef2017-05-23 09:23:53 -0700282 content_encoding = 'gzip_string' if gzip_content else 'none_string'
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700283
284 environment = {
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700285 'relative_path': relative_path,
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700286 'relative_path_escaped': relative_path_escaped,
287 'relative_path_sha1': relative_path_sha1,
288 'sha1': sha1,
289 'sha1_short': sha1[:20],
290 'content_type': content_type,
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700291 'content_encoding': content_encoding,
292 "CACHE_FOREVER_HEADER": ""
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700293 }
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700294
Ed Tanousb4d29f42017-03-24 16:39:25 -0700295 if ENABLE_CACHING:
296 # if we have a valid sha1, and we have a unique path to the resource
297 # it can be safely cached forever
298 if sha1 != "" and relative_path != relative_path_sha1:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700299 environment["CACHE_FOREVER_HEADER"] = "res.add_header(\"Cache-Control\", \"public, max-age=31556926\");\n"
Ed Tanousc4771fb2017-03-13 13:39:49 -0700300
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700301 content = CPP_MIDDLE_BUFFER.format(**environment)
302 hpp_output.write(content)
303
304 hpp_output.write("}\n}\n}")
305
306 with open(args.output, 'w') as cpp_output:
307 cpp_output.write("#include <webassets.hpp>\n"
308 "namespace crow{\n"
309 "namespace webassets{\n")
310
311 for full_filepath in dependency_ordered_file_list:
312 file_content = content_dict[full_filepath]
313 relative_path, relative_path_escaped = get_relative_path(
314 full_filepath)
315 # compute the 2s complement for negative numbers.
316 # If you don't, you get narrowing warnings from gcc/clang
317 array_binary_text = ', '.join(str(twos_comp(x, 8))
318 for x in file_content)
319 cpp_end_buffer = "const std::string staticassets::{relative_path_escaped}{{{file_bytes}}};\n"
320 cpp_output.write(
321 cpp_end_buffer.format(
322 relative_path=relative_path,
323 file_bytes=array_binary_text,
324 relative_path_escaped=relative_path_escaped
325 )
Ed Tanousc4771fb2017-03-13 13:39:49 -0700326 )
Ed Tanousdc7b6792017-05-12 15:55:51 -0700327 print("{:<40} took {:>6} KB".format(relative_path_escaped, int(len(array_binary_text)/1024)))
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700328 cpp_output.write("}\n}\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800329
Ed Tanous1ff48782017-04-18 12:45:08 -0700330 print("Total static file size: {}KB".format(int(total_payload_size/1024)))
331
Ed Tanous904063f2017-03-02 16:48:24 -0800332if __name__ == "__main__":
333 main()