blob: 857880e4ee8869566525a64c2aeccc844c35e07d [file] [log] [blame]
Ed Tanous904063f2017-03-02 16:48:24 -08001#! /usr/bin/python3
2
3import argparse
4import os
5import gzip
6import hashlib
7from subprocess import Popen, PIPE
Ed Tanousb4a7bfa2017-04-04 17:23:00 -07008from collections import defaultdict
Ed Tanous1ccd57c2017-03-21 13:15:58 -07009import re
Ed Tanous904063f2017-03-02 16:48:24 -080010
11THIS_DIR = os.path.dirname(os.path.realpath(__file__))
12
Ed Tanous1ccd57c2017-03-21 13:15:58 -070013ENABLE_CACHING = True
Ed Tanous904063f2017-03-02 16:48:24 -080014
Ed Tanous1ccd57c2017-03-21 13:15:58 -070015# TODO(ed) this needs to be better
Ed Tanousc4771fb2017-03-13 13:39:49 -070016CONTENT_TYPES = {
17 '.css': "text/css;charset=UTF-8",
18 '.html': "text/html;charset=UTF-8",
19 '.js': "text/html;charset=UTF-8",
Ed Tanous1ccd57c2017-03-21 13:15:58 -070020 '.png': "image/png;charset=UTF-8",
21 '.woff': "application/x-font-woff",
Ed Tanousc4771fb2017-03-13 13:39:49 -070022}
Ed Tanous904063f2017-03-02 16:48:24 -080023
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070024CPP_MIDDLE_BUFFER = """ CROW_ROUTE(app, "{relative_path_sha1}")
25 ([](const crow::request& req, crow::response& res) {{
26 {CACHE_FOREVER_HEADER}
27 res.add_header("ETag", "{sha1}");
28 if (req.headers.count("If-None-Match") == 1) {{
29 if (req.get_header_value("If-None-Match") == "{sha1}") {{
30 res.code = 304;
Ed Tanous1ccd57c2017-03-21 13:15:58 -070031 res.end();
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070032 return;
33 }}
34 }}
35
36 res.code = 200;
37 // TODO, if you have a browser from the dark ages that doesn't support gzip,
38 // unzip it before sending based on Accept-Encoding header
39 res.add_header("Content-Encoding", "{content_encoding}");
40 res.add_header("Content-Type", "{content_type}");
41
42 res.write(staticassets::{relative_path_escaped});
43
44 res.end();
45 }});
Ed Tanous904063f2017-03-02 16:48:24 -080046"""
47
48
Ed Tanous1ccd57c2017-03-21 13:15:58 -070049def twos_comp(val, bits):
50 """compute the 2's compliment of int value val"""
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070051 if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
Ed Tanous1ccd57c2017-03-21 13:15:58 -070052 val = val - (1 << bits) # compute negative value
53 return val # return positive value as is
54
Ed Tanousc4771fb2017-03-13 13:39:49 -070055def get_relative_path(full_filepath):
56 pathsplit = full_filepath.split(os.path.sep)
57 relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:])
Ed Tanousb4d29f42017-03-24 16:39:25 -070058
Ed Tanous1ccd57c2017-03-21 13:15:58 -070059 relative_path_escaped = relative_path
60 for character in ['/', '.', '-']:
61 relative_path_escaped = relative_path_escaped.replace(character, "_")
Ed Tanousc4771fb2017-03-13 13:39:49 -070062
Ed Tanousb4d29f42017-03-24 16:39:25 -070063 relative_path = "static/" + relative_path
Ed Tanousc4771fb2017-03-13 13:39:49 -070064
Ed Tanousc4771fb2017-03-13 13:39:49 -070065 return relative_path, relative_path_escaped
66
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070067
Ed Tanousc4771fb2017-03-13 13:39:49 -070068def get_sha1_path_from_relative(relative_path, sha1):
69 if sha1 != "":
70 path, extension = os.path.splitext(relative_path)
Ed Tanous1ccd57c2017-03-21 13:15:58 -070071 return path + "-" + sha1[:10] + extension
Ed Tanousc4771fb2017-03-13 13:39:49 -070072 else:
73 return relative_path
74
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070075
Ed Tanousc4771fb2017-03-13 13:39:49 -070076def filter_html(sha1_list, file_content):
77 string_content = file_content.decode()
78 for key, value in sha1_list.items():
Ed Tanous1ccd57c2017-03-21 13:15:58 -070079 replace_name = get_sha1_path_from_relative(key, value)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070080 string_content_new = re.sub(
81 "((src|href)=[\"'])(" + re.escape(key) + ")([\"'])", "\\1" + replace_name + "\\4", string_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -070082 if string_content_new != string_content:
83 print(" Replaced {}".format(key))
Ed Tanous8041f312017-04-03 09:47:01 -070084 print(" With {}".format(replace_name))
Ed Tanousb4d29f42017-03-24 16:39:25 -070085 string_content = string_content_new
86
Ed Tanousc4771fb2017-03-13 13:39:49 -070087 return string_content.encode()
88
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070089
Ed Tanousb4d29f42017-03-24 16:39:25 -070090def filter_js(sha1_list, file_content):
91
92 string_content = file_content.decode()
93 for key, value in sha1_list.items():
94 replace_name = get_sha1_path_from_relative(key, value)
95
96 string_content_new = re.sub(key, replace_name, string_content)
97 if string_content_new != string_content:
98 print(" Replaced {}".format(key))
99 print(" With {}".format(replace_name))
100 string_content = string_content_new
101 return string_content.encode()
102
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700103
Ed Tanousb4d29f42017-03-24 16:39:25 -0700104def compute_sha1_and_update_dict(sha1_list, file_content, relative_path):
105 sha = hashlib.sha1()
106 sha.update(file_content)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700107 sha_text = sha.hexdigest()
Ed Tanousb4d29f42017-03-24 16:39:25 -0700108 sha1_list[relative_path] = sha_text
109
Ed Tanousb4d29f42017-03-24 16:39:25 -0700110
111def get_dependencies(dependency_list, full_filepath):
112 r = []
113 my_dependencies = dependency_list[full_filepath]
114 r.extend(my_dependencies)
115 sub_deps = []
116 for dependency in my_dependencies:
117 sub_deps += get_dependencies(dependency_list, dependency)
118 r.extend(sub_deps)
119 return r
120
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700121
Ed Tanousb4d29f42017-03-24 16:39:25 -0700122def remove_duplicates_preserve_order(seq):
123 seen = set()
124 seen_add = seen.add
125 return [x for x in seq if not (x in seen or seen_add(x))]
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700126
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700127
Ed Tanous904063f2017-03-02 16:48:24 -0800128def main():
129 """ Main Function """
Ed Tanous904063f2017-03-02 16:48:24 -0800130
131 parser = argparse.ArgumentParser()
132 parser.add_argument('-i', '--input', nargs='+', type=str)
133 parser.add_argument('-o', '--output', type=str)
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700134 parser.add_argument('-d', '--debug', action='store_true')
Ed Tanous904063f2017-03-02 16:48:24 -0800135 args = parser.parse_args()
136
137 file_list = args.input
138
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700139 file_list = [os.path.realpath(f) for f in file_list]
140
Ed Tanousc4771fb2017-03-13 13:39:49 -0700141 sha1_list = {}
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700142 content_dict = {}
Ed Tanous904063f2017-03-02 16:48:24 -0800143
Ed Tanousb4d29f42017-03-24 16:39:25 -0700144 depends_on = {}
145
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700146 gzip_content = not(args.debug)
147
Ed Tanousb4d29f42017-03-24 16:39:25 -0700148 for full_filepath in file_list:
149 relative_path, relative_path_escaped = get_relative_path(full_filepath)
150 text_file_types = ['.css', '.js', '.html']
151 ext = os.path.splitext(relative_path)[1]
152 depends_on[full_filepath] = []
153 if ext in text_file_types:
154 with open(full_filepath, 'r') as input_file:
155 file_content = input_file.read()
156 for full_replacename in file_list:
157 relative_replacename, _ = get_relative_path(full_replacename)
158 if ext == ".html":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700159 match = re.search(
160 "((src|href)=[\"'])(" + relative_replacename + ")([\"'])", file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700161 if match:
162 depends_on[full_filepath].append(full_replacename)
163
Ed Tanous8041f312017-04-03 09:47:01 -0700164 elif ext == ".js" or ext == ".css":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700165 match = re.search(
166 "([\"'](\.\./)*)(" + relative_replacename + ")([\"'\?])", file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700167 if match:
168 depends_on[full_filepath].append(full_replacename)
169
170 dependency_ordered_file_list = []
171 for full_filepath in file_list:
172 relative_path, relative_path_escaped = get_relative_path(full_filepath)
173 deps = get_dependencies(depends_on, full_filepath)
174 dependency_ordered_file_list.extend(deps)
175 dependency_ordered_file_list.append(full_filepath)
176
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700177 dependency_ordered_file_list = remove_duplicates_preserve_order(
178 dependency_ordered_file_list)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700179
Ed Tanous1ff48782017-04-18 12:45:08 -0700180 total_payload_size = 0
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700181 for full_filepath in dependency_ordered_file_list:
182 # make sure none of the files are hidden
183 with open(full_filepath, 'rb') as input_file:
184 file_content = input_file.read()
185 relative_path, relative_path_escaped = get_relative_path(
186 full_filepath)
187 extension = os.path.splitext(relative_path)[1]
188
189 print("Including {:<40} size {:>7}".format(
190 relative_path, len(file_content)))
191
192 if extension == ".html" or relative_path == "/":
193 new_file_content = filter_html(sha1_list, file_content)
194 elif extension == ".js" or extension == ".css":
195 new_file_content = filter_js(sha1_list, file_content)
196 else:
197 new_file_content = file_content
198
199 file_content = new_file_content
200
201 if gzip_content:
202 file_content = gzip.compress(file_content)
203
204 compute_sha1_and_update_dict(
205 sha1_list, file_content, relative_path)
206 content_dict[full_filepath] = file_content
207
Ed Tanous1ff48782017-04-18 12:45:08 -0700208 total_payload_size += len(file_content)
209
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700210 with open(args.output.replace("cpp", "hpp"), 'w') as hpp_output:
211 hpp_output.write("#pragma once\n"
212 "\n"
213 "#include <string>\n"
214 "\n"
215 "#include <crow/app.h>\n"
216 "#include <crow/http_request.h>\n"
217 "#include <crow/http_response.h>\n"
218 "\n"
219 "#include <crow/routing.h>\n"
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700220 "\n"
221 "namespace crow {\n"
222 "namespace webassets {\n"
223 )
224
225 hpp_output.write("struct staticassets {\n")
226 for full_filepath in dependency_ordered_file_list:
227 relative_path, relative_path_escaped = get_relative_path(
228 full_filepath)
229 hpp_output.write(
230 " static const std::string {};\n".format(relative_path_escaped))
231 hpp_output.write("};\n\n")
232 hpp_output.write("template <typename... Middlewares>\n")
233 hpp_output.write("void request_routes(Crow<Middlewares...>& app) {\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800234
Ed Tanousb4d29f42017-03-24 16:39:25 -0700235 for full_filepath in dependency_ordered_file_list:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700236 relative_path, relative_path_escaped = get_relative_path(
237 full_filepath)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700238 sha1 = sha1_list.get(relative_path, '')
239
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700240 content_type = CONTENT_TYPES.get(
241 os.path.splitext(relative_path)[1], "")
Ed Tanousc4771fb2017-03-13 13:39:49 -0700242 if content_type == "":
243 print("unknown content type for {}".format(relative_path))
244
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700245 # handle the default routes
Ed Tanousb4d29f42017-03-24 16:39:25 -0700246 if relative_path == "static/index.html":
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700247 relative_path = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700248 relative_path_sha1 = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700249 else:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700250 relative_path_sha1 = "/" + \
251 get_sha1_path_from_relative(relative_path, sha1)
252 #print("relative_path_sha1: " + relative_path_sha1)
253 #print("sha1: " + sha1)
254 content_encoding = 'gzip' if gzip_content else 'none'
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700255
256 environment = {
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700257 'relative_path': relative_path,
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700258 'relative_path_escaped': relative_path_escaped,
259 'relative_path_sha1': relative_path_sha1,
260 'sha1': sha1,
261 'sha1_short': sha1[:20],
262 'content_type': content_type,
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700263 'content_encoding': content_encoding,
264 "CACHE_FOREVER_HEADER": ""
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700265 }
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700266
Ed Tanousb4d29f42017-03-24 16:39:25 -0700267 if ENABLE_CACHING:
268 # if we have a valid sha1, and we have a unique path to the resource
269 # it can be safely cached forever
270 if sha1 != "" and relative_path != relative_path_sha1:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700271 environment["CACHE_FOREVER_HEADER"] = "res.add_header(\"Cache-Control\", \"public, max-age=31556926\");\n"
Ed Tanousc4771fb2017-03-13 13:39:49 -0700272
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700273 content = CPP_MIDDLE_BUFFER.format(**environment)
274 hpp_output.write(content)
275
276 hpp_output.write("}\n}\n}")
277
278 with open(args.output, 'w') as cpp_output:
279 cpp_output.write("#include <webassets.hpp>\n"
280 "namespace crow{\n"
281 "namespace webassets{\n")
282
283 for full_filepath in dependency_ordered_file_list:
284 file_content = content_dict[full_filepath]
285 relative_path, relative_path_escaped = get_relative_path(
286 full_filepath)
287 # compute the 2s complement for negative numbers.
288 # If you don't, you get narrowing warnings from gcc/clang
289 array_binary_text = ', '.join(str(twos_comp(x, 8))
290 for x in file_content)
291 cpp_end_buffer = "const std::string staticassets::{relative_path_escaped}{{{file_bytes}}};\n"
292 cpp_output.write(
293 cpp_end_buffer.format(
294 relative_path=relative_path,
295 file_bytes=array_binary_text,
296 relative_path_escaped=relative_path_escaped
297 )
Ed Tanousc4771fb2017-03-13 13:39:49 -0700298 )
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700299 cpp_output.write("}\n}\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800300
Ed Tanous1ff48782017-04-18 12:45:08 -0700301 print("Total static file size: {}KB".format(int(total_payload_size/1024)))
302
Ed Tanous904063f2017-03-02 16:48:24 -0800303if __name__ == "__main__":
304 main()