blob: bb55694c5644c66989c961bf871ce67d3bfe791d [file] [log] [blame]
Ed Tanous904063f2017-03-02 16:48:24 -08001#! /usr/bin/python3
2
3import argparse
4import os
5import gzip
6import hashlib
7from subprocess import Popen, PIPE
Ed Tanousb4a7bfa2017-04-04 17:23:00 -07008from collections import defaultdict
Ed Tanous1ccd57c2017-03-21 13:15:58 -07009import re
Ed Tanous904063f2017-03-02 16:48:24 -080010
11THIS_DIR = os.path.dirname(os.path.realpath(__file__))
12
Ed Tanous1ccd57c2017-03-21 13:15:58 -070013ENABLE_CACHING = True
Ed Tanous904063f2017-03-02 16:48:24 -080014
Ed Tanous1ccd57c2017-03-21 13:15:58 -070015# TODO(ed) this needs to be better
Ed Tanousc4771fb2017-03-13 13:39:49 -070016CONTENT_TYPES = {
17 '.css': "text/css;charset=UTF-8",
18 '.html': "text/html;charset=UTF-8",
19 '.js': "text/html;charset=UTF-8",
Ed Tanous1ccd57c2017-03-21 13:15:58 -070020 '.png': "image/png;charset=UTF-8",
21 '.woff': "application/x-font-woff",
Ed Tanousc4771fb2017-03-13 13:39:49 -070022}
Ed Tanous904063f2017-03-02 16:48:24 -080023
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070024CPP_MIDDLE_BUFFER = """ CROW_ROUTE(app, "{relative_path_sha1}")
25 ([](const crow::request& req, crow::response& res) {{
26 {CACHE_FOREVER_HEADER}
27 res.add_header("ETag", "{sha1}");
28 if (req.headers.count("If-None-Match") == 1) {{
29 if (req.get_header_value("If-None-Match") == "{sha1}") {{
30 res.code = 304;
Ed Tanous1ccd57c2017-03-21 13:15:58 -070031 res.end();
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070032 return;
33 }}
34 }}
35
36 res.code = 200;
37 // TODO, if you have a browser from the dark ages that doesn't support gzip,
38 // unzip it before sending based on Accept-Encoding header
39 res.add_header("Content-Encoding", "{content_encoding}");
40 res.add_header("Content-Type", "{content_type}");
41
42 res.write(staticassets::{relative_path_escaped});
43
44 res.end();
45 }});
Ed Tanous904063f2017-03-02 16:48:24 -080046"""
47
48
Ed Tanous1ccd57c2017-03-21 13:15:58 -070049def twos_comp(val, bits):
50 """compute the 2's compliment of int value val"""
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070051 if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
Ed Tanous1ccd57c2017-03-21 13:15:58 -070052 val = val - (1 << bits) # compute negative value
53 return val # return positive value as is
54
Ed Tanousc4771fb2017-03-13 13:39:49 -070055def get_relative_path(full_filepath):
56 pathsplit = full_filepath.split(os.path.sep)
57 relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:])
Ed Tanousb4d29f42017-03-24 16:39:25 -070058
Ed Tanous1ccd57c2017-03-21 13:15:58 -070059 relative_path_escaped = relative_path
60 for character in ['/', '.', '-']:
61 relative_path_escaped = relative_path_escaped.replace(character, "_")
Ed Tanousc4771fb2017-03-13 13:39:49 -070062
Ed Tanousb4d29f42017-03-24 16:39:25 -070063 relative_path = "static/" + relative_path
Ed Tanousc4771fb2017-03-13 13:39:49 -070064
Ed Tanousc4771fb2017-03-13 13:39:49 -070065 return relative_path, relative_path_escaped
66
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070067
Ed Tanousc4771fb2017-03-13 13:39:49 -070068def get_sha1_path_from_relative(relative_path, sha1):
69 if sha1 != "":
70 path, extension = os.path.splitext(relative_path)
Ed Tanous1ccd57c2017-03-21 13:15:58 -070071 return path + "-" + sha1[:10] + extension
Ed Tanousc4771fb2017-03-13 13:39:49 -070072 else:
73 return relative_path
74
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070075
Ed Tanousc4771fb2017-03-13 13:39:49 -070076def filter_html(sha1_list, file_content):
77 string_content = file_content.decode()
78 for key, value in sha1_list.items():
Ed Tanous1ccd57c2017-03-21 13:15:58 -070079 replace_name = get_sha1_path_from_relative(key, value)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070080 string_content_new = re.sub(
81 "((src|href)=[\"'])(" + re.escape(key) + ")([\"'])", "\\1" + replace_name + "\\4", string_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -070082 if string_content_new != string_content:
83 print(" Replaced {}".format(key))
Ed Tanous8041f312017-04-03 09:47:01 -070084 print(" With {}".format(replace_name))
Ed Tanousb4d29f42017-03-24 16:39:25 -070085 string_content = string_content_new
86
Ed Tanousc4771fb2017-03-13 13:39:49 -070087 return string_content.encode()
88
Ed Tanousb4a7bfa2017-04-04 17:23:00 -070089
Ed Tanousb4d29f42017-03-24 16:39:25 -070090def filter_js(sha1_list, file_content):
91
92 string_content = file_content.decode()
93 for key, value in sha1_list.items():
94 replace_name = get_sha1_path_from_relative(key, value)
95
96 string_content_new = re.sub(key, replace_name, string_content)
97 if string_content_new != string_content:
98 print(" Replaced {}".format(key))
99 print(" With {}".format(replace_name))
100 string_content = string_content_new
101 return string_content.encode()
102
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700103
Ed Tanousb4d29f42017-03-24 16:39:25 -0700104def compute_sha1_and_update_dict(sha1_list, file_content, relative_path):
105 sha = hashlib.sha1()
106 sha.update(file_content)
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700107 sha_text = sha.hexdigest()
Ed Tanousb4d29f42017-03-24 16:39:25 -0700108 sha1_list[relative_path] = sha_text
109
Ed Tanousb4d29f42017-03-24 16:39:25 -0700110
111def get_dependencies(dependency_list, full_filepath):
112 r = []
113 my_dependencies = dependency_list[full_filepath]
114 r.extend(my_dependencies)
115 sub_deps = []
116 for dependency in my_dependencies:
117 sub_deps += get_dependencies(dependency_list, dependency)
118 r.extend(sub_deps)
119 return r
120
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700121
Ed Tanousb4d29f42017-03-24 16:39:25 -0700122def remove_duplicates_preserve_order(seq):
123 seen = set()
124 seen_add = seen.add
125 return [x for x in seq if not (x in seen or seen_add(x))]
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700126
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700127
Ed Tanous904063f2017-03-02 16:48:24 -0800128def main():
129 """ Main Function """
Ed Tanous904063f2017-03-02 16:48:24 -0800130
131 parser = argparse.ArgumentParser()
132 parser.add_argument('-i', '--input', nargs='+', type=str)
133 parser.add_argument('-o', '--output', type=str)
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700134 parser.add_argument('-d', '--debug', action='store_true')
Ed Tanous904063f2017-03-02 16:48:24 -0800135 args = parser.parse_args()
136
137 file_list = args.input
138
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700139 file_list = [os.path.realpath(f) for f in file_list]
140
Ed Tanousc4771fb2017-03-13 13:39:49 -0700141 sha1_list = {}
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700142 content_dict = {}
Ed Tanous904063f2017-03-02 16:48:24 -0800143
Ed Tanousb4d29f42017-03-24 16:39:25 -0700144 depends_on = {}
145
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700146 gzip_content = not(args.debug)
147
Ed Tanousb4d29f42017-03-24 16:39:25 -0700148 for full_filepath in file_list:
149 relative_path, relative_path_escaped = get_relative_path(full_filepath)
150 text_file_types = ['.css', '.js', '.html']
151 ext = os.path.splitext(relative_path)[1]
152 depends_on[full_filepath] = []
153 if ext in text_file_types:
154 with open(full_filepath, 'r') as input_file:
155 file_content = input_file.read()
156 for full_replacename in file_list:
157 relative_replacename, _ = get_relative_path(full_replacename)
158 if ext == ".html":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700159 match = re.search(
160 "((src|href)=[\"'])(" + relative_replacename + ")([\"'])", file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700161 if match:
162 depends_on[full_filepath].append(full_replacename)
163
Ed Tanous8041f312017-04-03 09:47:01 -0700164 elif ext == ".js" or ext == ".css":
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700165 match = re.search(
166 "([\"'](\.\./)*)(" + relative_replacename + ")([\"'\?])", file_content)
Ed Tanousb4d29f42017-03-24 16:39:25 -0700167 if match:
168 depends_on[full_filepath].append(full_replacename)
169
170 dependency_ordered_file_list = []
171 for full_filepath in file_list:
172 relative_path, relative_path_escaped = get_relative_path(full_filepath)
173 deps = get_dependencies(depends_on, full_filepath)
174 dependency_ordered_file_list.extend(deps)
175 dependency_ordered_file_list.append(full_filepath)
176
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700177 dependency_ordered_file_list = remove_duplicates_preserve_order(
178 dependency_ordered_file_list)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700179
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700180
181 for full_filepath in dependency_ordered_file_list:
182 # make sure none of the files are hidden
183 with open(full_filepath, 'rb') as input_file:
184 file_content = input_file.read()
185 relative_path, relative_path_escaped = get_relative_path(
186 full_filepath)
187 extension = os.path.splitext(relative_path)[1]
188
189 print("Including {:<40} size {:>7}".format(
190 relative_path, len(file_content)))
191
192 if extension == ".html" or relative_path == "/":
193 new_file_content = filter_html(sha1_list, file_content)
194 elif extension == ".js" or extension == ".css":
195 new_file_content = filter_js(sha1_list, file_content)
196 else:
197 new_file_content = file_content
198
199 file_content = new_file_content
200
201 if gzip_content:
202 file_content = gzip.compress(file_content)
203
204 compute_sha1_and_update_dict(
205 sha1_list, file_content, relative_path)
206 content_dict[full_filepath] = file_content
207
208 with open(args.output.replace("cpp", "hpp"), 'w') as hpp_output:
209 hpp_output.write("#pragma once\n"
210 "\n"
211 "#include <string>\n"
212 "\n"
213 "#include <crow/app.h>\n"
214 "#include <crow/http_request.h>\n"
215 "#include <crow/http_response.h>\n"
216 "\n"
217 "#include <crow/routing.h>\n"
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700218 "\n"
219 "namespace crow {\n"
220 "namespace webassets {\n"
221 )
222
223 hpp_output.write("struct staticassets {\n")
224 for full_filepath in dependency_ordered_file_list:
225 relative_path, relative_path_escaped = get_relative_path(
226 full_filepath)
227 hpp_output.write(
228 " static const std::string {};\n".format(relative_path_escaped))
229 hpp_output.write("};\n\n")
230 hpp_output.write("template <typename... Middlewares>\n")
231 hpp_output.write("void request_routes(Crow<Middlewares...>& app) {\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800232
Ed Tanousb4d29f42017-03-24 16:39:25 -0700233 for full_filepath in dependency_ordered_file_list:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700234 relative_path, relative_path_escaped = get_relative_path(
235 full_filepath)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700236 sha1 = sha1_list.get(relative_path, '')
237
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700238 content_type = CONTENT_TYPES.get(
239 os.path.splitext(relative_path)[1], "")
Ed Tanousc4771fb2017-03-13 13:39:49 -0700240 if content_type == "":
241 print("unknown content type for {}".format(relative_path))
242
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700243 # handle the default routes
Ed Tanousb4d29f42017-03-24 16:39:25 -0700244 if relative_path == "static/index.html":
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700245 relative_path = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700246 relative_path_sha1 = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700247 else:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700248 relative_path_sha1 = "/" + \
249 get_sha1_path_from_relative(relative_path, sha1)
250 #print("relative_path_sha1: " + relative_path_sha1)
251 #print("sha1: " + sha1)
252 content_encoding = 'gzip' if gzip_content else 'none'
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700253
254 environment = {
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700255 'relative_path': relative_path,
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700256 'relative_path_escaped': relative_path_escaped,
257 'relative_path_sha1': relative_path_sha1,
258 'sha1': sha1,
259 'sha1_short': sha1[:20],
260 'content_type': content_type,
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700261 'content_encoding': content_encoding,
262 "CACHE_FOREVER_HEADER": ""
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700263 }
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700264
Ed Tanousb4d29f42017-03-24 16:39:25 -0700265 if ENABLE_CACHING:
266 # if we have a valid sha1, and we have a unique path to the resource
267 # it can be safely cached forever
268 if sha1 != "" and relative_path != relative_path_sha1:
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700269 environment["CACHE_FOREVER_HEADER"] = "res.add_header(\"Cache-Control\", \"public, max-age=31556926\");\n"
Ed Tanousc4771fb2017-03-13 13:39:49 -0700270
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700271 content = CPP_MIDDLE_BUFFER.format(**environment)
272 hpp_output.write(content)
273
274 hpp_output.write("}\n}\n}")
275
276 with open(args.output, 'w') as cpp_output:
277 cpp_output.write("#include <webassets.hpp>\n"
278 "namespace crow{\n"
279 "namespace webassets{\n")
280
281 for full_filepath in dependency_ordered_file_list:
282 file_content = content_dict[full_filepath]
283 relative_path, relative_path_escaped = get_relative_path(
284 full_filepath)
285 # compute the 2s complement for negative numbers.
286 # If you don't, you get narrowing warnings from gcc/clang
287 array_binary_text = ', '.join(str(twos_comp(x, 8))
288 for x in file_content)
289 cpp_end_buffer = "const std::string staticassets::{relative_path_escaped}{{{file_bytes}}};\n"
290 cpp_output.write(
291 cpp_end_buffer.format(
292 relative_path=relative_path,
293 file_bytes=array_binary_text,
294 relative_path_escaped=relative_path_escaped
295 )
Ed Tanousc4771fb2017-03-13 13:39:49 -0700296 )
Ed Tanousb4a7bfa2017-04-04 17:23:00 -0700297 cpp_output.write("}\n}\n")
Ed Tanous904063f2017-03-02 16:48:24 -0800298
299if __name__ == "__main__":
300 main()