blob: 82687870921fe3928b3aa6d79cd3879698d7514a [file] [log] [blame]
Ed Tanous904063f2017-03-02 16:48:24 -08001#! /usr/bin/python3
2
3import argparse
4import os
5import gzip
6import hashlib
7from subprocess import Popen, PIPE
Ed Tanous1ccd57c2017-03-21 13:15:58 -07008import re
Ed Tanous904063f2017-03-02 16:48:24 -08009
10THIS_DIR = os.path.dirname(os.path.realpath(__file__))
11
Ed Tanous1ccd57c2017-03-21 13:15:58 -070012ENABLE_CACHING = True
Ed Tanous904063f2017-03-02 16:48:24 -080013
Ed Tanous1ccd57c2017-03-21 13:15:58 -070014# TODO(ed) this needs to be better
Ed Tanousc4771fb2017-03-13 13:39:49 -070015CONTENT_TYPES = {
16 '.css': "text/css;charset=UTF-8",
17 '.html': "text/html;charset=UTF-8",
18 '.js': "text/html;charset=UTF-8",
Ed Tanous1ccd57c2017-03-21 13:15:58 -070019 '.png': "image/png;charset=UTF-8",
20 '.woff': "application/x-font-woff",
Ed Tanousc4771fb2017-03-13 13:39:49 -070021}
Ed Tanous904063f2017-03-02 16:48:24 -080022
Ed Tanousc4771fb2017-03-13 13:39:49 -070023CPP_BEGIN_BUFFER = """
Ed Tanous904063f2017-03-02 16:48:24 -080024#include <webassets.hpp>
25
Ed Tanous904063f2017-03-02 16:48:24 -080026"""
27
Ed Tanous1ccd57c2017-03-21 13:15:58 -070028ROUTE_DECLARATION = """
29
30void crow::webassets::request_routes(BmcAppType& app){
Ed Tanousc4771fb2017-03-13 13:39:49 -070031"""
32
Ed Tanousb4d29f42017-03-24 16:39:25 -070033CACHE_FOREVER_HEADER = """
34 res.add_header("Cache-Control", "public, max-age=31556926");
35"""
36
37CPP_MIDDLE_BUFFER = """
38 CROW_ROUTE(app, "{relative_path_sha1}")([](const crow::request& req, crow::response& res) {{
39 {CACHE_FOREVER_HEADER}
40
Ed Tanous1ccd57c2017-03-21 13:15:58 -070041 res.add_header("ETag", "{sha1}");
42 if (req.headers.count("If-None-Match") == 1) {{
43 if (req.get_header_value("If-None-Match") == "{sha1}"){{
44 res.code = 304;
45 res.end();
46 return;
47 }}
48 }}
Ed Tanous1ccd57c2017-03-21 13:15:58 -070049
Ed Tanous904063f2017-03-02 16:48:24 -080050 res.code = 200;
51 // TODO, if you have a browser from the dark ages that doesn't support gzip,
52 // unzip it before sending based on Accept-Encoding header
Ed Tanous1ccd57c2017-03-21 13:15:58 -070053 res.add_header("Content-Encoding", "{content_encoding}");
Ed Tanousc4771fb2017-03-13 13:39:49 -070054 res.add_header("Content-Type", "{content_type}");
Ed Tanous904063f2017-03-02 16:48:24 -080055
Ed Tanousc4771fb2017-03-13 13:39:49 -070056 res.write({relative_path_escaped});
Ed Tanous904063f2017-03-02 16:48:24 -080057
Ed Tanous1ccd57c2017-03-21 13:15:58 -070058 res.end();
Ed Tanous904063f2017-03-02 16:48:24 -080059 }});
60"""
61
62
Ed Tanous1ccd57c2017-03-21 13:15:58 -070063def twos_comp(val, bits):
64 """compute the 2's compliment of int value val"""
65 if (val & (1 << (bits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255
66 val = val - (1 << bits) # compute negative value
67 return val # return positive value as is
68
Ed Tanousc4771fb2017-03-13 13:39:49 -070069CPP_END_BUFFER = """
70}
Ed Tanous904063f2017-03-02 16:48:24 -080071"""
72
Ed Tanousc4771fb2017-03-13 13:39:49 -070073CPP_END_BUFFER2 = """const static std::string {relative_path_escaped}{{{file_bytes}}};
74"""
75
76def get_relative_path(full_filepath):
77 pathsplit = full_filepath.split(os.path.sep)
78 relative_path = os.path.sep.join(pathsplit[pathsplit.index("static") + 1:])
Ed Tanousb4d29f42017-03-24 16:39:25 -070079
Ed Tanous1ccd57c2017-03-21 13:15:58 -070080 relative_path_escaped = relative_path
81 for character in ['/', '.', '-']:
82 relative_path_escaped = relative_path_escaped.replace(character, "_")
Ed Tanousc4771fb2017-03-13 13:39:49 -070083
Ed Tanousb4d29f42017-03-24 16:39:25 -070084 relative_path = "static/" + relative_path
Ed Tanousc4771fb2017-03-13 13:39:49 -070085
Ed Tanousc4771fb2017-03-13 13:39:49 -070086 return relative_path, relative_path_escaped
87
88def get_sha1_path_from_relative(relative_path, sha1):
89 if sha1 != "":
90 path, extension = os.path.splitext(relative_path)
Ed Tanous1ccd57c2017-03-21 13:15:58 -070091 return path + "-" + sha1[:10] + extension
Ed Tanousc4771fb2017-03-13 13:39:49 -070092 else:
93 return relative_path
94
Ed Tanousc4771fb2017-03-13 13:39:49 -070095def filter_html(sha1_list, file_content):
96 string_content = file_content.decode()
97 for key, value in sha1_list.items():
Ed Tanous1ccd57c2017-03-21 13:15:58 -070098 replace_name = get_sha1_path_from_relative(key, value)
Ed Tanousb4d29f42017-03-24 16:39:25 -070099 string_content_new = re.sub("((src|href)=[\"'])(" + re.escape(key) + ")([\"'])", "\\1" + replace_name + "\\4", string_content)
100 if string_content_new != string_content:
101 print(" Replaced {}".format(key))
102 print(" With {}".format(replace_name))
103 string_content = string_content_new
104
Ed Tanousc4771fb2017-03-13 13:39:49 -0700105 return string_content.encode()
106
Ed Tanousb4d29f42017-03-24 16:39:25 -0700107def filter_js(sha1_list, file_content):
108
109 string_content = file_content.decode()
110 for key, value in sha1_list.items():
111 replace_name = get_sha1_path_from_relative(key, value)
112
113 string_content_new = re.sub(key, replace_name, string_content)
114 if string_content_new != string_content:
115 print(" Replaced {}".format(key))
116 print(" With {}".format(replace_name))
117 string_content = string_content_new
118 return string_content.encode()
119
120def compute_sha1_and_update_dict(sha1_list, file_content, relative_path):
121 sha = hashlib.sha1()
122 sha.update(file_content)
123 sha_bytes = sha.digest()
124
125 sha_text = "".join("{:02x}".format(x) for x in sha_bytes)
126 sha1_list[relative_path] = sha_text
127
128FILE_PRECIDENCE = ['.woff', '.png' ,'.css', '.js', '.html']
129def sort_order(full_filepath):
130 # sort list based on users
131 path, ext = os.path.splitext(full_filepath)
132 if ext in FILE_PRECIDENCE:
133 return FILE_PRECIDENCE.index(ext) + 1
134 else:
135 return 0
136
137
138def get_dependencies(dependency_list, full_filepath):
139 r = []
140 my_dependencies = dependency_list[full_filepath]
141 r.extend(my_dependencies)
142 sub_deps = []
143 for dependency in my_dependencies:
144 sub_deps += get_dependencies(dependency_list, dependency)
145 r.extend(sub_deps)
146 return r
147
148def remove_duplicates_preserve_order(seq):
149 seen = set()
150 seen_add = seen.add
151 return [x for x in seq if not (x in seen or seen_add(x))]
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700152
Ed Tanous904063f2017-03-02 16:48:24 -0800153def main():
154 """ Main Function """
Ed Tanous904063f2017-03-02 16:48:24 -0800155
156 parser = argparse.ArgumentParser()
157 parser.add_argument('-i', '--input', nargs='+', type=str)
158 parser.add_argument('-o', '--output', type=str)
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700159 parser.add_argument('-d', '--debug', action='store_true')
Ed Tanous904063f2017-03-02 16:48:24 -0800160 args = parser.parse_args()
161
162 file_list = args.input
163
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700164 file_list = [os.path.realpath(f) for f in file_list]
165
Ed Tanousc4771fb2017-03-13 13:39:49 -0700166 sha1_list = {}
Ed Tanous904063f2017-03-02 16:48:24 -0800167
Ed Tanousb4d29f42017-03-24 16:39:25 -0700168 file_list.sort(key=sort_order)
169 from collections import defaultdict
170 depends_on = {}
171
172 for full_filepath in file_list:
173 relative_path, relative_path_escaped = get_relative_path(full_filepath)
174 text_file_types = ['.css', '.js', '.html']
175 ext = os.path.splitext(relative_path)[1]
176 depends_on[full_filepath] = []
177 if ext in text_file_types:
178 with open(full_filepath, 'r') as input_file:
179 file_content = input_file.read()
180 for full_replacename in file_list:
181 relative_replacename, _ = get_relative_path(full_replacename)
182 if ext == ".html":
183 match = re.search("((src|href)=[\"'])(" + relative_replacename + ")([\"'])", file_content)
184 if match:
185 depends_on[full_filepath].append(full_replacename)
186
187 elif ext == ".js":
188 match = re.search("([\"'])(" + relative_replacename + ")([\"'])", file_content)
189 if match:
190 depends_on[full_filepath].append(full_replacename)
191
192 dependency_ordered_file_list = []
193 for full_filepath in file_list:
194 relative_path, relative_path_escaped = get_relative_path(full_filepath)
195 deps = get_dependencies(depends_on, full_filepath)
196 dependency_ordered_file_list.extend(deps)
197 dependency_ordered_file_list.append(full_filepath)
198
199 dependency_ordered_file_list = remove_duplicates_preserve_order(dependency_ordered_file_list)
Ed Tanousc4771fb2017-03-13 13:39:49 -0700200
201 with open(args.output, 'w') as cpp_output:
202 cpp_output.write(CPP_BEGIN_BUFFER)
Ed Tanous904063f2017-03-02 16:48:24 -0800203
Ed Tanousb4d29f42017-03-24 16:39:25 -0700204 for full_filepath in dependency_ordered_file_list:
Ed Tanous904063f2017-03-02 16:48:24 -0800205 # make sure none of the files are hidden
206 with open(full_filepath, 'rb') as input_file:
207 file_content = input_file.read()
Ed Tanousc4771fb2017-03-13 13:39:49 -0700208 relative_path, relative_path_escaped = get_relative_path(full_filepath)
Ed Tanous904063f2017-03-02 16:48:24 -0800209
210 print("Including {:<40} size {:>7}".format(relative_path, len(file_content)))
211
Ed Tanousc4771fb2017-03-13 13:39:49 -0700212 if relative_path.endswith(".html") or relative_path == "/":
Ed Tanousb4d29f42017-03-24 16:39:25 -0700213 new_file_content = filter_html(sha1_list, file_content)
214 elif relative_path.endswith(".js"):
215 new_file_content = filter_js(sha1_list, file_content)
216 else:
217 new_file_content = file_content
218
219 file_content = new_file_content
Ed Tanous904063f2017-03-02 16:48:24 -0800220
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700221 if not args.debug:
222 file_content = gzip.compress(file_content)
223 #file_content = file_content[:10]
224 # compute the 2s complement. If you don't, you get narrowing warnings from gcc/clang
Ed Tanousb4d29f42017-03-24 16:39:25 -0700225
226 compute_sha1_and_update_dict(sha1_list, file_content, relative_path)
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700227 array_binary_text = ', '.join(str(twos_comp(x, 8)) for x in file_content)
Ed Tanous904063f2017-03-02 16:48:24 -0800228
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700229 cpp_output.write(
230 CPP_END_BUFFER2.format(
231 relative_path=relative_path,
232 file_bytes=array_binary_text,
233 relative_path_escaped=relative_path_escaped
234 )
235 )
Ed Tanous904063f2017-03-02 16:48:24 -0800236
Ed Tanousc4771fb2017-03-13 13:39:49 -0700237 cpp_output.write(ROUTE_DECLARATION)
238
Ed Tanousb4d29f42017-03-24 16:39:25 -0700239 for full_filepath in dependency_ordered_file_list:
Ed Tanousc4771fb2017-03-13 13:39:49 -0700240 relative_path, relative_path_escaped = get_relative_path(full_filepath)
241 sha1 = sha1_list.get(relative_path, '')
242
Ed Tanousc4771fb2017-03-13 13:39:49 -0700243 content_type = CONTENT_TYPES.get(os.path.splitext(relative_path)[1], "")
244 if content_type == "":
245 print("unknown content type for {}".format(relative_path))
246
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700247 # handle the default routes
Ed Tanousb4d29f42017-03-24 16:39:25 -0700248 if relative_path == "static/index.html":
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700249 relative_path = "/"
Ed Tanousb4d29f42017-03-24 16:39:25 -0700250 relative_path_sha1 = "/"
251 # TODO(ed), handle woff files better. They are referenced in CSS, which at this
252 # point isn't scrubbed with a find and replace algorithm
253 elif relative_path.endswith(".woff"):
254 relative_path_sha1 = relative_path
255 else:
256 relative_path_sha1 = "/" + get_sha1_path_from_relative(relative_path, sha1)
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700257
258 content_encoding = 'none' if args.debug else 'gzip'
259
260 environment = {
261 'relative_path':relative_path,
262 'relative_path_escaped': relative_path_escaped,
263 'relative_path_sha1': relative_path_sha1,
264 'sha1': sha1,
265 'sha1_short': sha1[:20],
266 'content_type': content_type,
Ed Tanousb4d29f42017-03-24 16:39:25 -0700267 'content_encoding': content_encoding
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700268 }
Ed Tanousb4d29f42017-03-24 16:39:25 -0700269 environment["CACHE_FOREVER_HEADER"] = ""
270 if ENABLE_CACHING:
271 # if we have a valid sha1, and we have a unique path to the resource
272 # it can be safely cached forever
273 if sha1 != "" and relative_path != relative_path_sha1:
274 environment["CACHE_FOREVER_HEADER"] = CACHE_FOREVER_HEADER
Ed Tanousc4771fb2017-03-13 13:39:49 -0700275
276 content = CPP_MIDDLE_BUFFER.format(
Ed Tanous1ccd57c2017-03-21 13:15:58 -0700277 **environment
Ed Tanousc4771fb2017-03-13 13:39:49 -0700278 )
279 cpp_output.write(content)
280
281 cpp_output.write(CPP_END_BUFFER)
282
283
Ed Tanous904063f2017-03-02 16:48:24 -0800284
285if __name__ == "__main__":
286 main()