Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 1 | From 7bf9525528c8f4a47413d7f82214d76f95f0c5f6 Mon Sep 17 00:00:00 2001 |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 2 | From: Mingke Wang <mingke.wang@freescale.com> |
| 3 | Date: Thu, 19 Mar 2015 14:17:10 +0800 |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 4 | Subject: [PATCH] ssaparse: enhance SSA text lines parsing. |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 5 | |
| 6 | some parser will pass in the original ssa text line which starts with "Dialog:" |
| 7 | and there's are maybe multiple Dialog lines in one input buffer. |
| 8 | |
| 9 | Upstream-Status: Submitted [https://bugzilla.gnome.org/show_bug.cgi?id=747496] |
| 10 | |
| 11 | Signed-off-by: Mingke Wang <mingke.wang@freescale.com> |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 12 | |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 13 | --- |
Andrew Geissler | 6ce62a2 | 2020-11-30 19:58:47 -0600 | [diff] [blame] | 14 | gst/subparse/gstssaparse.c | 150 +++++++++++++++++++++++++++++++++---- |
Brad Bishop | 1a4b7ee | 2018-12-16 17:11:34 -0800 | [diff] [blame] | 15 | 1 file changed, 134 insertions(+), 16 deletions(-) |
| 16 | mode change 100644 => 100755 gst/subparse/gstssaparse.c |
| 17 | |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 18 | diff --git a/gst/subparse/gstssaparse.c b/gst/subparse/gstssaparse.c |
| 19 | old mode 100644 |
| 20 | new mode 100755 |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 21 | index d6fdb9c..5ebe678 |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 22 | --- a/gst/subparse/gstssaparse.c |
| 23 | +++ b/gst/subparse/gstssaparse.c |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 24 | @@ -270,6 +270,7 @@ gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 25 | * gst_ssa_parse_push_line: |
| 26 | * @parse: caller element |
| 27 | * @txt: text to push |
| 28 | + * @size: text size need to be parse |
| 29 | * @start: timestamp for the buffer |
| 30 | * @duration: duration for the buffer |
| 31 | * |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 32 | @@ -279,27 +280,133 @@ gst_ssa_parse_remove_override_codes (GstSsaParse * parse, gchar * txt) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 33 | * Returns: result of the push of the created buffer |
| 34 | */ |
| 35 | static GstFlowReturn |
| 36 | -gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt, |
| 37 | +gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt, gint size, |
| 38 | GstClockTime start, GstClockTime duration) |
| 39 | { |
| 40 | GstFlowReturn ret; |
| 41 | GstBuffer *buf; |
| 42 | - gchar *t, *escaped; |
| 43 | + gchar *t, *text, *p, *escaped, *p_start, *p_end; |
| 44 | gint num, i, len; |
| 45 | + GstClockTime start_time = G_MAXUINT64, end_time = 0; |
| 46 | |
| 47 | - num = atoi (txt); |
| 48 | - GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT, |
| 49 | - num, GST_TIME_ARGS (start)); |
| 50 | - |
| 51 | - /* skip all non-text fields before the actual text */ |
| 52 | + p = text = g_malloc(size + 1); |
| 53 | + *p = '\0'; |
| 54 | t = txt; |
| 55 | - for (i = 0; i < 8; ++i) { |
| 56 | - t = strchr (t, ','); |
| 57 | + |
| 58 | + /* there are may have multiple dialogue lines at a time */ |
| 59 | + while (*t) { |
| 60 | + /* ignore leading white space characters */ |
| 61 | + while (isspace(*t)) |
| 62 | + t++; |
| 63 | + |
| 64 | + /* ignore Format: and Style: lines */ |
| 65 | + if (strncmp(t, "Format:", 7) == 0 || strncmp(t, "Style:", 6) == 0) { |
| 66 | + while (*t != '\0' && *t != '\n') { |
| 67 | + t++; |
| 68 | + } |
| 69 | + } |
| 70 | + |
| 71 | + if (*t == '\0') |
| 72 | + break; |
| 73 | + |
| 74 | + /* continue with next line */ |
| 75 | + if (*t == '\n') { |
| 76 | + t++; |
| 77 | + continue; |
| 78 | + } |
| 79 | + |
| 80 | + if(strncmp(t, "Dialogue:", 9) != 0) { |
| 81 | + /* not started with "Dialogue:", it must be a line trimmed by demuxer */ |
| 82 | + num = atoi (t); |
| 83 | + GST_LOG_OBJECT (parse, "Parsing line #%d at %" GST_TIME_FORMAT, |
| 84 | + num, GST_TIME_ARGS (start)); |
| 85 | + |
| 86 | + /* skip all non-text fields before the actual text */ |
| 87 | + for (i = 0; i < 8; ++i) { |
| 88 | + t = strchr (t, ','); |
| 89 | + if (t == NULL) |
| 90 | + break; |
| 91 | + ++t; |
| 92 | + } |
| 93 | + } else { |
| 94 | + /* started with "Dialogue:", update timestamp and duration */ |
| 95 | + /* time format are like Dialog:Mark,0:00:01.02,0:00:03.04,xx,xxx,... */ |
| 96 | + guint hour, min, sec, msec, len; |
| 97 | + GstClockTime tmp; |
| 98 | + gchar t_str[12] = {0}; |
| 99 | + |
| 100 | + /* find the first ',' */ |
| 101 | + p_start = strchr (t, ','); |
| 102 | + if (p_start) |
| 103 | + p_end = strchr (++p_start, ','); |
| 104 | + |
| 105 | + if (p_start && p_end) { |
| 106 | + /* copy text between first ',' and second ',' */ |
| 107 | + strncpy(t_str, p_start, p_end - p_start); |
| 108 | + if (sscanf (t_str, "%u:%u:%u.%u", &hour, &min, &sec, &msec) == 4) { |
| 109 | + tmp = ((hour*3600) + (min*60) + sec) * GST_SECOND + msec*GST_MSECOND; |
| 110 | + GST_DEBUG_OBJECT (parse, "Get start time:%02d:%02d:%02d:%03d\n", |
| 111 | + hour, min, sec, msec); |
| 112 | + if (start_time > tmp) |
| 113 | + start_time = tmp; |
| 114 | + } else { |
| 115 | + GST_WARNING_OBJECT (parse, |
| 116 | + "failed to parse ssa start timestamp string :%s", t_str); |
| 117 | + } |
| 118 | + |
| 119 | + p_start = p_end; |
| 120 | + p_end = strchr (++p_start, ','); |
| 121 | + if (p_end) { |
| 122 | + /* copy text between second ',' and third ',' */ |
| 123 | + strncpy(t_str, p_start, p_end - p_start); |
| 124 | + if (sscanf (t_str, "%u:%u:%u.%u", &hour, &min, &sec, &msec) == 4) { |
| 125 | + tmp = ((hour*3600) + (min*60) + sec)*GST_SECOND + msec*GST_MSECOND; |
| 126 | + GST_DEBUG_OBJECT(parse, "Get end time:%02d:%02d:%02d:%03d\n", |
| 127 | + hour, min, sec, msec); |
| 128 | + if (end_time < tmp) |
| 129 | + end_time = tmp; |
| 130 | + } else { |
| 131 | + GST_WARNING_OBJECT (parse, |
| 132 | + "failed to parse ssa end timestamp string :%s", t_str); |
| 133 | + } |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + /* now skip all non-text fields before the actual text */ |
| 138 | + for (i = 0; i <= 8; ++i) { |
| 139 | + t = strchr (t, ','); |
| 140 | + if (t == NULL) |
| 141 | + break; |
| 142 | + ++t; |
| 143 | + } |
| 144 | + } |
| 145 | + |
| 146 | + /* line end before expected number of ',', not a Dialogue line */ |
| 147 | if (t == NULL) |
| 148 | - return GST_FLOW_ERROR; |
| 149 | - ++t; |
| 150 | + break; |
| 151 | + |
| 152 | + /* if not the first line, and the last character of previous line is '\0', |
| 153 | + * then replace it with '\N' */ |
| 154 | + if (p != text && *p == '\0') { |
| 155 | + *p++ = '\\'; |
| 156 | + *p++ = 'N'; |
| 157 | + } |
| 158 | + |
| 159 | + /* copy all actual text of this line */ |
| 160 | + while ((*t != '\0') && (*t != '\n')) |
| 161 | + *p++ = *t++; |
| 162 | + |
| 163 | + /* add a terminator at the end */ |
| 164 | + *p = '\0'; |
| 165 | + } |
| 166 | + |
| 167 | + /* not valid text found in this buffer return OK to let caller unref buffer */ |
| 168 | + if (strlen(text) <= 0) { |
| 169 | + GST_WARNING_OBJECT (parse, "Not valid text found in this buffer\n"); |
| 170 | + return GST_FLOW_ERROR; |
| 171 | } |
| 172 | |
| 173 | + t = text; |
| 174 | GST_LOG_OBJECT (parse, "Text : %s", t); |
| 175 | |
| 176 | if (gst_ssa_parse_remove_override_codes (parse, t)) { |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 177 | @@ -317,13 +424,22 @@ gst_ssa_parse_push_line (GstSsaParse * parse, gchar * txt, |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 178 | gst_buffer_fill (buf, 0, escaped, len + 1); |
| 179 | gst_buffer_set_size (buf, len); |
| 180 | g_free (escaped); |
| 181 | + g_free(t); |
| 182 | + |
| 183 | + if (start_time != G_MAXUINT64) |
| 184 | + GST_BUFFER_TIMESTAMP (buf) = start_time; |
| 185 | + else |
| 186 | + GST_BUFFER_TIMESTAMP (buf) = start; |
| 187 | |
| 188 | - GST_BUFFER_TIMESTAMP (buf) = start; |
| 189 | - GST_BUFFER_DURATION (buf) = duration; |
| 190 | + if (end_time > start_time) |
| 191 | + GST_BUFFER_DURATION (buf) = end_time - start_time; |
| 192 | + else |
| 193 | + GST_BUFFER_DURATION (buf) = duration; |
| 194 | |
| 195 | GST_LOG_OBJECT (parse, "Pushing buffer with timestamp %" GST_TIME_FORMAT |
| 196 | - " and duration %" GST_TIME_FORMAT, GST_TIME_ARGS (start), |
| 197 | - GST_TIME_ARGS (duration)); |
| 198 | + " and duration %" GST_TIME_FORMAT, |
| 199 | + GST_TIME_ARGS (GST_BUFFER_TIMESTAMP (buf)), |
| 200 | + GST_TIME_ARGS (GST_BUFFER_DURATION (buf))); |
| 201 | |
| 202 | ret = gst_pad_push (parse->srcpad, buf); |
| 203 | |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 204 | @@ -343,6 +459,7 @@ gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 205 | GstClockTime ts; |
| 206 | gchar *txt; |
| 207 | GstMapInfo map; |
| 208 | + gint size; |
| 209 | |
| 210 | if (G_UNLIKELY (!parse->framed)) |
| 211 | goto not_framed; |
Patrick Williams | 73bd93f | 2024-02-20 08:07:48 -0600 | [diff] [blame] | 212 | @@ -360,13 +477,14 @@ gst_ssa_parse_chain (GstPad * sinkpad, GstObject * parent, GstBuffer * buf) |
Patrick Williams | c124f4f | 2015-09-15 14:41:29 -0500 | [diff] [blame] | 213 | /* make double-sure it's 0-terminated and all */ |
| 214 | gst_buffer_map (buf, &map, GST_MAP_READ); |
| 215 | txt = g_strndup ((gchar *) map.data, map.size); |
| 216 | + size = map.size; |
| 217 | gst_buffer_unmap (buf, &map); |
| 218 | |
| 219 | if (txt == NULL) |
| 220 | goto empty_text; |
| 221 | |
| 222 | ts = GST_BUFFER_TIMESTAMP (buf); |
| 223 | - ret = gst_ssa_parse_push_line (parse, txt, ts, GST_BUFFER_DURATION (buf)); |
| 224 | + ret = gst_ssa_parse_push_line (parse, txt, size, ts, GST_BUFFER_DURATION (buf)); |
| 225 | |
| 226 | if (ret != GST_FLOW_OK && GST_CLOCK_TIME_IS_VALID (ts)) { |
| 227 | GstSegment segment; |