Added markdown rendering and updated index link to README
16 files changed, 9691 insertions(+), 5 deletions(-) | |||
---|---|---|---|
M | Makefile | +8 | -2 |
A | entity.c | +2190 | -0 |
A | entity.h | +42 | -0 |
A | entity.o | +0 | -0 |
A | md4c-html.c | +573 | -0 |
A | md4c-html.h | +68 | -0 |
A | md4c-html.o | +0 | -0 |
A | md4c.c | +6348 | -0 |
A | md4c.h | +405 | -0 |
A | md4c.o | +0 | -0 |
M | stagit | +0 | -0 |
M | stagit-index | +0 | -0 |
M | stagit-index.c | +1 | -1 |
M | stagit-index.o | +0 | -0 |
M | stagit.c | +56 | -2 |
M | stagit.o | +0 | -0 |
1@@ -22,7 +22,10 @@ SRC = \
2 COMPATSRC = \
3 reallocarray.c\
4 strlcat.c\
5- strlcpy.c
6+ strlcpy.c\
7+ entity.c\
8+ md4c.c\
9+ md4c-html.c
10 BIN = \
11 stagit\
12 stagit-index
13@@ -37,7 +40,10 @@ HDR = compat.h
14 COMPATOBJ = \
15 reallocarray.o\
16 strlcat.o\
17- strlcpy.o
18+ strlcpy.o\
19+ entity.o\
20+ md4c.o\
21+ md4c-html.o
22
23 OBJ = ${SRC:.c=.o} ${COMPATOBJ}
24
A · entity.c
+2190, -0 1@@ -0,0 +1,2190 @@
2+/*
3+ * MD4C: Markdown parser for C
4+ * (http://github.com/mity/md4c)
5+ *
6+ * Copyright (c) 2016-2017 Martin Mitas
7+ *
8+ * Permission is hereby granted, free of charge, to any person obtaining a
9+ * copy of this software and associated documentation files (the "Software"),
10+ * to deal in the Software without restriction, including without limitation
11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12+ * and/or sell copies of the Software, and to permit persons to whom the
13+ * Software is furnished to do so, subject to the following conditions:
14+ *
15+ * The above copyright notice and this permission notice shall be included in
16+ * all copies or substantial portions of the Software.
17+ *
18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24+ * IN THE SOFTWARE.
25+ */
26+
27+#include "entity.h"
28+#include <string.h>
29+
30+
31+/* The table is generated from https://html.spec.whatwg.org/entities.json */
32+static const struct entity entity_table[] = {
33+ { "Æ", { 198, 0 } },
34+ { "&", { 38, 0 } },
35+ { "Á", { 193, 0 } },
36+ { "Ă", { 258, 0 } },
37+ { "Â", { 194, 0 } },
38+ { "А", { 1040, 0 } },
39+ { "𝔄", { 120068, 0 } },
40+ { "À", { 192, 0 } },
41+ { "Α", { 913, 0 } },
42+ { "Ā", { 256, 0 } },
43+ { "⩓", { 10835, 0 } },
44+ { "Ą", { 260, 0 } },
45+ { "𝔸", { 120120, 0 } },
46+ { "⁡", { 8289, 0 } },
47+ { "Å", { 197, 0 } },
48+ { "𝒜", { 119964, 0 } },
49+ { "≔", { 8788, 0 } },
50+ { "Ã", { 195, 0 } },
51+ { "Ä", { 196, 0 } },
52+ { "∖", { 8726, 0 } },
53+ { "⫧", { 10983, 0 } },
54+ { "⌆", { 8966, 0 } },
55+ { "Б", { 1041, 0 } },
56+ { "∵", { 8757, 0 } },
57+ { "ℬ", { 8492, 0 } },
58+ { "Β", { 914, 0 } },
59+ { "𝔅", { 120069, 0 } },
60+ { "𝔹", { 120121, 0 } },
61+ { "˘", { 728, 0 } },
62+ { "ℬ", { 8492, 0 } },
63+ { "≎", { 8782, 0 } },
64+ { "Ч", { 1063, 0 } },
65+ { "©", { 169, 0 } },
66+ { "Ć", { 262, 0 } },
67+ { "⋒", { 8914, 0 } },
68+ { "ⅅ", { 8517, 0 } },
69+ { "ℭ", { 8493, 0 } },
70+ { "Č", { 268, 0 } },
71+ { "Ç", { 199, 0 } },
72+ { "Ĉ", { 264, 0 } },
73+ { "∰", { 8752, 0 } },
74+ { "Ċ", { 266, 0 } },
75+ { "¸", { 184, 0 } },
76+ { "·", { 183, 0 } },
77+ { "ℭ", { 8493, 0 } },
78+ { "Χ", { 935, 0 } },
79+ { "⊙", { 8857, 0 } },
80+ { "⊖", { 8854, 0 } },
81+ { "⊕", { 8853, 0 } },
82+ { "⊗", { 8855, 0 } },
83+ { "∲", { 8754, 0 } },
84+ { "”", { 8221, 0 } },
85+ { "’", { 8217, 0 } },
86+ { "∷", { 8759, 0 } },
87+ { "⩴", { 10868, 0 } },
88+ { "≡", { 8801, 0 } },
89+ { "∯", { 8751, 0 } },
90+ { "∮", { 8750, 0 } },
91+ { "ℂ", { 8450, 0 } },
92+ { "∐", { 8720, 0 } },
93+ { "∳", { 8755, 0 } },
94+ { "⨯", { 10799, 0 } },
95+ { "𝒞", { 119966, 0 } },
96+ { "⋓", { 8915, 0 } },
97+ { "≍", { 8781, 0 } },
98+ { "ⅅ", { 8517, 0 } },
99+ { "⤑", { 10513, 0 } },
100+ { "Ђ", { 1026, 0 } },
101+ { "Ѕ", { 1029, 0 } },
102+ { "Џ", { 1039, 0 } },
103+ { "‡", { 8225, 0 } },
104+ { "↡", { 8609, 0 } },
105+ { "⫤", { 10980, 0 } },
106+ { "Ď", { 270, 0 } },
107+ { "Д", { 1044, 0 } },
108+ { "∇", { 8711, 0 } },
109+ { "Δ", { 916, 0 } },
110+ { "𝔇", { 120071, 0 } },
111+ { "´", { 180, 0 } },
112+ { "˙", { 729, 0 } },
113+ { "˝", { 733, 0 } },
114+ { "`", { 96, 0 } },
115+ { "˜", { 732, 0 } },
116+ { "⋄", { 8900, 0 } },
117+ { "ⅆ", { 8518, 0 } },
118+ { "𝔻", { 120123, 0 } },
119+ { "¨", { 168, 0 } },
120+ { "⃜", { 8412, 0 } },
121+ { "≐", { 8784, 0 } },
122+ { "∯", { 8751, 0 } },
123+ { "¨", { 168, 0 } },
124+ { "⇓", { 8659, 0 } },
125+ { "⇐", { 8656, 0 } },
126+ { "⇔", { 8660, 0 } },
127+ { "⫤", { 10980, 0 } },
128+ { "⟸", { 10232, 0 } },
129+ { "⟺", { 10234, 0 } },
130+ { "⟹", { 10233, 0 } },
131+ { "⇒", { 8658, 0 } },
132+ { "⊨", { 8872, 0 } },
133+ { "⇑", { 8657, 0 } },
134+ { "⇕", { 8661, 0 } },
135+ { "∥", { 8741, 0 } },
136+ { "↓", { 8595, 0 } },
137+ { "⤓", { 10515, 0 } },
138+ { "⇵", { 8693, 0 } },
139+ { "̑", { 785, 0 } },
140+ { "⥐", { 10576, 0 } },
141+ { "⥞", { 10590, 0 } },
142+ { "↽", { 8637, 0 } },
143+ { "⥖", { 10582, 0 } },
144+ { "⥟", { 10591, 0 } },
145+ { "⇁", { 8641, 0 } },
146+ { "⥗", { 10583, 0 } },
147+ { "⊤", { 8868, 0 } },
148+ { "↧", { 8615, 0 } },
149+ { "⇓", { 8659, 0 } },
150+ { "𝒟", { 119967, 0 } },
151+ { "Đ", { 272, 0 } },
152+ { "Ŋ", { 330, 0 } },
153+ { "Ð", { 208, 0 } },
154+ { "É", { 201, 0 } },
155+ { "Ě", { 282, 0 } },
156+ { "Ê", { 202, 0 } },
157+ { "Э", { 1069, 0 } },
158+ { "Ė", { 278, 0 } },
159+ { "𝔈", { 120072, 0 } },
160+ { "È", { 200, 0 } },
161+ { "∈", { 8712, 0 } },
162+ { "Ē", { 274, 0 } },
163+ { "◻", { 9723, 0 } },
164+ { "▫", { 9643, 0 } },
165+ { "Ę", { 280, 0 } },
166+ { "𝔼", { 120124, 0 } },
167+ { "Ε", { 917, 0 } },
168+ { "⩵", { 10869, 0 } },
169+ { "≂", { 8770, 0 } },
170+ { "⇌", { 8652, 0 } },
171+ { "ℰ", { 8496, 0 } },
172+ { "⩳", { 10867, 0 } },
173+ { "Η", { 919, 0 } },
174+ { "Ë", { 203, 0 } },
175+ { "∃", { 8707, 0 } },
176+ { "ⅇ", { 8519, 0 } },
177+ { "Ф", { 1060, 0 } },
178+ { "𝔉", { 120073, 0 } },
179+ { "◼", { 9724, 0 } },
180+ { "▪", { 9642, 0 } },
181+ { "𝔽", { 120125, 0 } },
182+ { "∀", { 8704, 0 } },
183+ { "ℱ", { 8497, 0 } },
184+ { "ℱ", { 8497, 0 } },
185+ { "Ѓ", { 1027, 0 } },
186+ { ">", { 62, 0 } },
187+ { "Γ", { 915, 0 } },
188+ { "Ϝ", { 988, 0 } },
189+ { "Ğ", { 286, 0 } },
190+ { "Ģ", { 290, 0 } },
191+ { "Ĝ", { 284, 0 } },
192+ { "Г", { 1043, 0 } },
193+ { "Ġ", { 288, 0 } },
194+ { "𝔊", { 120074, 0 } },
195+ { "⋙", { 8921, 0 } },
196+ { "𝔾", { 120126, 0 } },
197+ { "≥", { 8805, 0 } },
198+ { "⋛", { 8923, 0 } },
199+ { "≧", { 8807, 0 } },
200+ { "⪢", { 10914, 0 } },
201+ { "≷", { 8823, 0 } },
202+ { "⩾", { 10878, 0 } },
203+ { "≳", { 8819, 0 } },
204+ { "𝒢", { 119970, 0 } },
205+ { "≫", { 8811, 0 } },
206+ { "Ъ", { 1066, 0 } },
207+ { "ˇ", { 711, 0 } },
208+ { "^", { 94, 0 } },
209+ { "Ĥ", { 292, 0 } },
210+ { "ℌ", { 8460, 0 } },
211+ { "ℋ", { 8459, 0 } },
212+ { "ℍ", { 8461, 0 } },
213+ { "─", { 9472, 0 } },
214+ { "ℋ", { 8459, 0 } },
215+ { "Ħ", { 294, 0 } },
216+ { "≎", { 8782, 0 } },
217+ { "≏", { 8783, 0 } },
218+ { "Е", { 1045, 0 } },
219+ { "IJ", { 306, 0 } },
220+ { "Ё", { 1025, 0 } },
221+ { "Í", { 205, 0 } },
222+ { "Î", { 206, 0 } },
223+ { "И", { 1048, 0 } },
224+ { "İ", { 304, 0 } },
225+ { "ℑ", { 8465, 0 } },
226+ { "Ì", { 204, 0 } },
227+ { "ℑ", { 8465, 0 } },
228+ { "Ī", { 298, 0 } },
229+ { "ⅈ", { 8520, 0 } },
230+ { "⇒", { 8658, 0 } },
231+ { "∬", { 8748, 0 } },
232+ { "∫", { 8747, 0 } },
233+ { "⋂", { 8898, 0 } },
234+ { "⁣", { 8291, 0 } },
235+ { "⁢", { 8290, 0 } },
236+ { "Į", { 302, 0 } },
237+ { "𝕀", { 120128, 0 } },
238+ { "Ι", { 921, 0 } },
239+ { "ℐ", { 8464, 0 } },
240+ { "Ĩ", { 296, 0 } },
241+ { "І", { 1030, 0 } },
242+ { "Ï", { 207, 0 } },
243+ { "Ĵ", { 308, 0 } },
244+ { "Й", { 1049, 0 } },
245+ { "𝔍", { 120077, 0 } },
246+ { "𝕁", { 120129, 0 } },
247+ { "𝒥", { 119973, 0 } },
248+ { "Ј", { 1032, 0 } },
249+ { "Є", { 1028, 0 } },
250+ { "Х", { 1061, 0 } },
251+ { "Ќ", { 1036, 0 } },
252+ { "Κ", { 922, 0 } },
253+ { "Ķ", { 310, 0 } },
254+ { "К", { 1050, 0 } },
255+ { "𝔎", { 120078, 0 } },
256+ { "𝕂", { 120130, 0 } },
257+ { "𝒦", { 119974, 0 } },
258+ { "Љ", { 1033, 0 } },
259+ { "<", { 60, 0 } },
260+ { "Ĺ", { 313, 0 } },
261+ { "Λ", { 923, 0 } },
262+ { "⟪", { 10218, 0 } },
263+ { "ℒ", { 8466, 0 } },
264+ { "↞", { 8606, 0 } },
265+ { "Ľ", { 317, 0 } },
266+ { "Ļ", { 315, 0 } },
267+ { "Л", { 1051, 0 } },
268+ { "⟨", { 10216, 0 } },
269+ { "←", { 8592, 0 } },
270+ { "⇤", { 8676, 0 } },
271+ { "⇆", { 8646, 0 } },
272+ { "⌈", { 8968, 0 } },
273+ { "⟦", { 10214, 0 } },
274+ { "⥡", { 10593, 0 } },
275+ { "⇃", { 8643, 0 } },
276+ { "⥙", { 10585, 0 } },
277+ { "⌊", { 8970, 0 } },
278+ { "↔", { 8596, 0 } },
279+ { "⥎", { 10574, 0 } },
280+ { "⊣", { 8867, 0 } },
281+ { "↤", { 8612, 0 } },
282+ { "⥚", { 10586, 0 } },
283+ { "⊲", { 8882, 0 } },
284+ { "⧏", { 10703, 0 } },
285+ { "⊴", { 8884, 0 } },
286+ { "⥑", { 10577, 0 } },
287+ { "⥠", { 10592, 0 } },
288+ { "↿", { 8639, 0 } },
289+ { "⥘", { 10584, 0 } },
290+ { "↼", { 8636, 0 } },
291+ { "⥒", { 10578, 0 } },
292+ { "⇐", { 8656, 0 } },
293+ { "⇔", { 8660, 0 } },
294+ { "⋚", { 8922, 0 } },
295+ { "≦", { 8806, 0 } },
296+ { "≶", { 8822, 0 } },
297+ { "⪡", { 10913, 0 } },
298+ { "⩽", { 10877, 0 } },
299+ { "≲", { 8818, 0 } },
300+ { "𝔏", { 120079, 0 } },
301+ { "⋘", { 8920, 0 } },
302+ { "⇚", { 8666, 0 } },
303+ { "Ŀ", { 319, 0 } },
304+ { "⟵", { 10229, 0 } },
305+ { "⟷", { 10231, 0 } },
306+ { "⟶", { 10230, 0 } },
307+ { "⟸", { 10232, 0 } },
308+ { "⟺", { 10234, 0 } },
309+ { "⟹", { 10233, 0 } },
310+ { "𝕃", { 120131, 0 } },
311+ { "↙", { 8601, 0 } },
312+ { "↘", { 8600, 0 } },
313+ { "ℒ", { 8466, 0 } },
314+ { "↰", { 8624, 0 } },
315+ { "Ł", { 321, 0 } },
316+ { "≪", { 8810, 0 } },
317+ { "⤅", { 10501, 0 } },
318+ { "М", { 1052, 0 } },
319+ { " ", { 8287, 0 } },
320+ { "ℳ", { 8499, 0 } },
321+ { "𝔐", { 120080, 0 } },
322+ { "∓", { 8723, 0 } },
323+ { "𝕄", { 120132, 0 } },
324+ { "ℳ", { 8499, 0 } },
325+ { "Μ", { 924, 0 } },
326+ { "Њ", { 1034, 0 } },
327+ { "Ń", { 323, 0 } },
328+ { "Ň", { 327, 0 } },
329+ { "Ņ", { 325, 0 } },
330+ { "Н", { 1053, 0 } },
331+ { "​", { 8203, 0 } },
332+ { "​", { 8203, 0 } },
333+ { "​", { 8203, 0 } },
334+ { "​", { 8203, 0 } },
335+ { "≫", { 8811, 0 } },
336+ { "≪", { 8810, 0 } },
337+ { "
", { 10, 0 } },
338+ { "𝔑", { 120081, 0 } },
339+ { "⁠", { 8288, 0 } },
340+ { " ", { 160, 0 } },
341+ { "ℕ", { 8469, 0 } },
342+ { "⫬", { 10988, 0 } },
343+ { "≢", { 8802, 0 } },
344+ { "≭", { 8813, 0 } },
345+ { "∦", { 8742, 0 } },
346+ { "∉", { 8713, 0 } },
347+ { "≠", { 8800, 0 } },
348+ { "≂̸", { 8770, 824 } },
349+ { "∄", { 8708, 0 } },
350+ { "≯", { 8815, 0 } },
351+ { "≱", { 8817, 0 } },
352+ { "≧̸", { 8807, 824 } },
353+ { "≫̸", { 8811, 824 } },
354+ { "≹", { 8825, 0 } },
355+ { "⩾̸", { 10878, 824 } },
356+ { "≵", { 8821, 0 } },
357+ { "≎̸", { 8782, 824 } },
358+ { "≏̸", { 8783, 824 } },
359+ { "⋪", { 8938, 0 } },
360+ { "⧏̸", { 10703, 824 } },
361+ { "⋬", { 8940, 0 } },
362+ { "≮", { 8814, 0 } },
363+ { "≰", { 8816, 0 } },
364+ { "≸", { 8824, 0 } },
365+ { "≪̸", { 8810, 824 } },
366+ { "⩽̸", { 10877, 824 } },
367+ { "≴", { 8820, 0 } },
368+ { "⪢̸", { 10914, 824 } },
369+ { "⪡̸", { 10913, 824 } },
370+ { "⊀", { 8832, 0 } },
371+ { "⪯̸", { 10927, 824 } },
372+ { "⋠", { 8928, 0 } },
373+ { "∌", { 8716, 0 } },
374+ { "⋫", { 8939, 0 } },
375+ { "⧐̸", { 10704, 824 } },
376+ { "⋭", { 8941, 0 } },
377+ { "⊏̸", { 8847, 824 } },
378+ { "⋢", { 8930, 0 } },
379+ { "⊐̸", { 8848, 824 } },
380+ { "⋣", { 8931, 0 } },
381+ { "⊂⃒", { 8834, 8402 } },
382+ { "⊈", { 8840, 0 } },
383+ { "⊁", { 8833, 0 } },
384+ { "⪰̸", { 10928, 824 } },
385+ { "⋡", { 8929, 0 } },
386+ { "≿̸", { 8831, 824 } },
387+ { "⊃⃒", { 8835, 8402 } },
388+ { "⊉", { 8841, 0 } },
389+ { "≁", { 8769, 0 } },
390+ { "≄", { 8772, 0 } },
391+ { "≇", { 8775, 0 } },
392+ { "≉", { 8777, 0 } },
393+ { "∤", { 8740, 0 } },
394+ { "𝒩", { 119977, 0 } },
395+ { "Ñ", { 209, 0 } },
396+ { "Ν", { 925, 0 } },
397+ { "Œ", { 338, 0 } },
398+ { "Ó", { 211, 0 } },
399+ { "Ô", { 212, 0 } },
400+ { "О", { 1054, 0 } },
401+ { "Ő", { 336, 0 } },
402+ { "𝔒", { 120082, 0 } },
403+ { "Ò", { 210, 0 } },
404+ { "Ō", { 332, 0 } },
405+ { "Ω", { 937, 0 } },
406+ { "Ο", { 927, 0 } },
407+ { "𝕆", { 120134, 0 } },
408+ { "“", { 8220, 0 } },
409+ { "‘", { 8216, 0 } },
410+ { "⩔", { 10836, 0 } },
411+ { "𝒪", { 119978, 0 } },
412+ { "Ø", { 216, 0 } },
413+ { "Õ", { 213, 0 } },
414+ { "⨷", { 10807, 0 } },
415+ { "Ö", { 214, 0 } },
416+ { "‾", { 8254, 0 } },
417+ { "⏞", { 9182, 0 } },
418+ { "⎴", { 9140, 0 } },
419+ { "⏜", { 9180, 0 } },
420+ { "∂", { 8706, 0 } },
421+ { "П", { 1055, 0 } },
422+ { "𝔓", { 120083, 0 } },
423+ { "Φ", { 934, 0 } },
424+ { "Π", { 928, 0 } },
425+ { "±", { 177, 0 } },
426+ { "ℌ", { 8460, 0 } },
427+ { "ℙ", { 8473, 0 } },
428+ { "⪻", { 10939, 0 } },
429+ { "≺", { 8826, 0 } },
430+ { "⪯", { 10927, 0 } },
431+ { "≼", { 8828, 0 } },
432+ { "≾", { 8830, 0 } },
433+ { "″", { 8243, 0 } },
434+ { "∏", { 8719, 0 } },
435+ { "∷", { 8759, 0 } },
436+ { "∝", { 8733, 0 } },
437+ { "𝒫", { 119979, 0 } },
438+ { "Ψ", { 936, 0 } },
439+ { """, { 34, 0 } },
440+ { "𝔔", { 120084, 0 } },
441+ { "ℚ", { 8474, 0 } },
442+ { "𝒬", { 119980, 0 } },
443+ { "⤐", { 10512, 0 } },
444+ { "®", { 174, 0 } },
445+ { "Ŕ", { 340, 0 } },
446+ { "⟫", { 10219, 0 } },
447+ { "↠", { 8608, 0 } },
448+ { "⤖", { 10518, 0 } },
449+ { "Ř", { 344, 0 } },
450+ { "Ŗ", { 342, 0 } },
451+ { "Р", { 1056, 0 } },
452+ { "ℜ", { 8476, 0 } },
453+ { "∋", { 8715, 0 } },
454+ { "⇋", { 8651, 0 } },
455+ { "⥯", { 10607, 0 } },
456+ { "ℜ", { 8476, 0 } },
457+ { "Ρ", { 929, 0 } },
458+ { "⟩", { 10217, 0 } },
459+ { "→", { 8594, 0 } },
460+ { "⇥", { 8677, 0 } },
461+ { "⇄", { 8644, 0 } },
462+ { "⌉", { 8969, 0 } },
463+ { "⟧", { 10215, 0 } },
464+ { "⥝", { 10589, 0 } },
465+ { "⇂", { 8642, 0 } },
466+ { "⥕", { 10581, 0 } },
467+ { "⌋", { 8971, 0 } },
468+ { "⊢", { 8866, 0 } },
469+ { "↦", { 8614, 0 } },
470+ { "⥛", { 10587, 0 } },
471+ { "⊳", { 8883, 0 } },
472+ { "⧐", { 10704, 0 } },
473+ { "⊵", { 8885, 0 } },
474+ { "⥏", { 10575, 0 } },
475+ { "⥜", { 10588, 0 } },
476+ { "↾", { 8638, 0 } },
477+ { "⥔", { 10580, 0 } },
478+ { "⇀", { 8640, 0 } },
479+ { "⥓", { 10579, 0 } },
480+ { "⇒", { 8658, 0 } },
481+ { "ℝ", { 8477, 0 } },
482+ { "⥰", { 10608, 0 } },
483+ { "⇛", { 8667, 0 } },
484+ { "ℛ", { 8475, 0 } },
485+ { "↱", { 8625, 0 } },
486+ { "⧴", { 10740, 0 } },
487+ { "Щ", { 1065, 0 } },
488+ { "Ш", { 1064, 0 } },
489+ { "Ь", { 1068, 0 } },
490+ { "Ś", { 346, 0 } },
491+ { "⪼", { 10940, 0 } },
492+ { "Š", { 352, 0 } },
493+ { "Ş", { 350, 0 } },
494+ { "Ŝ", { 348, 0 } },
495+ { "С", { 1057, 0 } },
496+ { "𝔖", { 120086, 0 } },
497+ { "↓", { 8595, 0 } },
498+ { "←", { 8592, 0 } },
499+ { "→", { 8594, 0 } },
500+ { "↑", { 8593, 0 } },
501+ { "Σ", { 931, 0 } },
502+ { "∘", { 8728, 0 } },
503+ { "𝕊", { 120138, 0 } },
504+ { "√", { 8730, 0 } },
505+ { "□", { 9633, 0 } },
506+ { "⊓", { 8851, 0 } },
507+ { "⊏", { 8847, 0 } },
508+ { "⊑", { 8849, 0 } },
509+ { "⊐", { 8848, 0 } },
510+ { "⊒", { 8850, 0 } },
511+ { "⊔", { 8852, 0 } },
512+ { "𝒮", { 119982, 0 } },
513+ { "⋆", { 8902, 0 } },
514+ { "⋐", { 8912, 0 } },
515+ { "⋐", { 8912, 0 } },
516+ { "⊆", { 8838, 0 } },
517+ { "≻", { 8827, 0 } },
518+ { "⪰", { 10928, 0 } },
519+ { "≽", { 8829, 0 } },
520+ { "≿", { 8831, 0 } },
521+ { "∋", { 8715, 0 } },
522+ { "∑", { 8721, 0 } },
523+ { "⋑", { 8913, 0 } },
524+ { "⊃", { 8835, 0 } },
525+ { "⊇", { 8839, 0 } },
526+ { "⋑", { 8913, 0 } },
527+ { "Þ", { 222, 0 } },
528+ { "™", { 8482, 0 } },
529+ { "Ћ", { 1035, 0 } },
530+ { "Ц", { 1062, 0 } },
531+ { "	", { 9, 0 } },
532+ { "Τ", { 932, 0 } },
533+ { "Ť", { 356, 0 } },
534+ { "Ţ", { 354, 0 } },
535+ { "Т", { 1058, 0 } },
536+ { "𝔗", { 120087, 0 } },
537+ { "∴", { 8756, 0 } },
538+ { "Θ", { 920, 0 } },
539+ { "  ", { 8287, 8202 } },
540+ { " ", { 8201, 0 } },
541+ { "∼", { 8764, 0 } },
542+ { "≃", { 8771, 0 } },
543+ { "≅", { 8773, 0 } },
544+ { "≈", { 8776, 0 } },
545+ { "𝕋", { 120139, 0 } },
546+ { "⃛", { 8411, 0 } },
547+ { "𝒯", { 119983, 0 } },
548+ { "Ŧ", { 358, 0 } },
549+ { "Ú", { 218, 0 } },
550+ { "↟", { 8607, 0 } },
551+ { "⥉", { 10569, 0 } },
552+ { "Ў", { 1038, 0 } },
553+ { "Ŭ", { 364, 0 } },
554+ { "Û", { 219, 0 } },
555+ { "У", { 1059, 0 } },
556+ { "Ű", { 368, 0 } },
557+ { "𝔘", { 120088, 0 } },
558+ { "Ù", { 217, 0 } },
559+ { "Ū", { 362, 0 } },
560+ { "_", { 95, 0 } },
561+ { "⏟", { 9183, 0 } },
562+ { "⎵", { 9141, 0 } },
563+ { "⏝", { 9181, 0 } },
564+ { "⋃", { 8899, 0 } },
565+ { "⊎", { 8846, 0 } },
566+ { "Ų", { 370, 0 } },
567+ { "𝕌", { 120140, 0 } },
568+ { "↑", { 8593, 0 } },
569+ { "⤒", { 10514, 0 } },
570+ { "⇅", { 8645, 0 } },
571+ { "↕", { 8597, 0 } },
572+ { "⥮", { 10606, 0 } },
573+ { "⊥", { 8869, 0 } },
574+ { "↥", { 8613, 0 } },
575+ { "⇑", { 8657, 0 } },
576+ { "⇕", { 8661, 0 } },
577+ { "↖", { 8598, 0 } },
578+ { "↗", { 8599, 0 } },
579+ { "ϒ", { 978, 0 } },
580+ { "Υ", { 933, 0 } },
581+ { "Ů", { 366, 0 } },
582+ { "𝒰", { 119984, 0 } },
583+ { "Ũ", { 360, 0 } },
584+ { "Ü", { 220, 0 } },
585+ { "⊫", { 8875, 0 } },
586+ { "⫫", { 10987, 0 } },
587+ { "В", { 1042, 0 } },
588+ { "⊩", { 8873, 0 } },
589+ { "⫦", { 10982, 0 } },
590+ { "⋁", { 8897, 0 } },
591+ { "‖", { 8214, 0 } },
592+ { "‖", { 8214, 0 } },
593+ { "∣", { 8739, 0 } },
594+ { "|", { 124, 0 } },
595+ { "❘", { 10072, 0 } },
596+ { "≀", { 8768, 0 } },
597+ { " ", { 8202, 0 } },
598+ { "𝔙", { 120089, 0 } },
599+ { "𝕍", { 120141, 0 } },
600+ { "𝒱", { 119985, 0 } },
601+ { "⊪", { 8874, 0 } },
602+ { "Ŵ", { 372, 0 } },
603+ { "⋀", { 8896, 0 } },
604+ { "𝔚", { 120090, 0 } },
605+ { "𝕎", { 120142, 0 } },
606+ { "𝒲", { 119986, 0 } },
607+ { "𝔛", { 120091, 0 } },
608+ { "Ξ", { 926, 0 } },
609+ { "𝕏", { 120143, 0 } },
610+ { "𝒳", { 119987, 0 } },
611+ { "Я", { 1071, 0 } },
612+ { "Ї", { 1031, 0 } },
613+ { "Ю", { 1070, 0 } },
614+ { "Ý", { 221, 0 } },
615+ { "Ŷ", { 374, 0 } },
616+ { "Ы", { 1067, 0 } },
617+ { "𝔜", { 120092, 0 } },
618+ { "𝕐", { 120144, 0 } },
619+ { "𝒴", { 119988, 0 } },
620+ { "Ÿ", { 376, 0 } },
621+ { "Ж", { 1046, 0 } },
622+ { "Ź", { 377, 0 } },
623+ { "Ž", { 381, 0 } },
624+ { "З", { 1047, 0 } },
625+ { "Ż", { 379, 0 } },
626+ { "​", { 8203, 0 } },
627+ { "Ζ", { 918, 0 } },
628+ { "ℨ", { 8488, 0 } },
629+ { "ℤ", { 8484, 0 } },
630+ { "𝒵", { 119989, 0 } },
631+ { "á", { 225, 0 } },
632+ { "ă", { 259, 0 } },
633+ { "∾", { 8766, 0 } },
634+ { "∾̳", { 8766, 819 } },
635+ { "∿", { 8767, 0 } },
636+ { "â", { 226, 0 } },
637+ { "´", { 180, 0 } },
638+ { "а", { 1072, 0 } },
639+ { "æ", { 230, 0 } },
640+ { "⁡", { 8289, 0 } },
641+ { "𝔞", { 120094, 0 } },
642+ { "à", { 224, 0 } },
643+ { "ℵ", { 8501, 0 } },
644+ { "ℵ", { 8501, 0 } },
645+ { "α", { 945, 0 } },
646+ { "ā", { 257, 0 } },
647+ { "⨿", { 10815, 0 } },
648+ { "&", { 38, 0 } },
649+ { "∧", { 8743, 0 } },
650+ { "⩕", { 10837, 0 } },
651+ { "⩜", { 10844, 0 } },
652+ { "⩘", { 10840, 0 } },
653+ { "⩚", { 10842, 0 } },
654+ { "∠", { 8736, 0 } },
655+ { "⦤", { 10660, 0 } },
656+ { "∠", { 8736, 0 } },
657+ { "∡", { 8737, 0 } },
658+ { "⦨", { 10664, 0 } },
659+ { "⦩", { 10665, 0 } },
660+ { "⦪", { 10666, 0 } },
661+ { "⦫", { 10667, 0 } },
662+ { "⦬", { 10668, 0 } },
663+ { "⦭", { 10669, 0 } },
664+ { "⦮", { 10670, 0 } },
665+ { "⦯", { 10671, 0 } },
666+ { "∟", { 8735, 0 } },
667+ { "⊾", { 8894, 0 } },
668+ { "⦝", { 10653, 0 } },
669+ { "∢", { 8738, 0 } },
670+ { "Å", { 197, 0 } },
671+ { "⍼", { 9084, 0 } },
672+ { "ą", { 261, 0 } },
673+ { "𝕒", { 120146, 0 } },
674+ { "≈", { 8776, 0 } },
675+ { "⩰", { 10864, 0 } },
676+ { "⩯", { 10863, 0 } },
677+ { "≊", { 8778, 0 } },
678+ { "≋", { 8779, 0 } },
679+ { "'", { 39, 0 } },
680+ { "≈", { 8776, 0 } },
681+ { "≊", { 8778, 0 } },
682+ { "å", { 229, 0 } },
683+ { "𝒶", { 119990, 0 } },
684+ { "*", { 42, 0 } },
685+ { "≈", { 8776, 0 } },
686+ { "≍", { 8781, 0 } },
687+ { "ã", { 227, 0 } },
688+ { "ä", { 228, 0 } },
689+ { "∳", { 8755, 0 } },
690+ { "⨑", { 10769, 0 } },
691+ { "⫭", { 10989, 0 } },
692+ { "≌", { 8780, 0 } },
693+ { "϶", { 1014, 0 } },
694+ { "‵", { 8245, 0 } },
695+ { "∽", { 8765, 0 } },
696+ { "⋍", { 8909, 0 } },
697+ { "⊽", { 8893, 0 } },
698+ { "⌅", { 8965, 0 } },
699+ { "⌅", { 8965, 0 } },
700+ { "⎵", { 9141, 0 } },
701+ { "⎶", { 9142, 0 } },
702+ { "≌", { 8780, 0 } },
703+ { "б", { 1073, 0 } },
704+ { "„", { 8222, 0 } },
705+ { "∵", { 8757, 0 } },
706+ { "∵", { 8757, 0 } },
707+ { "⦰", { 10672, 0 } },
708+ { "϶", { 1014, 0 } },
709+ { "ℬ", { 8492, 0 } },
710+ { "β", { 946, 0 } },
711+ { "ℶ", { 8502, 0 } },
712+ { "≬", { 8812, 0 } },
713+ { "𝔟", { 120095, 0 } },
714+ { "⋂", { 8898, 0 } },
715+ { "◯", { 9711, 0 } },
716+ { "⋃", { 8899, 0 } },
717+ { "⨀", { 10752, 0 } },
718+ { "⨁", { 10753, 0 } },
719+ { "⨂", { 10754, 0 } },
720+ { "⨆", { 10758, 0 } },
721+ { "★", { 9733, 0 } },
722+ { "▽", { 9661, 0 } },
723+ { "△", { 9651, 0 } },
724+ { "⨄", { 10756, 0 } },
725+ { "⋁", { 8897, 0 } },
726+ { "⋀", { 8896, 0 } },
727+ { "⤍", { 10509, 0 } },
728+ { "⧫", { 10731, 0 } },
729+ { "▪", { 9642, 0 } },
730+ { "▴", { 9652, 0 } },
731+ { "▾", { 9662, 0 } },
732+ { "◂", { 9666, 0 } },
733+ { "▸", { 9656, 0 } },
734+ { "␣", { 9251, 0 } },
735+ { "▒", { 9618, 0 } },
736+ { "░", { 9617, 0 } },
737+ { "▓", { 9619, 0 } },
738+ { "█", { 9608, 0 } },
739+ { "=⃥", { 61, 8421 } },
740+ { "≡⃥", { 8801, 8421 } },
741+ { "⌐", { 8976, 0 } },
742+ { "𝕓", { 120147, 0 } },
743+ { "⊥", { 8869, 0 } },
744+ { "⊥", { 8869, 0 } },
745+ { "⋈", { 8904, 0 } },
746+ { "╗", { 9559, 0 } },
747+ { "╔", { 9556, 0 } },
748+ { "╖", { 9558, 0 } },
749+ { "╓", { 9555, 0 } },
750+ { "═", { 9552, 0 } },
751+ { "╦", { 9574, 0 } },
752+ { "╩", { 9577, 0 } },
753+ { "╤", { 9572, 0 } },
754+ { "╧", { 9575, 0 } },
755+ { "╝", { 9565, 0 } },
756+ { "╚", { 9562, 0 } },
757+ { "╜", { 9564, 0 } },
758+ { "╙", { 9561, 0 } },
759+ { "║", { 9553, 0 } },
760+ { "╬", { 9580, 0 } },
761+ { "╣", { 9571, 0 } },
762+ { "╠", { 9568, 0 } },
763+ { "╫", { 9579, 0 } },
764+ { "╢", { 9570, 0 } },
765+ { "╟", { 9567, 0 } },
766+ { "⧉", { 10697, 0 } },
767+ { "╕", { 9557, 0 } },
768+ { "╒", { 9554, 0 } },
769+ { "┐", { 9488, 0 } },
770+ { "┌", { 9484, 0 } },
771+ { "─", { 9472, 0 } },
772+ { "╥", { 9573, 0 } },
773+ { "╨", { 9576, 0 } },
774+ { "┬", { 9516, 0 } },
775+ { "┴", { 9524, 0 } },
776+ { "⊟", { 8863, 0 } },
777+ { "⊞", { 8862, 0 } },
778+ { "⊠", { 8864, 0 } },
779+ { "╛", { 9563, 0 } },
780+ { "╘", { 9560, 0 } },
781+ { "┘", { 9496, 0 } },
782+ { "└", { 9492, 0 } },
783+ { "│", { 9474, 0 } },
784+ { "╪", { 9578, 0 } },
785+ { "╡", { 9569, 0 } },
786+ { "╞", { 9566, 0 } },
787+ { "┼", { 9532, 0 } },
788+ { "┤", { 9508, 0 } },
789+ { "├", { 9500, 0 } },
790+ { "‵", { 8245, 0 } },
791+ { "˘", { 728, 0 } },
792+ { "¦", { 166, 0 } },
793+ { "𝒷", { 119991, 0 } },
794+ { "⁏", { 8271, 0 } },
795+ { "∽", { 8765, 0 } },
796+ { "⋍", { 8909, 0 } },
797+ { "\", { 92, 0 } },
798+ { "⧅", { 10693, 0 } },
799+ { "⟈", { 10184, 0 } },
800+ { "•", { 8226, 0 } },
801+ { "•", { 8226, 0 } },
802+ { "≎", { 8782, 0 } },
803+ { "⪮", { 10926, 0 } },
804+ { "≏", { 8783, 0 } },
805+ { "≏", { 8783, 0 } },
806+ { "ć", { 263, 0 } },
807+ { "∩", { 8745, 0 } },
808+ { "⩄", { 10820, 0 } },
809+ { "⩉", { 10825, 0 } },
810+ { "⩋", { 10827, 0 } },
811+ { "⩇", { 10823, 0 } },
812+ { "⩀", { 10816, 0 } },
813+ { "∩︀", { 8745, 65024 } },
814+ { "⁁", { 8257, 0 } },
815+ { "ˇ", { 711, 0 } },
816+ { "⩍", { 10829, 0 } },
817+ { "č", { 269, 0 } },
818+ { "ç", { 231, 0 } },
819+ { "ĉ", { 265, 0 } },
820+ { "⩌", { 10828, 0 } },
821+ { "⩐", { 10832, 0 } },
822+ { "ċ", { 267, 0 } },
823+ { "¸", { 184, 0 } },
824+ { "⦲", { 10674, 0 } },
825+ { "¢", { 162, 0 } },
826+ { "·", { 183, 0 } },
827+ { "𝔠", { 120096, 0 } },
828+ { "ч", { 1095, 0 } },
829+ { "✓", { 10003, 0 } },
830+ { "✓", { 10003, 0 } },
831+ { "χ", { 967, 0 } },
832+ { "○", { 9675, 0 } },
833+ { "⧃", { 10691, 0 } },
834+ { "ˆ", { 710, 0 } },
835+ { "≗", { 8791, 0 } },
836+ { "↺", { 8634, 0 } },
837+ { "↻", { 8635, 0 } },
838+ { "®", { 174, 0 } },
839+ { "Ⓢ", { 9416, 0 } },
840+ { "⊛", { 8859, 0 } },
841+ { "⊚", { 8858, 0 } },
842+ { "⊝", { 8861, 0 } },
843+ { "≗", { 8791, 0 } },
844+ { "⨐", { 10768, 0 } },
845+ { "⫯", { 10991, 0 } },
846+ { "⧂", { 10690, 0 } },
847+ { "♣", { 9827, 0 } },
848+ { "♣", { 9827, 0 } },
849+ { ":", { 58, 0 } },
850+ { "≔", { 8788, 0 } },
851+ { "≔", { 8788, 0 } },
852+ { ",", { 44, 0 } },
853+ { "@", { 64, 0 } },
854+ { "∁", { 8705, 0 } },
855+ { "∘", { 8728, 0 } },
856+ { "∁", { 8705, 0 } },
857+ { "ℂ", { 8450, 0 } },
858+ { "≅", { 8773, 0 } },
859+ { "⩭", { 10861, 0 } },
860+ { "∮", { 8750, 0 } },
861+ { "𝕔", { 120148, 0 } },
862+ { "∐", { 8720, 0 } },
863+ { "©", { 169, 0 } },
864+ { "℗", { 8471, 0 } },
865+ { "↵", { 8629, 0 } },
866+ { "✗", { 10007, 0 } },
867+ { "𝒸", { 119992, 0 } },
868+ { "⫏", { 10959, 0 } },
869+ { "⫑", { 10961, 0 } },
870+ { "⫐", { 10960, 0 } },
871+ { "⫒", { 10962, 0 } },
872+ { "⋯", { 8943, 0 } },
873+ { "⤸", { 10552, 0 } },
874+ { "⤵", { 10549, 0 } },
875+ { "⋞", { 8926, 0 } },
876+ { "⋟", { 8927, 0 } },
877+ { "↶", { 8630, 0 } },
878+ { "⤽", { 10557, 0 } },
879+ { "∪", { 8746, 0 } },
880+ { "⩈", { 10824, 0 } },
881+ { "⩆", { 10822, 0 } },
882+ { "⩊", { 10826, 0 } },
883+ { "⊍", { 8845, 0 } },
884+ { "⩅", { 10821, 0 } },
885+ { "∪︀", { 8746, 65024 } },
886+ { "↷", { 8631, 0 } },
887+ { "⤼", { 10556, 0 } },
888+ { "⋞", { 8926, 0 } },
889+ { "⋟", { 8927, 0 } },
890+ { "⋎", { 8910, 0 } },
891+ { "⋏", { 8911, 0 } },
892+ { "¤", { 164, 0 } },
893+ { "↶", { 8630, 0 } },
894+ { "↷", { 8631, 0 } },
895+ { "⋎", { 8910, 0 } },
896+ { "⋏", { 8911, 0 } },
897+ { "∲", { 8754, 0 } },
898+ { "∱", { 8753, 0 } },
899+ { "⌭", { 9005, 0 } },
900+ { "⇓", { 8659, 0 } },
901+ { "⥥", { 10597, 0 } },
902+ { "†", { 8224, 0 } },
903+ { "ℸ", { 8504, 0 } },
904+ { "↓", { 8595, 0 } },
905+ { "‐", { 8208, 0 } },
906+ { "⊣", { 8867, 0 } },
907+ { "⤏", { 10511, 0 } },
908+ { "˝", { 733, 0 } },
909+ { "ď", { 271, 0 } },
910+ { "д", { 1076, 0 } },
911+ { "ⅆ", { 8518, 0 } },
912+ { "‡", { 8225, 0 } },
913+ { "⇊", { 8650, 0 } },
914+ { "⩷", { 10871, 0 } },
915+ { "°", { 176, 0 } },
916+ { "δ", { 948, 0 } },
917+ { "⦱", { 10673, 0 } },
918+ { "⥿", { 10623, 0 } },
919+ { "𝔡", { 120097, 0 } },
920+ { "⇃", { 8643, 0 } },
921+ { "⇂", { 8642, 0 } },
922+ { "⋄", { 8900, 0 } },
923+ { "⋄", { 8900, 0 } },
924+ { "♦", { 9830, 0 } },
925+ { "♦", { 9830, 0 } },
926+ { "¨", { 168, 0 } },
927+ { "ϝ", { 989, 0 } },
928+ { "⋲", { 8946, 0 } },
929+ { "÷", { 247, 0 } },
930+ { "÷", { 247, 0 } },
931+ { "⋇", { 8903, 0 } },
932+ { "⋇", { 8903, 0 } },
933+ { "ђ", { 1106, 0 } },
934+ { "⌞", { 8990, 0 } },
935+ { "⌍", { 8973, 0 } },
936+ { "$", { 36, 0 } },
937+ { "𝕕", { 120149, 0 } },
938+ { "˙", { 729, 0 } },
939+ { "≐", { 8784, 0 } },
940+ { "≑", { 8785, 0 } },
941+ { "∸", { 8760, 0 } },
942+ { "∔", { 8724, 0 } },
943+ { "⊡", { 8865, 0 } },
944+ { "⌆", { 8966, 0 } },
945+ { "↓", { 8595, 0 } },
946+ { "⇊", { 8650, 0 } },
947+ { "⇃", { 8643, 0 } },
948+ { "⇂", { 8642, 0 } },
949+ { "⤐", { 10512, 0 } },
950+ { "⌟", { 8991, 0 } },
951+ { "⌌", { 8972, 0 } },
952+ { "𝒹", { 119993, 0 } },
953+ { "ѕ", { 1109, 0 } },
954+ { "⧶", { 10742, 0 } },
955+ { "đ", { 273, 0 } },
956+ { "⋱", { 8945, 0 } },
957+ { "▿", { 9663, 0 } },
958+ { "▾", { 9662, 0 } },
959+ { "⇵", { 8693, 0 } },
960+ { "⥯", { 10607, 0 } },
961+ { "⦦", { 10662, 0 } },
962+ { "џ", { 1119, 0 } },
963+ { "⟿", { 10239, 0 } },
964+ { "⩷", { 10871, 0 } },
965+ { "≑", { 8785, 0 } },
966+ { "é", { 233, 0 } },
967+ { "⩮", { 10862, 0 } },
968+ { "ě", { 283, 0 } },
969+ { "≖", { 8790, 0 } },
970+ { "ê", { 234, 0 } },
971+ { "≕", { 8789, 0 } },
972+ { "э", { 1101, 0 } },
973+ { "ė", { 279, 0 } },
974+ { "ⅇ", { 8519, 0 } },
975+ { "≒", { 8786, 0 } },
976+ { "𝔢", { 120098, 0 } },
977+ { "⪚", { 10906, 0 } },
978+ { "è", { 232, 0 } },
979+ { "⪖", { 10902, 0 } },
980+ { "⪘", { 10904, 0 } },
981+ { "⪙", { 10905, 0 } },
982+ { "⏧", { 9191, 0 } },
983+ { "ℓ", { 8467, 0 } },
984+ { "⪕", { 10901, 0 } },
985+ { "⪗", { 10903, 0 } },
986+ { "ē", { 275, 0 } },
987+ { "∅", { 8709, 0 } },
988+ { "∅", { 8709, 0 } },
989+ { "∅", { 8709, 0 } },
990+ { " ", { 8196, 0 } },
991+ { " ", { 8197, 0 } },
992+ { " ", { 8195, 0 } },
993+ { "ŋ", { 331, 0 } },
994+ { " ", { 8194, 0 } },
995+ { "ę", { 281, 0 } },
996+ { "𝕖", { 120150, 0 } },
997+ { "⋕", { 8917, 0 } },
998+ { "⧣", { 10723, 0 } },
999+ { "⩱", { 10865, 0 } },
1000+ { "ε", { 949, 0 } },
1001+ { "ε", { 949, 0 } },
1002+ { "ϵ", { 1013, 0 } },
1003+ { "≖", { 8790, 0 } },
1004+ { "≕", { 8789, 0 } },
1005+ { "≂", { 8770, 0 } },
1006+ { "⪖", { 10902, 0 } },
1007+ { "⪕", { 10901, 0 } },
1008+ { "=", { 61, 0 } },
1009+ { "≟", { 8799, 0 } },
1010+ { "≡", { 8801, 0 } },
1011+ { "⩸", { 10872, 0 } },
1012+ { "⧥", { 10725, 0 } },
1013+ { "≓", { 8787, 0 } },
1014+ { "⥱", { 10609, 0 } },
1015+ { "ℯ", { 8495, 0 } },
1016+ { "≐", { 8784, 0 } },
1017+ { "≂", { 8770, 0 } },
1018+ { "η", { 951, 0 } },
1019+ { "ð", { 240, 0 } },
1020+ { "ë", { 235, 0 } },
1021+ { "€", { 8364, 0 } },
1022+ { "!", { 33, 0 } },
1023+ { "∃", { 8707, 0 } },
1024+ { "ℰ", { 8496, 0 } },
1025+ { "ⅇ", { 8519, 0 } },
1026+ { "≒", { 8786, 0 } },
1027+ { "ф", { 1092, 0 } },
1028+ { "♀", { 9792, 0 } },
1029+ { "ffi", { 64259, 0 } },
1030+ { "ff", { 64256, 0 } },
1031+ { "ffl", { 64260, 0 } },
1032+ { "𝔣", { 120099, 0 } },
1033+ { "fi", { 64257, 0 } },
1034+ { "fj", { 102, 106 } },
1035+ { "♭", { 9837, 0 } },
1036+ { "fl", { 64258, 0 } },
1037+ { "▱", { 9649, 0 } },
1038+ { "ƒ", { 402, 0 } },
1039+ { "𝕗", { 120151, 0 } },
1040+ { "∀", { 8704, 0 } },
1041+ { "⋔", { 8916, 0 } },
1042+ { "⫙", { 10969, 0 } },
1043+ { "⨍", { 10765, 0 } },
1044+ { "½", { 189, 0 } },
1045+ { "½", { 189, 0 } },
1046+ { "⅓", { 8531, 0 } },
1047+ { "¼", { 188, 0 } },
1048+ { "¼", { 188, 0 } },
1049+ { "⅕", { 8533, 0 } },
1050+ { "⅙", { 8537, 0 } },
1051+ { "⅛", { 8539, 0 } },
1052+ { "⅔", { 8532, 0 } },
1053+ { "⅖", { 8534, 0 } },
1054+ { "¾", { 190, 0 } },
1055+ { "¾", { 190, 0 } },
1056+ { "⅗", { 8535, 0 } },
1057+ { "⅜", { 8540, 0 } },
1058+ { "⅘", { 8536, 0 } },
1059+ { "⅚", { 8538, 0 } },
1060+ { "⅝", { 8541, 0 } },
1061+ { "⅞", { 8542, 0 } },
1062+ { "⁄", { 8260, 0 } },
1063+ { "⌢", { 8994, 0 } },
1064+ { "𝒻", { 119995, 0 } },
1065+ { "≧", { 8807, 0 } },
1066+ { "⪌", { 10892, 0 } },
1067+ { "ǵ", { 501, 0 } },
1068+ { "γ", { 947, 0 } },
1069+ { "ϝ", { 989, 0 } },
1070+ { "⪆", { 10886, 0 } },
1071+ { "ğ", { 287, 0 } },
1072+ { "ĝ", { 285, 0 } },
1073+ { "г", { 1075, 0 } },
1074+ { "ġ", { 289, 0 } },
1075+ { "≥", { 8805, 0 } },
1076+ { "⋛", { 8923, 0 } },
1077+ { "≥", { 8805, 0 } },
1078+ { "≧", { 8807, 0 } },
1079+ { "⩾", { 10878, 0 } },
1080+ { "⩾", { 10878, 0 } },
1081+ { "⪩", { 10921, 0 } },
1082+ { "⪀", { 10880, 0 } },
1083+ { "⪂", { 10882, 0 } },
1084+ { "⪄", { 10884, 0 } },
1085+ { "⋛︀", { 8923, 65024 } },
1086+ { "⪔", { 10900, 0 } },
1087+ { "𝔤", { 120100, 0 } },
1088+ { "≫", { 8811, 0 } },
1089+ { "⋙", { 8921, 0 } },
1090+ { "ℷ", { 8503, 0 } },
1091+ { "ѓ", { 1107, 0 } },
1092+ { "≷", { 8823, 0 } },
1093+ { "⪒", { 10898, 0 } },
1094+ { "⪥", { 10917, 0 } },
1095+ { "⪤", { 10916, 0 } },
1096+ { "≩", { 8809, 0 } },
1097+ { "⪊", { 10890, 0 } },
1098+ { "⪊", { 10890, 0 } },
1099+ { "⪈", { 10888, 0 } },
1100+ { "⪈", { 10888, 0 } },
1101+ { "≩", { 8809, 0 } },
1102+ { "⋧", { 8935, 0 } },
1103+ { "𝕘", { 120152, 0 } },
1104+ { "`", { 96, 0 } },
1105+ { "ℊ", { 8458, 0 } },
1106+ { "≳", { 8819, 0 } },
1107+ { "⪎", { 10894, 0 } },
1108+ { "⪐", { 10896, 0 } },
1109+ { ">", { 62, 0 } },
1110+ { "⪧", { 10919, 0 } },
1111+ { "⩺", { 10874, 0 } },
1112+ { "⋗", { 8919, 0 } },
1113+ { "⦕", { 10645, 0 } },
1114+ { "⩼", { 10876, 0 } },
1115+ { "⪆", { 10886, 0 } },
1116+ { "⥸", { 10616, 0 } },
1117+ { "⋗", { 8919, 0 } },
1118+ { "⋛", { 8923, 0 } },
1119+ { "⪌", { 10892, 0 } },
1120+ { "≷", { 8823, 0 } },
1121+ { "≳", { 8819, 0 } },
1122+ { "≩︀", { 8809, 65024 } },
1123+ { "≩︀", { 8809, 65024 } },
1124+ { "⇔", { 8660, 0 } },
1125+ { " ", { 8202, 0 } },
1126+ { "½", { 189, 0 } },
1127+ { "ℋ", { 8459, 0 } },
1128+ { "ъ", { 1098, 0 } },
1129+ { "↔", { 8596, 0 } },
1130+ { "⥈", { 10568, 0 } },
1131+ { "↭", { 8621, 0 } },
1132+ { "ℏ", { 8463, 0 } },
1133+ { "ĥ", { 293, 0 } },
1134+ { "♥", { 9829, 0 } },
1135+ { "♥", { 9829, 0 } },
1136+ { "…", { 8230, 0 } },
1137+ { "⊹", { 8889, 0 } },
1138+ { "𝔥", { 120101, 0 } },
1139+ { "⤥", { 10533, 0 } },
1140+ { "⤦", { 10534, 0 } },
1141+ { "⇿", { 8703, 0 } },
1142+ { "∻", { 8763, 0 } },
1143+ { "↩", { 8617, 0 } },
1144+ { "↪", { 8618, 0 } },
1145+ { "𝕙", { 120153, 0 } },
1146+ { "―", { 8213, 0 } },
1147+ { "𝒽", { 119997, 0 } },
1148+ { "ℏ", { 8463, 0 } },
1149+ { "ħ", { 295, 0 } },
1150+ { "⁃", { 8259, 0 } },
1151+ { "‐", { 8208, 0 } },
1152+ { "í", { 237, 0 } },
1153+ { "⁣", { 8291, 0 } },
1154+ { "î", { 238, 0 } },
1155+ { "и", { 1080, 0 } },
1156+ { "е", { 1077, 0 } },
1157+ { "¡", { 161, 0 } },
1158+ { "⇔", { 8660, 0 } },
1159+ { "𝔦", { 120102, 0 } },
1160+ { "ì", { 236, 0 } },
1161+ { "ⅈ", { 8520, 0 } },
1162+ { "⨌", { 10764, 0 } },
1163+ { "∭", { 8749, 0 } },
1164+ { "⧜", { 10716, 0 } },
1165+ { "℩", { 8489, 0 } },
1166+ { "ij", { 307, 0 } },
1167+ { "ī", { 299, 0 } },
1168+ { "ℑ", { 8465, 0 } },
1169+ { "ℐ", { 8464, 0 } },
1170+ { "ℑ", { 8465, 0 } },
1171+ { "ı", { 305, 0 } },
1172+ { "⊷", { 8887, 0 } },
1173+ { "Ƶ", { 437, 0 } },
1174+ { "∈", { 8712, 0 } },
1175+ { "℅", { 8453, 0 } },
1176+ { "∞", { 8734, 0 } },
1177+ { "⧝", { 10717, 0 } },
1178+ { "ı", { 305, 0 } },
1179+ { "∫", { 8747, 0 } },
1180+ { "⊺", { 8890, 0 } },
1181+ { "ℤ", { 8484, 0 } },
1182+ { "⊺", { 8890, 0 } },
1183+ { "⨗", { 10775, 0 } },
1184+ { "⨼", { 10812, 0 } },
1185+ { "ё", { 1105, 0 } },
1186+ { "į", { 303, 0 } },
1187+ { "𝕚", { 120154, 0 } },
1188+ { "ι", { 953, 0 } },
1189+ { "⨼", { 10812, 0 } },
1190+ { "¿", { 191, 0 } },
1191+ { "𝒾", { 119998, 0 } },
1192+ { "∈", { 8712, 0 } },
1193+ { "⋹", { 8953, 0 } },
1194+ { "⋵", { 8949, 0 } },
1195+ { "⋴", { 8948, 0 } },
1196+ { "⋳", { 8947, 0 } },
1197+ { "∈", { 8712, 0 } },
1198+ { "⁢", { 8290, 0 } },
1199+ { "ĩ", { 297, 0 } },
1200+ { "і", { 1110, 0 } },
1201+ { "ï", { 239, 0 } },
1202+ { "ĵ", { 309, 0 } },
1203+ { "й", { 1081, 0 } },
1204+ { "𝔧", { 120103, 0 } },
1205+ { "ȷ", { 567, 0 } },
1206+ { "𝕛", { 120155, 0 } },
1207+ { "𝒿", { 119999, 0 } },
1208+ { "ј", { 1112, 0 } },
1209+ { "є", { 1108, 0 } },
1210+ { "κ", { 954, 0 } },
1211+ { "ϰ", { 1008, 0 } },
1212+ { "ķ", { 311, 0 } },
1213+ { "к", { 1082, 0 } },
1214+ { "𝔨", { 120104, 0 } },
1215+ { "ĸ", { 312, 0 } },
1216+ { "х", { 1093, 0 } },
1217+ { "ќ", { 1116, 0 } },
1218+ { "𝕜", { 120156, 0 } },
1219+ { "𝓀", { 120000, 0 } },
1220+ { "⇚", { 8666, 0 } },
1221+ { "⇐", { 8656, 0 } },
1222+ { "⤛", { 10523, 0 } },
1223+ { "⤎", { 10510, 0 } },
1224+ { "≦", { 8806, 0 } },
1225+ { "⪋", { 10891, 0 } },
1226+ { "⥢", { 10594, 0 } },
1227+ { "ĺ", { 314, 0 } },
1228+ { "⦴", { 10676, 0 } },
1229+ { "ℒ", { 8466, 0 } },
1230+ { "λ", { 955, 0 } },
1231+ { "⟨", { 10216, 0 } },
1232+ { "⦑", { 10641, 0 } },
1233+ { "⟨", { 10216, 0 } },
1234+ { "⪅", { 10885, 0 } },
1235+ { "«", { 171, 0 } },
1236+ { "←", { 8592, 0 } },
1237+ { "⇤", { 8676, 0 } },
1238+ { "⤟", { 10527, 0 } },
1239+ { "⤝", { 10525, 0 } },
1240+ { "↩", { 8617, 0 } },
1241+ { "↫", { 8619, 0 } },
1242+ { "⤹", { 10553, 0 } },
1243+ { "⥳", { 10611, 0 } },
1244+ { "↢", { 8610, 0 } },
1245+ { "⪫", { 10923, 0 } },
1246+ { "⤙", { 10521, 0 } },
1247+ { "⪭", { 10925, 0 } },
1248+ { "⪭︀", { 10925, 65024 } },
1249+ { "⤌", { 10508, 0 } },
1250+ { "❲", { 10098, 0 } },
1251+ { "{", { 123, 0 } },
1252+ { "[", { 91, 0 } },
1253+ { "⦋", { 10635, 0 } },
1254+ { "⦏", { 10639, 0 } },
1255+ { "⦍", { 10637, 0 } },
1256+ { "ľ", { 318, 0 } },
1257+ { "ļ", { 316, 0 } },
1258+ { "⌈", { 8968, 0 } },
1259+ { "{", { 123, 0 } },
1260+ { "л", { 1083, 0 } },
1261+ { "⤶", { 10550, 0 } },
1262+ { "“", { 8220, 0 } },
1263+ { "„", { 8222, 0 } },
1264+ { "⥧", { 10599, 0 } },
1265+ { "⥋", { 10571, 0 } },
1266+ { "↲", { 8626, 0 } },
1267+ { "≤", { 8804, 0 } },
1268+ { "←", { 8592, 0 } },
1269+ { "↢", { 8610, 0 } },
1270+ { "↽", { 8637, 0 } },
1271+ { "↼", { 8636, 0 } },
1272+ { "⇇", { 8647, 0 } },
1273+ { "↔", { 8596, 0 } },
1274+ { "⇆", { 8646, 0 } },
1275+ { "⇋", { 8651, 0 } },
1276+ { "↭", { 8621, 0 } },
1277+ { "⋋", { 8907, 0 } },
1278+ { "⋚", { 8922, 0 } },
1279+ { "≤", { 8804, 0 } },
1280+ { "≦", { 8806, 0 } },
1281+ { "⩽", { 10877, 0 } },
1282+ { "⩽", { 10877, 0 } },
1283+ { "⪨", { 10920, 0 } },
1284+ { "⩿", { 10879, 0 } },
1285+ { "⪁", { 10881, 0 } },
1286+ { "⪃", { 10883, 0 } },
1287+ { "⋚︀", { 8922, 65024 } },
1288+ { "⪓", { 10899, 0 } },
1289+ { "⪅", { 10885, 0 } },
1290+ { "⋖", { 8918, 0 } },
1291+ { "⋚", { 8922, 0 } },
1292+ { "⪋", { 10891, 0 } },
1293+ { "≶", { 8822, 0 } },
1294+ { "≲", { 8818, 0 } },
1295+ { "⥼", { 10620, 0 } },
1296+ { "⌊", { 8970, 0 } },
1297+ { "𝔩", { 120105, 0 } },
1298+ { "≶", { 8822, 0 } },
1299+ { "⪑", { 10897, 0 } },
1300+ { "↽", { 8637, 0 } },
1301+ { "↼", { 8636, 0 } },
1302+ { "⥪", { 10602, 0 } },
1303+ { "▄", { 9604, 0 } },
1304+ { "љ", { 1113, 0 } },
1305+ { "≪", { 8810, 0 } },
1306+ { "⇇", { 8647, 0 } },
1307+ { "⌞", { 8990, 0 } },
1308+ { "⥫", { 10603, 0 } },
1309+ { "◺", { 9722, 0 } },
1310+ { "ŀ", { 320, 0 } },
1311+ { "⎰", { 9136, 0 } },
1312+ { "⎰", { 9136, 0 } },
1313+ { "≨", { 8808, 0 } },
1314+ { "⪉", { 10889, 0 } },
1315+ { "⪉", { 10889, 0 } },
1316+ { "⪇", { 10887, 0 } },
1317+ { "⪇", { 10887, 0 } },
1318+ { "≨", { 8808, 0 } },
1319+ { "⋦", { 8934, 0 } },
1320+ { "⟬", { 10220, 0 } },
1321+ { "⇽", { 8701, 0 } },
1322+ { "⟦", { 10214, 0 } },
1323+ { "⟵", { 10229, 0 } },
1324+ { "⟷", { 10231, 0 } },
1325+ { "⟼", { 10236, 0 } },
1326+ { "⟶", { 10230, 0 } },
1327+ { "↫", { 8619, 0 } },
1328+ { "↬", { 8620, 0 } },
1329+ { "⦅", { 10629, 0 } },
1330+ { "𝕝", { 120157, 0 } },
1331+ { "⨭", { 10797, 0 } },
1332+ { "⨴", { 10804, 0 } },
1333+ { "∗", { 8727, 0 } },
1334+ { "_", { 95, 0 } },
1335+ { "◊", { 9674, 0 } },
1336+ { "◊", { 9674, 0 } },
1337+ { "⧫", { 10731, 0 } },
1338+ { "(", { 40, 0 } },
1339+ { "⦓", { 10643, 0 } },
1340+ { "⇆", { 8646, 0 } },
1341+ { "⌟", { 8991, 0 } },
1342+ { "⇋", { 8651, 0 } },
1343+ { "⥭", { 10605, 0 } },
1344+ { "‎", { 8206, 0 } },
1345+ { "⊿", { 8895, 0 } },
1346+ { "‹", { 8249, 0 } },
1347+ { "𝓁", { 120001, 0 } },
1348+ { "↰", { 8624, 0 } },
1349+ { "≲", { 8818, 0 } },
1350+ { "⪍", { 10893, 0 } },
1351+ { "⪏", { 10895, 0 } },
1352+ { "[", { 91, 0 } },
1353+ { "‘", { 8216, 0 } },
1354+ { "‚", { 8218, 0 } },
1355+ { "ł", { 322, 0 } },
1356+ { "<", { 60, 0 } },
1357+ { "⪦", { 10918, 0 } },
1358+ { "⩹", { 10873, 0 } },
1359+ { "⋖", { 8918, 0 } },
1360+ { "⋋", { 8907, 0 } },
1361+ { "⋉", { 8905, 0 } },
1362+ { "⥶", { 10614, 0 } },
1363+ { "⩻", { 10875, 0 } },
1364+ { "⦖", { 10646, 0 } },
1365+ { "◃", { 9667, 0 } },
1366+ { "⊴", { 8884, 0 } },
1367+ { "◂", { 9666, 0 } },
1368+ { "⥊", { 10570, 0 } },
1369+ { "⥦", { 10598, 0 } },
1370+ { "≨︀", { 8808, 65024 } },
1371+ { "≨︀", { 8808, 65024 } },
1372+ { "∺", { 8762, 0 } },
1373+ { "¯", { 175, 0 } },
1374+ { "♂", { 9794, 0 } },
1375+ { "✠", { 10016, 0 } },
1376+ { "✠", { 10016, 0 } },
1377+ { "↦", { 8614, 0 } },
1378+ { "↦", { 8614, 0 } },
1379+ { "↧", { 8615, 0 } },
1380+ { "↤", { 8612, 0 } },
1381+ { "↥", { 8613, 0 } },
1382+ { "▮", { 9646, 0 } },
1383+ { "⨩", { 10793, 0 } },
1384+ { "м", { 1084, 0 } },
1385+ { "—", { 8212, 0 } },
1386+ { "∡", { 8737, 0 } },
1387+ { "𝔪", { 120106, 0 } },
1388+ { "℧", { 8487, 0 } },
1389+ { "µ", { 181, 0 } },
1390+ { "∣", { 8739, 0 } },
1391+ { "*", { 42, 0 } },
1392+ { "⫰", { 10992, 0 } },
1393+ { "·", { 183, 0 } },
1394+ { "−", { 8722, 0 } },
1395+ { "⊟", { 8863, 0 } },
1396+ { "∸", { 8760, 0 } },
1397+ { "⨪", { 10794, 0 } },
1398+ { "⫛", { 10971, 0 } },
1399+ { "…", { 8230, 0 } },
1400+ { "∓", { 8723, 0 } },
1401+ { "⊧", { 8871, 0 } },
1402+ { "𝕞", { 120158, 0 } },
1403+ { "∓", { 8723, 0 } },
1404+ { "𝓂", { 120002, 0 } },
1405+ { "∾", { 8766, 0 } },
1406+ { "μ", { 956, 0 } },
1407+ { "⊸", { 8888, 0 } },
1408+ { "⊸", { 8888, 0 } },
1409+ { "⋙̸", { 8921, 824 } },
1410+ { "≫⃒", { 8811, 8402 } },
1411+ { "≫̸", { 8811, 824 } },
1412+ { "⇍", { 8653, 0 } },
1413+ { "⇎", { 8654, 0 } },
1414+ { "⋘̸", { 8920, 824 } },
1415+ { "≪⃒", { 8810, 8402 } },
1416+ { "≪̸", { 8810, 824 } },
1417+ { "⇏", { 8655, 0 } },
1418+ { "⊯", { 8879, 0 } },
1419+ { "⊮", { 8878, 0 } },
1420+ { "∇", { 8711, 0 } },
1421+ { "ń", { 324, 0 } },
1422+ { "∠⃒", { 8736, 8402 } },
1423+ { "≉", { 8777, 0 } },
1424+ { "⩰̸", { 10864, 824 } },
1425+ { "≋̸", { 8779, 824 } },
1426+ { "ʼn", { 329, 0 } },
1427+ { "≉", { 8777, 0 } },
1428+ { "♮", { 9838, 0 } },
1429+ { "♮", { 9838, 0 } },
1430+ { "ℕ", { 8469, 0 } },
1431+ { " ", { 160, 0 } },
1432+ { "≎̸", { 8782, 824 } },
1433+ { "≏̸", { 8783, 824 } },
1434+ { "⩃", { 10819, 0 } },
1435+ { "ň", { 328, 0 } },
1436+ { "ņ", { 326, 0 } },
1437+ { "≇", { 8775, 0 } },
1438+ { "⩭̸", { 10861, 824 } },
1439+ { "⩂", { 10818, 0 } },
1440+ { "н", { 1085, 0 } },
1441+ { "–", { 8211, 0 } },
1442+ { "≠", { 8800, 0 } },
1443+ { "⇗", { 8663, 0 } },
1444+ { "⤤", { 10532, 0 } },
1445+ { "↗", { 8599, 0 } },
1446+ { "↗", { 8599, 0 } },
1447+ { "≐̸", { 8784, 824 } },
1448+ { "≢", { 8802, 0 } },
1449+ { "⤨", { 10536, 0 } },
1450+ { "≂̸", { 8770, 824 } },
1451+ { "∄", { 8708, 0 } },
1452+ { "∄", { 8708, 0 } },
1453+ { "𝔫", { 120107, 0 } },
1454+ { "≧̸", { 8807, 824 } },
1455+ { "≱", { 8817, 0 } },
1456+ { "≱", { 8817, 0 } },
1457+ { "≧̸", { 8807, 824 } },
1458+ { "⩾̸", { 10878, 824 } },
1459+ { "⩾̸", { 10878, 824 } },
1460+ { "≵", { 8821, 0 } },
1461+ { "≯", { 8815, 0 } },
1462+ { "≯", { 8815, 0 } },
1463+ { "⇎", { 8654, 0 } },
1464+ { "↮", { 8622, 0 } },
1465+ { "⫲", { 10994, 0 } },
1466+ { "∋", { 8715, 0 } },
1467+ { "⋼", { 8956, 0 } },
1468+ { "⋺", { 8954, 0 } },
1469+ { "∋", { 8715, 0 } },
1470+ { "њ", { 1114, 0 } },
1471+ { "⇍", { 8653, 0 } },
1472+ { "≦̸", { 8806, 824 } },
1473+ { "↚", { 8602, 0 } },
1474+ { "‥", { 8229, 0 } },
1475+ { "≰", { 8816, 0 } },
1476+ { "↚", { 8602, 0 } },
1477+ { "↮", { 8622, 0 } },
1478+ { "≰", { 8816, 0 } },
1479+ { "≦̸", { 8806, 824 } },
1480+ { "⩽̸", { 10877, 824 } },
1481+ { "⩽̸", { 10877, 824 } },
1482+ { "≮", { 8814, 0 } },
1483+ { "≴", { 8820, 0 } },
1484+ { "≮", { 8814, 0 } },
1485+ { "⋪", { 8938, 0 } },
1486+ { "⋬", { 8940, 0 } },
1487+ { "∤", { 8740, 0 } },
1488+ { "𝕟", { 120159, 0 } },
1489+ { "¬", { 172, 0 } },
1490+ { "∉", { 8713, 0 } },
1491+ { "⋹̸", { 8953, 824 } },
1492+ { "⋵̸", { 8949, 824 } },
1493+ { "∉", { 8713, 0 } },
1494+ { "⋷", { 8951, 0 } },
1495+ { "⋶", { 8950, 0 } },
1496+ { "∌", { 8716, 0 } },
1497+ { "∌", { 8716, 0 } },
1498+ { "⋾", { 8958, 0 } },
1499+ { "⋽", { 8957, 0 } },
1500+ { "∦", { 8742, 0 } },
1501+ { "∦", { 8742, 0 } },
1502+ { "⫽⃥", { 11005, 8421 } },
1503+ { "∂̸", { 8706, 824 } },
1504+ { "⨔", { 10772, 0 } },
1505+ { "⊀", { 8832, 0 } },
1506+ { "⋠", { 8928, 0 } },
1507+ { "⪯̸", { 10927, 824 } },
1508+ { "⊀", { 8832, 0 } },
1509+ { "⪯̸", { 10927, 824 } },
1510+ { "⇏", { 8655, 0 } },
1511+ { "↛", { 8603, 0 } },
1512+ { "⤳̸", { 10547, 824 } },
1513+ { "↝̸", { 8605, 824 } },
1514+ { "↛", { 8603, 0 } },
1515+ { "⋫", { 8939, 0 } },
1516+ { "⋭", { 8941, 0 } },
1517+ { "⊁", { 8833, 0 } },
1518+ { "⋡", { 8929, 0 } },
1519+ { "⪰̸", { 10928, 824 } },
1520+ { "𝓃", { 120003, 0 } },
1521+ { "∤", { 8740, 0 } },
1522+ { "∦", { 8742, 0 } },
1523+ { "≁", { 8769, 0 } },
1524+ { "≄", { 8772, 0 } },
1525+ { "≄", { 8772, 0 } },
1526+ { "∤", { 8740, 0 } },
1527+ { "∦", { 8742, 0 } },
1528+ { "⋢", { 8930, 0 } },
1529+ { "⋣", { 8931, 0 } },
1530+ { "⊄", { 8836, 0 } },
1531+ { "⫅̸", { 10949, 824 } },
1532+ { "⊈", { 8840, 0 } },
1533+ { "⊂⃒", { 8834, 8402 } },
1534+ { "⊈", { 8840, 0 } },
1535+ { "⫅̸", { 10949, 824 } },
1536+ { "⊁", { 8833, 0 } },
1537+ { "⪰̸", { 10928, 824 } },
1538+ { "⊅", { 8837, 0 } },
1539+ { "⫆̸", { 10950, 824 } },
1540+ { "⊉", { 8841, 0 } },
1541+ { "⊃⃒", { 8835, 8402 } },
1542+ { "⊉", { 8841, 0 } },
1543+ { "⫆̸", { 10950, 824 } },
1544+ { "≹", { 8825, 0 } },
1545+ { "ñ", { 241, 0 } },
1546+ { "≸", { 8824, 0 } },
1547+ { "⋪", { 8938, 0 } },
1548+ { "⋬", { 8940, 0 } },
1549+ { "⋫", { 8939, 0 } },
1550+ { "⋭", { 8941, 0 } },
1551+ { "ν", { 957, 0 } },
1552+ { "#", { 35, 0 } },
1553+ { "№", { 8470, 0 } },
1554+ { " ", { 8199, 0 } },
1555+ { "⊭", { 8877, 0 } },
1556+ { "⤄", { 10500, 0 } },
1557+ { "≍⃒", { 8781, 8402 } },
1558+ { "⊬", { 8876, 0 } },
1559+ { "≥⃒", { 8805, 8402 } },
1560+ { ">⃒", { 62, 8402 } },
1561+ { "⧞", { 10718, 0 } },
1562+ { "⤂", { 10498, 0 } },
1563+ { "≤⃒", { 8804, 8402 } },
1564+ { "<⃒", { 60, 8402 } },
1565+ { "⊴⃒", { 8884, 8402 } },
1566+ { "⤃", { 10499, 0 } },
1567+ { "⊵⃒", { 8885, 8402 } },
1568+ { "∼⃒", { 8764, 8402 } },
1569+ { "⇖", { 8662, 0 } },
1570+ { "⤣", { 10531, 0 } },
1571+ { "↖", { 8598, 0 } },
1572+ { "↖", { 8598, 0 } },
1573+ { "⤧", { 10535, 0 } },
1574+ { "Ⓢ", { 9416, 0 } },
1575+ { "ó", { 243, 0 } },
1576+ { "⊛", { 8859, 0 } },
1577+ { "⊚", { 8858, 0 } },
1578+ { "ô", { 244, 0 } },
1579+ { "о", { 1086, 0 } },
1580+ { "⊝", { 8861, 0 } },
1581+ { "ő", { 337, 0 } },
1582+ { "⨸", { 10808, 0 } },
1583+ { "⊙", { 8857, 0 } },
1584+ { "⦼", { 10684, 0 } },
1585+ { "œ", { 339, 0 } },
1586+ { "⦿", { 10687, 0 } },
1587+ { "𝔬", { 120108, 0 } },
1588+ { "˛", { 731, 0 } },
1589+ { "ò", { 242, 0 } },
1590+ { "⧁", { 10689, 0 } },
1591+ { "⦵", { 10677, 0 } },
1592+ { "Ω", { 937, 0 } },
1593+ { "∮", { 8750, 0 } },
1594+ { "↺", { 8634, 0 } },
1595+ { "⦾", { 10686, 0 } },
1596+ { "⦻", { 10683, 0 } },
1597+ { "‾", { 8254, 0 } },
1598+ { "⧀", { 10688, 0 } },
1599+ { "ō", { 333, 0 } },
1600+ { "ω", { 969, 0 } },
1601+ { "ο", { 959, 0 } },
1602+ { "⦶", { 10678, 0 } },
1603+ { "⊖", { 8854, 0 } },
1604+ { "𝕠", { 120160, 0 } },
1605+ { "⦷", { 10679, 0 } },
1606+ { "⦹", { 10681, 0 } },
1607+ { "⊕", { 8853, 0 } },
1608+ { "∨", { 8744, 0 } },
1609+ { "↻", { 8635, 0 } },
1610+ { "⩝", { 10845, 0 } },
1611+ { "ℴ", { 8500, 0 } },
1612+ { "ℴ", { 8500, 0 } },
1613+ { "ª", { 170, 0 } },
1614+ { "º", { 186, 0 } },
1615+ { "⊶", { 8886, 0 } },
1616+ { "⩖", { 10838, 0 } },
1617+ { "⩗", { 10839, 0 } },
1618+ { "⩛", { 10843, 0 } },
1619+ { "ℴ", { 8500, 0 } },
1620+ { "ø", { 248, 0 } },
1621+ { "⊘", { 8856, 0 } },
1622+ { "õ", { 245, 0 } },
1623+ { "⊗", { 8855, 0 } },
1624+ { "⨶", { 10806, 0 } },
1625+ { "ö", { 246, 0 } },
1626+ { "⌽", { 9021, 0 } },
1627+ { "∥", { 8741, 0 } },
1628+ { "¶", { 182, 0 } },
1629+ { "∥", { 8741, 0 } },
1630+ { "⫳", { 10995, 0 } },
1631+ { "⫽", { 11005, 0 } },
1632+ { "∂", { 8706, 0 } },
1633+ { "п", { 1087, 0 } },
1634+ { "%", { 37, 0 } },
1635+ { ".", { 46, 0 } },
1636+ { "‰", { 8240, 0 } },
1637+ { "⊥", { 8869, 0 } },
1638+ { "‱", { 8241, 0 } },
1639+ { "𝔭", { 120109, 0 } },
1640+ { "φ", { 966, 0 } },
1641+ { "ϕ", { 981, 0 } },
1642+ { "ℳ", { 8499, 0 } },
1643+ { "☎", { 9742, 0 } },
1644+ { "π", { 960, 0 } },
1645+ { "⋔", { 8916, 0 } },
1646+ { "ϖ", { 982, 0 } },
1647+ { "ℏ", { 8463, 0 } },
1648+ { "ℎ", { 8462, 0 } },
1649+ { "ℏ", { 8463, 0 } },
1650+ { "+", { 43, 0 } },
1651+ { "⨣", { 10787, 0 } },
1652+ { "⊞", { 8862, 0 } },
1653+ { "⨢", { 10786, 0 } },
1654+ { "∔", { 8724, 0 } },
1655+ { "⨥", { 10789, 0 } },
1656+ { "⩲", { 10866, 0 } },
1657+ { "±", { 177, 0 } },
1658+ { "⨦", { 10790, 0 } },
1659+ { "⨧", { 10791, 0 } },
1660+ { "±", { 177, 0 } },
1661+ { "⨕", { 10773, 0 } },
1662+ { "𝕡", { 120161, 0 } },
1663+ { "£", { 163, 0 } },
1664+ { "≺", { 8826, 0 } },
1665+ { "⪳", { 10931, 0 } },
1666+ { "⪷", { 10935, 0 } },
1667+ { "≼", { 8828, 0 } },
1668+ { "⪯", { 10927, 0 } },
1669+ { "≺", { 8826, 0 } },
1670+ { "⪷", { 10935, 0 } },
1671+ { "≼", { 8828, 0 } },
1672+ { "⪯", { 10927, 0 } },
1673+ { "⪹", { 10937, 0 } },
1674+ { "⪵", { 10933, 0 } },
1675+ { "⋨", { 8936, 0 } },
1676+ { "≾", { 8830, 0 } },
1677+ { "′", { 8242, 0 } },
1678+ { "ℙ", { 8473, 0 } },
1679+ { "⪵", { 10933, 0 } },
1680+ { "⪹", { 10937, 0 } },
1681+ { "⋨", { 8936, 0 } },
1682+ { "∏", { 8719, 0 } },
1683+ { "⌮", { 9006, 0 } },
1684+ { "⌒", { 8978, 0 } },
1685+ { "⌓", { 8979, 0 } },
1686+ { "∝", { 8733, 0 } },
1687+ { "∝", { 8733, 0 } },
1688+ { "≾", { 8830, 0 } },
1689+ { "⊰", { 8880, 0 } },
1690+ { "𝓅", { 120005, 0 } },
1691+ { "ψ", { 968, 0 } },
1692+ { " ", { 8200, 0 } },
1693+ { "𝔮", { 120110, 0 } },
1694+ { "⨌", { 10764, 0 } },
1695+ { "𝕢", { 120162, 0 } },
1696+ { "⁗", { 8279, 0 } },
1697+ { "𝓆", { 120006, 0 } },
1698+ { "ℍ", { 8461, 0 } },
1699+ { "⨖", { 10774, 0 } },
1700+ { "?", { 63, 0 } },
1701+ { "≟", { 8799, 0 } },
1702+ { """, { 34, 0 } },
1703+ { "⇛", { 8667, 0 } },
1704+ { "⇒", { 8658, 0 } },
1705+ { "⤜", { 10524, 0 } },
1706+ { "⤏", { 10511, 0 } },
1707+ { "⥤", { 10596, 0 } },
1708+ { "∽̱", { 8765, 817 } },
1709+ { "ŕ", { 341, 0 } },
1710+ { "√", { 8730, 0 } },
1711+ { "⦳", { 10675, 0 } },
1712+ { "⟩", { 10217, 0 } },
1713+ { "⦒", { 10642, 0 } },
1714+ { "⦥", { 10661, 0 } },
1715+ { "⟩", { 10217, 0 } },
1716+ { "»", { 187, 0 } },
1717+ { "→", { 8594, 0 } },
1718+ { "⥵", { 10613, 0 } },
1719+ { "⇥", { 8677, 0 } },
1720+ { "⤠", { 10528, 0 } },
1721+ { "⤳", { 10547, 0 } },
1722+ { "⤞", { 10526, 0 } },
1723+ { "↪", { 8618, 0 } },
1724+ { "↬", { 8620, 0 } },
1725+ { "⥅", { 10565, 0 } },
1726+ { "⥴", { 10612, 0 } },
1727+ { "↣", { 8611, 0 } },
1728+ { "↝", { 8605, 0 } },
1729+ { "⤚", { 10522, 0 } },
1730+ { "∶", { 8758, 0 } },
1731+ { "ℚ", { 8474, 0 } },
1732+ { "⤍", { 10509, 0 } },
1733+ { "❳", { 10099, 0 } },
1734+ { "}", { 125, 0 } },
1735+ { "]", { 93, 0 } },
1736+ { "⦌", { 10636, 0 } },
1737+ { "⦎", { 10638, 0 } },
1738+ { "⦐", { 10640, 0 } },
1739+ { "ř", { 345, 0 } },
1740+ { "ŗ", { 343, 0 } },
1741+ { "⌉", { 8969, 0 } },
1742+ { "}", { 125, 0 } },
1743+ { "р", { 1088, 0 } },
1744+ { "⤷", { 10551, 0 } },
1745+ { "⥩", { 10601, 0 } },
1746+ { "”", { 8221, 0 } },
1747+ { "”", { 8221, 0 } },
1748+ { "↳", { 8627, 0 } },
1749+ { "ℜ", { 8476, 0 } },
1750+ { "ℛ", { 8475, 0 } },
1751+ { "ℜ", { 8476, 0 } },
1752+ { "ℝ", { 8477, 0 } },
1753+ { "▭", { 9645, 0 } },
1754+ { "®", { 174, 0 } },
1755+ { "⥽", { 10621, 0 } },
1756+ { "⌋", { 8971, 0 } },
1757+ { "𝔯", { 120111, 0 } },
1758+ { "⇁", { 8641, 0 } },
1759+ { "⇀", { 8640, 0 } },
1760+ { "⥬", { 10604, 0 } },
1761+ { "ρ", { 961, 0 } },
1762+ { "ϱ", { 1009, 0 } },
1763+ { "→", { 8594, 0 } },
1764+ { "↣", { 8611, 0 } },
1765+ { "⇁", { 8641, 0 } },
1766+ { "⇀", { 8640, 0 } },
1767+ { "⇄", { 8644, 0 } },
1768+ { "⇌", { 8652, 0 } },
1769+ { "⇉", { 8649, 0 } },
1770+ { "↝", { 8605, 0 } },
1771+ { "⋌", { 8908, 0 } },
1772+ { "˚", { 730, 0 } },
1773+ { "≓", { 8787, 0 } },
1774+ { "⇄", { 8644, 0 } },
1775+ { "⇌", { 8652, 0 } },
1776+ { "‏", { 8207, 0 } },
1777+ { "⎱", { 9137, 0 } },
1778+ { "⎱", { 9137, 0 } },
1779+ { "⫮", { 10990, 0 } },
1780+ { "⟭", { 10221, 0 } },
1781+ { "⇾", { 8702, 0 } },
1782+ { "⟧", { 10215, 0 } },
1783+ { "⦆", { 10630, 0 } },
1784+ { "𝕣", { 120163, 0 } },
1785+ { "⨮", { 10798, 0 } },
1786+ { "⨵", { 10805, 0 } },
1787+ { ")", { 41, 0 } },
1788+ { "⦔", { 10644, 0 } },
1789+ { "⨒", { 10770, 0 } },
1790+ { "⇉", { 8649, 0 } },
1791+ { "›", { 8250, 0 } },
1792+ { "𝓇", { 120007, 0 } },
1793+ { "↱", { 8625, 0 } },
1794+ { "]", { 93, 0 } },
1795+ { "’", { 8217, 0 } },
1796+ { "’", { 8217, 0 } },
1797+ { "⋌", { 8908, 0 } },
1798+ { "⋊", { 8906, 0 } },
1799+ { "▹", { 9657, 0 } },
1800+ { "⊵", { 8885, 0 } },
1801+ { "▸", { 9656, 0 } },
1802+ { "⧎", { 10702, 0 } },
1803+ { "⥨", { 10600, 0 } },
1804+ { "℞", { 8478, 0 } },
1805+ { "ś", { 347, 0 } },
1806+ { "‚", { 8218, 0 } },
1807+ { "≻", { 8827, 0 } },
1808+ { "⪴", { 10932, 0 } },
1809+ { "⪸", { 10936, 0 } },
1810+ { "š", { 353, 0 } },
1811+ { "≽", { 8829, 0 } },
1812+ { "⪰", { 10928, 0 } },
1813+ { "ş", { 351, 0 } },
1814+ { "ŝ", { 349, 0 } },
1815+ { "⪶", { 10934, 0 } },
1816+ { "⪺", { 10938, 0 } },
1817+ { "⋩", { 8937, 0 } },
1818+ { "⨓", { 10771, 0 } },
1819+ { "≿", { 8831, 0 } },
1820+ { "с", { 1089, 0 } },
1821+ { "⋅", { 8901, 0 } },
1822+ { "⊡", { 8865, 0 } },
1823+ { "⩦", { 10854, 0 } },
1824+ { "⇘", { 8664, 0 } },
1825+ { "⤥", { 10533, 0 } },
1826+ { "↘", { 8600, 0 } },
1827+ { "↘", { 8600, 0 } },
1828+ { "§", { 167, 0 } },
1829+ { ";", { 59, 0 } },
1830+ { "⤩", { 10537, 0 } },
1831+ { "∖", { 8726, 0 } },
1832+ { "∖", { 8726, 0 } },
1833+ { "✶", { 10038, 0 } },
1834+ { "𝔰", { 120112, 0 } },
1835+ { "⌢", { 8994, 0 } },
1836+ { "♯", { 9839, 0 } },
1837+ { "щ", { 1097, 0 } },
1838+ { "ш", { 1096, 0 } },
1839+ { "∣", { 8739, 0 } },
1840+ { "∥", { 8741, 0 } },
1841+ { "­", { 173, 0 } },
1842+ { "σ", { 963, 0 } },
1843+ { "ς", { 962, 0 } },
1844+ { "ς", { 962, 0 } },
1845+ { "∼", { 8764, 0 } },
1846+ { "⩪", { 10858, 0 } },
1847+ { "≃", { 8771, 0 } },
1848+ { "≃", { 8771, 0 } },
1849+ { "⪞", { 10910, 0 } },
1850+ { "⪠", { 10912, 0 } },
1851+ { "⪝", { 10909, 0 } },
1852+ { "⪟", { 10911, 0 } },
1853+ { "≆", { 8774, 0 } },
1854+ { "⨤", { 10788, 0 } },
1855+ { "⥲", { 10610, 0 } },
1856+ { "←", { 8592, 0 } },
1857+ { "∖", { 8726, 0 } },
1858+ { "⨳", { 10803, 0 } },
1859+ { "⧤", { 10724, 0 } },
1860+ { "∣", { 8739, 0 } },
1861+ { "⌣", { 8995, 0 } },
1862+ { "⪪", { 10922, 0 } },
1863+ { "⪬", { 10924, 0 } },
1864+ { "⪬︀", { 10924, 65024 } },
1865+ { "ь", { 1100, 0 } },
1866+ { "/", { 47, 0 } },
1867+ { "⧄", { 10692, 0 } },
1868+ { "⌿", { 9023, 0 } },
1869+ { "𝕤", { 120164, 0 } },
1870+ { "♠", { 9824, 0 } },
1871+ { "♠", { 9824, 0 } },
1872+ { "∥", { 8741, 0 } },
1873+ { "⊓", { 8851, 0 } },
1874+ { "⊓︀", { 8851, 65024 } },
1875+ { "⊔", { 8852, 0 } },
1876+ { "⊔︀", { 8852, 65024 } },
1877+ { "⊏", { 8847, 0 } },
1878+ { "⊑", { 8849, 0 } },
1879+ { "⊏", { 8847, 0 } },
1880+ { "⊑", { 8849, 0 } },
1881+ { "⊐", { 8848, 0 } },
1882+ { "⊒", { 8850, 0 } },
1883+ { "⊐", { 8848, 0 } },
1884+ { "⊒", { 8850, 0 } },
1885+ { "□", { 9633, 0 } },
1886+ { "□", { 9633, 0 } },
1887+ { "▪", { 9642, 0 } },
1888+ { "▪", { 9642, 0 } },
1889+ { "→", { 8594, 0 } },
1890+ { "𝓈", { 120008, 0 } },
1891+ { "∖", { 8726, 0 } },
1892+ { "⌣", { 8995, 0 } },
1893+ { "⋆", { 8902, 0 } },
1894+ { "☆", { 9734, 0 } },
1895+ { "★", { 9733, 0 } },
1896+ { "ϵ", { 1013, 0 } },
1897+ { "ϕ", { 981, 0 } },
1898+ { "¯", { 175, 0 } },
1899+ { "⊂", { 8834, 0 } },
1900+ { "⫅", { 10949, 0 } },
1901+ { "⪽", { 10941, 0 } },
1902+ { "⊆", { 8838, 0 } },
1903+ { "⫃", { 10947, 0 } },
1904+ { "⫁", { 10945, 0 } },
1905+ { "⫋", { 10955, 0 } },
1906+ { "⊊", { 8842, 0 } },
1907+ { "⪿", { 10943, 0 } },
1908+ { "⥹", { 10617, 0 } },
1909+ { "⊂", { 8834, 0 } },
1910+ { "⊆", { 8838, 0 } },
1911+ { "⫅", { 10949, 0 } },
1912+ { "⊊", { 8842, 0 } },
1913+ { "⫋", { 10955, 0 } },
1914+ { "⫇", { 10951, 0 } },
1915+ { "⫕", { 10965, 0 } },
1916+ { "⫓", { 10963, 0 } },
1917+ { "≻", { 8827, 0 } },
1918+ { "⪸", { 10936, 0 } },
1919+ { "≽", { 8829, 0 } },
1920+ { "⪰", { 10928, 0 } },
1921+ { "⪺", { 10938, 0 } },
1922+ { "⪶", { 10934, 0 } },
1923+ { "⋩", { 8937, 0 } },
1924+ { "≿", { 8831, 0 } },
1925+ { "∑", { 8721, 0 } },
1926+ { "♪", { 9834, 0 } },
1927+ { "¹", { 185, 0 } },
1928+ { "¹", { 185, 0 } },
1929+ { "²", { 178, 0 } },
1930+ { "²", { 178, 0 } },
1931+ { "³", { 179, 0 } },
1932+ { "³", { 179, 0 } },
1933+ { "⊃", { 8835, 0 } },
1934+ { "⫆", { 10950, 0 } },
1935+ { "⪾", { 10942, 0 } },
1936+ { "⫘", { 10968, 0 } },
1937+ { "⊇", { 8839, 0 } },
1938+ { "⫄", { 10948, 0 } },
1939+ { "⟉", { 10185, 0 } },
1940+ { "⫗", { 10967, 0 } },
1941+ { "⥻", { 10619, 0 } },
1942+ { "⫂", { 10946, 0 } },
1943+ { "⫌", { 10956, 0 } },
1944+ { "⊋", { 8843, 0 } },
1945+ { "⫀", { 10944, 0 } },
1946+ { "⊃", { 8835, 0 } },
1947+ { "⊇", { 8839, 0 } },
1948+ { "⫆", { 10950, 0 } },
1949+ { "⊋", { 8843, 0 } },
1950+ { "⫌", { 10956, 0 } },
1951+ { "⫈", { 10952, 0 } },
1952+ { "⫔", { 10964, 0 } },
1953+ { "⫖", { 10966, 0 } },
1954+ { "⇙", { 8665, 0 } },
1955+ { "⤦", { 10534, 0 } },
1956+ { "↙", { 8601, 0 } },
1957+ { "↙", { 8601, 0 } },
1958+ { "⤪", { 10538, 0 } },
1959+ { "ß", { 223, 0 } },
1960+ { "⌖", { 8982, 0 } },
1961+ { "τ", { 964, 0 } },
1962+ { "⎴", { 9140, 0 } },
1963+ { "ť", { 357, 0 } },
1964+ { "ţ", { 355, 0 } },
1965+ { "т", { 1090, 0 } },
1966+ { "⃛", { 8411, 0 } },
1967+ { "⌕", { 8981, 0 } },
1968+ { "𝔱", { 120113, 0 } },
1969+ { "∴", { 8756, 0 } },
1970+ { "∴", { 8756, 0 } },
1971+ { "θ", { 952, 0 } },
1972+ { "ϑ", { 977, 0 } },
1973+ { "ϑ", { 977, 0 } },
1974+ { "≈", { 8776, 0 } },
1975+ { "∼", { 8764, 0 } },
1976+ { " ", { 8201, 0 } },
1977+ { "≈", { 8776, 0 } },
1978+ { "∼", { 8764, 0 } },
1979+ { "þ", { 254, 0 } },
1980+ { "˜", { 732, 0 } },
1981+ { "×", { 215, 0 } },
1982+ { "⊠", { 8864, 0 } },
1983+ { "⨱", { 10801, 0 } },
1984+ { "⨰", { 10800, 0 } },
1985+ { "∭", { 8749, 0 } },
1986+ { "⤨", { 10536, 0 } },
1987+ { "⊤", { 8868, 0 } },
1988+ { "⌶", { 9014, 0 } },
1989+ { "⫱", { 10993, 0 } },
1990+ { "𝕥", { 120165, 0 } },
1991+ { "⫚", { 10970, 0 } },
1992+ { "⤩", { 10537, 0 } },
1993+ { "‴", { 8244, 0 } },
1994+ { "™", { 8482, 0 } },
1995+ { "▵", { 9653, 0 } },
1996+ { "▿", { 9663, 0 } },
1997+ { "◃", { 9667, 0 } },
1998+ { "⊴", { 8884, 0 } },
1999+ { "≜", { 8796, 0 } },
2000+ { "▹", { 9657, 0 } },
2001+ { "⊵", { 8885, 0 } },
2002+ { "◬", { 9708, 0 } },
2003+ { "≜", { 8796, 0 } },
2004+ { "⨺", { 10810, 0 } },
2005+ { "⨹", { 10809, 0 } },
2006+ { "⧍", { 10701, 0 } },
2007+ { "⨻", { 10811, 0 } },
2008+ { "⏢", { 9186, 0 } },
2009+ { "𝓉", { 120009, 0 } },
2010+ { "ц", { 1094, 0 } },
2011+ { "ћ", { 1115, 0 } },
2012+ { "ŧ", { 359, 0 } },
2013+ { "≬", { 8812, 0 } },
2014+ { "↞", { 8606, 0 } },
2015+ { "↠", { 8608, 0 } },
2016+ { "⇑", { 8657, 0 } },
2017+ { "⥣", { 10595, 0 } },
2018+ { "ú", { 250, 0 } },
2019+ { "↑", { 8593, 0 } },
2020+ { "ў", { 1118, 0 } },
2021+ { "ŭ", { 365, 0 } },
2022+ { "û", { 251, 0 } },
2023+ { "у", { 1091, 0 } },
2024+ { "⇅", { 8645, 0 } },
2025+ { "ű", { 369, 0 } },
2026+ { "⥮", { 10606, 0 } },
2027+ { "⥾", { 10622, 0 } },
2028+ { "𝔲", { 120114, 0 } },
2029+ { "ù", { 249, 0 } },
2030+ { "↿", { 8639, 0 } },
2031+ { "↾", { 8638, 0 } },
2032+ { "▀", { 9600, 0 } },
2033+ { "⌜", { 8988, 0 } },
2034+ { "⌜", { 8988, 0 } },
2035+ { "⌏", { 8975, 0 } },
2036+ { "◸", { 9720, 0 } },
2037+ { "ū", { 363, 0 } },
2038+ { "¨", { 168, 0 } },
2039+ { "ų", { 371, 0 } },
2040+ { "𝕦", { 120166, 0 } },
2041+ { "↑", { 8593, 0 } },
2042+ { "↕", { 8597, 0 } },
2043+ { "↿", { 8639, 0 } },
2044+ { "↾", { 8638, 0 } },
2045+ { "⊎", { 8846, 0 } },
2046+ { "υ", { 965, 0 } },
2047+ { "ϒ", { 978, 0 } },
2048+ { "υ", { 965, 0 } },
2049+ { "⇈", { 8648, 0 } },
2050+ { "⌝", { 8989, 0 } },
2051+ { "⌝", { 8989, 0 } },
2052+ { "⌎", { 8974, 0 } },
2053+ { "ů", { 367, 0 } },
2054+ { "◹", { 9721, 0 } },
2055+ { "𝓊", { 120010, 0 } },
2056+ { "⋰", { 8944, 0 } },
2057+ { "ũ", { 361, 0 } },
2058+ { "▵", { 9653, 0 } },
2059+ { "▴", { 9652, 0 } },
2060+ { "⇈", { 8648, 0 } },
2061+ { "ü", { 252, 0 } },
2062+ { "⦧", { 10663, 0 } },
2063+ { "⇕", { 8661, 0 } },
2064+ { "⫨", { 10984, 0 } },
2065+ { "⫩", { 10985, 0 } },
2066+ { "⊨", { 8872, 0 } },
2067+ { "⦜", { 10652, 0 } },
2068+ { "ϵ", { 1013, 0 } },
2069+ { "ϰ", { 1008, 0 } },
2070+ { "∅", { 8709, 0 } },
2071+ { "ϕ", { 981, 0 } },
2072+ { "ϖ", { 982, 0 } },
2073+ { "∝", { 8733, 0 } },
2074+ { "↕", { 8597, 0 } },
2075+ { "ϱ", { 1009, 0 } },
2076+ { "ς", { 962, 0 } },
2077+ { "⊊︀", { 8842, 65024 } },
2078+ { "⫋︀", { 10955, 65024 } },
2079+ { "⊋︀", { 8843, 65024 } },
2080+ { "⫌︀", { 10956, 65024 } },
2081+ { "ϑ", { 977, 0 } },
2082+ { "⊲", { 8882, 0 } },
2083+ { "⊳", { 8883, 0 } },
2084+ { "в", { 1074, 0 } },
2085+ { "⊢", { 8866, 0 } },
2086+ { "∨", { 8744, 0 } },
2087+ { "⊻", { 8891, 0 } },
2088+ { "≚", { 8794, 0 } },
2089+ { "⋮", { 8942, 0 } },
2090+ { "|", { 124, 0 } },
2091+ { "|", { 124, 0 } },
2092+ { "𝔳", { 120115, 0 } },
2093+ { "⊲", { 8882, 0 } },
2094+ { "⊂⃒", { 8834, 8402 } },
2095+ { "⊃⃒", { 8835, 8402 } },
2096+ { "𝕧", { 120167, 0 } },
2097+ { "∝", { 8733, 0 } },
2098+ { "⊳", { 8883, 0 } },
2099+ { "𝓋", { 120011, 0 } },
2100+ { "⫋︀", { 10955, 65024 } },
2101+ { "⊊︀", { 8842, 65024 } },
2102+ { "⫌︀", { 10956, 65024 } },
2103+ { "⊋︀", { 8843, 65024 } },
2104+ { "⦚", { 10650, 0 } },
2105+ { "ŵ", { 373, 0 } },
2106+ { "⩟", { 10847, 0 } },
2107+ { "∧", { 8743, 0 } },
2108+ { "≙", { 8793, 0 } },
2109+ { "℘", { 8472, 0 } },
2110+ { "𝔴", { 120116, 0 } },
2111+ { "𝕨", { 120168, 0 } },
2112+ { "℘", { 8472, 0 } },
2113+ { "≀", { 8768, 0 } },
2114+ { "≀", { 8768, 0 } },
2115+ { "𝓌", { 120012, 0 } },
2116+ { "⋂", { 8898, 0 } },
2117+ { "◯", { 9711, 0 } },
2118+ { "⋃", { 8899, 0 } },
2119+ { "▽", { 9661, 0 } },
2120+ { "𝔵", { 120117, 0 } },
2121+ { "⟺", { 10234, 0 } },
2122+ { "⟷", { 10231, 0 } },
2123+ { "ξ", { 958, 0 } },
2124+ { "⟸", { 10232, 0 } },
2125+ { "⟵", { 10229, 0 } },
2126+ { "⟼", { 10236, 0 } },
2127+ { "⋻", { 8955, 0 } },
2128+ { "⨀", { 10752, 0 } },
2129+ { "𝕩", { 120169, 0 } },
2130+ { "⨁", { 10753, 0 } },
2131+ { "⨂", { 10754, 0 } },
2132+ { "⟹", { 10233, 0 } },
2133+ { "⟶", { 10230, 0 } },
2134+ { "𝓍", { 120013, 0 } },
2135+ { "⨆", { 10758, 0 } },
2136+ { "⨄", { 10756, 0 } },
2137+ { "△", { 9651, 0 } },
2138+ { "⋁", { 8897, 0 } },
2139+ { "⋀", { 8896, 0 } },
2140+ { "ý", { 253, 0 } },
2141+ { "я", { 1103, 0 } },
2142+ { "ŷ", { 375, 0 } },
2143+ { "ы", { 1099, 0 } },
2144+ { "¥", { 165, 0 } },
2145+ { "𝔶", { 120118, 0 } },
2146+ { "ї", { 1111, 0 } },
2147+ { "𝕪", { 120170, 0 } },
2148+ { "𝓎", { 120014, 0 } },
2149+ { "ю", { 1102, 0 } },
2150+ { "ÿ", { 255, 0 } },
2151+ { "ź", { 378, 0 } },
2152+ { "ž", { 382, 0 } },
2153+ { "з", { 1079, 0 } },
2154+ { "ż", { 380, 0 } },
2155+ { "ℨ", { 8488, 0 } },
2156+ { "ζ", { 950, 0 } },
2157+ { "𝔷", { 120119, 0 } },
2158+ { "ж", { 1078, 0 } },
2159+ { "⇝", { 8669, 0 } },
2160+ { "𝕫", { 120171, 0 } },
2161+ { "𝓏", { 120015, 0 } },
2162+ { "‍", { 8205, 0 } },
2163+ { "‌", { 8204, 0 } }
2164+};
2165+
2166+
2167+struct entity_key {
2168+ const char* name;
2169+ size_t name_size;
2170+};
2171+
2172+static int
2173+entity_cmp(const void* p_key, const void* p_entity)
2174+{
2175+ struct entity_key* key = (struct entity_key*) p_key;
2176+ struct entity* ent = (struct entity*) p_entity;
2177+
2178+ return strncmp(key->name, ent->name, key->name_size);
2179+}
2180+
2181+const struct entity*
2182+entity_lookup(const char* name, size_t name_size)
2183+{
2184+ struct entity_key key = { name, name_size };
2185+
2186+ return bsearch(&key,
2187+ entity_table,
2188+ sizeof(entity_table) / sizeof(entity_table[0]),
2189+ sizeof(struct entity),
2190+ entity_cmp);
2191+}
A · entity.h
+42, -0 1@@ -0,0 +1,42 @@
2+/*
3+ * MD4C: Markdown parser for C
4+ * (http://github.com/mity/md4c)
5+ *
6+ * Copyright (c) 2016-2019 Martin Mitas
7+ *
8+ * Permission is hereby granted, free of charge, to any person obtaining a
9+ * copy of this software and associated documentation files (the "Software"),
10+ * to deal in the Software without restriction, including without limitation
11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12+ * and/or sell copies of the Software, and to permit persons to whom the
13+ * Software is furnished to do so, subject to the following conditions:
14+ *
15+ * The above copyright notice and this permission notice shall be included in
16+ * all copies or substantial portions of the Software.
17+ *
18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24+ * IN THE SOFTWARE.
25+ */
26+
27+#ifndef MD4C_ENTITY_H
28+#define MD4C_ENTITY_H
29+
30+#include <stdlib.h>
31+
32+
33+/* Most entities are formed by single Unicode codepoint, few by two codepoints.
34+ * Single-codepoint entities have codepoints[1] set to zero. */
35+struct entity {
36+ const char* name;
37+ unsigned codepoints[2];
38+};
39+
40+const struct entity* entity_lookup(const char* name, size_t name_size);
41+
42+
43+#endif /* MD4C_ENTITY_H */
A · entity.o
+0, -0
A · md4c-html.c
+573, -0 1@@ -0,0 +1,573 @@
2+/*
3+ * MD4C: Markdown parser for C
4+ * (http://github.com/mity/md4c)
5+ *
6+ * Copyright (c) 2016-2019 Martin Mitas
7+ *
8+ * Permission is hereby granted, free of charge, to any person obtaining a
9+ * copy of this software and associated documentation files (the "Software"),
10+ * to deal in the Software without restriction, including without limitation
11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12+ * and/or sell copies of the Software, and to permit persons to whom the
13+ * Software is furnished to do so, subject to the following conditions:
14+ *
15+ * The above copyright notice and this permission notice shall be included in
16+ * all copies or substantial portions of the Software.
17+ *
18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24+ * IN THE SOFTWARE.
25+ */
26+
27+#include <stdio.h>
28+#include <string.h>
29+
30+#include "md4c-html.h"
31+#include "entity.h"
32+
33+
34+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
35+ /* C89/90 or old compilers in general may not understand "inline". */
36+ #if defined __GNUC__
37+ #define inline __inline__
38+ #elif defined _MSC_VER
39+ #define inline __inline
40+ #else
41+ #define inline
42+ #endif
43+#endif
44+
45+#ifdef _WIN32
46+ #define snprintf _snprintf
47+#endif
48+
49+
50+
51+typedef struct MD_HTML_tag MD_HTML;
52+struct MD_HTML_tag {
53+ void (*process_output)(const MD_CHAR*, MD_SIZE, void*);
54+ void* userdata;
55+ unsigned flags;
56+ int image_nesting_level;
57+ char escape_map[256];
58+};
59+
60+#define NEED_HTML_ESC_FLAG 0x1
61+#define NEED_URL_ESC_FLAG 0x2
62+
63+
64+/*****************************************
65+ *** HTML rendering helper functions ***
66+ *****************************************/
67+
68+#define ISDIGIT(ch) ('0' <= (ch) && (ch) <= '9')
69+#define ISLOWER(ch) ('a' <= (ch) && (ch) <= 'z')
70+#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z')
71+#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
72+
73+
74+static inline void
75+render_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size)
76+{
77+ r->process_output(text, size, r->userdata);
78+}
79+
80+/* Keep this as a macro. Most compiler should then be smart enough to replace
81+ * the strlen() call with a compile-time constant if the string is a C literal. */
82+#define RENDER_VERBATIM(r, verbatim) \
83+ render_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim)))
84+
85+
86+static void
87+render_html_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
88+{
89+ MD_OFFSET beg = 0;
90+ MD_OFFSET off = 0;
91+
92+ /* Some characters need to be escaped in normal HTML text. */
93+ #define NEED_HTML_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG)
94+
95+ while(1) {
96+ /* Optimization: Use some loop unrolling. */
97+ while(off + 3 < size && !NEED_HTML_ESC(data[off+0]) && !NEED_HTML_ESC(data[off+1])
98+ && !NEED_HTML_ESC(data[off+2]) && !NEED_HTML_ESC(data[off+3]))
99+ off += 4;
100+ while(off < size && !NEED_HTML_ESC(data[off]))
101+ off++;
102+
103+ if(off > beg)
104+ render_verbatim(r, data + beg, off - beg);
105+
106+ if(off < size) {
107+ switch(data[off]) {
108+ case '&': RENDER_VERBATIM(r, "&"); break;
109+ case '<': RENDER_VERBATIM(r, "<"); break;
110+ case '>': RENDER_VERBATIM(r, ">"); break;
111+ case '"': RENDER_VERBATIM(r, """); break;
112+ }
113+ off++;
114+ } else {
115+ break;
116+ }
117+ beg = off;
118+ }
119+}
120+
121+static void
122+render_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
123+{
124+ static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
125+ MD_OFFSET beg = 0;
126+ MD_OFFSET off = 0;
127+
128+ /* Some characters need to be escaped in URL attributes. */
129+ #define NEED_URL_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG)
130+
131+ while(1) {
132+ while(off < size && !NEED_URL_ESC(data[off]))
133+ off++;
134+ if(off > beg)
135+ render_verbatim(r, data + beg, off - beg);
136+
137+ if(off < size) {
138+ char hex[3];
139+
140+ switch(data[off]) {
141+ case '&': RENDER_VERBATIM(r, "&"); break;
142+ default:
143+ hex[0] = '%';
144+ hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
145+ hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
146+ render_verbatim(r, hex, 3);
147+ break;
148+ }
149+ off++;
150+ } else {
151+ break;
152+ }
153+
154+ beg = off;
155+ }
156+}
157+
158+static unsigned
159+hex_val(char ch)
160+{
161+ if('0' <= ch && ch <= '9')
162+ return ch - '0';
163+ if('A' <= ch && ch <= 'Z')
164+ return ch - 'A' + 10;
165+ else
166+ return ch - 'a' + 10;
167+}
168+
169+static void
170+render_utf8_codepoint(MD_HTML* r, unsigned codepoint,
171+ void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
172+{
173+ static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
174+
175+ unsigned char utf8[4];
176+ size_t n;
177+
178+ if(codepoint <= 0x7f) {
179+ n = 1;
180+ utf8[0] = codepoint;
181+ } else if(codepoint <= 0x7ff) {
182+ n = 2;
183+ utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
184+ utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
185+ } else if(codepoint <= 0xffff) {
186+ n = 3;
187+ utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
188+ utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
189+ utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
190+ } else {
191+ n = 4;
192+ utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
193+ utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
194+ utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
195+ utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
196+ }
197+
198+ if(0 < codepoint && codepoint <= 0x10ffff)
199+ fn_append(r, (char*)utf8, n);
200+ else
201+ fn_append(r, utf8_replacement_char, 3);
202+}
203+
204+/* Translate entity to its UTF-8 equivalent, or output the verbatim one
205+ * if such entity is unknown (or if the translation is disabled). */
206+static void
207+render_entity(MD_HTML* r, const MD_CHAR* text, MD_SIZE size,
208+ void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
209+{
210+ if(r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) {
211+ render_verbatim(r, text, size);
212+ return;
213+ }
214+
215+ /* We assume UTF-8 output is what is desired. */
216+ if(size > 3 && text[1] == '#') {
217+ unsigned codepoint = 0;
218+
219+ if(text[2] == 'x' || text[2] == 'X') {
220+ /* Hexadecimal entity (e.g. "�")). */
221+ MD_SIZE i;
222+ for(i = 3; i < size-1; i++)
223+ codepoint = 16 * codepoint + hex_val(text[i]);
224+ } else {
225+ /* Decimal entity (e.g. "&1234;") */
226+ MD_SIZE i;
227+ for(i = 2; i < size-1; i++)
228+ codepoint = 10 * codepoint + (text[i] - '0');
229+ }
230+
231+ render_utf8_codepoint(r, codepoint, fn_append);
232+ return;
233+ } else {
234+ /* Named entity (e.g. " "). */
235+ const struct entity* ent;
236+
237+ ent = entity_lookup(text, size);
238+ if(ent != NULL) {
239+ render_utf8_codepoint(r, ent->codepoints[0], fn_append);
240+ if(ent->codepoints[1])
241+ render_utf8_codepoint(r, ent->codepoints[1], fn_append);
242+ return;
243+ }
244+ }
245+
246+ fn_append(r, text, size);
247+}
248+
249+static void
250+render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr,
251+ void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
252+{
253+ int i;
254+
255+ for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
256+ MD_TEXTTYPE type = attr->substr_types[i];
257+ MD_OFFSET off = attr->substr_offsets[i];
258+ MD_SIZE size = attr->substr_offsets[i+1] - off;
259+ const MD_CHAR* text = attr->text + off;
260+
261+ switch(type) {
262+ case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
263+ case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break;
264+ default: fn_append(r, text, size); break;
265+ }
266+ }
267+}
268+
269+
270+static void
271+render_open_ol_block(MD_HTML* r, const MD_BLOCK_OL_DETAIL* det)
272+{
273+ char buf[64];
274+
275+ if(det->start == 1) {
276+ RENDER_VERBATIM(r, "<ol>\n");
277+ return;
278+ }
279+
280+ snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
281+ RENDER_VERBATIM(r, buf);
282+}
283+
284+static void
285+render_open_li_block(MD_HTML* r, const MD_BLOCK_LI_DETAIL* det)
286+{
287+ if(det->is_task) {
288+ RENDER_VERBATIM(r, "<li class=\"task-list-item\">"
289+ "<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
290+ if(det->task_mark == 'x' || det->task_mark == 'X')
291+ RENDER_VERBATIM(r, " checked");
292+ RENDER_VERBATIM(r, ">");
293+ } else {
294+ RENDER_VERBATIM(r, "<li>");
295+ }
296+}
297+
298+static void
299+render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det)
300+{
301+ RENDER_VERBATIM(r, "<pre><code");
302+
303+ /* If known, output the HTML 5 attribute class="language-LANGNAME". */
304+ if(det->lang.text != NULL) {
305+ RENDER_VERBATIM(r, " class=\"language-");
306+ render_attribute(r, &det->lang, render_html_escaped);
307+ RENDER_VERBATIM(r, "\"");
308+ }
309+
310+ RENDER_VERBATIM(r, ">");
311+}
312+
313+static void
314+render_open_td_block(MD_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
315+{
316+ RENDER_VERBATIM(r, "<");
317+ RENDER_VERBATIM(r, cell_type);
318+
319+ switch(det->align) {
320+ case MD_ALIGN_LEFT: RENDER_VERBATIM(r, " align=\"left\">"); break;
321+ case MD_ALIGN_CENTER: RENDER_VERBATIM(r, " align=\"center\">"); break;
322+ case MD_ALIGN_RIGHT: RENDER_VERBATIM(r, " align=\"right\">"); break;
323+ default: RENDER_VERBATIM(r, ">"); break;
324+ }
325+}
326+
327+static void
328+render_open_a_span(MD_HTML* r, const MD_SPAN_A_DETAIL* det)
329+{
330+ RENDER_VERBATIM(r, "<a href=\"");
331+ render_attribute(r, &det->href, render_url_escaped);
332+
333+ if(det->title.text != NULL) {
334+ RENDER_VERBATIM(r, "\" title=\"");
335+ render_attribute(r, &det->title, render_html_escaped);
336+ }
337+
338+ RENDER_VERBATIM(r, "\">");
339+}
340+
341+static void
342+render_open_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
343+{
344+ RENDER_VERBATIM(r, "<img src=\"");
345+ render_attribute(r, &det->src, render_url_escaped);
346+
347+ RENDER_VERBATIM(r, "\" alt=\"");
348+
349+ r->image_nesting_level++;
350+}
351+
352+static void
353+render_close_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
354+{
355+ if(det->title.text != NULL) {
356+ RENDER_VERBATIM(r, "\" title=\"");
357+ render_attribute(r, &det->title, render_html_escaped);
358+ }
359+
360+ RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
361+
362+ r->image_nesting_level--;
363+}
364+
365+static void
366+render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det)
367+{
368+ RENDER_VERBATIM(r, "<x-wikilink data-target=\"");
369+ render_attribute(r, &det->target, render_html_escaped);
370+
371+ RENDER_VERBATIM(r, "\">");
372+}
373+
374+
375+/**************************************
376+ *** HTML renderer implementation ***
377+ **************************************/
378+
379+static int
380+enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
381+{
382+ static const MD_CHAR* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" };
383+ MD_HTML* r = (MD_HTML*) userdata;
384+
385+ switch(type) {
386+ case MD_BLOCK_DOC: /* noop */ break;
387+ case MD_BLOCK_QUOTE: RENDER_VERBATIM(r, "<blockquote>\n"); break;
388+ case MD_BLOCK_UL: RENDER_VERBATIM(r, "<ul>\n"); break;
389+ case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break;
390+ case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break;
391+ case MD_BLOCK_HR: RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "<hr />\n" : "<hr>\n"); break;
392+ case MD_BLOCK_H: RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
393+ case MD_BLOCK_CODE: render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL*) detail); break;
394+ case MD_BLOCK_HTML: /* noop */ break;
395+ case MD_BLOCK_P: RENDER_VERBATIM(r, "<p>"); break;
396+ case MD_BLOCK_TABLE: RENDER_VERBATIM(r, "<table>\n"); break;
397+ case MD_BLOCK_THEAD: RENDER_VERBATIM(r, "<thead>\n"); break;
398+ case MD_BLOCK_TBODY: RENDER_VERBATIM(r, "<tbody>\n"); break;
399+ case MD_BLOCK_TR: RENDER_VERBATIM(r, "<tr>\n"); break;
400+ case MD_BLOCK_TH: render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break;
401+ case MD_BLOCK_TD: render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break;
402+ }
403+
404+ return 0;
405+}
406+
407+static int
408+leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
409+{
410+ static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
411+ MD_HTML* r = (MD_HTML*) userdata;
412+
413+ switch(type) {
414+ case MD_BLOCK_DOC: /*noop*/ break;
415+ case MD_BLOCK_QUOTE: RENDER_VERBATIM(r, "</blockquote>\n"); break;
416+ case MD_BLOCK_UL: RENDER_VERBATIM(r, "</ul>\n"); break;
417+ case MD_BLOCK_OL: RENDER_VERBATIM(r, "</ol>\n"); break;
418+ case MD_BLOCK_LI: RENDER_VERBATIM(r, "</li>\n"); break;
419+ case MD_BLOCK_HR: /*noop*/ break;
420+ case MD_BLOCK_H: RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
421+ case MD_BLOCK_CODE: RENDER_VERBATIM(r, "</code></pre>\n"); break;
422+ case MD_BLOCK_HTML: /* noop */ break;
423+ case MD_BLOCK_P: RENDER_VERBATIM(r, "</p>\n"); break;
424+ case MD_BLOCK_TABLE: RENDER_VERBATIM(r, "</table>\n"); break;
425+ case MD_BLOCK_THEAD: RENDER_VERBATIM(r, "</thead>\n"); break;
426+ case MD_BLOCK_TBODY: RENDER_VERBATIM(r, "</tbody>\n"); break;
427+ case MD_BLOCK_TR: RENDER_VERBATIM(r, "</tr>\n"); break;
428+ case MD_BLOCK_TH: RENDER_VERBATIM(r, "</th>\n"); break;
429+ case MD_BLOCK_TD: RENDER_VERBATIM(r, "</td>\n"); break;
430+ }
431+
432+ return 0;
433+}
434+
435+static int
436+enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
437+{
438+ MD_HTML* r = (MD_HTML*) userdata;
439+
440+ if(r->image_nesting_level > 0) {
441+ /* We are inside a Markdown image label. Markdown allows to use any
442+ * emphasis and other rich contents in that context similarly as in
443+ * any link label.
444+ *
445+ * However, unlike in the case of links (where that contents becomes
446+ * contents of the <a>...</a> tag), in the case of images the contents
447+ * is supposed to fall into the attribute alt: <img alt="...">.
448+ *
449+ * In that context we naturally cannot output nested HTML tags. So lets
450+ * suppress them and only output the plain text (i.e. what falls into
451+ * text() callback).
452+ *
453+ * This make-it-a-plain-text approach is the recommended practice by
454+ * CommonMark specification (for HTML output).
455+ */
456+ return 0;
457+ }
458+
459+ switch(type) {
460+ case MD_SPAN_EM: RENDER_VERBATIM(r, "<em>"); break;
461+ case MD_SPAN_STRONG: RENDER_VERBATIM(r, "<strong>"); break;
462+ case MD_SPAN_U: RENDER_VERBATIM(r, "<u>"); break;
463+ case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
464+ case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
465+ case MD_SPAN_CODE: RENDER_VERBATIM(r, "<code>"); break;
466+ case MD_SPAN_DEL: RENDER_VERBATIM(r, "<del>"); break;
467+ case MD_SPAN_LATEXMATH: RENDER_VERBATIM(r, "<x-equation>"); break;
468+ case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "<x-equation type=\"display\">"); break;
469+ case MD_SPAN_WIKILINK: render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL*) detail); break;
470+ }
471+
472+ return 0;
473+}
474+
475+static int
476+leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
477+{
478+ MD_HTML* r = (MD_HTML*) userdata;
479+
480+ if(r->image_nesting_level > 0) {
481+ /* Ditto as in enter_span_callback(), except we have to allow the
482+ * end of the <img> tag. */
483+ if(r->image_nesting_level == 1 && type == MD_SPAN_IMG)
484+ render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail);
485+ return 0;
486+ }
487+
488+ switch(type) {
489+ case MD_SPAN_EM: RENDER_VERBATIM(r, "</em>"); break;
490+ case MD_SPAN_STRONG: RENDER_VERBATIM(r, "</strong>"); break;
491+ case MD_SPAN_U: RENDER_VERBATIM(r, "</u>"); break;
492+ case MD_SPAN_A: RENDER_VERBATIM(r, "</a>"); break;
493+ case MD_SPAN_IMG: /*noop, handled above*/ break;
494+ case MD_SPAN_CODE: RENDER_VERBATIM(r, "</code>"); break;
495+ case MD_SPAN_DEL: RENDER_VERBATIM(r, "</del>"); break;
496+ case MD_SPAN_LATEXMATH: /*fall through*/
497+ case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "</x-equation>"); break;
498+ case MD_SPAN_WIKILINK: RENDER_VERBATIM(r, "</x-wikilink>"); break;
499+ }
500+
501+ return 0;
502+}
503+
504+static int
505+text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
506+{
507+ MD_HTML* r = (MD_HTML*) userdata;
508+
509+ switch(type) {
510+ case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
511+ case MD_TEXT_BR: RENDER_VERBATIM(r, (r->image_nesting_level == 0
512+ ? ((r->flags & MD_HTML_FLAG_XHTML) ? "<br />\n" : "<br>\n")
513+ : " "));
514+ break;
515+ case MD_TEXT_SOFTBR: RENDER_VERBATIM(r, (r->image_nesting_level == 0 ? "\n" : " ")); break;
516+ case MD_TEXT_HTML: render_verbatim(r, text, size); break;
517+ case MD_TEXT_ENTITY: render_entity(r, text, size, render_html_escaped); break;
518+ default: render_html_escaped(r, text, size); break;
519+ }
520+
521+ return 0;
522+}
523+
524+static void
525+debug_log_callback(const char* msg, void* userdata)
526+{
527+ MD_HTML* r = (MD_HTML*) userdata;
528+ if(r->flags & MD_HTML_FLAG_DEBUG)
529+ fprintf(stderr, "MD4C: %s\n", msg);
530+}
531+
532+int
533+md_html(const MD_CHAR* input, MD_SIZE input_size,
534+ void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
535+ void* userdata, unsigned parser_flags, unsigned renderer_flags)
536+{
537+ MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
538+ int i;
539+
540+ MD_PARSER parser = {
541+ 0,
542+ parser_flags,
543+ enter_block_callback,
544+ leave_block_callback,
545+ enter_span_callback,
546+ leave_span_callback,
547+ text_callback,
548+ debug_log_callback,
549+ NULL
550+ };
551+
552+ /* Build map of characters which need escaping. */
553+ for(i = 0; i < 256; i++) {
554+ unsigned char ch = (unsigned char) i;
555+
556+ if(strchr("\"&<>", ch) != NULL)
557+ render.escape_map[i] |= NEED_HTML_ESC_FLAG;
558+
559+ if(!ISALNUM(ch) && strchr("-_.+!*(),%#@?=;:/,+$", ch) == NULL)
560+ render.escape_map[i] |= NEED_URL_ESC_FLAG;
561+ }
562+
563+ /* Consider skipping UTF-8 byte order mark (BOM). */
564+ if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM && sizeof(MD_CHAR) == 1) {
565+ static const MD_CHAR bom[3] = { 0xef, 0xbb, 0xbf };
566+ if(input_size >= sizeof(bom) && memcmp(input, bom, sizeof(bom)) == 0) {
567+ input += sizeof(bom);
568+ input_size -= sizeof(bom);
569+ }
570+ }
571+
572+ return md_parse(input, input_size, &parser, (void*) &render);
573+}
574+
A · md4c-html.h
+68, -0 1@@ -0,0 +1,68 @@
2+/*
3+ * MD4C: Markdown parser for C
4+ * (http://github.com/mity/md4c)
5+ *
6+ * Copyright (c) 2016-2017 Martin Mitas
7+ *
8+ * Permission is hereby granted, free of charge, to any person obtaining a
9+ * copy of this software and associated documentation files (the "Software"),
10+ * to deal in the Software without restriction, including without limitation
11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12+ * and/or sell copies of the Software, and to permit persons to whom the
13+ * Software is furnished to do so, subject to the following conditions:
14+ *
15+ * The above copyright notice and this permission notice shall be included in
16+ * all copies or substantial portions of the Software.
17+ *
18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24+ * IN THE SOFTWARE.
25+ */
26+
27+#ifndef MD4C_HTML_H
28+#define MD4C_HTML_H
29+
30+#include "md4c.h"
31+
32+#ifdef __cplusplus
33+ extern "C" {
34+#endif
35+
36+
37+/* If set, debug output from md_parse() is sent to stderr. */
38+#define MD_HTML_FLAG_DEBUG 0x0001
39+#define MD_HTML_FLAG_VERBATIM_ENTITIES 0x0002
40+#define MD_HTML_FLAG_SKIP_UTF8_BOM 0x0004
41+#define MD_HTML_FLAG_XHTML 0x0008
42+
43+
44+/* Render Markdown into HTML.
45+ *
46+ * Note only contents of <body> tag is generated. Caller must generate
47+ * HTML header/footer manually before/after calling md_html().
48+ *
49+ * Params input and input_size specify the Markdown input.
50+ * Callback process_output() gets called with chunks of HTML output.
51+ * (Typical implementation may just output the bytes to a file or append to
52+ * some buffer).
53+ * Param userdata is just propgated back to process_output() callback.
54+ * Param parser_flags are flags from md4c.h propagated to md_parse().
55+ * Param render_flags is bitmask of MD_HTML_FLAG_xxxx.
56+ *
57+ * Returns -1 on error (if md_parse() fails.)
58+ * Returns 0 on success.
59+ */
60+int md_html(const MD_CHAR* input, MD_SIZE input_size,
61+ void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
62+ void* userdata, unsigned parser_flags, unsigned renderer_flags);
63+
64+
65+#ifdef __cplusplus
66+ } /* extern "C" { */
67+#endif
68+
69+#endif /* MD4C_HTML_H */
A · md4c-html.o
+0, -0
A · md4c.c
+6348, -0 1@@ -0,0 +1,6348 @@
2+/*
3+ * MD4C: Markdown parser for C
4+ * (http://github.com/mity/md4c)
5+ *
6+ * Copyright (c) 2016-2020 Martin Mitas
7+ *
8+ * Permission is hereby granted, free of charge, to any person obtaining a
9+ * copy of this software and associated documentation files (the "Software"),
10+ * to deal in the Software without restriction, including without limitation
11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12+ * and/or sell copies of the Software, and to permit persons to whom the
13+ * Software is furnished to do so, subject to the following conditions:
14+ *
15+ * The above copyright notice and this permission notice shall be included in
16+ * all copies or substantial portions of the Software.
17+ *
18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24+ * IN THE SOFTWARE.
25+ */
26+
27+#include "md4c.h"
28+
29+#include <limits.h>
30+#include <stdio.h>
31+#include <stdlib.h>
32+#include <string.h>
33+
34+
35+/*****************************
36+ *** Miscellaneous Stuff ***
37+ *****************************/
38+
39+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
40+ /* C89/90 or old compilers in general may not understand "inline". */
41+ #if defined __GNUC__
42+ #define inline __inline__
43+ #elif defined _MSC_VER
44+ #define inline __inline
45+ #else
46+ #define inline
47+ #endif
48+#endif
49+
50+/* Make the UTF-8 support the default. */
51+#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
52+ #define MD4C_USE_UTF8
53+#endif
54+
55+/* Magic for making wide literals with MD4C_USE_UTF16. */
56+#ifdef _T
57+ #undef _T
58+#endif
59+#if defined MD4C_USE_UTF16
60+ #define _T(x) L##x
61+#else
62+ #define _T(x) x
63+#endif
64+
65+/* Misc. macros. */
66+#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
67+
68+#define STRINGIZE_(x) #x
69+#define STRINGIZE(x) STRINGIZE_(x)
70+
71+#ifndef TRUE
72+ #define TRUE 1
73+ #define FALSE 0
74+#endif
75+
76+#define MD_LOG(msg) \
77+ do { \
78+ if(ctx->parser.debug_log != NULL) \
79+ ctx->parser.debug_log((msg), ctx->userdata); \
80+ } while(0)
81+
82+#ifdef DEBUG
83+ #define MD_ASSERT(cond) \
84+ do { \
85+ if(!(cond)) { \
86+ MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \
87+ "Assertion '" STRINGIZE(cond) "' failed."); \
88+ exit(1); \
89+ } \
90+ } while(0)
91+
92+ #define MD_UNREACHABLE() MD_ASSERT(1 == 0)
93+#else
94+ #ifdef __GNUC__
95+ #define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0)
96+ #define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0)
97+ #elif defined _MSC_VER && _MSC_VER > 120
98+ #define MD_ASSERT(cond) do { __assume(cond); } while(0)
99+ #define MD_UNREACHABLE() do { __assume(0); } while(0)
100+ #else
101+ #define MD_ASSERT(cond) do {} while(0)
102+ #define MD_UNREACHABLE() do {} while(0)
103+ #endif
104+#endif
105+
106+/* For falling through case labels in switch statements. */
107+#if defined __clang__ && __clang_major__ >= 12
108+ #define MD_FALLTHROUGH() __attribute__((fallthrough))
109+#elif defined __GNUC__ && __GNUC__ >= 7
110+ #define MD_FALLTHROUGH() __attribute__((fallthrough))
111+#else
112+ #define MD_FALLTHROUGH() ((void)0)
113+#endif
114+
115+/* Suppress "unused parameter" warnings. */
116+#define MD_UNUSED(x) ((void)x)
117+
118+
119+/************************
120+ *** Internal Types ***
121+ ************************/
122+
123+/* These are omnipresent so lets save some typing. */
124+#define CHAR MD_CHAR
125+#define SZ MD_SIZE
126+#define OFF MD_OFFSET
127+
128+typedef struct MD_MARK_tag MD_MARK;
129+typedef struct MD_BLOCK_tag MD_BLOCK;
130+typedef struct MD_CONTAINER_tag MD_CONTAINER;
131+typedef struct MD_REF_DEF_tag MD_REF_DEF;
132+
133+
134+/* During analyzes of inline marks, we need to manage some "mark chains",
135+ * of (yet unresolved) openers. This structure holds start/end of the chain.
136+ * The chain internals are then realized through MD_MARK::prev and ::next.
137+ */
138+typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN;
139+struct MD_MARKCHAIN_tag {
140+ int head; /* Index of first mark in the chain, or -1 if empty. */
141+ int tail; /* Index of last mark in the chain, or -1 if empty. */
142+};
143+
144+/* Context propagated through all the parsing. */
145+typedef struct MD_CTX_tag MD_CTX;
146+struct MD_CTX_tag {
147+ /* Immutable stuff (parameters of md_parse()). */
148+ const CHAR* text;
149+ SZ size;
150+ MD_PARSER parser;
151+ void* userdata;
152+
153+ /* When this is true, it allows some optimizations. */
154+ int doc_ends_with_newline;
155+
156+ /* Helper temporary growing buffer. */
157+ CHAR* buffer;
158+ unsigned alloc_buffer;
159+
160+ /* Reference definitions. */
161+ MD_REF_DEF* ref_defs;
162+ int n_ref_defs;
163+ int alloc_ref_defs;
164+ void** ref_def_hashtable;
165+ int ref_def_hashtable_size;
166+
167+ /* Stack of inline/span markers.
168+ * This is only used for parsing a single block contents but by storing it
169+ * here we may reuse the stack for subsequent blocks; i.e. we have fewer
170+ * (re)allocations. */
171+ MD_MARK* marks;
172+ int n_marks;
173+ int alloc_marks;
174+
175+#if defined MD4C_USE_UTF16
176+ char mark_char_map[128];
177+#else
178+ char mark_char_map[256];
179+#endif
180+
181+ /* For resolving of inline spans. */
182+ MD_MARKCHAIN mark_chains[13];
183+#define PTR_CHAIN (ctx->mark_chains[0])
184+#define TABLECELLBOUNDARIES (ctx->mark_chains[1])
185+#define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2])
186+#define ASTERISK_OPENERS_extraword_mod3_1 (ctx->mark_chains[3])
187+#define ASTERISK_OPENERS_extraword_mod3_2 (ctx->mark_chains[4])
188+#define ASTERISK_OPENERS_intraword_mod3_0 (ctx->mark_chains[5])
189+#define ASTERISK_OPENERS_intraword_mod3_1 (ctx->mark_chains[6])
190+#define ASTERISK_OPENERS_intraword_mod3_2 (ctx->mark_chains[7])
191+#define UNDERSCORE_OPENERS (ctx->mark_chains[8])
192+#define TILDE_OPENERS_1 (ctx->mark_chains[9])
193+#define TILDE_OPENERS_2 (ctx->mark_chains[10])
194+#define BRACKET_OPENERS (ctx->mark_chains[11])
195+#define DOLLAR_OPENERS (ctx->mark_chains[12])
196+#define OPENERS_CHAIN_FIRST 2
197+#define OPENERS_CHAIN_LAST 12
198+
199+ int n_table_cell_boundaries;
200+
201+ /* For resolving links. */
202+ int unresolved_link_head;
203+ int unresolved_link_tail;
204+
205+ /* For resolving raw HTML. */
206+ OFF html_comment_horizon;
207+ OFF html_proc_instr_horizon;
208+ OFF html_decl_horizon;
209+ OFF html_cdata_horizon;
210+
211+ /* For block analysis.
212+ * Notes:
213+ * -- It holds MD_BLOCK as well as MD_LINE structures. After each
214+ * MD_BLOCK, its (multiple) MD_LINE(s) follow.
215+ * -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
216+ * instead of MD_LINE(s).
217+ */
218+ void* block_bytes;
219+ MD_BLOCK* current_block;
220+ int n_block_bytes;
221+ int alloc_block_bytes;
222+
223+ /* For container block analysis. */
224+ MD_CONTAINER* containers;
225+ int n_containers;
226+ int alloc_containers;
227+
228+ /* Minimal indentation to call the block "indented code block". */
229+ unsigned code_indent_offset;
230+
231+ /* Contextual info for line analysis. */
232+ SZ code_fence_length; /* For checking closing fence length. */
233+ int html_block_type; /* For checking closing raw HTML condition. */
234+ int last_line_has_list_loosening_effect;
235+ int last_list_item_starts_with_two_blank_lines;
236+};
237+
238+enum MD_LINETYPE_tag {
239+ MD_LINE_BLANK,
240+ MD_LINE_HR,
241+ MD_LINE_ATXHEADER,
242+ MD_LINE_SETEXTHEADER,
243+ MD_LINE_SETEXTUNDERLINE,
244+ MD_LINE_INDENTEDCODE,
245+ MD_LINE_FENCEDCODE,
246+ MD_LINE_HTML,
247+ MD_LINE_TEXT,
248+ MD_LINE_TABLE,
249+ MD_LINE_TABLEUNDERLINE
250+};
251+typedef enum MD_LINETYPE_tag MD_LINETYPE;
252+
253+typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
254+struct MD_LINE_ANALYSIS_tag {
255+ MD_LINETYPE type : 16;
256+ unsigned data : 16;
257+ OFF beg;
258+ OFF end;
259+ unsigned indent; /* Indentation level. */
260+};
261+
262+typedef struct MD_LINE_tag MD_LINE;
263+struct MD_LINE_tag {
264+ OFF beg;
265+ OFF end;
266+};
267+
268+typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
269+struct MD_VERBATIMLINE_tag {
270+ OFF beg;
271+ OFF end;
272+ OFF indent;
273+};
274+
275+
276+/*****************
277+ *** Helpers ***
278+ *****************/
279+
280+/* Character accessors. */
281+#define CH(off) (ctx->text[(off)])
282+#define STR(off) (ctx->text + (off))
283+
284+/* Character classification.
285+ * Note we assume ASCII compatibility of code points < 128 here. */
286+#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
287+#define ISANYOF_(ch, palette) ((ch) != _T('\0') && md_strchr((palette), (ch)) != NULL)
288+#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2))
289+#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
290+#define ISASCII_(ch) ((unsigned)(ch) <= 127)
291+#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t')))
292+#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n')))
293+#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
294+#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
295+#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
296+#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z')))
297+#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z')))
298+#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch))
299+#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9')))
300+#define ISXDIGIT_(ch) (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
301+#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch))
302+
303+#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette))
304+#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2))
305+#define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
306+#define ISASCII(off) ISASCII_(CH(off))
307+#define ISBLANK(off) ISBLANK_(CH(off))
308+#define ISNEWLINE(off) ISNEWLINE_(CH(off))
309+#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
310+#define ISCNTRL(off) ISCNTRL_(CH(off))
311+#define ISPUNCT(off) ISPUNCT_(CH(off))
312+#define ISUPPER(off) ISUPPER_(CH(off))
313+#define ISLOWER(off) ISLOWER_(CH(off))
314+#define ISALPHA(off) ISALPHA_(CH(off))
315+#define ISDIGIT(off) ISDIGIT_(CH(off))
316+#define ISXDIGIT(off) ISXDIGIT_(CH(off))
317+#define ISALNUM(off) ISALNUM_(CH(off))
318+
319+
320+#if defined MD4C_USE_UTF16
321+ #define md_strchr wcschr
322+#else
323+ #define md_strchr strchr
324+#endif
325+
326+
327+/* Case insensitive check of string equality. */
328+static inline int
329+md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
330+{
331+ OFF i;
332+ for(i = 0; i < n; i++) {
333+ CHAR ch1 = s1[i];
334+ CHAR ch2 = s2[i];
335+
336+ if(ISLOWER_(ch1))
337+ ch1 += ('A'-'a');
338+ if(ISLOWER_(ch2))
339+ ch2 += ('A'-'a');
340+ if(ch1 != ch2)
341+ return FALSE;
342+ }
343+ return TRUE;
344+}
345+
346+static inline int
347+md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
348+{
349+ return memcmp(s1, s2, n * sizeof(CHAR)) == 0;
350+}
351+
352+static int
353+md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
354+{
355+ OFF off = 0;
356+ int ret = 0;
357+
358+ while(1) {
359+ while(off < size && str[off] != _T('\0'))
360+ off++;
361+
362+ if(off > 0) {
363+ ret = ctx->parser.text(type, str, off, ctx->userdata);
364+ if(ret != 0)
365+ return ret;
366+
367+ str += off;
368+ size -= off;
369+ off = 0;
370+ }
371+
372+ if(off >= size)
373+ return 0;
374+
375+ ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata);
376+ if(ret != 0)
377+ return ret;
378+ off++;
379+ }
380+}
381+
382+
383+#define MD_CHECK(func) \
384+ do { \
385+ ret = (func); \
386+ if(ret < 0) \
387+ goto abort; \
388+ } while(0)
389+
390+
391+#define MD_TEMP_BUFFER(sz) \
392+ do { \
393+ if(sz > ctx->alloc_buffer) { \
394+ CHAR* new_buffer; \
395+ SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \
396+ \
397+ new_buffer = realloc(ctx->buffer, new_size); \
398+ if(new_buffer == NULL) { \
399+ MD_LOG("realloc() failed."); \
400+ ret = -1; \
401+ goto abort; \
402+ } \
403+ \
404+ ctx->buffer = new_buffer; \
405+ ctx->alloc_buffer = new_size; \
406+ } \
407+ } while(0)
408+
409+
410+#define MD_ENTER_BLOCK(type, arg) \
411+ do { \
412+ ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \
413+ if(ret != 0) { \
414+ MD_LOG("Aborted from enter_block() callback."); \
415+ goto abort; \
416+ } \
417+ } while(0)
418+
419+#define MD_LEAVE_BLOCK(type, arg) \
420+ do { \
421+ ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \
422+ if(ret != 0) { \
423+ MD_LOG("Aborted from leave_block() callback."); \
424+ goto abort; \
425+ } \
426+ } while(0)
427+
428+#define MD_ENTER_SPAN(type, arg) \
429+ do { \
430+ ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \
431+ if(ret != 0) { \
432+ MD_LOG("Aborted from enter_span() callback."); \
433+ goto abort; \
434+ } \
435+ } while(0)
436+
437+#define MD_LEAVE_SPAN(type, arg) \
438+ do { \
439+ ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \
440+ if(ret != 0) { \
441+ MD_LOG("Aborted from leave_span() callback."); \
442+ goto abort; \
443+ } \
444+ } while(0)
445+
446+#define MD_TEXT(type, str, size) \
447+ do { \
448+ if(size > 0) { \
449+ ret = ctx->parser.text((type), (str), (size), ctx->userdata); \
450+ if(ret != 0) { \
451+ MD_LOG("Aborted from text() callback."); \
452+ goto abort; \
453+ } \
454+ } \
455+ } while(0)
456+
457+#define MD_TEXT_INSECURE(type, str, size) \
458+ do { \
459+ if(size > 0) { \
460+ ret = md_text_with_null_replacement(ctx, type, str, size); \
461+ if(ret != 0) { \
462+ MD_LOG("Aborted from text() callback."); \
463+ goto abort; \
464+ } \
465+ } \
466+ } while(0)
467+
468+
469+
470+/*************************
471+ *** Unicode Support ***
472+ *************************/
473+
474+typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
475+struct MD_UNICODE_FOLD_INFO_tag {
476+ unsigned codepoints[3];
477+ unsigned n_codepoints;
478+};
479+
480+
481+#if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8
482+ /* Binary search over sorted "map" of codepoints. Consecutive sequences
483+ * of codepoints may be encoded in the map by just using the
484+ * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000).
485+ *
486+ * Returns index of the found record in the map (in the case of ranges,
487+ * the minimal value is used); or -1 on failure. */
488+ static int
489+ md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
490+ {
491+ int beg, end;
492+ int pivot_beg, pivot_end;
493+
494+ beg = 0;
495+ end = (int) map_size-1;
496+ while(beg <= end) {
497+ /* Pivot may be a range, not just a single value. */
498+ pivot_beg = pivot_end = (beg + end) / 2;
499+ if(map[pivot_end] & 0x40000000)
500+ pivot_end++;
501+ if(map[pivot_beg] & 0x80000000)
502+ pivot_beg--;
503+
504+ if(codepoint < (map[pivot_beg] & 0x00ffffff))
505+ end = pivot_beg - 1;
506+ else if(codepoint > (map[pivot_end] & 0x00ffffff))
507+ beg = pivot_end + 1;
508+ else
509+ return pivot_beg;
510+ }
511+
512+ return -1;
513+ }
514+
515+ static int
516+ md_is_unicode_whitespace__(unsigned codepoint)
517+ {
518+#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
519+#define S(cp) (cp)
520+ /* Unicode "Zs" category.
521+ * (generated by scripts/build_whitespace_map.py) */
522+ static const unsigned WHITESPACE_MAP[] = {
523+ S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000)
524+ };
525+#undef R
526+#undef S
527+
528+ /* The ASCII ones are the most frequently used ones, also CommonMark
529+ * specification requests few more in this range. */
530+ if(codepoint <= 0x7f)
531+ return ISWHITESPACE_(codepoint);
532+
533+ return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0);
534+ }
535+
536+ static int
537+ md_is_unicode_punct__(unsigned codepoint)
538+ {
539+#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
540+#define S(cp) (cp)
541+ /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
542+ * (generated by scripts/build_punct_map.py) */
543+ static const unsigned PUNCT_MAP[] = {
544+ R(0x0021,0x0023), R(0x0025,0x002a), R(0x002c,0x002f), R(0x003a,0x003b), R(0x003f,0x0040),
545+ R(0x005b,0x005d), S(0x005f), S(0x007b), S(0x007d), S(0x00a1), S(0x00a7), S(0x00ab), R(0x00b6,0x00b7),
546+ S(0x00bb), S(0x00bf), S(0x037e), S(0x0387), R(0x055a,0x055f), R(0x0589,0x058a), S(0x05be), S(0x05c0),
547+ S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0609,0x060a), R(0x060c,0x060d), S(0x061b), R(0x061e,0x061f),
548+ R(0x066a,0x066d), S(0x06d4), R(0x0700,0x070d), R(0x07f7,0x07f9), R(0x0830,0x083e), S(0x085e),
549+ R(0x0964,0x0965), S(0x0970), S(0x09fd), S(0x0a76), S(0x0af0), S(0x0c77), S(0x0c84), S(0x0df4), S(0x0e4f),
550+ R(0x0e5a,0x0e5b), R(0x0f04,0x0f12), S(0x0f14), R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fd0,0x0fd4),
551+ R(0x0fd9,0x0fda), R(0x104a,0x104f), S(0x10fb), R(0x1360,0x1368), S(0x1400), S(0x166e), R(0x169b,0x169c),
552+ R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17da), R(0x1800,0x180a),
553+ R(0x1944,0x1945), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6), R(0x1aa8,0x1aad), R(0x1b5a,0x1b60),
554+ R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f), R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), R(0x2010,0x2027),
555+ R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e),
556+ R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef),
557+ R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70),
558+ R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f),
559+ S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
560+ R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f),
561+ S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1),
562+ S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68),
563+ R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b),
564+ R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102),
565+ S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f),
566+ R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59),
567+ R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175),
568+ R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9),
569+ R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643),
570+ R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46),
571+ R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8),
572+ S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44),
573+ R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f)
574+ };
575+#undef R
576+#undef S
577+
578+ /* The ASCII ones are the most frequently used ones, also CommonMark
579+ * specification requests few more in this range. */
580+ if(codepoint <= 0x7f)
581+ return ISPUNCT_(codepoint);
582+
583+ return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0);
584+ }
585+
586+ static void
587+ md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
588+ {
589+#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
590+#define S(cp) (cp)
591+ /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
592+ * (generated by scripts/build_punct_map.py) */
593+ static const unsigned FOLD_MAP_1[] = {
594+ R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136),
595+ R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182),
596+ S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190),
597+ S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f),
598+ R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2),
599+ S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8),
600+ S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7),
601+ R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241),
602+ S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f),
603+ S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab),
604+ S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1),
605+ S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f),
606+ R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e),
607+ R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81),
608+ S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
609+ R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d),
610+ R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f),
611+ R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8),
612+ S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8),
613+ S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183),
614+ R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b),
615+ S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2),
616+ S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e),
617+ S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792),
618+ R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2),
619+ S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5),
620+ R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2),
621+ R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
622+ };
623+ static const unsigned FOLD_MAP_1_DATA[] = {
624+ 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148,
625+ 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257,
626+ 0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275,
627+ 0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292,
628+ 0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3,
629+ 0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242,
630+ 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af,
631+ 0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0,
632+ 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f,
633+ 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586,
634+ 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a,
635+ 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07,
636+ 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60,
637+ 0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0,
638+ 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170,
639+ 0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251,
640+ 0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641,
641+ 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c,
642+ 0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d,
643+ 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41,
644+ 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922,
645+ 0x1e943
646+ };
647+ static const unsigned FOLD_MAP_2[] = {
648+ S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),
649+ S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f),
650+ R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2),
651+ S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3),
652+ S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13),
653+ S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17)
654+ };
655+ static const unsigned FOLD_MAP_2_DATA[] = {
656+ 0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308,
657+ 0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9,
658+ 0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9,
659+ 0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342,
660+ 0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342,
661+ 0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9,
662+ 0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565,
663+ 0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d
664+ };
665+ static const unsigned FOLD_MAP_3[] = {
666+ S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3),
667+ S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04)
668+ };
669+ static const unsigned FOLD_MAP_3_DATA[] = {
670+ 0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301,
671+ 0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300,
672+ 0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301,
673+ 0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c
674+ };
675+#undef R
676+#undef S
677+ static const struct {
678+ const unsigned* map;
679+ const unsigned* data;
680+ size_t map_size;
681+ unsigned n_codepoints;
682+ } FOLD_MAP_LIST[] = {
683+ { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 },
684+ { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 },
685+ { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 }
686+ };
687+
688+ int i;
689+
690+ /* Fast path for ASCII characters. */
691+ if(codepoint <= 0x7f) {
692+ info->codepoints[0] = codepoint;
693+ if(ISUPPER_(codepoint))
694+ info->codepoints[0] += 'a' - 'A';
695+ info->n_codepoints = 1;
696+ return;
697+ }
698+
699+ /* Try to locate the codepoint in any of the maps. */
700+ for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
701+ int index;
702+
703+ index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size);
704+ if(index >= 0) {
705+ /* Found the mapping. */
706+ unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
707+ const unsigned* map = FOLD_MAP_LIST[i].map;
708+ const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
709+
710+ memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints);
711+ info->n_codepoints = n_codepoints;
712+
713+ if(FOLD_MAP_LIST[i].map[index] != codepoint) {
714+ /* The found mapping maps whole range of codepoints,
715+ * i.e. we have to offset info->codepoints[0] accordingly. */
716+ if((map[index] & 0x00ffffff)+1 == codepoints[0]) {
717+ /* Alternating type of the range. */
718+ info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0);
719+ } else {
720+ /* Range to range kind of mapping. */
721+ info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff));
722+ }
723+ }
724+
725+ return;
726+ }
727+ }
728+
729+ /* No mapping found. Map the codepoint to itself. */
730+ info->codepoints[0] = codepoint;
731+ info->n_codepoints = 1;
732+ }
733+#endif
734+
735+
736+#if defined MD4C_USE_UTF16
737+ #define IS_UTF16_SURROGATE_HI(word) (((WORD)(word) & 0xfc00) == 0xd800)
738+ #define IS_UTF16_SURROGATE_LO(word) (((WORD)(word) & 0xfc00) == 0xdc00)
739+ #define UTF16_DECODE_SURROGATE(hi, lo) (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0)))
740+
741+ static unsigned
742+ md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
743+ {
744+ if(IS_UTF16_SURROGATE_HI(str[0])) {
745+ if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) {
746+ if(p_size != NULL)
747+ *p_size = 2;
748+ return UTF16_DECODE_SURROGATE(str[0], str[1]);
749+ }
750+ }
751+
752+ if(p_size != NULL)
753+ *p_size = 1;
754+ return str[0];
755+ }
756+
757+ static unsigned
758+ md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
759+ {
760+ if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1)))
761+ return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1));
762+
763+ return CH(off);
764+ }
765+
766+ /* No whitespace uses surrogates, so no decoding needed here. */
767+ #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
768+ #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(CH(off))
769+ #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(CH((off)-1))
770+
771+ #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
772+ #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
773+
774+ static inline int
775+ md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
776+ {
777+ return md_decode_utf16le__(str+off, str_size-off, p_char_size);
778+ }
779+#elif defined MD4C_USE_UTF8
780+ #define IS_UTF8_LEAD1(byte) ((unsigned char)(byte) <= 0x7f)
781+ #define IS_UTF8_LEAD2(byte) (((unsigned char)(byte) & 0xe0) == 0xc0)
782+ #define IS_UTF8_LEAD3(byte) (((unsigned char)(byte) & 0xf0) == 0xe0)
783+ #define IS_UTF8_LEAD4(byte) (((unsigned char)(byte) & 0xf8) == 0xf0)
784+ #define IS_UTF8_TAIL(byte) (((unsigned char)(byte) & 0xc0) == 0x80)
785+
786+ static unsigned
787+ md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
788+ {
789+ if(!IS_UTF8_LEAD1(str[0])) {
790+ if(IS_UTF8_LEAD2(str[0])) {
791+ if(1 < str_size && IS_UTF8_TAIL(str[1])) {
792+ if(p_size != NULL)
793+ *p_size = 2;
794+
795+ return (((unsigned int)str[0] & 0x1f) << 6) |
796+ (((unsigned int)str[1] & 0x3f) << 0);
797+ }
798+ } else if(IS_UTF8_LEAD3(str[0])) {
799+ if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) {
800+ if(p_size != NULL)
801+ *p_size = 3;
802+
803+ return (((unsigned int)str[0] & 0x0f) << 12) |
804+ (((unsigned int)str[1] & 0x3f) << 6) |
805+ (((unsigned int)str[2] & 0x3f) << 0);
806+ }
807+ } else if(IS_UTF8_LEAD4(str[0])) {
808+ if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) {
809+ if(p_size != NULL)
810+ *p_size = 4;
811+
812+ return (((unsigned int)str[0] & 0x07) << 18) |
813+ (((unsigned int)str[1] & 0x3f) << 12) |
814+ (((unsigned int)str[2] & 0x3f) << 6) |
815+ (((unsigned int)str[3] & 0x3f) << 0);
816+ }
817+ }
818+ }
819+
820+ if(p_size != NULL)
821+ *p_size = 1;
822+ return (unsigned) str[0];
823+ }
824+
825+ static unsigned
826+ md_decode_utf8_before__(MD_CTX* ctx, OFF off)
827+ {
828+ if(!IS_UTF8_LEAD1(CH(off-1))) {
829+ if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
830+ return (((unsigned int)CH(off-2) & 0x1f) << 6) |
831+ (((unsigned int)CH(off-1) & 0x3f) << 0);
832+
833+ if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
834+ return (((unsigned int)CH(off-3) & 0x0f) << 12) |
835+ (((unsigned int)CH(off-2) & 0x3f) << 6) |
836+ (((unsigned int)CH(off-1) & 0x3f) << 0);
837+
838+ if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
839+ return (((unsigned int)CH(off-4) & 0x07) << 18) |
840+ (((unsigned int)CH(off-3) & 0x3f) << 12) |
841+ (((unsigned int)CH(off-2) & 0x3f) << 6) |
842+ (((unsigned int)CH(off-1) & 0x3f) << 0);
843+ }
844+
845+ return (unsigned) CH(off-1);
846+ }
847+
848+ #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
849+ #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
850+ #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
851+
852+ #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
853+ #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
854+
855+ static inline unsigned
856+ md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
857+ {
858+ return md_decode_utf8__(str+off, str_size-off, p_char_size);
859+ }
860+#else
861+ #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
862+ #define ISUNICODEWHITESPACE(off) ISWHITESPACE(off)
863+ #define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1)
864+
865+ #define ISUNICODEPUNCT(off) ISPUNCT(off)
866+ #define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1)
867+
868+ static inline void
869+ md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
870+ {
871+ info->codepoints[0] = codepoint;
872+ if(ISUPPER_(codepoint))
873+ info->codepoints[0] += 'a' - 'A';
874+ info->n_codepoints = 1;
875+ }
876+
877+ static inline unsigned
878+ md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
879+ {
880+ *p_size = 1;
881+ return (unsigned) str[off];
882+ }
883+#endif
884+
885+
886+/*************************************
887+ *** Helper string manipulations ***
888+ *************************************/
889+
890+/* Fill buffer with copy of the string between 'beg' and 'end' but replace any
891+ * line breaks with given replacement character.
892+ *
893+ * NOTE: Caller is responsible to make sure the buffer is large enough.
894+ * (Given the output is always shorter then input, (end - beg) is good idea
895+ * what the caller should allocate.)
896+ */
897+static void
898+md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
899+ CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
900+{
901+ CHAR* ptr = buffer;
902+ int line_index = 0;
903+ OFF off = beg;
904+
905+ MD_UNUSED(n_lines);
906+
907+ while(1) {
908+ const MD_LINE* line = &lines[line_index];
909+ OFF line_end = line->end;
910+ if(end < line_end)
911+ line_end = end;
912+
913+ while(off < line_end) {
914+ *ptr = CH(off);
915+ ptr++;
916+ off++;
917+ }
918+
919+ if(off >= end) {
920+ *p_size = ptr - buffer;
921+ return;
922+ }
923+
924+ *ptr = line_break_replacement_char;
925+ ptr++;
926+
927+ line_index++;
928+ off = lines[line_index].beg;
929+ }
930+}
931+
932+/* Wrapper of md_merge_lines() which allocates new buffer for the output string.
933+ */
934+static int
935+md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
936+ CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
937+{
938+ CHAR* buffer;
939+
940+ buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
941+ if(buffer == NULL) {
942+ MD_LOG("malloc() failed.");
943+ return -1;
944+ }
945+
946+ md_merge_lines(ctx, beg, end, lines, n_lines,
947+ line_break_replacement_char, buffer, p_size);
948+
949+ *p_str = buffer;
950+ return 0;
951+}
952+
953+static OFF
954+md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
955+{
956+ SZ char_size;
957+ unsigned codepoint;
958+
959+ while(off < size) {
960+ codepoint = md_decode_unicode(label, off, size, &char_size);
961+ if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off]))
962+ break;
963+ off += char_size;
964+ }
965+
966+ return off;
967+}
968+
969+
970+/******************************
971+ *** Recognizing raw HTML ***
972+ ******************************/
973+
974+/* md_is_html_tag() may be called when processing inlines (inline raw HTML)
975+ * or when breaking document to blocks (checking for start of HTML block type 7).
976+ *
977+ * When breaking document to blocks, we do not yet know line boundaries, but
978+ * in that case the whole tag has to live on a single line. We distinguish this
979+ * by n_lines == 0.
980+ */
981+static int
982+md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
983+{
984+ int attr_state;
985+ OFF off = beg;
986+ OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size;
987+ int i = 0;
988+
989+ MD_ASSERT(CH(beg) == _T('<'));
990+
991+ if(off + 1 >= line_end)
992+ return FALSE;
993+ off++;
994+
995+ /* For parsing attributes, we need a little state automaton below.
996+ * State -1: no attributes are allowed.
997+ * State 0: attribute could follow after some whitespace.
998+ * State 1: after a whitespace (attribute name may follow).
999+ * State 2: after attribute name ('=' MAY follow).
1000+ * State 3: after '=' (value specification MUST follow).
1001+ * State 41: in middle of unquoted attribute value.
1002+ * State 42: in middle of single-quoted attribute value.
1003+ * State 43: in middle of double-quoted attribute value.
1004+ */
1005+ attr_state = 0;
1006+
1007+ if(CH(off) == _T('/')) {
1008+ /* Closer tag "</ ... >". No attributes may be present. */
1009+ attr_state = -1;
1010+ off++;
1011+ }
1012+
1013+ /* Tag name */
1014+ if(off >= line_end || !ISALPHA(off))
1015+ return FALSE;
1016+ off++;
1017+ while(off < line_end && (ISALNUM(off) || CH(off) == _T('-')))
1018+ off++;
1019+
1020+ /* (Optional) attributes (if not closer), (optional) '/' (if not closer)
1021+ * and final '>'. */
1022+ while(1) {
1023+ while(off < line_end && !ISNEWLINE(off)) {
1024+ if(attr_state > 40) {
1025+ if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) {
1026+ attr_state = 0;
1027+ off--; /* Put the char back for re-inspection in the new state. */
1028+ } else if(attr_state == 42 && CH(off) == _T('\'')) {
1029+ attr_state = 0;
1030+ } else if(attr_state == 43 && CH(off) == _T('"')) {
1031+ attr_state = 0;
1032+ }
1033+ off++;
1034+ } else if(ISWHITESPACE(off)) {
1035+ if(attr_state == 0)
1036+ attr_state = 1;
1037+ off++;
1038+ } else if(attr_state <= 2 && CH(off) == _T('>')) {
1039+ /* End. */
1040+ goto done;
1041+ } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) {
1042+ /* End with digraph '/>' */
1043+ off++;
1044+ goto done;
1045+ } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) {
1046+ off++;
1047+ /* Attribute name */
1048+ while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-"))))
1049+ off++;
1050+ attr_state = 2;
1051+ } else if(attr_state == 2 && CH(off) == _T('=')) {
1052+ /* Attribute assignment sign */
1053+ off++;
1054+ attr_state = 3;
1055+ } else if(attr_state == 3) {
1056+ /* Expecting start of attribute value. */
1057+ if(CH(off) == _T('"'))
1058+ attr_state = 43;
1059+ else if(CH(off) == _T('\''))
1060+ attr_state = 42;
1061+ else if(!ISANYOF(off, _T("\"'=<>`")) && !ISNEWLINE(off))
1062+ attr_state = 41;
1063+ else
1064+ return FALSE;
1065+ off++;
1066+ } else {
1067+ /* Anything unexpected. */
1068+ return FALSE;
1069+ }
1070+ }
1071+
1072+ /* We have to be on a single line. See definition of start condition
1073+ * of HTML block, type 7. */
1074+ if(n_lines == 0)
1075+ return FALSE;
1076+
1077+ i++;
1078+ if(i >= n_lines)
1079+ return FALSE;
1080+
1081+ off = lines[i].beg;
1082+ line_end = lines[i].end;
1083+
1084+ if(attr_state == 0 || attr_state == 41)
1085+ attr_state = 1;
1086+
1087+ if(off >= max_end)
1088+ return FALSE;
1089+ }
1090+
1091+done:
1092+ if(off >= max_end)
1093+ return FALSE;
1094+
1095+ *p_end = off+1;
1096+ return TRUE;
1097+}
1098+
1099+static int
1100+md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
1101+ const MD_LINE* lines, int n_lines,
1102+ OFF beg, OFF max_end, OFF* p_end,
1103+ OFF* p_scan_horizon)
1104+{
1105+ OFF off = beg;
1106+ int i = 0;
1107+
1108+ if(off < *p_scan_horizon && *p_scan_horizon >= max_end - len) {
1109+ /* We have already scanned the range up to the max_end so we know
1110+ * there is nothing to see. */
1111+ return FALSE;
1112+ }
1113+
1114+ while(TRUE) {
1115+ while(off + len <= lines[i].end && off + len <= max_end) {
1116+ if(md_ascii_eq(STR(off), str, len)) {
1117+ /* Success. */
1118+ *p_end = off + len;
1119+ return TRUE;
1120+ }
1121+ off++;
1122+ }
1123+
1124+ i++;
1125+ if(off >= max_end || i >= n_lines) {
1126+ /* Failure. */
1127+ *p_scan_horizon = off;
1128+ return FALSE;
1129+ }
1130+
1131+ off = lines[i].beg;
1132+ }
1133+}
1134+
1135+static int
1136+md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1137+{
1138+ OFF off = beg;
1139+
1140+ MD_ASSERT(CH(beg) == _T('<'));
1141+
1142+ if(off + 4 >= lines[0].end)
1143+ return FALSE;
1144+ if(CH(off+1) != _T('!') || CH(off+2) != _T('-') || CH(off+3) != _T('-'))
1145+ return FALSE;
1146+ off += 4;
1147+
1148+ /* ">" and "->" must not follow the opening. */
1149+ if(off < lines[0].end && CH(off) == _T('>'))
1150+ return FALSE;
1151+ if(off+1 < lines[0].end && CH(off) == _T('-') && CH(off+1) == _T('>'))
1152+ return FALSE;
1153+
1154+ /* HTML comment must not contain "--", so we scan just for "--" instead
1155+ * of "-->" and verify manually that '>' follows. */
1156+ if(md_scan_for_html_closer(ctx, _T("--"), 2,
1157+ lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon))
1158+ {
1159+ if(*p_end < max_end && CH(*p_end) == _T('>')) {
1160+ *p_end = *p_end + 1;
1161+ return TRUE;
1162+ }
1163+ }
1164+
1165+ return FALSE;
1166+}
1167+
1168+static int
1169+md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1170+{
1171+ OFF off = beg;
1172+
1173+ if(off + 2 >= lines[0].end)
1174+ return FALSE;
1175+ if(CH(off+1) != _T('?'))
1176+ return FALSE;
1177+ off += 2;
1178+
1179+ return md_scan_for_html_closer(ctx, _T("?>"), 2,
1180+ lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon);
1181+}
1182+
1183+static int
1184+md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1185+{
1186+ OFF off = beg;
1187+
1188+ if(off + 2 >= lines[0].end)
1189+ return FALSE;
1190+ if(CH(off+1) != _T('!'))
1191+ return FALSE;
1192+ off += 2;
1193+
1194+ /* Declaration name. */
1195+ if(off >= lines[0].end || !ISALPHA(off))
1196+ return FALSE;
1197+ off++;
1198+ while(off < lines[0].end && ISALPHA(off))
1199+ off++;
1200+ if(off < lines[0].end && !ISWHITESPACE(off))
1201+ return FALSE;
1202+
1203+ return md_scan_for_html_closer(ctx, _T(">"), 1,
1204+ lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon);
1205+}
1206+
1207+static int
1208+md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1209+{
1210+ static const CHAR open_str[] = _T("<![CDATA[");
1211+ static const SZ open_size = SIZEOF_ARRAY(open_str) - 1;
1212+
1213+ OFF off = beg;
1214+
1215+ if(off + open_size >= lines[0].end)
1216+ return FALSE;
1217+ if(memcmp(STR(off), open_str, open_size) != 0)
1218+ return FALSE;
1219+ off += open_size;
1220+
1221+ if(lines[n_lines-1].end < max_end)
1222+ max_end = lines[n_lines-1].end - 2;
1223+
1224+ return md_scan_for_html_closer(ctx, _T("]]>"), 3,
1225+ lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon);
1226+}
1227+
1228+static int
1229+md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1230+{
1231+ MD_ASSERT(CH(beg) == _T('<'));
1232+ return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) ||
1233+ md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) ||
1234+ md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) ||
1235+ md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) ||
1236+ md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
1237+}
1238+
1239+
1240+/****************************
1241+ *** Recognizing Entity ***
1242+ ****************************/
1243+
1244+static int
1245+md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1246+{
1247+ OFF off = beg;
1248+ MD_UNUSED(ctx);
1249+
1250+ while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8)
1251+ off++;
1252+
1253+ if(1 <= off - beg && off - beg <= 6) {
1254+ *p_end = off;
1255+ return TRUE;
1256+ } else {
1257+ return FALSE;
1258+ }
1259+}
1260+
1261+static int
1262+md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1263+{
1264+ OFF off = beg;
1265+ MD_UNUSED(ctx);
1266+
1267+ while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8)
1268+ off++;
1269+
1270+ if(1 <= off - beg && off - beg <= 7) {
1271+ *p_end = off;
1272+ return TRUE;
1273+ } else {
1274+ return FALSE;
1275+ }
1276+}
1277+
1278+static int
1279+md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1280+{
1281+ OFF off = beg;
1282+ MD_UNUSED(ctx);
1283+
1284+ if(off < max_end && ISALPHA_(text[off]))
1285+ off++;
1286+ else
1287+ return FALSE;
1288+
1289+ while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48)
1290+ off++;
1291+
1292+ if(2 <= off - beg && off - beg <= 48) {
1293+ *p_end = off;
1294+ return TRUE;
1295+ } else {
1296+ return FALSE;
1297+ }
1298+}
1299+
1300+static int
1301+md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1302+{
1303+ int is_contents;
1304+ OFF off = beg;
1305+
1306+ MD_ASSERT(text[off] == _T('&'));
1307+ off++;
1308+
1309+ if(off+2 < max_end && text[off] == _T('#') && (text[off+1] == _T('x') || text[off+1] == _T('X')))
1310+ is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
1311+ else if(off+1 < max_end && text[off] == _T('#'))
1312+ is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
1313+ else
1314+ is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
1315+
1316+ if(is_contents && off < max_end && text[off] == _T(';')) {
1317+ *p_end = off+1;
1318+ return TRUE;
1319+ } else {
1320+ return FALSE;
1321+ }
1322+}
1323+
1324+static inline int
1325+md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
1326+{
1327+ return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
1328+}
1329+
1330+
1331+/******************************
1332+ *** Attribute Management ***
1333+ ******************************/
1334+
1335+typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
1336+struct MD_ATTRIBUTE_BUILD_tag {
1337+ CHAR* text;
1338+ MD_TEXTTYPE* substr_types;
1339+ OFF* substr_offsets;
1340+ int substr_count;
1341+ int substr_alloc;
1342+ MD_TEXTTYPE trivial_types[1];
1343+ OFF trivial_offsets[2];
1344+};
1345+
1346+
1347+#define MD_BUILD_ATTR_NO_ESCAPES 0x0001
1348+
1349+static int
1350+md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
1351+ MD_TEXTTYPE type, OFF off)
1352+{
1353+ if(build->substr_count >= build->substr_alloc) {
1354+ MD_TEXTTYPE* new_substr_types;
1355+ OFF* new_substr_offsets;
1356+
1357+ build->substr_alloc = (build->substr_alloc > 0
1358+ ? build->substr_alloc + build->substr_alloc / 2
1359+ : 8);
1360+ new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
1361+ build->substr_alloc * sizeof(MD_TEXTTYPE));
1362+ if(new_substr_types == NULL) {
1363+ MD_LOG("realloc() failed.");
1364+ return -1;
1365+ }
1366+ /* Note +1 to reserve space for final offset (== raw_size). */
1367+ new_substr_offsets = (OFF*) realloc(build->substr_offsets,
1368+ (build->substr_alloc+1) * sizeof(OFF));
1369+ if(new_substr_offsets == NULL) {
1370+ MD_LOG("realloc() failed.");
1371+ free(new_substr_types);
1372+ return -1;
1373+ }
1374+
1375+ build->substr_types = new_substr_types;
1376+ build->substr_offsets = new_substr_offsets;
1377+ }
1378+
1379+ build->substr_types[build->substr_count] = type;
1380+ build->substr_offsets[build->substr_count] = off;
1381+ build->substr_count++;
1382+ return 0;
1383+}
1384+
1385+static void
1386+md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
1387+{
1388+ MD_UNUSED(ctx);
1389+
1390+ if(build->substr_alloc > 0) {
1391+ free(build->text);
1392+ free(build->substr_types);
1393+ free(build->substr_offsets);
1394+ }
1395+}
1396+
1397+static int
1398+md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
1399+ unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
1400+{
1401+ OFF raw_off, off;
1402+ int is_trivial;
1403+ int ret = 0;
1404+
1405+ memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
1406+
1407+ /* If there is no backslash and no ampersand, build trivial attribute
1408+ * without any malloc(). */
1409+ is_trivial = TRUE;
1410+ for(raw_off = 0; raw_off < raw_size; raw_off++) {
1411+ if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
1412+ is_trivial = FALSE;
1413+ break;
1414+ }
1415+ }
1416+
1417+ if(is_trivial) {
1418+ build->text = (CHAR*) (raw_size ? raw_text : NULL);
1419+ build->substr_types = build->trivial_types;
1420+ build->substr_offsets = build->trivial_offsets;
1421+ build->substr_count = 1;
1422+ build->substr_alloc = 0;
1423+ build->trivial_types[0] = MD_TEXT_NORMAL;
1424+ build->trivial_offsets[0] = 0;
1425+ build->trivial_offsets[1] = raw_size;
1426+ off = raw_size;
1427+ } else {
1428+ build->text = (CHAR*) malloc(raw_size * sizeof(CHAR));
1429+ if(build->text == NULL) {
1430+ MD_LOG("malloc() failed.");
1431+ goto abort;
1432+ }
1433+
1434+ raw_off = 0;
1435+ off = 0;
1436+
1437+ while(raw_off < raw_size) {
1438+ if(raw_text[raw_off] == _T('\0')) {
1439+ MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
1440+ memcpy(build->text + off, raw_text + raw_off, 1);
1441+ off++;
1442+ raw_off++;
1443+ continue;
1444+ }
1445+
1446+ if(raw_text[raw_off] == _T('&')) {
1447+ OFF ent_end;
1448+
1449+ if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
1450+ MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
1451+ memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off);
1452+ off += ent_end - raw_off;
1453+ raw_off = ent_end;
1454+ continue;
1455+ }
1456+ }
1457+
1458+ if(build->substr_count == 0 || build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL)
1459+ MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
1460+
1461+ if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) &&
1462+ raw_text[raw_off] == _T('\\') && raw_off+1 < raw_size &&
1463+ (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
1464+ raw_off++;
1465+
1466+ build->text[off++] = raw_text[raw_off++];
1467+ }
1468+ build->substr_offsets[build->substr_count] = off;
1469+ }
1470+
1471+ attr->text = build->text;
1472+ attr->size = off;
1473+ attr->substr_offsets = build->substr_offsets;
1474+ attr->substr_types = build->substr_types;
1475+ return 0;
1476+
1477+abort:
1478+ md_free_attribute(ctx, build);
1479+ return -1;
1480+}
1481+
1482+
1483+/*********************************************
1484+ *** Dictionary of Reference Definitions ***
1485+ *********************************************/
1486+
1487+#define MD_FNV1A_BASE 2166136261U
1488+#define MD_FNV1A_PRIME 16777619U
1489+
1490+static inline unsigned
1491+md_fnv1a(unsigned base, const void* data, size_t n)
1492+{
1493+ const unsigned char* buf = (const unsigned char*) data;
1494+ unsigned hash = base;
1495+ size_t i;
1496+
1497+ for(i = 0; i < n; i++) {
1498+ hash ^= buf[i];
1499+ hash *= MD_FNV1A_PRIME;
1500+ }
1501+
1502+ return hash;
1503+}
1504+
1505+
1506+struct MD_REF_DEF_tag {
1507+ CHAR* label;
1508+ CHAR* title;
1509+ unsigned hash;
1510+ SZ label_size;
1511+ SZ title_size;
1512+ OFF dest_beg;
1513+ OFF dest_end;
1514+ unsigned char label_needs_free : 1;
1515+ unsigned char title_needs_free : 1;
1516+};
1517+
1518+/* Label equivalence is quite complicated with regards to whitespace and case
1519+ * folding. This complicates computing a hash of it as well as direct comparison
1520+ * of two labels. */
1521+
1522+static unsigned
1523+md_link_label_hash(const CHAR* label, SZ size)
1524+{
1525+ unsigned hash = MD_FNV1A_BASE;
1526+ OFF off;
1527+ unsigned codepoint;
1528+ int is_whitespace = FALSE;
1529+
1530+ off = md_skip_unicode_whitespace(label, 0, size);
1531+ while(off < size) {
1532+ SZ char_size;
1533+
1534+ codepoint = md_decode_unicode(label, off, size, &char_size);
1535+ is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]);
1536+
1537+ if(is_whitespace) {
1538+ codepoint = ' ';
1539+ hash = md_fnv1a(hash, &codepoint, sizeof(unsigned));
1540+ off = md_skip_unicode_whitespace(label, off, size);
1541+ } else {
1542+ MD_UNICODE_FOLD_INFO fold_info;
1543+
1544+ md_get_unicode_fold_info(codepoint, &fold_info);
1545+ hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned));
1546+ off += char_size;
1547+ }
1548+ }
1549+
1550+ return hash;
1551+}
1552+
1553+static OFF
1554+md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
1555+ MD_UNICODE_FOLD_INFO* fold_info)
1556+{
1557+ unsigned codepoint;
1558+ SZ char_size;
1559+
1560+ if(off >= size) {
1561+ /* Treat end of a link label as a whitespace. */
1562+ goto whitespace;
1563+ }
1564+
1565+ codepoint = md_decode_unicode(label, off, size, &char_size);
1566+ off += char_size;
1567+ if(ISUNICODEWHITESPACE_(codepoint)) {
1568+ /* Treat all whitespace as equivalent */
1569+ goto whitespace;
1570+ }
1571+
1572+ /* Get real folding info. */
1573+ md_get_unicode_fold_info(codepoint, fold_info);
1574+ return off;
1575+
1576+whitespace:
1577+ fold_info->codepoints[0] = _T(' ');
1578+ fold_info->n_codepoints = 1;
1579+ return md_skip_unicode_whitespace(label, off, size);
1580+}
1581+
1582+static int
1583+md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
1584+{
1585+ OFF a_off;
1586+ OFF b_off;
1587+ MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 };
1588+ MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 };
1589+ OFF a_fi_off = 0;
1590+ OFF b_fi_off = 0;
1591+ int cmp;
1592+
1593+ a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
1594+ b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
1595+ while(a_off < a_size || a_fi_off < a_fi.n_codepoints ||
1596+ b_off < b_size || b_fi_off < b_fi.n_codepoints)
1597+ {
1598+ /* If needed, load fold info for next char. */
1599+ if(a_fi_off >= a_fi.n_codepoints) {
1600+ a_fi_off = 0;
1601+ a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi);
1602+ }
1603+ if(b_fi_off >= b_fi.n_codepoints) {
1604+ b_fi_off = 0;
1605+ b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi);
1606+ }
1607+
1608+ cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
1609+ if(cmp != 0)
1610+ return cmp;
1611+
1612+ a_fi_off++;
1613+ b_fi_off++;
1614+ }
1615+
1616+ return 0;
1617+}
1618+
1619+typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
1620+struct MD_REF_DEF_LIST_tag {
1621+ int n_ref_defs;
1622+ int alloc_ref_defs;
1623+ MD_REF_DEF* ref_defs[]; /* Valid items always point into ctx->ref_defs[] */
1624+};
1625+
1626+static int
1627+md_ref_def_cmp(const void* a, const void* b)
1628+{
1629+ const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1630+ const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1631+
1632+ if(a_ref->hash < b_ref->hash)
1633+ return -1;
1634+ else if(a_ref->hash > b_ref->hash)
1635+ return +1;
1636+ else
1637+ return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size);
1638+}
1639+
1640+static int
1641+md_ref_def_cmp_for_sort(const void* a, const void* b)
1642+{
1643+ int cmp;
1644+
1645+ cmp = md_ref_def_cmp(a, b);
1646+
1647+ /* Ensure stability of the sorting. */
1648+ if(cmp == 0) {
1649+ const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1650+ const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1651+
1652+ if(a_ref < b_ref)
1653+ cmp = -1;
1654+ else if(a_ref > b_ref)
1655+ cmp = +1;
1656+ else
1657+ cmp = 0;
1658+ }
1659+
1660+ return cmp;
1661+}
1662+
1663+static int
1664+md_build_ref_def_hashtable(MD_CTX* ctx)
1665+{
1666+ int i, j;
1667+
1668+ if(ctx->n_ref_defs == 0)
1669+ return 0;
1670+
1671+ ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4;
1672+ ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*));
1673+ if(ctx->ref_def_hashtable == NULL) {
1674+ MD_LOG("malloc() failed.");
1675+ goto abort;
1676+ }
1677+ memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*));
1678+
1679+ /* Each member of ctx->ref_def_hashtable[] can be:
1680+ * -- NULL,
1681+ * -- pointer to the MD_REF_DEF in ctx->ref_defs[], or
1682+ * -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to
1683+ * such MD_REF_DEFs.
1684+ */
1685+ for(i = 0; i < ctx->n_ref_defs; i++) {
1686+ MD_REF_DEF* def = &ctx->ref_defs[i];
1687+ void* bucket;
1688+ MD_REF_DEF_LIST* list;
1689+
1690+ def->hash = md_link_label_hash(def->label, def->label_size);
1691+ bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
1692+
1693+ if(bucket == NULL) {
1694+ /* The bucket is empty. Make it just point to the def. */
1695+ ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
1696+ continue;
1697+ }
1698+
1699+ if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1700+ /* The bucket already contains one ref. def. Lets see whether it
1701+ * is the same label (ref. def. duplicate) or different one
1702+ * (hash conflict). */
1703+ MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
1704+
1705+ if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) {
1706+ /* Duplicate label: Ignore this ref. def. */
1707+ continue;
1708+ }
1709+
1710+ /* Make the bucket complex, i.e. able to hold more ref. defs. */
1711+ list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*));
1712+ if(list == NULL) {
1713+ MD_LOG("malloc() failed.");
1714+ goto abort;
1715+ }
1716+ list->ref_defs[0] = old_def;
1717+ list->ref_defs[1] = def;
1718+ list->n_ref_defs = 2;
1719+ list->alloc_ref_defs = 2;
1720+ ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1721+ continue;
1722+ }
1723+
1724+ /* Append the def to the complex bucket list.
1725+ *
1726+ * Note in this case we ignore potential duplicates to avoid expensive
1727+ * iterating over the complex bucket. Below, we revisit all the complex
1728+ * buckets and handle it more cheaply after the complex bucket contents
1729+ * is sorted. */
1730+ list = (MD_REF_DEF_LIST*) bucket;
1731+ if(list->n_ref_defs >= list->alloc_ref_defs) {
1732+ int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2;
1733+ MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
1734+ sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
1735+ if(list_tmp == NULL) {
1736+ MD_LOG("realloc() failed.");
1737+ goto abort;
1738+ }
1739+ list = list_tmp;
1740+ list->alloc_ref_defs = alloc_ref_defs;
1741+ ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1742+ }
1743+
1744+ list->ref_defs[list->n_ref_defs] = def;
1745+ list->n_ref_defs++;
1746+ }
1747+
1748+ /* Sort the complex buckets so we can use bsearch() with them. */
1749+ for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1750+ void* bucket = ctx->ref_def_hashtable[i];
1751+ MD_REF_DEF_LIST* list;
1752+
1753+ if(bucket == NULL)
1754+ continue;
1755+ if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1756+ continue;
1757+
1758+ list = (MD_REF_DEF_LIST*) bucket;
1759+ qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
1760+
1761+ /* Disable all duplicates in the complex bucket by forcing all such
1762+ * records to point to the 1st such ref. def. I.e. no matter which
1763+ * record is found during the lookup, it will always point to the right
1764+ * ref. def. in ctx->ref_defs[]. */
1765+ for(j = 1; j < list->n_ref_defs; j++) {
1766+ if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0)
1767+ list->ref_defs[j] = list->ref_defs[j-1];
1768+ }
1769+ }
1770+
1771+ return 0;
1772+
1773+abort:
1774+ return -1;
1775+}
1776+
1777+static void
1778+md_free_ref_def_hashtable(MD_CTX* ctx)
1779+{
1780+ if(ctx->ref_def_hashtable != NULL) {
1781+ int i;
1782+
1783+ for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1784+ void* bucket = ctx->ref_def_hashtable[i];
1785+ if(bucket == NULL)
1786+ continue;
1787+ if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1788+ continue;
1789+ free(bucket);
1790+ }
1791+
1792+ free(ctx->ref_def_hashtable);
1793+ }
1794+}
1795+
1796+static const MD_REF_DEF*
1797+md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
1798+{
1799+ unsigned hash;
1800+ void* bucket;
1801+
1802+ if(ctx->ref_def_hashtable_size == 0)
1803+ return NULL;
1804+
1805+ hash = md_link_label_hash(label, label_size);
1806+ bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
1807+
1808+ if(bucket == NULL) {
1809+ return NULL;
1810+ } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1811+ const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
1812+
1813+ if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0)
1814+ return def;
1815+ else
1816+ return NULL;
1817+ } else {
1818+ MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
1819+ MD_REF_DEF key_buf;
1820+ const MD_REF_DEF* key = &key_buf;
1821+ const MD_REF_DEF** ret;
1822+
1823+ key_buf.label = (CHAR*) label;
1824+ key_buf.label_size = label_size;
1825+ key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size);
1826+
1827+ ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs,
1828+ list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp);
1829+ if(ret != NULL)
1830+ return *ret;
1831+ else
1832+ return NULL;
1833+ }
1834+}
1835+
1836+
1837+/***************************
1838+ *** Recognizing Links ***
1839+ ***************************/
1840+
1841+/* Note this code is partially shared between processing inlines and blocks
1842+ * as reference definitions and links share some helper parser functions.
1843+ */
1844+
1845+typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
1846+struct MD_LINK_ATTR_tag {
1847+ OFF dest_beg;
1848+ OFF dest_end;
1849+
1850+ CHAR* title;
1851+ SZ title_size;
1852+ int title_needs_free;
1853+};
1854+
1855+
1856+static int
1857+md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
1858+ OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
1859+ OFF* p_contents_beg, OFF* p_contents_end)
1860+{
1861+ OFF off = beg;
1862+ OFF contents_beg = 0;
1863+ OFF contents_end = 0;
1864+ int line_index = 0;
1865+ int len = 0;
1866+
1867+ if(CH(off) != _T('['))
1868+ return FALSE;
1869+ off++;
1870+
1871+ while(1) {
1872+ OFF line_end = lines[line_index].end;
1873+
1874+ while(off < line_end) {
1875+ if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
1876+ if(contents_end == 0) {
1877+ contents_beg = off;
1878+ *p_beg_line_index = line_index;
1879+ }
1880+ contents_end = off + 2;
1881+ off += 2;
1882+ } else if(CH(off) == _T('[')) {
1883+ return FALSE;
1884+ } else if(CH(off) == _T(']')) {
1885+ if(contents_beg < contents_end) {
1886+ /* Success. */
1887+ *p_contents_beg = contents_beg;
1888+ *p_contents_end = contents_end;
1889+ *p_end = off+1;
1890+ *p_end_line_index = line_index;
1891+ return TRUE;
1892+ } else {
1893+ /* Link label must have some non-whitespace contents. */
1894+ return FALSE;
1895+ }
1896+ } else {
1897+ unsigned codepoint;
1898+ SZ char_size;
1899+
1900+ codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size);
1901+ if(!ISUNICODEWHITESPACE_(codepoint)) {
1902+ if(contents_end == 0) {
1903+ contents_beg = off;
1904+ *p_beg_line_index = line_index;
1905+ }
1906+ contents_end = off + char_size;
1907+ }
1908+
1909+ off += char_size;
1910+ }
1911+
1912+ len++;
1913+ if(len > 999)
1914+ return FALSE;
1915+ }
1916+
1917+ line_index++;
1918+ len++;
1919+ if(line_index < n_lines)
1920+ off = lines[line_index].beg;
1921+ else
1922+ break;
1923+ }
1924+
1925+ return FALSE;
1926+}
1927+
1928+static int
1929+md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1930+ OFF* p_contents_beg, OFF* p_contents_end)
1931+{
1932+ OFF off = beg;
1933+
1934+ if(off >= max_end || CH(off) != _T('<'))
1935+ return FALSE;
1936+ off++;
1937+
1938+ while(off < max_end) {
1939+ if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) {
1940+ off += 2;
1941+ continue;
1942+ }
1943+
1944+ if(ISNEWLINE(off) || CH(off) == _T('<'))
1945+ return FALSE;
1946+
1947+ if(CH(off) == _T('>')) {
1948+ /* Success. */
1949+ *p_contents_beg = beg+1;
1950+ *p_contents_end = off;
1951+ *p_end = off+1;
1952+ return TRUE;
1953+ }
1954+
1955+ off++;
1956+ }
1957+
1958+ return FALSE;
1959+}
1960+
1961+static int
1962+md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1963+ OFF* p_contents_beg, OFF* p_contents_end)
1964+{
1965+ OFF off = beg;
1966+ int parenthesis_level = 0;
1967+
1968+ while(off < max_end) {
1969+ if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) {
1970+ off += 2;
1971+ continue;
1972+ }
1973+
1974+ if(ISWHITESPACE(off) || ISCNTRL(off))
1975+ break;
1976+
1977+ /* Link destination may include balanced pairs of unescaped '(' ')'.
1978+ * Note we limit the maximal nesting level by 32 to protect us from
1979+ * https://github.com/jgm/cmark/issues/214 */
1980+ if(CH(off) == _T('(')) {
1981+ parenthesis_level++;
1982+ if(parenthesis_level > 32)
1983+ return FALSE;
1984+ } else if(CH(off) == _T(')')) {
1985+ if(parenthesis_level == 0)
1986+ break;
1987+ parenthesis_level--;
1988+ }
1989+
1990+ off++;
1991+ }
1992+
1993+ if(parenthesis_level != 0 || off == beg)
1994+ return FALSE;
1995+
1996+ /* Success. */
1997+ *p_contents_beg = beg;
1998+ *p_contents_end = off;
1999+ *p_end = off;
2000+ return TRUE;
2001+}
2002+
2003+static inline int
2004+md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
2005+ OFF* p_contents_beg, OFF* p_contents_end)
2006+{
2007+ if(CH(beg) == _T('<'))
2008+ return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2009+ else
2010+ return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2011+}
2012+
2013+static int
2014+md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2015+ OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
2016+ OFF* p_contents_beg, OFF* p_contents_end)
2017+{
2018+ OFF off = beg;
2019+ CHAR closer_char;
2020+ int line_index = 0;
2021+
2022+ /* White space with up to one line break. */
2023+ while(off < lines[line_index].end && ISWHITESPACE(off))
2024+ off++;
2025+ if(off >= lines[line_index].end) {
2026+ line_index++;
2027+ if(line_index >= n_lines)
2028+ return FALSE;
2029+ off = lines[line_index].beg;
2030+ }
2031+ if(off == beg)
2032+ return FALSE;
2033+
2034+ *p_beg_line_index = line_index;
2035+
2036+ /* First char determines how to detect end of it. */
2037+ switch(CH(off)) {
2038+ case _T('"'): closer_char = _T('"'); break;
2039+ case _T('\''): closer_char = _T('\''); break;
2040+ case _T('('): closer_char = _T(')'); break;
2041+ default: return FALSE;
2042+ }
2043+ off++;
2044+
2045+ *p_contents_beg = off;
2046+
2047+ while(line_index < n_lines) {
2048+ OFF line_end = lines[line_index].end;
2049+
2050+ while(off < line_end) {
2051+ if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
2052+ off++;
2053+ } else if(CH(off) == closer_char) {
2054+ /* Success. */
2055+ *p_contents_end = off;
2056+ *p_end = off+1;
2057+ *p_end_line_index = line_index;
2058+ return TRUE;
2059+ } else if(closer_char == _T(')') && CH(off) == _T('(')) {
2060+ /* ()-style title cannot contain (unescaped '(')) */
2061+ return FALSE;
2062+ }
2063+
2064+ off++;
2065+ }
2066+
2067+ line_index++;
2068+ }
2069+
2070+ return FALSE;
2071+}
2072+
2073+/* Returns 0 if it is not a reference definition.
2074+ *
2075+ * Returns N > 0 if it is a reference definition. N then corresponds to the
2076+ * number of lines forming it). In this case the definition is stored for
2077+ * resolving any links referring to it.
2078+ *
2079+ * Returns -1 in case of an error (out of memory).
2080+ */
2081+static int
2082+md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
2083+{
2084+ OFF label_contents_beg;
2085+ OFF label_contents_end;
2086+ int label_contents_line_index = -1;
2087+ int label_is_multiline = FALSE;
2088+ OFF dest_contents_beg;
2089+ OFF dest_contents_end;
2090+ OFF title_contents_beg;
2091+ OFF title_contents_end;
2092+ int title_contents_line_index;
2093+ int title_is_multiline = FALSE;
2094+ OFF off;
2095+ int line_index = 0;
2096+ int tmp_line_index;
2097+ MD_REF_DEF* def = NULL;
2098+ int ret = 0;
2099+
2100+ /* Link label. */
2101+ if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg,
2102+ &off, &label_contents_line_index, &line_index,
2103+ &label_contents_beg, &label_contents_end))
2104+ return FALSE;
2105+ label_is_multiline = (label_contents_line_index != line_index);
2106+
2107+ /* Colon. */
2108+ if(off >= lines[line_index].end || CH(off) != _T(':'))
2109+ return FALSE;
2110+ off++;
2111+
2112+ /* Optional white space with up to one line break. */
2113+ while(off < lines[line_index].end && ISWHITESPACE(off))
2114+ off++;
2115+ if(off >= lines[line_index].end) {
2116+ line_index++;
2117+ if(line_index >= n_lines)
2118+ return FALSE;
2119+ off = lines[line_index].beg;
2120+ }
2121+
2122+ /* Link destination. */
2123+ if(!md_is_link_destination(ctx, off, lines[line_index].end,
2124+ &off, &dest_contents_beg, &dest_contents_end))
2125+ return FALSE;
2126+
2127+ /* (Optional) title. Note we interpret it as an title only if nothing
2128+ * more follows on its last line. */
2129+ if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2130+ &off, &title_contents_line_index, &tmp_line_index,
2131+ &title_contents_beg, &title_contents_end)
2132+ && off >= lines[line_index + tmp_line_index].end)
2133+ {
2134+ title_is_multiline = (tmp_line_index != title_contents_line_index);
2135+ title_contents_line_index += line_index;
2136+ line_index += tmp_line_index;
2137+ } else {
2138+ /* Not a title. */
2139+ title_is_multiline = FALSE;
2140+ title_contents_beg = off;
2141+ title_contents_end = off;
2142+ title_contents_line_index = 0;
2143+ }
2144+
2145+ /* Nothing more can follow on the last line. */
2146+ if(off < lines[line_index].end)
2147+ return FALSE;
2148+
2149+ /* So, it _is_ a reference definition. Remember it. */
2150+ if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
2151+ MD_REF_DEF* new_defs;
2152+
2153+ ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
2154+ ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
2155+ : 16);
2156+ new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
2157+ if(new_defs == NULL) {
2158+ MD_LOG("realloc() failed.");
2159+ goto abort;
2160+ }
2161+
2162+ ctx->ref_defs = new_defs;
2163+ }
2164+ def = &ctx->ref_defs[ctx->n_ref_defs];
2165+ memset(def, 0, sizeof(MD_REF_DEF));
2166+
2167+ if(label_is_multiline) {
2168+ MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
2169+ lines + label_contents_line_index, n_lines - label_contents_line_index,
2170+ _T(' '), &def->label, &def->label_size));
2171+ def->label_needs_free = TRUE;
2172+ } else {
2173+ def->label = (CHAR*) STR(label_contents_beg);
2174+ def->label_size = label_contents_end - label_contents_beg;
2175+ }
2176+
2177+ if(title_is_multiline) {
2178+ MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2179+ lines + title_contents_line_index, n_lines - title_contents_line_index,
2180+ _T('\n'), &def->title, &def->title_size));
2181+ def->title_needs_free = TRUE;
2182+ } else {
2183+ def->title = (CHAR*) STR(title_contents_beg);
2184+ def->title_size = title_contents_end - title_contents_beg;
2185+ }
2186+
2187+ def->dest_beg = dest_contents_beg;
2188+ def->dest_end = dest_contents_end;
2189+
2190+ /* Success. */
2191+ ctx->n_ref_defs++;
2192+ return line_index + 1;
2193+
2194+abort:
2195+ /* Failure. */
2196+ if(def != NULL && def->label_needs_free)
2197+ free(def->label);
2198+ if(def != NULL && def->title_needs_free)
2199+ free(def->title);
2200+ return ret;
2201+}
2202+
2203+static int
2204+md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2205+ OFF beg, OFF end, MD_LINK_ATTR* attr)
2206+{
2207+ const MD_REF_DEF* def;
2208+ const MD_LINE* beg_line;
2209+ const MD_LINE* end_line;
2210+ CHAR* label;
2211+ SZ label_size;
2212+ int ret;
2213+
2214+ MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
2215+ MD_ASSERT(CH(end-1) == _T(']'));
2216+
2217+ beg += (CH(beg) == _T('!') ? 2 : 1);
2218+ end--;
2219+
2220+ /* Find lines corresponding to the beg and end positions. */
2221+ MD_ASSERT(lines[0].beg <= beg);
2222+ beg_line = lines;
2223+ while(beg >= beg_line->end)
2224+ beg_line++;
2225+
2226+ MD_ASSERT(end <= lines[n_lines-1].end);
2227+ end_line = beg_line;
2228+ while(end >= end_line->end)
2229+ end_line++;
2230+
2231+ if(beg_line != end_line) {
2232+ MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
2233+ n_lines - (beg_line - lines), _T(' '), &label, &label_size));
2234+ } else {
2235+ label = (CHAR*) STR(beg);
2236+ label_size = end - beg;
2237+ }
2238+
2239+ def = md_lookup_ref_def(ctx, label, label_size);
2240+ if(def != NULL) {
2241+ attr->dest_beg = def->dest_beg;
2242+ attr->dest_end = def->dest_end;
2243+ attr->title = def->title;
2244+ attr->title_size = def->title_size;
2245+ attr->title_needs_free = FALSE;
2246+ }
2247+
2248+ if(beg_line != end_line)
2249+ free(label);
2250+
2251+ ret = (def != NULL);
2252+
2253+abort:
2254+ return ret;
2255+}
2256+
2257+static int
2258+md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2259+ OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
2260+{
2261+ int line_index = 0;
2262+ int tmp_line_index;
2263+ OFF title_contents_beg;
2264+ OFF title_contents_end;
2265+ int title_contents_line_index;
2266+ int title_is_multiline;
2267+ OFF off = beg;
2268+ int ret = FALSE;
2269+
2270+ while(off >= lines[line_index].end)
2271+ line_index++;
2272+
2273+ MD_ASSERT(CH(off) == _T('('));
2274+ off++;
2275+
2276+ /* Optional white space with up to one line break. */
2277+ while(off < lines[line_index].end && ISWHITESPACE(off))
2278+ off++;
2279+ if(off >= lines[line_index].end && ISNEWLINE(off)) {
2280+ line_index++;
2281+ if(line_index >= n_lines)
2282+ return FALSE;
2283+ off = lines[line_index].beg;
2284+ }
2285+
2286+ /* Link destination may be omitted, but only when not also having a title. */
2287+ if(off < ctx->size && CH(off) == _T(')')) {
2288+ attr->dest_beg = off;
2289+ attr->dest_end = off;
2290+ attr->title = NULL;
2291+ attr->title_size = 0;
2292+ attr->title_needs_free = FALSE;
2293+ off++;
2294+ *p_end = off;
2295+ return TRUE;
2296+ }
2297+
2298+ /* Link destination. */
2299+ if(!md_is_link_destination(ctx, off, lines[line_index].end,
2300+ &off, &attr->dest_beg, &attr->dest_end))
2301+ return FALSE;
2302+
2303+ /* (Optional) title. */
2304+ if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2305+ &off, &title_contents_line_index, &tmp_line_index,
2306+ &title_contents_beg, &title_contents_end))
2307+ {
2308+ title_is_multiline = (tmp_line_index != title_contents_line_index);
2309+ title_contents_line_index += line_index;
2310+ line_index += tmp_line_index;
2311+ } else {
2312+ /* Not a title. */
2313+ title_is_multiline = FALSE;
2314+ title_contents_beg = off;
2315+ title_contents_end = off;
2316+ title_contents_line_index = 0;
2317+ }
2318+
2319+ /* Optional whitespace followed with final ')'. */
2320+ while(off < lines[line_index].end && ISWHITESPACE(off))
2321+ off++;
2322+ if(off >= lines[line_index].end && ISNEWLINE(off)) {
2323+ line_index++;
2324+ if(line_index >= n_lines)
2325+ return FALSE;
2326+ off = lines[line_index].beg;
2327+ }
2328+ if(CH(off) != _T(')'))
2329+ goto abort;
2330+ off++;
2331+
2332+ if(title_contents_beg >= title_contents_end) {
2333+ attr->title = NULL;
2334+ attr->title_size = 0;
2335+ attr->title_needs_free = FALSE;
2336+ } else if(!title_is_multiline) {
2337+ attr->title = (CHAR*) STR(title_contents_beg);
2338+ attr->title_size = title_contents_end - title_contents_beg;
2339+ attr->title_needs_free = FALSE;
2340+ } else {
2341+ MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2342+ lines + title_contents_line_index, n_lines - title_contents_line_index,
2343+ _T('\n'), &attr->title, &attr->title_size));
2344+ attr->title_needs_free = TRUE;
2345+ }
2346+
2347+ *p_end = off;
2348+ ret = TRUE;
2349+
2350+abort:
2351+ return ret;
2352+}
2353+
2354+static void
2355+md_free_ref_defs(MD_CTX* ctx)
2356+{
2357+ int i;
2358+
2359+ for(i = 0; i < ctx->n_ref_defs; i++) {
2360+ MD_REF_DEF* def = &ctx->ref_defs[i];
2361+
2362+ if(def->label_needs_free)
2363+ free(def->label);
2364+ if(def->title_needs_free)
2365+ free(def->title);
2366+ }
2367+
2368+ free(ctx->ref_defs);
2369+}
2370+
2371+
2372+/******************************************
2373+ *** Processing Inlines (a.k.a Spans) ***
2374+ ******************************************/
2375+
2376+/* We process inlines in few phases:
2377+ *
2378+ * (1) We go through the block text and collect all significant characters
2379+ * which may start/end a span or some other significant position into
2380+ * ctx->marks[]. Core of this is what md_collect_marks() does.
2381+ *
2382+ * We also do some very brief preliminary context-less analysis, whether
2383+ * it might be opener or closer (e.g. of an emphasis span).
2384+ *
2385+ * This speeds the other steps as we do not need to re-iterate over all
2386+ * characters anymore.
2387+ *
2388+ * (2) We analyze each potential mark types, in order by their precedence.
2389+ *
2390+ * In each md_analyze_XXX() function, we re-iterate list of the marks,
2391+ * skipping already resolved regions (in preceding precedences) and try to
2392+ * resolve them.
2393+ *
2394+ * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark
2395+ * them as resolved.
2396+ *
2397+ * (2.2) For range-type marks, we analyze whether the mark could be closer
2398+ * and, if yes, whether there is some preceding opener it could satisfy.
2399+ *
2400+ * If not we check whether it could be really an opener and if yes, we
2401+ * remember it so subsequent closers may resolve it.
2402+ *
2403+ * (3) Finally, when all marks were analyzed, we render the block contents
2404+ * by calling MD_RENDERER::text() callback, interrupting by ::enter_span()
2405+ * or ::close_span() whenever we reach a resolved mark.
2406+ */
2407+
2408+
2409+/* The mark structure.
2410+ *
2411+ * '\\': Maybe escape sequence.
2412+ * '\0': NULL char.
2413+ * '*': Maybe (strong) emphasis start/end.
2414+ * '_': Maybe (strong) emphasis start/end.
2415+ * '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
2416+ * '`': Maybe code span start/end.
2417+ * '&': Maybe start of entity.
2418+ * ';': Maybe end of entity.
2419+ * '<': Maybe start of raw HTML or autolink.
2420+ * '>': Maybe end of raw HTML or autolink.
2421+ * '[': Maybe start of link label or link text.
2422+ * '!': Equivalent of '[' for image.
2423+ * ']': Maybe end of link label or link text.
2424+ * '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS).
2425+ * ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS).
2426+ * '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS).
2427+ * 'D': Dummy mark, it reserves a space for splitting a previous mark
2428+ * (e.g. emphasis) or to make more space for storing some special data
2429+ * related to the preceding mark (e.g. link).
2430+ *
2431+ * Note that not all instances of these chars in the text imply creation of the
2432+ * structure. Only those which have (or may have, after we see more context)
2433+ * the special meaning.
2434+ *
2435+ * (Keep this struct as small as possible to fit as much of them into CPU
2436+ * cache line.)
2437+ */
2438+struct MD_MARK_tag {
2439+ OFF beg;
2440+ OFF end;
2441+
2442+ /* For unresolved openers, 'prev' and 'next' form the chain of open openers
2443+ * of given type 'ch'.
2444+ *
2445+ * During resolving, we disconnect from the chain and point to the
2446+ * corresponding counterpart so opener points to its closer and vice versa.
2447+ */
2448+ int prev;
2449+ int next;
2450+ CHAR ch;
2451+ unsigned char flags;
2452+};
2453+
2454+/* Mark flags (these apply to ALL mark types). */
2455+#define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */
2456+#define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */
2457+#define MD_MARK_OPENER 0x04 /* Definitely opener. */
2458+#define MD_MARK_CLOSER 0x08 /* Definitely closer. */
2459+#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
2460+
2461+/* Mark flags specific for various mark types (so they can share bits). */
2462+#define MD_MARK_EMPH_INTRAWORD 0x20 /* Helper for the "rule of 3". */
2463+#define MD_MARK_EMPH_MOD3_0 0x40
2464+#define MD_MARK_EMPH_MOD3_1 0x80
2465+#define MD_MARK_EMPH_MOD3_2 (0x40 | 0x80)
2466+#define MD_MARK_EMPH_MOD3_MASK (0x40 | 0x80)
2467+#define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */
2468+#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For permissive autolinks. */
2469+
2470+static MD_MARKCHAIN*
2471+md_asterisk_chain(MD_CTX* ctx, unsigned flags)
2472+{
2473+ switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) {
2474+ case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_intraword_mod3_0;
2475+ case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_intraword_mod3_1;
2476+ case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_intraword_mod3_2;
2477+ case MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_extraword_mod3_0;
2478+ case MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_extraword_mod3_1;
2479+ case MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_extraword_mod3_2;
2480+ default: MD_UNREACHABLE();
2481+ }
2482+ return NULL;
2483+}
2484+
2485+static MD_MARKCHAIN*
2486+md_mark_chain(MD_CTX* ctx, int mark_index)
2487+{
2488+ MD_MARK* mark = &ctx->marks[mark_index];
2489+
2490+ switch(mark->ch) {
2491+ case _T('*'): return md_asterisk_chain(ctx, mark->flags);
2492+ case _T('_'): return &UNDERSCORE_OPENERS;
2493+ case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
2494+ case _T('['): return &BRACKET_OPENERS;
2495+ case _T('|'): return &TABLECELLBOUNDARIES;
2496+ default: return NULL;
2497+ }
2498+}
2499+
2500+static MD_MARK*
2501+md_push_mark(MD_CTX* ctx)
2502+{
2503+ if(ctx->n_marks >= ctx->alloc_marks) {
2504+ MD_MARK* new_marks;
2505+
2506+ ctx->alloc_marks = (ctx->alloc_marks > 0
2507+ ? ctx->alloc_marks + ctx->alloc_marks / 2
2508+ : 64);
2509+ new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
2510+ if(new_marks == NULL) {
2511+ MD_LOG("realloc() failed.");
2512+ return NULL;
2513+ }
2514+
2515+ ctx->marks = new_marks;
2516+ }
2517+
2518+ return &ctx->marks[ctx->n_marks++];
2519+}
2520+
2521+#define PUSH_MARK_() \
2522+ do { \
2523+ mark = md_push_mark(ctx); \
2524+ if(mark == NULL) { \
2525+ ret = -1; \
2526+ goto abort; \
2527+ } \
2528+ } while(0)
2529+
2530+#define PUSH_MARK(ch_, beg_, end_, flags_) \
2531+ do { \
2532+ PUSH_MARK_(); \
2533+ mark->beg = (beg_); \
2534+ mark->end = (end_); \
2535+ mark->prev = -1; \
2536+ mark->next = -1; \
2537+ mark->ch = (char)(ch_); \
2538+ mark->flags = (flags_); \
2539+ } while(0)
2540+
2541+
2542+static void
2543+md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index)
2544+{
2545+ if(chain->tail >= 0)
2546+ ctx->marks[chain->tail].next = mark_index;
2547+ else
2548+ chain->head = mark_index;
2549+
2550+ ctx->marks[mark_index].prev = chain->tail;
2551+ ctx->marks[mark_index].next = -1;
2552+ chain->tail = mark_index;
2553+}
2554+
2555+/* Sometimes, we need to store a pointer into the mark. It is quite rare
2556+ * so we do not bother to make MD_MARK use union, and it can only happen
2557+ * for dummy marks. */
2558+static inline void
2559+md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
2560+{
2561+ MD_MARK* mark = &ctx->marks[mark_index];
2562+ MD_ASSERT(mark->ch == 'D');
2563+
2564+ /* Check only members beg and end are misused for this. */
2565+ MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF));
2566+ memcpy(mark, &ptr, sizeof(void*));
2567+}
2568+
2569+static inline void*
2570+md_mark_get_ptr(MD_CTX* ctx, int mark_index)
2571+{
2572+ void* ptr;
2573+ MD_MARK* mark = &ctx->marks[mark_index];
2574+ MD_ASSERT(mark->ch == 'D');
2575+ memcpy(&ptr, mark, sizeof(void*));
2576+ return ptr;
2577+}
2578+
2579+static void
2580+md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index)
2581+{
2582+ MD_MARK* opener = &ctx->marks[opener_index];
2583+ MD_MARK* closer = &ctx->marks[closer_index];
2584+
2585+ /* Remove opener from the list of openers. */
2586+ if(chain != NULL) {
2587+ if(opener->prev >= 0)
2588+ ctx->marks[opener->prev].next = opener->next;
2589+ else
2590+ chain->head = opener->next;
2591+
2592+ if(opener->next >= 0)
2593+ ctx->marks[opener->next].prev = opener->prev;
2594+ else
2595+ chain->tail = opener->prev;
2596+ }
2597+
2598+ /* Interconnect opener and closer and mark both as resolved. */
2599+ opener->next = closer_index;
2600+ opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
2601+ closer->prev = opener_index;
2602+ closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
2603+}
2604+
2605+
2606+#define MD_ROLLBACK_ALL 0
2607+#define MD_ROLLBACK_CROSSING 1
2608+
2609+/* In the range ctx->marks[opener_index] ... [closer_index], undo some or all
2610+ * resolvings accordingly to these rules:
2611+ *
2612+ * (1) All openers BEFORE the range corresponding to any closer inside the
2613+ * range are un-resolved and they are re-added to their respective chains
2614+ * of unresolved openers. This ensures we can reuse the opener for closers
2615+ * AFTER the range.
2616+ *
2617+ * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range
2618+ * are discarded.
2619+ *
2620+ * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled
2621+ * in (1) are discarded. I.e. pairs of openers and closers which are both
2622+ * inside the range are retained as well as any unpaired marks.
2623+ */
2624+static void
2625+md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
2626+{
2627+ int i;
2628+ int mark_index;
2629+
2630+ /* Cut all unresolved openers at the mark index. */
2631+ for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) {
2632+ MD_MARKCHAIN* chain = &ctx->mark_chains[i];
2633+
2634+ while(chain->tail >= opener_index)
2635+ chain->tail = ctx->marks[chain->tail].prev;
2636+
2637+ if(chain->tail >= 0)
2638+ ctx->marks[chain->tail].next = -1;
2639+ else
2640+ chain->head = -1;
2641+ }
2642+
2643+ /* Go backwards so that unresolved openers are re-added into their
2644+ * respective chains, in the right order. */
2645+ mark_index = closer_index - 1;
2646+ while(mark_index > opener_index) {
2647+ MD_MARK* mark = &ctx->marks[mark_index];
2648+ int mark_flags = mark->flags;
2649+ int discard_flag = (how == MD_ROLLBACK_ALL);
2650+
2651+ if(mark->flags & MD_MARK_CLOSER) {
2652+ int mark_opener_index = mark->prev;
2653+
2654+ /* Undo opener BEFORE the range. */
2655+ if(mark_opener_index < opener_index) {
2656+ MD_MARK* mark_opener = &ctx->marks[mark_opener_index];
2657+ MD_MARKCHAIN* chain;
2658+
2659+ mark_opener->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
2660+ chain = md_mark_chain(ctx, opener_index);
2661+ if(chain != NULL) {
2662+ md_mark_chain_append(ctx, chain, mark_opener_index);
2663+ discard_flag = 1;
2664+ }
2665+ }
2666+ }
2667+
2668+ /* And reset our flags. */
2669+ if(discard_flag)
2670+ mark->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
2671+
2672+ /* Jump as far as we can over unresolved or non-interesting marks. */
2673+ switch(how) {
2674+ case MD_ROLLBACK_CROSSING:
2675+ if((mark_flags & MD_MARK_CLOSER) && mark->prev > opener_index) {
2676+ /* If we are closer with opener INSIDE the range, there may
2677+ * not be any other crosser inside the subrange. */
2678+ mark_index = mark->prev;
2679+ break;
2680+ }
2681+ MD_FALLTHROUGH();
2682+ default:
2683+ mark_index--;
2684+ break;
2685+ }
2686+ }
2687+}
2688+
2689+static void
2690+md_build_mark_char_map(MD_CTX* ctx)
2691+{
2692+ memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map));
2693+
2694+ ctx->mark_char_map['\\'] = 1;
2695+ ctx->mark_char_map['*'] = 1;
2696+ ctx->mark_char_map['_'] = 1;
2697+ ctx->mark_char_map['`'] = 1;
2698+ ctx->mark_char_map['&'] = 1;
2699+ ctx->mark_char_map[';'] = 1;
2700+ ctx->mark_char_map['<'] = 1;
2701+ ctx->mark_char_map['>'] = 1;
2702+ ctx->mark_char_map['['] = 1;
2703+ ctx->mark_char_map['!'] = 1;
2704+ ctx->mark_char_map[']'] = 1;
2705+ ctx->mark_char_map['\0'] = 1;
2706+
2707+ if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
2708+ ctx->mark_char_map['~'] = 1;
2709+
2710+ if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
2711+ ctx->mark_char_map['$'] = 1;
2712+
2713+ if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
2714+ ctx->mark_char_map['@'] = 1;
2715+
2716+ if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
2717+ ctx->mark_char_map[':'] = 1;
2718+
2719+ if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
2720+ ctx->mark_char_map['.'] = 1;
2721+
2722+ if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS))
2723+ ctx->mark_char_map['|'] = 1;
2724+
2725+ if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
2726+ int i;
2727+
2728+ for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) {
2729+ if(ISWHITESPACE_(i))
2730+ ctx->mark_char_map[i] = 1;
2731+ }
2732+ }
2733+}
2734+
2735+/* We limit code span marks to lower than 32 backticks. This solves the
2736+ * pathologic case of too many openers, each of different length: Their
2737+ * resolving would be then O(n^2). */
2738+#define CODESPAN_MARK_MAXLEN 32
2739+
2740+static int
2741+md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2742+ OFF* p_opener_beg, OFF* p_opener_end,
2743+ OFF* p_closer_beg, OFF* p_closer_end,
2744+ OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
2745+ int* p_reached_paragraph_end)
2746+{
2747+ OFF opener_beg = beg;
2748+ OFF opener_end;
2749+ OFF closer_beg;
2750+ OFF closer_end;
2751+ SZ mark_len;
2752+ OFF line_end;
2753+ int has_space_after_opener = FALSE;
2754+ int has_eol_after_opener = FALSE;
2755+ int has_space_before_closer = FALSE;
2756+ int has_eol_before_closer = FALSE;
2757+ int has_only_space = TRUE;
2758+ int line_index = 0;
2759+
2760+ line_end = lines[0].end;
2761+ opener_end = opener_beg;
2762+ while(opener_end < line_end && CH(opener_end) == _T('`'))
2763+ opener_end++;
2764+ has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' '));
2765+ has_eol_after_opener = (opener_end == line_end);
2766+
2767+ /* The caller needs to know end of the opening mark even if we fail. */
2768+ *p_opener_end = opener_end;
2769+
2770+ mark_len = opener_end - opener_beg;
2771+ if(mark_len > CODESPAN_MARK_MAXLEN)
2772+ return FALSE;
2773+
2774+ /* Check whether we already know there is no closer of this length.
2775+ * If so, re-scan does no sense. This fixes issue #59. */
2776+ if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end ||
2777+ (*p_reached_paragraph_end && last_potential_closers[mark_len-1] < opener_end))
2778+ return FALSE;
2779+
2780+ closer_beg = opener_end;
2781+ closer_end = opener_end;
2782+
2783+ /* Find closer mark. */
2784+ while(TRUE) {
2785+ while(closer_beg < line_end && CH(closer_beg) != _T('`')) {
2786+ if(CH(closer_beg) != _T(' '))
2787+ has_only_space = FALSE;
2788+ closer_beg++;
2789+ }
2790+ closer_end = closer_beg;
2791+ while(closer_end < line_end && CH(closer_end) == _T('`'))
2792+ closer_end++;
2793+
2794+ if(closer_end - closer_beg == mark_len) {
2795+ /* Success. */
2796+ has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' '));
2797+ has_eol_before_closer = (closer_beg == lines[line_index].beg);
2798+ break;
2799+ }
2800+
2801+ if(closer_end - closer_beg > 0) {
2802+ /* We have found a back-tick which is not part of the closer. */
2803+ has_only_space = FALSE;
2804+
2805+ /* But if we eventually fail, remember it as a potential closer
2806+ * of its own length for future attempts. This mitigates needs for
2807+ * rescans. */
2808+ if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
2809+ if(closer_beg > last_potential_closers[closer_end - closer_beg - 1])
2810+ last_potential_closers[closer_end - closer_beg - 1] = closer_beg;
2811+ }
2812+ }
2813+
2814+ if(closer_end >= line_end) {
2815+ line_index++;
2816+ if(line_index >= n_lines) {
2817+ /* Reached end of the paragraph and still nothing. */
2818+ *p_reached_paragraph_end = TRUE;
2819+ return FALSE;
2820+ }
2821+ /* Try on the next line. */
2822+ line_end = lines[line_index].end;
2823+ closer_beg = lines[line_index].beg;
2824+ } else {
2825+ closer_beg = closer_end;
2826+ }
2827+ }
2828+
2829+ /* If there is a space or a new line both after and before the opener
2830+ * (and if the code span is not made of spaces only), consume one initial
2831+ * and one trailing space as part of the marks. */
2832+ if(!has_only_space &&
2833+ (has_space_after_opener || has_eol_after_opener) &&
2834+ (has_space_before_closer || has_eol_before_closer))
2835+ {
2836+ if(has_space_after_opener)
2837+ opener_end++;
2838+ else
2839+ opener_end = lines[1].beg;
2840+
2841+ if(has_space_before_closer)
2842+ closer_beg--;
2843+ else {
2844+ closer_beg = lines[line_index-1].end;
2845+ /* We need to eat the preceding "\r\n" but not any line trailing
2846+ * spaces. */
2847+ while(closer_beg < ctx->size && ISBLANK(closer_beg))
2848+ closer_beg++;
2849+ }
2850+ }
2851+
2852+ *p_opener_beg = opener_beg;
2853+ *p_opener_end = opener_end;
2854+ *p_closer_beg = closer_beg;
2855+ *p_closer_end = closer_end;
2856+ return TRUE;
2857+}
2858+
2859+static int
2860+md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2861+{
2862+ OFF off = beg+1;
2863+
2864+ MD_ASSERT(CH(beg) == _T('<'));
2865+
2866+ /* Check for scheme. */
2867+ if(off >= max_end || !ISASCII(off))
2868+ return FALSE;
2869+ off++;
2870+ while(1) {
2871+ if(off >= max_end)
2872+ return FALSE;
2873+ if(off - beg > 32)
2874+ return FALSE;
2875+ if(CH(off) == _T(':') && off - beg >= 3)
2876+ break;
2877+ if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
2878+ return FALSE;
2879+ off++;
2880+ }
2881+
2882+ /* Check the path after the scheme. */
2883+ while(off < max_end && CH(off) != _T('>')) {
2884+ if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<'))
2885+ return FALSE;
2886+ off++;
2887+ }
2888+
2889+ if(off >= max_end)
2890+ return FALSE;
2891+
2892+ MD_ASSERT(CH(off) == _T('>'));
2893+ *p_end = off+1;
2894+ return TRUE;
2895+}
2896+
2897+static int
2898+md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2899+{
2900+ OFF off = beg + 1;
2901+ int label_len;
2902+
2903+ MD_ASSERT(CH(beg) == _T('<'));
2904+
2905+ /* The code should correspond to this regexp:
2906+ /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+
2907+ @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
2908+ (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
2909+ */
2910+
2911+ /* Username (before '@'). */
2912+ while(off < max_end && (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-"))))
2913+ off++;
2914+ if(off <= beg+1)
2915+ return FALSE;
2916+
2917+ /* '@' */
2918+ if(off >= max_end || CH(off) != _T('@'))
2919+ return FALSE;
2920+ off++;
2921+
2922+ /* Labels delimited with '.'; each label is sequence of 1 - 63 alnum
2923+ * characters or '-', but '-' is not allowed as first or last char. */
2924+ label_len = 0;
2925+ while(off < max_end) {
2926+ if(ISALNUM(off))
2927+ label_len++;
2928+ else if(CH(off) == _T('-') && label_len > 0)
2929+ label_len++;
2930+ else if(CH(off) == _T('.') && label_len > 0 && CH(off-1) != _T('-'))
2931+ label_len = 0;
2932+ else
2933+ break;
2934+
2935+ if(label_len > 63)
2936+ return FALSE;
2937+
2938+ off++;
2939+ }
2940+
2941+ if(label_len <= 0 || off >= max_end || CH(off) != _T('>') || CH(off-1) == _T('-'))
2942+ return FALSE;
2943+
2944+ *p_end = off+1;
2945+ return TRUE;
2946+}
2947+
2948+static int
2949+md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
2950+{
2951+ if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
2952+ *p_missing_mailto = FALSE;
2953+ return TRUE;
2954+ }
2955+
2956+ if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
2957+ *p_missing_mailto = TRUE;
2958+ return TRUE;
2959+ }
2960+
2961+ return FALSE;
2962+}
2963+
2964+static int
2965+md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
2966+{
2967+ int i;
2968+ int ret = 0;
2969+ MD_MARK* mark;
2970+ OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 };
2971+ int codespan_scanned_till_paragraph_end = FALSE;
2972+
2973+ for(i = 0; i < n_lines; i++) {
2974+ const MD_LINE* line = &lines[i];
2975+ OFF off = line->beg;
2976+ OFF line_end = line->end;
2977+
2978+ while(TRUE) {
2979+ CHAR ch;
2980+
2981+#ifdef MD4C_USE_UTF16
2982+ /* For UTF-16, mark_char_map[] covers only ASCII. */
2983+ #define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \
2984+ (ctx->mark_char_map[(unsigned char) CH(off)]))
2985+#else
2986+ /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */
2987+ #define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)])
2988+#endif
2989+
2990+ /* Optimization: Use some loop unrolling. */
2991+ while(off + 3 < line_end && !IS_MARK_CHAR(off+0) && !IS_MARK_CHAR(off+1)
2992+ && !IS_MARK_CHAR(off+2) && !IS_MARK_CHAR(off+3))
2993+ off += 4;
2994+ while(off < line_end && !IS_MARK_CHAR(off+0))
2995+ off++;
2996+
2997+ if(off >= line_end)
2998+ break;
2999+
3000+ ch = CH(off);
3001+
3002+ /* A backslash escape.
3003+ * It can go beyond line->end as it may involve escaped new
3004+ * line to form a hard break. */
3005+ if(ch == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
3006+ /* Hard-break cannot be on the last line of the block. */
3007+ if(!ISNEWLINE(off+1) || i+1 < n_lines)
3008+ PUSH_MARK(ch, off, off+2, MD_MARK_RESOLVED);
3009+ off += 2;
3010+ continue;
3011+ }
3012+
3013+ /* A potential (string) emphasis start/end. */
3014+ if(ch == _T('*') || ch == _T('_')) {
3015+ OFF tmp = off+1;
3016+ int left_level; /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */
3017+ int right_level; /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */
3018+
3019+ while(tmp < line_end && CH(tmp) == ch)
3020+ tmp++;
3021+
3022+ if(off == line->beg || ISUNICODEWHITESPACEBEFORE(off))
3023+ left_level = 0;
3024+ else if(ISUNICODEPUNCTBEFORE(off))
3025+ left_level = 1;
3026+ else
3027+ left_level = 2;
3028+
3029+ if(tmp == line_end || ISUNICODEWHITESPACE(tmp))
3030+ right_level = 0;
3031+ else if(ISUNICODEPUNCT(tmp))
3032+ right_level = 1;
3033+ else
3034+ right_level = 2;
3035+
3036+ /* Intra-word underscore doesn't have special meaning. */
3037+ if(ch == _T('_') && left_level == 2 && right_level == 2) {
3038+ left_level = 0;
3039+ right_level = 0;
3040+ }
3041+
3042+ if(left_level != 0 || right_level != 0) {
3043+ unsigned flags = 0;
3044+
3045+ if(left_level > 0 && left_level >= right_level)
3046+ flags |= MD_MARK_POTENTIAL_CLOSER;
3047+ if(right_level > 0 && right_level >= left_level)
3048+ flags |= MD_MARK_POTENTIAL_OPENER;
3049+ if(left_level == 2 && right_level == 2)
3050+ flags |= MD_MARK_EMPH_INTRAWORD;
3051+
3052+ /* For "the rule of three" we need to remember the original
3053+ * size of the mark (modulo three), before we potentially
3054+ * split the mark when being later resolved partially by some
3055+ * shorter closer. */
3056+ switch((tmp - off) % 3) {
3057+ case 0: flags |= MD_MARK_EMPH_MOD3_0; break;
3058+ case 1: flags |= MD_MARK_EMPH_MOD3_1; break;
3059+ case 2: flags |= MD_MARK_EMPH_MOD3_2; break;
3060+ }
3061+
3062+ PUSH_MARK(ch, off, tmp, flags);
3063+
3064+ /* During resolving, multiple asterisks may have to be
3065+ * split into independent span start/ends. Consider e.g.
3066+ * "**foo* bar*". Therefore we push also some empty dummy
3067+ * marks to have enough space for that. */
3068+ off++;
3069+ while(off < tmp) {
3070+ PUSH_MARK('D', off, off, 0);
3071+ off++;
3072+ }
3073+ continue;
3074+ }
3075+
3076+ off = tmp;
3077+ continue;
3078+ }
3079+
3080+ /* A potential code span start/end. */
3081+ if(ch == _T('`')) {
3082+ OFF opener_beg, opener_end;
3083+ OFF closer_beg, closer_end;
3084+ int is_code_span;
3085+
3086+ is_code_span = md_is_code_span(ctx, lines + i, n_lines - i, off,
3087+ &opener_beg, &opener_end, &closer_beg, &closer_end,
3088+ codespan_last_potential_closers,
3089+ &codespan_scanned_till_paragraph_end);
3090+ if(is_code_span) {
3091+ PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED);
3092+ PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
3093+ ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3094+ ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3095+
3096+ off = closer_end;
3097+
3098+ /* Advance the current line accordingly. */
3099+ while(off > line_end) {
3100+ i++;
3101+ line++;
3102+ line_end = line->end;
3103+ }
3104+ continue;
3105+ }
3106+
3107+ off = opener_end;
3108+ continue;
3109+ }
3110+
3111+ /* A potential entity start. */
3112+ if(ch == _T('&')) {
3113+ PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
3114+ off++;
3115+ continue;
3116+ }
3117+
3118+ /* A potential entity end. */
3119+ if(ch == _T(';')) {
3120+ /* We surely cannot be entity unless the previous mark is '&'. */
3121+ if(ctx->n_marks > 0 && ctx->marks[ctx->n_marks-1].ch == _T('&'))
3122+ PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
3123+
3124+ off++;
3125+ continue;
3126+ }
3127+
3128+ /* A potential autolink or raw HTML start/end. */
3129+ if(ch == _T('<')) {
3130+ int is_autolink;
3131+ OFF autolink_end;
3132+ int missing_mailto;
3133+
3134+ if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
3135+ int is_html;
3136+ OFF html_end;
3137+
3138+ /* Given the nature of the raw HTML, we have to recognize
3139+ * it here. Doing so later in md_analyze_lt_gt() could
3140+ * open can of worms of quadratic complexity. */
3141+ is_html = md_is_html_any(ctx, lines + i, n_lines - i, off,
3142+ lines[n_lines-1].end, &html_end);
3143+ if(is_html) {
3144+ PUSH_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED);
3145+ PUSH_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
3146+ ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3147+ ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3148+ off = html_end;
3149+
3150+ /* Advance the current line accordingly. */
3151+ while(off > line_end) {
3152+ i++;
3153+ line++;
3154+ line_end = line->end;
3155+ }
3156+ continue;
3157+ }
3158+ }
3159+
3160+ is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end,
3161+ &autolink_end, &missing_mailto);
3162+ if(is_autolink) {
3163+ PUSH_MARK((missing_mailto ? _T('@') : _T('<')), off, off+1,
3164+ MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
3165+ PUSH_MARK(_T('>'), autolink_end-1, autolink_end,
3166+ MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
3167+ ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3168+ ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3169+ off = autolink_end;
3170+ continue;
3171+ }
3172+
3173+ off++;
3174+ continue;
3175+ }
3176+
3177+ /* A potential link or its part. */
3178+ if(ch == _T('[') || (ch == _T('!') && off+1 < line_end && CH(off+1) == _T('['))) {
3179+ OFF tmp = (ch == _T('[') ? off+1 : off+2);
3180+ PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
3181+ off = tmp;
3182+ /* Two dummies to make enough place for data we need if it is
3183+ * a link. */
3184+ PUSH_MARK('D', off, off, 0);
3185+ PUSH_MARK('D', off, off, 0);
3186+ continue;
3187+ }
3188+ if(ch == _T(']')) {
3189+ PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
3190+ off++;
3191+ continue;
3192+ }
3193+
3194+ /* A potential permissive e-mail autolink. */
3195+ if(ch == _T('@')) {
3196+ if(line->beg + 1 <= off && ISALNUM(off-1) &&
3197+ off + 3 < line->end && ISALNUM(off+1))
3198+ {
3199+ PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
3200+ /* Push a dummy as a reserve for a closer. */
3201+ PUSH_MARK('D', off, off, 0);
3202+ }
3203+
3204+ off++;
3205+ continue;
3206+ }
3207+
3208+ /* A potential permissive URL autolink. */
3209+ if(ch == _T(':')) {
3210+ static struct {
3211+ const CHAR* scheme;
3212+ SZ scheme_size;
3213+ const CHAR* suffix;
3214+ SZ suffix_size;
3215+ } scheme_map[] = {
3216+ /* In the order from the most frequently used, arguably. */
3217+ { _T("http"), 4, _T("//"), 2 },
3218+ { _T("https"), 5, _T("//"), 2 },
3219+ { _T("ftp"), 3, _T("//"), 2 }
3220+ };
3221+ int scheme_index;
3222+
3223+ for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
3224+ const CHAR* scheme = scheme_map[scheme_index].scheme;
3225+ const SZ scheme_size = scheme_map[scheme_index].scheme_size;
3226+ const CHAR* suffix = scheme_map[scheme_index].suffix;
3227+ const SZ suffix_size = scheme_map[scheme_index].suffix_size;
3228+
3229+ if(line->beg + scheme_size <= off && md_ascii_eq(STR(off-scheme_size), scheme, scheme_size) &&
3230+ (line->beg + scheme_size == off || ISWHITESPACE(off-scheme_size-1) || ISANYOF(off-scheme_size-1, _T("*_~(["))) &&
3231+ off + 1 + suffix_size < line->end && md_ascii_eq(STR(off+1), suffix, suffix_size))
3232+ {
3233+ PUSH_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER);
3234+ /* Push a dummy as a reserve for a closer. */
3235+ PUSH_MARK('D', off, off, 0);
3236+ off += 1 + suffix_size;
3237+ break;
3238+ }
3239+ }
3240+
3241+ off++;
3242+ continue;
3243+ }
3244+
3245+ /* A potential permissive WWW autolink. */
3246+ if(ch == _T('.')) {
3247+ if(line->beg + 3 <= off && md_ascii_eq(STR(off-3), _T("www"), 3) &&
3248+ (line->beg + 3 == off || ISWHITESPACE(off-4) || ISANYOF(off-4, _T("*_~(["))) &&
3249+ off + 1 < line_end)
3250+ {
3251+ PUSH_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER);
3252+ /* Push a dummy as a reserve for a closer. */
3253+ PUSH_MARK('D', off, off, 0);
3254+ off++;
3255+ continue;
3256+ }
3257+
3258+ off++;
3259+ continue;
3260+ }
3261+
3262+ /* A potential table cell boundary or wiki link label delimiter. */
3263+ if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) {
3264+ PUSH_MARK(ch, off, off+1, 0);
3265+ off++;
3266+ continue;
3267+ }
3268+
3269+ /* A potential strikethrough start/end. */
3270+ if(ch == _T('~')) {
3271+ OFF tmp = off+1;
3272+
3273+ while(tmp < line_end && CH(tmp) == _T('~'))
3274+ tmp++;
3275+
3276+ if(tmp - off < 3) {
3277+ unsigned flags = 0;
3278+
3279+ if(tmp < line_end && !ISUNICODEWHITESPACE(tmp))
3280+ flags |= MD_MARK_POTENTIAL_OPENER;
3281+ if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off))
3282+ flags |= MD_MARK_POTENTIAL_CLOSER;
3283+ if(flags != 0)
3284+ PUSH_MARK(ch, off, tmp, flags);
3285+ }
3286+
3287+ off = tmp;
3288+ continue;
3289+ }
3290+
3291+ /* A potential equation start/end */
3292+ if(ch == _T('$')) {
3293+ /* We can have at most two consecutive $ signs,
3294+ * where two dollar signs signify a display equation. */
3295+ OFF tmp = off+1;
3296+
3297+ while(tmp < line_end && CH(tmp) == _T('$'))
3298+ tmp++;
3299+
3300+ if (tmp - off <= 2)
3301+ PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER);
3302+ off = tmp;
3303+ continue;
3304+ }
3305+
3306+ /* Turn non-trivial whitespace into single space. */
3307+ if(ISWHITESPACE_(ch)) {
3308+ OFF tmp = off+1;
3309+
3310+ while(tmp < line_end && ISWHITESPACE(tmp))
3311+ tmp++;
3312+
3313+ if(tmp - off > 1 || ch != _T(' '))
3314+ PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED);
3315+
3316+ off = tmp;
3317+ continue;
3318+ }
3319+
3320+ /* NULL character. */
3321+ if(ch == _T('\0')) {
3322+ PUSH_MARK(ch, off, off+1, MD_MARK_RESOLVED);
3323+ off++;
3324+ continue;
3325+ }
3326+
3327+ off++;
3328+ }
3329+ }
3330+
3331+ /* Add a dummy mark at the end of the mark vector to simplify
3332+ * process_inlines(). */
3333+ PUSH_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED);
3334+
3335+abort:
3336+ return ret;
3337+}
3338+
3339+static void
3340+md_analyze_bracket(MD_CTX* ctx, int mark_index)
3341+{
3342+ /* We cannot really resolve links here as for that we would need
3343+ * more context. E.g. a following pair of brackets (reference link),
3344+ * or enclosing pair of brackets (if the inner is the link, the outer
3345+ * one cannot be.)
3346+ *
3347+ * Therefore we here only construct a list of resolved '[' ']' pairs
3348+ * ordered by position of the closer. This allows ur to analyze what is
3349+ * or is not link in the right order, from inside to outside in case
3350+ * of nested brackets.
3351+ *
3352+ * The resolving itself is deferred into md_resolve_links().
3353+ */
3354+
3355+ MD_MARK* mark = &ctx->marks[mark_index];
3356+
3357+ if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
3358+ md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index);
3359+ return;
3360+ }
3361+
3362+ if(BRACKET_OPENERS.tail >= 0) {
3363+ /* Pop the opener from the chain. */
3364+ int opener_index = BRACKET_OPENERS.tail;
3365+ MD_MARK* opener = &ctx->marks[opener_index];
3366+ if(opener->prev >= 0)
3367+ ctx->marks[opener->prev].next = -1;
3368+ else
3369+ BRACKET_OPENERS.head = -1;
3370+ BRACKET_OPENERS.tail = opener->prev;
3371+
3372+ /* Interconnect the opener and closer. */
3373+ opener->next = mark_index;
3374+ mark->prev = opener_index;
3375+
3376+ /* Add the pair into chain of potential links for md_resolve_links().
3377+ * Note we misuse opener->prev for this as opener->next points to its
3378+ * closer. */
3379+ if(ctx->unresolved_link_tail >= 0)
3380+ ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
3381+ else
3382+ ctx->unresolved_link_head = opener_index;
3383+ ctx->unresolved_link_tail = opener_index;
3384+ opener->prev = -1;
3385+ }
3386+}
3387+
3388+/* Forward declaration. */
3389+static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3390+ int mark_beg, int mark_end);
3391+
3392+static int
3393+md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
3394+{
3395+ int opener_index = ctx->unresolved_link_head;
3396+ OFF last_link_beg = 0;
3397+ OFF last_link_end = 0;
3398+ OFF last_img_beg = 0;
3399+ OFF last_img_end = 0;
3400+
3401+ while(opener_index >= 0) {
3402+ MD_MARK* opener = &ctx->marks[opener_index];
3403+ int closer_index = opener->next;
3404+ MD_MARK* closer = &ctx->marks[closer_index];
3405+ int next_index = opener->prev;
3406+ MD_MARK* next_opener;
3407+ MD_MARK* next_closer;
3408+ MD_LINK_ATTR attr;
3409+ int is_link = FALSE;
3410+
3411+ if(next_index >= 0) {
3412+ next_opener = &ctx->marks[next_index];
3413+ next_closer = &ctx->marks[next_opener->next];
3414+ } else {
3415+ next_opener = NULL;
3416+ next_closer = NULL;
3417+ }
3418+
3419+ /* If nested ("[ [ ] ]"), we need to make sure that:
3420+ * - The outer does not end inside of (...) belonging to the inner.
3421+ * - The outer cannot be link if the inner is link (i.e. not image).
3422+ *
3423+ * (Note we here analyze from inner to outer as the marks are ordered
3424+ * by closer->beg.)
3425+ */
3426+ if((opener->beg < last_link_beg && closer->end < last_link_end) ||
3427+ (opener->beg < last_img_beg && closer->end < last_img_end) ||
3428+ (opener->beg < last_link_end && opener->ch == '['))
3429+ {
3430+ opener_index = next_index;
3431+ continue;
3432+ }
3433+
3434+ /* Recognize and resolve wiki links.
3435+ * Wiki-links maybe '[[destination]]' or '[[destination|label]]'.
3436+ */
3437+ if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
3438+ (opener->end - opener->beg == 1) && /* not image */
3439+ next_opener != NULL && /* double '[' opener */
3440+ next_opener->ch == '[' &&
3441+ (next_opener->beg == opener->beg - 1) &&
3442+ (next_opener->end - next_opener->beg == 1) &&
3443+ next_closer != NULL && /* double ']' closer */
3444+ next_closer->ch == ']' &&
3445+ (next_closer->beg == closer->beg + 1) &&
3446+ (next_closer->end - next_closer->beg == 1))
3447+ {
3448+ MD_MARK* delim = NULL;
3449+ int delim_index;
3450+ OFF dest_beg, dest_end;
3451+
3452+ is_link = TRUE;
3453+
3454+ /* We don't allow destination to be longer than 100 characters.
3455+ * Lets scan to see whether there is '|'. (If not then the whole
3456+ * wiki-link has to be below the 100 characters.) */
3457+ delim_index = opener_index + 1;
3458+ while(delim_index < closer_index) {
3459+ MD_MARK* m = &ctx->marks[delim_index];
3460+ if(m->ch == '|') {
3461+ delim = m;
3462+ break;
3463+ }
3464+ if(m->ch != 'D' && m->beg - opener->end > 100)
3465+ break;
3466+ delim_index++;
3467+ }
3468+ dest_beg = opener->end;
3469+ dest_end = (delim != NULL) ? delim->beg : closer->beg;
3470+ if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100)
3471+ is_link = FALSE;
3472+
3473+ /* There may not be any new line in the destination. */
3474+ if(is_link) {
3475+ OFF off;
3476+ for(off = dest_beg; off < dest_end; off++) {
3477+ if(ISNEWLINE(off)) {
3478+ is_link = FALSE;
3479+ break;
3480+ }
3481+ }
3482+ }
3483+
3484+ if(is_link) {
3485+ if(delim != NULL) {
3486+ if(delim->end < closer->beg) {
3487+ opener->end = delim->beg;
3488+ } else {
3489+ /* The pipe is just before the closer: [[foo|]] */
3490+ closer->beg = delim->beg;
3491+ delim = NULL;
3492+ }
3493+ }
3494+
3495+ opener->beg = next_opener->beg;
3496+ opener->next = closer_index;
3497+ opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
3498+
3499+ closer->end = next_closer->end;
3500+ closer->prev = opener_index;
3501+ closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
3502+
3503+ last_link_beg = opener->beg;
3504+ last_link_end = closer->end;
3505+
3506+ if(delim != NULL) {
3507+ delim->flags |= MD_MARK_RESOLVED;
3508+ md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
3509+ md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
3510+ } else {
3511+ md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3512+ }
3513+
3514+ opener_index = next_opener->prev;
3515+ continue;
3516+ }
3517+ }
3518+
3519+ if(next_opener != NULL && next_opener->beg == closer->end) {
3520+ if(next_closer->beg > closer->end + 1) {
3521+ /* Might be full reference link. */
3522+ is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
3523+ } else {
3524+ /* Might be shortcut reference link. */
3525+ is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3526+ }
3527+
3528+ if(is_link < 0)
3529+ return -1;
3530+
3531+ if(is_link) {
3532+ /* Eat the 2nd "[...]". */
3533+ closer->end = next_closer->end;
3534+
3535+ /* Do not analyze the label as a standalone link in the next
3536+ * iteration. */
3537+ next_index = ctx->marks[next_index].prev;
3538+ }
3539+ } else {
3540+ if(closer->end < ctx->size && CH(closer->end) == _T('(')) {
3541+ /* Might be inline link. */
3542+ OFF inline_link_end = UINT_MAX;
3543+
3544+ is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr);
3545+ if(is_link < 0)
3546+ return -1;
3547+
3548+ /* Check the closing ')' is not inside an already resolved range
3549+ * (i.e. a range with a higher priority), e.g. a code span. */
3550+ if(is_link) {
3551+ int i = closer_index + 1;
3552+
3553+ while(i < ctx->n_marks) {
3554+ MD_MARK* mark = &ctx->marks[i];
3555+
3556+ if(mark->beg >= inline_link_end)
3557+ break;
3558+ if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) {
3559+ if(ctx->marks[mark->next].beg >= inline_link_end) {
3560+ /* Cancel the link status. */
3561+ if(attr.title_needs_free)
3562+ free(attr.title);
3563+ is_link = FALSE;
3564+ break;
3565+ }
3566+
3567+ i = mark->next + 1;
3568+ } else {
3569+ i++;
3570+ }
3571+ }
3572+ }
3573+
3574+ if(is_link) {
3575+ /* Eat the "(...)" */
3576+ closer->end = inline_link_end;
3577+ }
3578+ }
3579+
3580+ if(!is_link) {
3581+ /* Might be collapsed reference link. */
3582+ is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3583+ if(is_link < 0)
3584+ return -1;
3585+ }
3586+ }
3587+
3588+ if(is_link) {
3589+ /* Resolve the brackets as a link. */
3590+ opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
3591+ closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
3592+
3593+ /* If it is a link, we store the destination and title in the two
3594+ * dummy marks after the opener. */
3595+ MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
3596+ ctx->marks[opener_index+1].beg = attr.dest_beg;
3597+ ctx->marks[opener_index+1].end = attr.dest_end;
3598+
3599+ MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
3600+ md_mark_store_ptr(ctx, opener_index+2, attr.title);
3601+ /* The title might or might not have been allocated for us. */
3602+ if(attr.title_needs_free)
3603+ md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+2);
3604+ ctx->marks[opener_index+2].prev = attr.title_size;
3605+
3606+ if(opener->ch == '[') {
3607+ last_link_beg = opener->beg;
3608+ last_link_end = closer->end;
3609+ } else {
3610+ last_img_beg = opener->beg;
3611+ last_img_end = closer->end;
3612+ }
3613+
3614+ md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
3615+ }
3616+
3617+ opener_index = next_index;
3618+ }
3619+
3620+ return 0;
3621+}
3622+
3623+/* Analyze whether the mark '&' starts a HTML entity.
3624+ * If so, update its flags as well as flags of corresponding closer ';'. */
3625+static void
3626+md_analyze_entity(MD_CTX* ctx, int mark_index)
3627+{
3628+ MD_MARK* opener = &ctx->marks[mark_index];
3629+ MD_MARK* closer;
3630+ OFF off;
3631+
3632+ /* Cannot be entity if there is no closer as the next mark.
3633+ * (Any other mark between would mean strange character which cannot be
3634+ * part of the entity.
3635+ *
3636+ * So we can do all the work on '&' and do not call this later for the
3637+ * closing mark ';'.
3638+ */
3639+ if(mark_index + 1 >= ctx->n_marks)
3640+ return;
3641+ closer = &ctx->marks[mark_index+1];
3642+ if(closer->ch != ';')
3643+ return;
3644+
3645+ if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
3646+ MD_ASSERT(off == closer->end);
3647+
3648+ md_resolve_range(ctx, NULL, mark_index, mark_index+1);
3649+ opener->end = closer->end;
3650+ }
3651+}
3652+
3653+static void
3654+md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
3655+{
3656+ MD_MARK* mark = &ctx->marks[mark_index];
3657+ mark->flags |= MD_MARK_RESOLVED;
3658+
3659+ md_mark_chain_append(ctx, &TABLECELLBOUNDARIES, mark_index);
3660+ ctx->n_table_cell_boundaries++;
3661+}
3662+
3663+/* Split a longer mark into two. The new mark takes the given count of
3664+ * characters. May only be called if an adequate number of dummy 'D' marks
3665+ * follows.
3666+ */
3667+static int
3668+md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
3669+{
3670+ MD_MARK* mark = &ctx->marks[mark_index];
3671+ int new_mark_index = mark_index + (mark->end - mark->beg - n);
3672+ MD_MARK* dummy = &ctx->marks[new_mark_index];
3673+
3674+ MD_ASSERT(mark->end - mark->beg > n);
3675+ MD_ASSERT(dummy->ch == 'D');
3676+
3677+ memcpy(dummy, mark, sizeof(MD_MARK));
3678+ mark->end -= n;
3679+ dummy->beg = mark->end;
3680+
3681+ return new_mark_index;
3682+}
3683+
3684+static void
3685+md_analyze_emph(MD_CTX* ctx, int mark_index)
3686+{
3687+ MD_MARK* mark = &ctx->marks[mark_index];
3688+ MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3689+
3690+ /* If we can be a closer, try to resolve with the preceding opener. */
3691+ if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
3692+ MD_MARK* opener = NULL;
3693+ int opener_index = 0;
3694+
3695+ if(mark->ch == _T('*')) {
3696+ MD_MARKCHAIN* opener_chains[6];
3697+ int i, n_opener_chains;
3698+ unsigned flags = mark->flags;
3699+
3700+ /* Apply the "rule of three". */
3701+ n_opener_chains = 0;
3702+ opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
3703+ if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3704+ opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1;
3705+ if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3706+ opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2;
3707+ opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0;
3708+ if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3709+ opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1;
3710+ if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3711+ opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2;
3712+
3713+ /* Opener is the most recent mark from the allowed chains. */
3714+ for(i = 0; i < n_opener_chains; i++) {
3715+ if(opener_chains[i]->tail >= 0) {
3716+ int tmp_index = opener_chains[i]->tail;
3717+ MD_MARK* tmp_mark = &ctx->marks[tmp_index];
3718+ if(opener == NULL || tmp_mark->end > opener->end) {
3719+ opener_index = tmp_index;
3720+ opener = tmp_mark;
3721+ }
3722+ }
3723+ }
3724+ } else {
3725+ /* Simple emph. mark */
3726+ if(chain->tail >= 0) {
3727+ opener_index = chain->tail;
3728+ opener = &ctx->marks[opener_index];
3729+ }
3730+ }
3731+
3732+ /* Resolve, if we have found matching opener. */
3733+ if(opener != NULL) {
3734+ SZ opener_size = opener->end - opener->beg;
3735+ SZ closer_size = mark->end - mark->beg;
3736+ MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index);
3737+
3738+ if(opener_size > closer_size) {
3739+ opener_index = md_split_emph_mark(ctx, opener_index, closer_size);
3740+ md_mark_chain_append(ctx, opener_chain, opener_index);
3741+ } else if(opener_size < closer_size) {
3742+ md_split_emph_mark(ctx, mark_index, closer_size - opener_size);
3743+ }
3744+
3745+ md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3746+ md_resolve_range(ctx, opener_chain, opener_index, mark_index);
3747+ return;
3748+ }
3749+ }
3750+
3751+ /* If we could not resolve as closer, we may be yet be an opener. */
3752+ if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3753+ md_mark_chain_append(ctx, chain, mark_index);
3754+}
3755+
3756+static void
3757+md_analyze_tilde(MD_CTX* ctx, int mark_index)
3758+{
3759+ MD_MARK* mark = &ctx->marks[mark_index];
3760+ MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3761+
3762+ /* We attempt to be Github Flavored Markdown compatible here. GFM accepts
3763+ * only tildes sequences of length 1 and 2, and the length of the opener
3764+ * and closer has to match. */
3765+
3766+ if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && chain->head >= 0) {
3767+ int opener_index = chain->head;
3768+
3769+ md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3770+ md_resolve_range(ctx, chain, opener_index, mark_index);
3771+ return;
3772+ }
3773+
3774+ if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3775+ md_mark_chain_append(ctx, chain, mark_index);
3776+}
3777+
3778+static void
3779+md_analyze_dollar(MD_CTX* ctx, int mark_index)
3780+{
3781+ /* This should mimic the way inline equations work in LaTeX, so there
3782+ * can only ever be one item in the chain (i.e. the dollars can't be
3783+ * nested). This is basically the same as the md_analyze_tilde function,
3784+ * except that we require matching openers and closers to be of the same
3785+ * length.
3786+ *
3787+ * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */
3788+ if(DOLLAR_OPENERS.head >= 0) {
3789+ /* If the potential closer has a non-matching number of $, discard */
3790+ MD_MARK* open = &ctx->marks[DOLLAR_OPENERS.head];
3791+ MD_MARK* close = &ctx->marks[mark_index];
3792+
3793+ int opener_index = DOLLAR_OPENERS.head;
3794+ md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL);
3795+ if (open->end - open->beg == close->end - close->beg) {
3796+ /* We are the matching closer */
3797+ md_resolve_range(ctx, &DOLLAR_OPENERS, opener_index, mark_index);
3798+ } else {
3799+ /* We don't match the opener, so discard old opener and insert as opener */
3800+ md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
3801+ }
3802+ } else {
3803+ /* No unmatched openers, so we are opener */
3804+ md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
3805+ }
3806+}
3807+
3808+static void
3809+md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
3810+{
3811+ MD_MARK* opener = &ctx->marks[mark_index];
3812+ int closer_index = mark_index + 1;
3813+ MD_MARK* closer = &ctx->marks[closer_index];
3814+ MD_MARK* next_resolved_mark;
3815+ OFF off = opener->end;
3816+ int n_dots = FALSE;
3817+ int has_underscore_in_last_seg = FALSE;
3818+ int has_underscore_in_next_to_last_seg = FALSE;
3819+ int n_opened_parenthesis = 0;
3820+ int n_excess_parenthesis = 0;
3821+
3822+ /* Check for domain. */
3823+ while(off < ctx->size) {
3824+ if(ISALNUM(off) || CH(off) == _T('-')) {
3825+ off++;
3826+ } else if(CH(off) == _T('.')) {
3827+ /* We must see at least one period. */
3828+ n_dots++;
3829+ has_underscore_in_next_to_last_seg = has_underscore_in_last_seg;
3830+ has_underscore_in_last_seg = FALSE;
3831+ off++;
3832+ } else if(CH(off) == _T('_')) {
3833+ /* No underscore may be present in the last two domain segments. */
3834+ has_underscore_in_last_seg = TRUE;
3835+ off++;
3836+ } else {
3837+ break;
3838+ }
3839+ }
3840+ if(off > opener->end && CH(off-1) == _T('.')) {
3841+ off--;
3842+ n_dots--;
3843+ }
3844+ if(off <= opener->end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg)
3845+ return;
3846+
3847+ /* Check for path. */
3848+ next_resolved_mark = closer + 1;
3849+ while(next_resolved_mark->ch == 'D' || !(next_resolved_mark->flags & MD_MARK_RESOLVED))
3850+ next_resolved_mark++;
3851+ while(off < next_resolved_mark->beg && CH(off) != _T('<') && !ISWHITESPACE(off) && !ISNEWLINE(off)) {
3852+ /* Parenthesis must be balanced. */
3853+ if(CH(off) == _T('(')) {
3854+ n_opened_parenthesis++;
3855+ } else if(CH(off) == _T(')')) {
3856+ if(n_opened_parenthesis > 0)
3857+ n_opened_parenthesis--;
3858+ else
3859+ n_excess_parenthesis++;
3860+ }
3861+
3862+ off++;
3863+ }
3864+
3865+ /* Trim a trailing punctuation from the end. */
3866+ while(TRUE) {
3867+ if(ISANYOF(off-1, _T("?!.,:*_~"))) {
3868+ off--;
3869+ } else if(CH(off-1) == ')' && n_excess_parenthesis > 0) {
3870+ /* Unmatched ')' can be in an interior of the path but not at the
3871+ * of it, so the auto-link may be safely nested in a parenthesis
3872+ * pair. */
3873+ off--;
3874+ n_excess_parenthesis--;
3875+ } else {
3876+ break;
3877+ }
3878+ }
3879+
3880+ /* Ok. Lets call it an auto-link. Adapt opener and create closer to zero
3881+ * length so all the contents becomes the link text. */
3882+ MD_ASSERT(closer->ch == 'D');
3883+ opener->end = opener->beg;
3884+ closer->ch = opener->ch;
3885+ closer->beg = off;
3886+ closer->end = off;
3887+ md_resolve_range(ctx, NULL, mark_index, closer_index);
3888+}
3889+
3890+/* The permissive autolinks do not have to be enclosed in '<' '>' but we
3891+ * instead impose stricter rules what is understood as an e-mail address
3892+ * here. Actually any non-alphanumeric characters with exception of '.'
3893+ * are prohibited both in username and after '@'. */
3894+static void
3895+md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
3896+{
3897+ MD_MARK* opener = &ctx->marks[mark_index];
3898+ int closer_index;
3899+ MD_MARK* closer;
3900+ OFF beg = opener->beg;
3901+ OFF end = opener->end;
3902+ int dot_count = 0;
3903+
3904+ MD_ASSERT(CH(beg) == _T('@'));
3905+
3906+ /* Scan for name before '@'. */
3907+ while(beg > 0 && (ISALNUM(beg-1) || ISANYOF(beg-1, _T(".-_+"))))
3908+ beg--;
3909+
3910+ /* Scan for domain after '@'. */
3911+ while(end < ctx->size && (ISALNUM(end) || ISANYOF(end, _T(".-_")))) {
3912+ if(CH(end) == _T('.'))
3913+ dot_count++;
3914+ end++;
3915+ }
3916+ if(CH(end-1) == _T('.')) { /* Final '.' not part of it. */
3917+ dot_count--;
3918+ end--;
3919+ }
3920+ else if(ISANYOF2(end-1, _T('-'), _T('_'))) /* These are forbidden at the end. */
3921+ return;
3922+ if(CH(end-1) == _T('@') || dot_count == 0)
3923+ return;
3924+
3925+ /* Ok. Lets call it auto-link. Adapt opener and create closer to zero
3926+ * length so all the contents becomes the link text. */
3927+ closer_index = mark_index + 1;
3928+ closer = &ctx->marks[closer_index];
3929+ MD_ASSERT(closer->ch == 'D');
3930+
3931+ opener->beg = beg;
3932+ opener->end = beg;
3933+ closer->ch = opener->ch;
3934+ closer->beg = end;
3935+ closer->end = end;
3936+ md_resolve_range(ctx, NULL, mark_index, closer_index);
3937+}
3938+
3939+static inline void
3940+md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3941+ int mark_beg, int mark_end, const CHAR* mark_chars)
3942+{
3943+ int i = mark_beg;
3944+ MD_UNUSED(lines);
3945+ MD_UNUSED(n_lines);
3946+
3947+ while(i < mark_end) {
3948+ MD_MARK* mark = &ctx->marks[i];
3949+
3950+ /* Skip resolved spans. */
3951+ if(mark->flags & MD_MARK_RESOLVED) {
3952+ if(mark->flags & MD_MARK_OPENER) {
3953+ MD_ASSERT(i < mark->next);
3954+ i = mark->next + 1;
3955+ } else {
3956+ i++;
3957+ }
3958+ continue;
3959+ }
3960+
3961+ /* Skip marks we do not want to deal with. */
3962+ if(!ISANYOF_(mark->ch, mark_chars)) {
3963+ i++;
3964+ continue;
3965+ }
3966+
3967+ /* Analyze the mark. */
3968+ switch(mark->ch) {
3969+ case '[': /* Pass through. */
3970+ case '!': /* Pass through. */
3971+ case ']': md_analyze_bracket(ctx, i); break;
3972+ case '&': md_analyze_entity(ctx, i); break;
3973+ case '|': md_analyze_table_cell_boundary(ctx, i); break;
3974+ case '_': /* Pass through. */
3975+ case '*': md_analyze_emph(ctx, i); break;
3976+ case '~': md_analyze_tilde(ctx, i); break;
3977+ case '$': md_analyze_dollar(ctx, i); break;
3978+ case '.': /* Pass through. */
3979+ case ':': md_analyze_permissive_url_autolink(ctx, i); break;
3980+ case '@': md_analyze_permissive_email_autolink(ctx, i); break;
3981+ }
3982+
3983+ i++;
3984+ }
3985+}
3986+
3987+/* Analyze marks (build ctx->marks). */
3988+static int
3989+md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
3990+{
3991+ int ret;
3992+
3993+ /* Reset the previously collected stack of marks. */
3994+ ctx->n_marks = 0;
3995+
3996+ /* Collect all marks. */
3997+ MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
3998+
3999+ /* We analyze marks in few groups to handle their precedence. */
4000+ /* (1) Entities; code spans; autolinks; raw HTML. */
4001+ md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("&"));
4002+
4003+ /* (2) Links. */
4004+ md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!"));
4005+ MD_CHECK(md_resolve_links(ctx, lines, n_lines));
4006+ BRACKET_OPENERS.head = -1;
4007+ BRACKET_OPENERS.tail = -1;
4008+ ctx->unresolved_link_head = -1;
4009+ ctx->unresolved_link_tail = -1;
4010+
4011+ if(table_mode) {
4012+ /* (3) Analyze table cell boundaries.
4013+ * Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(),
4014+ * not after, because caller may need it. */
4015+ MD_ASSERT(n_lines == 1);
4016+ TABLECELLBOUNDARIES.head = -1;
4017+ TABLECELLBOUNDARIES.tail = -1;
4018+ ctx->n_table_cell_boundaries = 0;
4019+ md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("|"));
4020+ return ret;
4021+ }
4022+
4023+ /* (4) Emphasis and strong emphasis; permissive autolinks. */
4024+ md_analyze_link_contents(ctx, lines, n_lines, 0, ctx->n_marks);
4025+
4026+abort:
4027+ return ret;
4028+}
4029+
4030+static void
4031+md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
4032+ int mark_beg, int mark_end)
4033+{
4034+ int i;
4035+
4036+ md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:."));
4037+
4038+ for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) {
4039+ ctx->mark_chains[i].head = -1;
4040+ ctx->mark_chains[i].tail = -1;
4041+ }
4042+}
4043+
4044+static int
4045+md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
4046+ const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest,
4047+ const CHAR* title, SZ title_size)
4048+{
4049+ MD_ATTRIBUTE_BUILD href_build = { 0 };
4050+ MD_ATTRIBUTE_BUILD title_build = { 0 };
4051+ MD_SPAN_A_DETAIL det;
4052+ int ret = 0;
4053+
4054+ /* Note we here rely on fact that MD_SPAN_A_DETAIL and
4055+ * MD_SPAN_IMG_DETAIL are binary-compatible. */
4056+ memset(&det, 0, sizeof(MD_SPAN_A_DETAIL));
4057+ MD_CHECK(md_build_attribute(ctx, dest, dest_size,
4058+ (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0),
4059+ &det.href, &href_build));
4060+ MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build));
4061+
4062+ if(enter)
4063+ MD_ENTER_SPAN(type, &det);
4064+ else
4065+ MD_LEAVE_SPAN(type, &det);
4066+
4067+abort:
4068+ md_free_attribute(ctx, &href_build);
4069+ md_free_attribute(ctx, &title_build);
4070+ return ret;
4071+}
4072+
4073+static int
4074+md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
4075+{
4076+ MD_ATTRIBUTE_BUILD target_build = { 0 };
4077+ MD_SPAN_WIKILINK_DETAIL det;
4078+ int ret = 0;
4079+
4080+ memset(&det, 0, sizeof(MD_SPAN_WIKILINK_DETAIL));
4081+ MD_CHECK(md_build_attribute(ctx, target, target_size, 0, &det.target, &target_build));
4082+
4083+ if (enter)
4084+ MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det);
4085+ else
4086+ MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det);
4087+
4088+abort:
4089+ md_free_attribute(ctx, &target_build);
4090+ return ret;
4091+}
4092+
4093+
4094+/* Render the output, accordingly to the analyzed ctx->marks. */
4095+static int
4096+md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4097+{
4098+ MD_TEXTTYPE text_type;
4099+ const MD_LINE* line = lines;
4100+ MD_MARK* prev_mark = NULL;
4101+ MD_MARK* mark;
4102+ OFF off = lines[0].beg;
4103+ OFF end = lines[n_lines-1].end;
4104+ int enforce_hardbreak = 0;
4105+ int ret = 0;
4106+
4107+ /* Find first resolved mark. Note there is always at least one resolved
4108+ * mark, the dummy last one after the end of the latest line we actually
4109+ * never really reach. This saves us of a lot of special checks and cases
4110+ * in this function. */
4111+ mark = ctx->marks;
4112+ while(!(mark->flags & MD_MARK_RESOLVED))
4113+ mark++;
4114+
4115+ text_type = MD_TEXT_NORMAL;
4116+
4117+ while(1) {
4118+ /* Process the text up to the next mark or end-of-line. */
4119+ OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
4120+ if(tmp > off) {
4121+ MD_TEXT(text_type, STR(off), tmp - off);
4122+ off = tmp;
4123+ }
4124+
4125+ /* If reached the mark, process it and move to next one. */
4126+ if(off >= mark->beg) {
4127+ switch(mark->ch) {
4128+ case '\\': /* Backslash escape. */
4129+ if(ISNEWLINE(mark->beg+1))
4130+ enforce_hardbreak = 1;
4131+ else
4132+ MD_TEXT(text_type, STR(mark->beg+1), 1);
4133+ break;
4134+
4135+ case ' ': /* Non-trivial space. */
4136+ MD_TEXT(text_type, _T(" "), 1);
4137+ break;
4138+
4139+ case '`': /* Code span. */
4140+ if(mark->flags & MD_MARK_OPENER) {
4141+ MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
4142+ text_type = MD_TEXT_CODE;
4143+ } else {
4144+ MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
4145+ text_type = MD_TEXT_NORMAL;
4146+ }
4147+ break;
4148+
4149+ case '_': /* Underline (or emphasis if we fall through). */
4150+ if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
4151+ if(mark->flags & MD_MARK_OPENER) {
4152+ while(off < mark->end) {
4153+ MD_ENTER_SPAN(MD_SPAN_U, NULL);
4154+ off++;
4155+ }
4156+ } else {
4157+ while(off < mark->end) {
4158+ MD_LEAVE_SPAN(MD_SPAN_U, NULL);
4159+ off++;
4160+ }
4161+ }
4162+ break;
4163+ }
4164+ MD_FALLTHROUGH();
4165+
4166+ case '*': /* Emphasis, strong emphasis. */
4167+ if(mark->flags & MD_MARK_OPENER) {
4168+ if((mark->end - off) % 2) {
4169+ MD_ENTER_SPAN(MD_SPAN_EM, NULL);
4170+ off++;
4171+ }
4172+ while(off + 1 < mark->end) {
4173+ MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
4174+ off += 2;
4175+ }
4176+ } else {
4177+ while(off + 1 < mark->end) {
4178+ MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
4179+ off += 2;
4180+ }
4181+ if((mark->end - off) % 2) {
4182+ MD_LEAVE_SPAN(MD_SPAN_EM, NULL);
4183+ off++;
4184+ }
4185+ }
4186+ break;
4187+
4188+ case '~':
4189+ if(mark->flags & MD_MARK_OPENER)
4190+ MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
4191+ else
4192+ MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
4193+ break;
4194+
4195+ case '$':
4196+ if(mark->flags & MD_MARK_OPENER) {
4197+ MD_ENTER_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4198+ text_type = MD_TEXT_LATEXMATH;
4199+ } else {
4200+ MD_LEAVE_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4201+ text_type = MD_TEXT_NORMAL;
4202+ }
4203+ break;
4204+
4205+ case '[': /* Link, wiki link, image. */
4206+ case '!':
4207+ case ']':
4208+ {
4209+ const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
4210+ const MD_MARK* closer = &ctx->marks[opener->next];
4211+ const MD_MARK* dest_mark;
4212+ const MD_MARK* title_mark;
4213+
4214+ if ((opener->ch == '[' && closer->ch == ']') &&
4215+ opener->end - opener->beg >= 2 &&
4216+ closer->end - closer->beg >= 2)
4217+ {
4218+ int has_label = (opener->end - opener->beg > 2);
4219+ SZ target_sz;
4220+
4221+ if(has_label)
4222+ target_sz = opener->end - (opener->beg+2);
4223+ else
4224+ target_sz = closer->beg - opener->end;
4225+
4226+ MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != ']'),
4227+ has_label ? STR(opener->beg+2) : STR(opener->end),
4228+ target_sz));
4229+
4230+ break;
4231+ }
4232+
4233+ dest_mark = opener+1;
4234+ MD_ASSERT(dest_mark->ch == 'D');
4235+ title_mark = opener+2;
4236+ MD_ASSERT(title_mark->ch == 'D');
4237+
4238+ MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
4239+ (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
4240+ STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
4241+ md_mark_get_ptr(ctx, title_mark - ctx->marks), title_mark->prev));
4242+
4243+ /* link/image closer may span multiple lines. */
4244+ if(mark->ch == ']') {
4245+ while(mark->end > line->end)
4246+ line++;
4247+ }
4248+
4249+ break;
4250+ }
4251+
4252+ case '<':
4253+ case '>': /* Autolink or raw HTML. */
4254+ if(!(mark->flags & MD_MARK_AUTOLINK)) {
4255+ /* Raw HTML. */
4256+ if(mark->flags & MD_MARK_OPENER)
4257+ text_type = MD_TEXT_HTML;
4258+ else
4259+ text_type = MD_TEXT_NORMAL;
4260+ break;
4261+ }
4262+ /* Pass through, if auto-link. */
4263+ MD_FALLTHROUGH();
4264+
4265+ case '@': /* Permissive e-mail autolink. */
4266+ case ':': /* Permissive URL autolink. */
4267+ case '.': /* Permissive WWW autolink. */
4268+ {
4269+ MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
4270+ MD_MARK* closer = &ctx->marks[opener->next];
4271+ const CHAR* dest = STR(opener->end);
4272+ SZ dest_size = closer->beg - opener->end;
4273+
4274+ /* For permissive auto-links we do not know closer mark
4275+ * position at the time of md_collect_marks(), therefore
4276+ * it can be out-of-order in ctx->marks[].
4277+ *
4278+ * With this flag, we make sure that we output the closer
4279+ * only if we processed the opener. */
4280+ if(mark->flags & MD_MARK_OPENER)
4281+ closer->flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK;
4282+
4283+ if(opener->ch == '@' || opener->ch == '.') {
4284+ dest_size += 7;
4285+ MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
4286+ memcpy(ctx->buffer,
4287+ (opener->ch == '@' ? _T("mailto:") : _T("http://")),
4288+ 7 * sizeof(CHAR));
4289+ memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
4290+ dest = ctx->buffer;
4291+ }
4292+
4293+ if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK)
4294+ MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
4295+ MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
4296+ break;
4297+ }
4298+
4299+ case '&': /* Entity. */
4300+ MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
4301+ break;
4302+
4303+ case '\0':
4304+ MD_TEXT(MD_TEXT_NULLCHAR, _T(""), 1);
4305+ break;
4306+
4307+ case 127:
4308+ goto abort;
4309+ }
4310+
4311+ off = mark->end;
4312+
4313+ /* Move to next resolved mark. */
4314+ prev_mark = mark;
4315+ mark++;
4316+ while(!(mark->flags & MD_MARK_RESOLVED) || mark->beg < off)
4317+ mark++;
4318+ }
4319+
4320+ /* If reached end of line, move to next one. */
4321+ if(off >= line->end) {
4322+ /* If it is the last line, we are done. */
4323+ if(off >= end)
4324+ break;
4325+
4326+ if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) {
4327+ OFF tmp;
4328+
4329+ MD_ASSERT(prev_mark != NULL);
4330+ MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', '$') && (prev_mark->flags & MD_MARK_OPENER));
4331+ MD_ASSERT(ISANYOF2_(mark->ch, '`', '$') && (mark->flags & MD_MARK_CLOSER));
4332+
4333+ /* Inside a code span, trailing line whitespace has to be
4334+ * outputted. */
4335+ tmp = off;
4336+ while(off < ctx->size && ISBLANK(off))
4337+ off++;
4338+ if(off > tmp)
4339+ MD_TEXT(text_type, STR(tmp), off-tmp);
4340+
4341+ /* and new lines are transformed into single spaces. */
4342+ if(prev_mark->end < off && off < mark->beg)
4343+ MD_TEXT(text_type, _T(" "), 1);
4344+ } else if(text_type == MD_TEXT_HTML) {
4345+ /* Inside raw HTML, we output the new line verbatim, including
4346+ * any trailing spaces. */
4347+ OFF tmp = off;
4348+
4349+ while(tmp < end && ISBLANK(tmp))
4350+ tmp++;
4351+ if(tmp > off)
4352+ MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
4353+ MD_TEXT(MD_TEXT_HTML, _T("\n"), 1);
4354+ } else {
4355+ /* Output soft or hard line break. */
4356+ MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
4357+
4358+ if(text_type == MD_TEXT_NORMAL) {
4359+ if(enforce_hardbreak)
4360+ break_type = MD_TEXT_BR;
4361+ else if((CH(line->end) == _T(' ') && CH(line->end+1) == _T(' ')))
4362+ break_type = MD_TEXT_BR;
4363+ }
4364+
4365+ MD_TEXT(break_type, _T("\n"), 1);
4366+ }
4367+
4368+ /* Move to the next line. */
4369+ line++;
4370+ off = line->beg;
4371+
4372+ enforce_hardbreak = 0;
4373+ }
4374+ }
4375+
4376+abort:
4377+ return ret;
4378+}
4379+
4380+
4381+/***************************
4382+ *** Processing Tables ***
4383+ ***************************/
4384+
4385+static void
4386+md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
4387+{
4388+ static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
4389+ OFF off = beg;
4390+
4391+ while(n_align > 0) {
4392+ int index = 0; /* index into align_map[] */
4393+
4394+ while(CH(off) != _T('-'))
4395+ off++;
4396+ if(off > beg && CH(off-1) == _T(':'))
4397+ index |= 1;
4398+ while(off < end && CH(off) == _T('-'))
4399+ off++;
4400+ if(off < end && CH(off) == _T(':'))
4401+ index |= 2;
4402+
4403+ *align = align_map[index];
4404+ align++;
4405+ n_align--;
4406+ }
4407+
4408+}
4409+
4410+/* Forward declaration. */
4411+static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines);
4412+
4413+static int
4414+md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
4415+{
4416+ MD_LINE line;
4417+ MD_BLOCK_TD_DETAIL det;
4418+ int ret = 0;
4419+
4420+ while(beg < end && ISWHITESPACE(beg))
4421+ beg++;
4422+ while(end > beg && ISWHITESPACE(end-1))
4423+ end--;
4424+
4425+ det.align = align;
4426+ line.beg = beg;
4427+ line.end = end;
4428+
4429+ MD_ENTER_BLOCK(cell_type, &det);
4430+ MD_CHECK(md_process_normal_block_contents(ctx, &line, 1));
4431+ MD_LEAVE_BLOCK(cell_type, &det);
4432+
4433+abort:
4434+ return ret;
4435+}
4436+
4437+static int
4438+md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
4439+ const MD_ALIGN* align, int col_count)
4440+{
4441+ MD_LINE line;
4442+ OFF* pipe_offs = NULL;
4443+ int i, j, k, n;
4444+ int ret = 0;
4445+
4446+ line.beg = beg;
4447+ line.end = end;
4448+
4449+ /* Break the line into table cells by identifying pipe characters who
4450+ * form the cell boundary. */
4451+ MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
4452+
4453+ /* We have to remember the cell boundaries in local buffer because
4454+ * ctx->marks[] shall be reused during cell contents processing. */
4455+ n = ctx->n_table_cell_boundaries + 2;
4456+ pipe_offs = (OFF*) malloc(n * sizeof(OFF));
4457+ if(pipe_offs == NULL) {
4458+ MD_LOG("malloc() failed.");
4459+ ret = -1;
4460+ goto abort;
4461+ }
4462+ j = 0;
4463+ pipe_offs[j++] = beg;
4464+ for(i = TABLECELLBOUNDARIES.head; i >= 0; i = ctx->marks[i].next) {
4465+ MD_MARK* mark = &ctx->marks[i];
4466+ pipe_offs[j++] = mark->end;
4467+ }
4468+ pipe_offs[j++] = end+1;
4469+
4470+ /* Process cells. */
4471+ MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
4472+ k = 0;
4473+ for(i = 0; i < j-1 && k < col_count; i++) {
4474+ if(pipe_offs[i] < pipe_offs[i+1]-1)
4475+ MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1));
4476+ }
4477+ /* Make sure we call enough table cells even if the current table contains
4478+ * too few of them. */
4479+ while(k < col_count)
4480+ MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0));
4481+ MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
4482+
4483+abort:
4484+ free(pipe_offs);
4485+
4486+ /* Free any temporary memory blocks stored within some dummy marks. */
4487+ for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
4488+ free(md_mark_get_ptr(ctx, i));
4489+ PTR_CHAIN.head = -1;
4490+ PTR_CHAIN.tail = -1;
4491+
4492+ return ret;
4493+}
4494+
4495+static int
4496+md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines)
4497+{
4498+ MD_ALIGN* align;
4499+ int i;
4500+ int ret = 0;
4501+
4502+ /* At least two lines have to be present: The column headers and the line
4503+ * with the underlines. */
4504+ MD_ASSERT(n_lines >= 2);
4505+
4506+ align = malloc(col_count * sizeof(MD_ALIGN));
4507+ if(align == NULL) {
4508+ MD_LOG("malloc() failed.");
4509+ ret = -1;
4510+ goto abort;
4511+ }
4512+
4513+ md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align, col_count);
4514+
4515+ MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL);
4516+ MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
4517+ lines[0].beg, lines[0].end, align, col_count));
4518+ MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL);
4519+
4520+ if(n_lines > 2) {
4521+ MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL);
4522+ for(i = 2; i < n_lines; i++) {
4523+ MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
4524+ lines[i].beg, lines[i].end, align, col_count));
4525+ }
4526+ MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL);
4527+ }
4528+
4529+abort:
4530+ free(align);
4531+ return ret;
4532+}
4533+
4534+
4535+/**************************
4536+ *** Processing Block ***
4537+ **************************/
4538+
4539+#define MD_BLOCK_CONTAINER_OPENER 0x01
4540+#define MD_BLOCK_CONTAINER_CLOSER 0x02
4541+#define MD_BLOCK_CONTAINER (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER)
4542+#define MD_BLOCK_LOOSE_LIST 0x04
4543+#define MD_BLOCK_SETEXT_HEADER 0x08
4544+
4545+struct MD_BLOCK_tag {
4546+ MD_BLOCKTYPE type : 8;
4547+ unsigned flags : 8;
4548+
4549+ /* MD_BLOCK_H: Header level (1 - 6)
4550+ * MD_BLOCK_CODE: Non-zero if fenced, zero if indented.
4551+ * MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' ').
4552+ * MD_BLOCK_TABLE: Column count (as determined by the table underline).
4553+ */
4554+ unsigned data : 16;
4555+
4556+ /* Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block.
4557+ * MD_BLOCK_LI: Task mark offset in the input doc.
4558+ * MD_BLOCK_OL: Start item number.
4559+ */
4560+ unsigned n_lines;
4561+};
4562+
4563+struct MD_CONTAINER_tag {
4564+ CHAR ch;
4565+ unsigned is_loose : 8;
4566+ unsigned is_task : 8;
4567+ unsigned start;
4568+ unsigned mark_indent;
4569+ unsigned contents_indent;
4570+ OFF block_byte_off;
4571+ OFF task_mark_off;
4572+};
4573+
4574+
4575+static int
4576+md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4577+{
4578+ int i;
4579+ int ret;
4580+
4581+ MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
4582+ MD_CHECK(md_process_inlines(ctx, lines, n_lines));
4583+
4584+abort:
4585+ /* Free any temporary memory blocks stored within some dummy marks. */
4586+ for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
4587+ free(md_mark_get_ptr(ctx, i));
4588+ PTR_CHAIN.head = -1;
4589+ PTR_CHAIN.tail = -1;
4590+
4591+ return ret;
4592+}
4593+
4594+static int
4595+md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines)
4596+{
4597+ static const CHAR indent_chunk_str[] = _T(" ");
4598+ static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - 1;
4599+
4600+ int i;
4601+ int ret = 0;
4602+
4603+ for(i = 0; i < n_lines; i++) {
4604+ const MD_VERBATIMLINE* line = &lines[i];
4605+ int indent = line->indent;
4606+
4607+ MD_ASSERT(indent >= 0);
4608+
4609+ /* Output code indentation. */
4610+ while(indent > (int) indent_chunk_size) {
4611+ MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
4612+ indent -= indent_chunk_size;
4613+ }
4614+ if(indent > 0)
4615+ MD_TEXT(text_type, indent_chunk_str, indent);
4616+
4617+ /* Output the code line itself. */
4618+ MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
4619+
4620+ /* Enforce end-of-line. */
4621+ MD_TEXT(text_type, _T("\n"), 1);
4622+ }
4623+
4624+abort:
4625+ return ret;
4626+}
4627+
4628+static int
4629+md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines)
4630+{
4631+ if(is_fenced) {
4632+ /* Skip the first line in case of fenced code: It is the fence.
4633+ * (Only the starting fence is present due to logic in md_analyze_line().) */
4634+ lines++;
4635+ n_lines--;
4636+ } else {
4637+ /* Ignore blank lines at start/end of indented code block. */
4638+ while(n_lines > 0 && lines[0].beg == lines[0].end) {
4639+ lines++;
4640+ n_lines--;
4641+ }
4642+ while(n_lines > 0 && lines[n_lines-1].beg == lines[n_lines-1].end) {
4643+ n_lines--;
4644+ }
4645+ }
4646+
4647+ if(n_lines == 0)
4648+ return 0;
4649+
4650+ return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
4651+}
4652+
4653+static int
4654+md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
4655+ MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
4656+{
4657+ const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
4658+ OFF beg = fence_line->beg;
4659+ OFF end = fence_line->end;
4660+ OFF lang_end;
4661+ CHAR fence_ch = CH(fence_line->beg);
4662+ int ret = 0;
4663+
4664+ /* Skip the fence itself. */
4665+ while(beg < ctx->size && CH(beg) == fence_ch)
4666+ beg++;
4667+ /* Trim initial spaces. */
4668+ while(beg < ctx->size && CH(beg) == _T(' '))
4669+ beg++;
4670+
4671+ /* Trim trailing spaces. */
4672+ while(end > beg && CH(end-1) == _T(' '))
4673+ end--;
4674+
4675+ /* Build info string attribute. */
4676+ MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info, info_build));
4677+
4678+ /* Build info string attribute. */
4679+ lang_end = beg;
4680+ while(lang_end < end && !ISWHITESPACE(lang_end))
4681+ lang_end++;
4682+ MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang, lang_build));
4683+
4684+ det->fence_char = fence_ch;
4685+
4686+abort:
4687+ return ret;
4688+}
4689+
4690+static int
4691+md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
4692+{
4693+ union {
4694+ MD_BLOCK_H_DETAIL header;
4695+ MD_BLOCK_CODE_DETAIL code;
4696+ MD_BLOCK_TABLE_DETAIL table;
4697+ } det;
4698+ MD_ATTRIBUTE_BUILD info_build;
4699+ MD_ATTRIBUTE_BUILD lang_build;
4700+ int is_in_tight_list;
4701+ int clean_fence_code_detail = FALSE;
4702+ int ret = 0;
4703+
4704+ memset(&det, 0, sizeof(det));
4705+
4706+ if(ctx->n_containers == 0)
4707+ is_in_tight_list = FALSE;
4708+ else
4709+ is_in_tight_list = !ctx->containers[ctx->n_containers-1].is_loose;
4710+
4711+ switch(block->type) {
4712+ case MD_BLOCK_H:
4713+ det.header.level = block->data;
4714+ break;
4715+
4716+ case MD_BLOCK_CODE:
4717+ /* For fenced code block, we may need to set the info string. */
4718+ if(block->data != 0) {
4719+ memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL));
4720+ clean_fence_code_detail = TRUE;
4721+ MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
4722+ }
4723+ break;
4724+
4725+ case MD_BLOCK_TABLE:
4726+ det.table.col_count = block->data;
4727+ det.table.head_row_count = 1;
4728+ det.table.body_row_count = block->n_lines - 2;
4729+ break;
4730+
4731+ default:
4732+ /* Noop. */
4733+ break;
4734+ }
4735+
4736+ if(!is_in_tight_list || block->type != MD_BLOCK_P)
4737+ MD_ENTER_BLOCK(block->type, (void*) &det);
4738+
4739+ /* Process the block contents accordingly to is type. */
4740+ switch(block->type) {
4741+ case MD_BLOCK_HR:
4742+ /* noop */
4743+ break;
4744+
4745+ case MD_BLOCK_CODE:
4746+ MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0),
4747+ (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4748+ break;
4749+
4750+ case MD_BLOCK_HTML:
4751+ MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
4752+ (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4753+ break;
4754+
4755+ case MD_BLOCK_TABLE:
4756+ MD_CHECK(md_process_table_block_contents(ctx, block->data,
4757+ (const MD_LINE*)(block + 1), block->n_lines));
4758+ break;
4759+
4760+ default:
4761+ MD_CHECK(md_process_normal_block_contents(ctx,
4762+ (const MD_LINE*)(block + 1), block->n_lines));
4763+ break;
4764+ }
4765+
4766+ if(!is_in_tight_list || block->type != MD_BLOCK_P)
4767+ MD_LEAVE_BLOCK(block->type, (void*) &det);
4768+
4769+abort:
4770+ if(clean_fence_code_detail) {
4771+ md_free_attribute(ctx, &info_build);
4772+ md_free_attribute(ctx, &lang_build);
4773+ }
4774+ return ret;
4775+}
4776+
4777+static int
4778+md_process_all_blocks(MD_CTX* ctx)
4779+{
4780+ int byte_off = 0;
4781+ int ret = 0;
4782+
4783+ /* ctx->containers now is not needed for detection of lists and list items
4784+ * so we reuse it for tracking what lists are loose or tight. We rely
4785+ * on the fact the vector is large enough to hold the deepest nesting
4786+ * level of lists. */
4787+ ctx->n_containers = 0;
4788+
4789+ while(byte_off < ctx->n_block_bytes) {
4790+ MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off);
4791+ union {
4792+ MD_BLOCK_UL_DETAIL ul;
4793+ MD_BLOCK_OL_DETAIL ol;
4794+ MD_BLOCK_LI_DETAIL li;
4795+ } det;
4796+
4797+ switch(block->type) {
4798+ case MD_BLOCK_UL:
4799+ det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4800+ det.ul.mark = (CHAR) block->data;
4801+ break;
4802+
4803+ case MD_BLOCK_OL:
4804+ det.ol.start = block->n_lines;
4805+ det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4806+ det.ol.mark_delimiter = (CHAR) block->data;
4807+ break;
4808+
4809+ case MD_BLOCK_LI:
4810+ det.li.is_task = (block->data != 0);
4811+ det.li.task_mark = (CHAR) block->data;
4812+ det.li.task_mark_offset = (OFF) block->n_lines;
4813+ break;
4814+
4815+ default:
4816+ /* noop */
4817+ break;
4818+ }
4819+
4820+ if(block->flags & MD_BLOCK_CONTAINER) {
4821+ if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
4822+ MD_LEAVE_BLOCK(block->type, &det);
4823+
4824+ if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE)
4825+ ctx->n_containers--;
4826+ }
4827+
4828+ if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
4829+ MD_ENTER_BLOCK(block->type, &det);
4830+
4831+ if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) {
4832+ ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
4833+ ctx->n_containers++;
4834+ } else if(block->type == MD_BLOCK_QUOTE) {
4835+ /* This causes that any text in a block quote, even if
4836+ * nested inside a tight list item, is wrapped with
4837+ * <p>...</p>. */
4838+ ctx->containers[ctx->n_containers].is_loose = TRUE;
4839+ ctx->n_containers++;
4840+ }
4841+ }
4842+ } else {
4843+ MD_CHECK(md_process_leaf_block(ctx, block));
4844+
4845+ if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML)
4846+ byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
4847+ else
4848+ byte_off += block->n_lines * sizeof(MD_LINE);
4849+ }
4850+
4851+ byte_off += sizeof(MD_BLOCK);
4852+ }
4853+
4854+ ctx->n_block_bytes = 0;
4855+
4856+abort:
4857+ return ret;
4858+}
4859+
4860+
4861+/************************************
4862+ *** Grouping Lines into Blocks ***
4863+ ************************************/
4864+
4865+static void*
4866+md_push_block_bytes(MD_CTX* ctx, int n_bytes)
4867+{
4868+ void* ptr;
4869+
4870+ if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
4871+ void* new_block_bytes;
4872+
4873+ ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0
4874+ ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2
4875+ : 512);
4876+ new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
4877+ if(new_block_bytes == NULL) {
4878+ MD_LOG("realloc() failed.");
4879+ return NULL;
4880+ }
4881+
4882+ /* Fix the ->current_block after the reallocation. */
4883+ if(ctx->current_block != NULL) {
4884+ OFF off_current_block = (char*) ctx->current_block - (char*) ctx->block_bytes;
4885+ ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block);
4886+ }
4887+
4888+ ctx->block_bytes = new_block_bytes;
4889+ }
4890+
4891+ ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
4892+ ctx->n_block_bytes += n_bytes;
4893+ return ptr;
4894+}
4895+
4896+static int
4897+md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
4898+{
4899+ MD_BLOCK* block;
4900+
4901+ MD_ASSERT(ctx->current_block == NULL);
4902+
4903+ block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
4904+ if(block == NULL)
4905+ return -1;
4906+
4907+ switch(line->type) {
4908+ case MD_LINE_HR:
4909+ block->type = MD_BLOCK_HR;
4910+ break;
4911+
4912+ case MD_LINE_ATXHEADER:
4913+ case MD_LINE_SETEXTHEADER:
4914+ block->type = MD_BLOCK_H;
4915+ break;
4916+
4917+ case MD_LINE_FENCEDCODE:
4918+ case MD_LINE_INDENTEDCODE:
4919+ block->type = MD_BLOCK_CODE;
4920+ break;
4921+
4922+ case MD_LINE_TEXT:
4923+ block->type = MD_BLOCK_P;
4924+ break;
4925+
4926+ case MD_LINE_HTML:
4927+ block->type = MD_BLOCK_HTML;
4928+ break;
4929+
4930+ case MD_LINE_BLANK:
4931+ case MD_LINE_SETEXTUNDERLINE:
4932+ case MD_LINE_TABLEUNDERLINE:
4933+ default:
4934+ MD_UNREACHABLE();
4935+ break;
4936+ }
4937+
4938+ block->flags = 0;
4939+ block->data = line->data;
4940+ block->n_lines = 0;
4941+
4942+ ctx->current_block = block;
4943+ return 0;
4944+}
4945+
4946+/* Eat from start of current (textual) block any reference definitions and
4947+ * remember them so we can resolve any links referring to them.
4948+ *
4949+ * (Reference definitions can only be at start of it as they cannot break
4950+ * a paragraph.)
4951+ */
4952+static int
4953+md_consume_link_reference_definitions(MD_CTX* ctx)
4954+{
4955+ MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
4956+ int n_lines = ctx->current_block->n_lines;
4957+ int n = 0;
4958+
4959+ /* Compute how many lines at the start of the block form one or more
4960+ * reference definitions. */
4961+ while(n < n_lines) {
4962+ int n_link_ref_lines;
4963+
4964+ n_link_ref_lines = md_is_link_reference_definition(ctx,
4965+ lines + n, n_lines - n);
4966+ /* Not a reference definition? */
4967+ if(n_link_ref_lines == 0)
4968+ break;
4969+
4970+ /* We fail if it is the ref. def. but it could not be stored due
4971+ * a memory allocation error. */
4972+ if(n_link_ref_lines < 0)
4973+ return -1;
4974+
4975+ n += n_link_ref_lines;
4976+ }
4977+
4978+ /* If there was at least one reference definition, we need to remove
4979+ * its lines from the block, or perhaps even the whole block. */
4980+ if(n > 0) {
4981+ if(n == n_lines) {
4982+ /* Remove complete block. */
4983+ ctx->n_block_bytes -= n * sizeof(MD_LINE);
4984+ ctx->n_block_bytes -= sizeof(MD_BLOCK);
4985+ ctx->current_block = NULL;
4986+ } else {
4987+ /* Remove just some initial lines from the block. */
4988+ memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE));
4989+ ctx->current_block->n_lines -= n;
4990+ ctx->n_block_bytes -= n * sizeof(MD_LINE);
4991+ }
4992+ }
4993+
4994+ return 0;
4995+}
4996+
4997+static int
4998+md_end_current_block(MD_CTX* ctx)
4999+{
5000+ int ret = 0;
5001+
5002+ if(ctx->current_block == NULL)
5003+ return ret;
5004+
5005+ /* Check whether there is a reference definition. (We do this here instead
5006+ * of in md_analyze_line() because reference definition can take multiple
5007+ * lines.) */
5008+ if(ctx->current_block->type == MD_BLOCK_P ||
5009+ (ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)))
5010+ {
5011+ MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
5012+ if(CH(lines[0].beg) == _T('[')) {
5013+ MD_CHECK(md_consume_link_reference_definitions(ctx));
5014+ if(ctx->current_block == NULL)
5015+ return ret;
5016+ }
5017+ }
5018+
5019+ if(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) {
5020+ int n_lines = ctx->current_block->n_lines;
5021+
5022+ if(n_lines > 1) {
5023+ /* Get rid of the underline. */
5024+ ctx->current_block->n_lines--;
5025+ ctx->n_block_bytes -= sizeof(MD_LINE);
5026+ } else {
5027+ /* Only the underline has left after eating the ref. defs.
5028+ * Keep the line as beginning of a new ordinary paragraph. */
5029+ ctx->current_block->type = MD_BLOCK_P;
5030+ return 0;
5031+ }
5032+ }
5033+
5034+ /* Mark we are not building any block anymore. */
5035+ ctx->current_block = NULL;
5036+
5037+abort:
5038+ return ret;
5039+}
5040+
5041+static int
5042+md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
5043+{
5044+ MD_ASSERT(ctx->current_block != NULL);
5045+
5046+ if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) {
5047+ MD_VERBATIMLINE* line;
5048+
5049+ line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE));
5050+ if(line == NULL)
5051+ return -1;
5052+
5053+ line->indent = analysis->indent;
5054+ line->beg = analysis->beg;
5055+ line->end = analysis->end;
5056+ } else {
5057+ MD_LINE* line;
5058+
5059+ line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE));
5060+ if(line == NULL)
5061+ return -1;
5062+
5063+ line->beg = analysis->beg;
5064+ line->end = analysis->end;
5065+ }
5066+ ctx->current_block->n_lines++;
5067+
5068+ return 0;
5069+}
5070+
5071+static int
5072+md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
5073+ unsigned data, unsigned flags)
5074+{
5075+ MD_BLOCK* block;
5076+ int ret = 0;
5077+
5078+ MD_CHECK(md_end_current_block(ctx));
5079+
5080+ block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
5081+ if(block == NULL)
5082+ return -1;
5083+
5084+ block->type = type;
5085+ block->flags = flags;
5086+ block->data = data;
5087+ block->n_lines = start;
5088+
5089+abort:
5090+ return ret;
5091+}
5092+
5093+
5094+
5095+/***********************
5096+ *** Line Analysis ***
5097+ ***********************/
5098+
5099+static int
5100+md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
5101+{
5102+ OFF off = beg + 1;
5103+ int n = 1;
5104+
5105+ while(off < ctx->size && (CH(off) == CH(beg) || CH(off) == _T(' ') || CH(off) == _T('\t'))) {
5106+ if(CH(off) == CH(beg))
5107+ n++;
5108+ off++;
5109+ }
5110+
5111+ if(n < 3) {
5112+ *p_killer = off;
5113+ return FALSE;
5114+ }
5115+
5116+ /* Nothing else can be present on the line. */
5117+ if(off < ctx->size && !ISNEWLINE(off)) {
5118+ *p_killer = off;
5119+ return FALSE;
5120+ }
5121+
5122+ *p_end = off;
5123+ return TRUE;
5124+}
5125+
5126+static int
5127+md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
5128+{
5129+ int n;
5130+ OFF off = beg + 1;
5131+
5132+ while(off < ctx->size && CH(off) == _T('#') && off - beg < 7)
5133+ off++;
5134+ n = off - beg;
5135+
5136+ if(n > 6)
5137+ return FALSE;
5138+ *p_level = n;
5139+
5140+ if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size &&
5141+ CH(off) != _T(' ') && CH(off) != _T('\t') && !ISNEWLINE(off))
5142+ return FALSE;
5143+
5144+ while(off < ctx->size && CH(off) == _T(' '))
5145+ off++;
5146+ *p_beg = off;
5147+ *p_end = off;
5148+ return TRUE;
5149+}
5150+
5151+static int
5152+md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
5153+{
5154+ OFF off = beg + 1;
5155+
5156+ while(off < ctx->size && CH(off) == CH(beg))
5157+ off++;
5158+
5159+ /* Optionally, space(s) can follow. */
5160+ while(off < ctx->size && CH(off) == _T(' '))
5161+ off++;
5162+
5163+ /* But nothing more is allowed on the line. */
5164+ if(off < ctx->size && !ISNEWLINE(off))
5165+ return FALSE;
5166+
5167+ *p_level = (CH(beg) == _T('=') ? 1 : 2);
5168+ *p_end = off;
5169+ return TRUE;
5170+}
5171+
5172+static int
5173+md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
5174+{
5175+ OFF off = beg;
5176+ int found_pipe = FALSE;
5177+ unsigned col_count = 0;
5178+
5179+ if(off < ctx->size && CH(off) == _T('|')) {
5180+ found_pipe = TRUE;
5181+ off++;
5182+ while(off < ctx->size && ISWHITESPACE(off))
5183+ off++;
5184+ }
5185+
5186+ while(1) {
5187+ OFF cell_beg;
5188+ int delimited = FALSE;
5189+
5190+ /* Cell underline ("-----", ":----", "----:" or ":----:") */
5191+ cell_beg = off;
5192+ if(off < ctx->size && CH(off) == _T(':'))
5193+ off++;
5194+ while(off < ctx->size && CH(off) == _T('-'))
5195+ off++;
5196+ if(off < ctx->size && CH(off) == _T(':'))
5197+ off++;
5198+ if(off - cell_beg < 3)
5199+ return FALSE;
5200+
5201+ col_count++;
5202+
5203+ /* Pipe delimiter (optional at the end of line). */
5204+ while(off < ctx->size && ISWHITESPACE(off))
5205+ off++;
5206+ if(off < ctx->size && CH(off) == _T('|')) {
5207+ delimited = TRUE;
5208+ found_pipe = TRUE;
5209+ off++;
5210+ while(off < ctx->size && ISWHITESPACE(off))
5211+ off++;
5212+ }
5213+
5214+ /* Success, if we reach end of line. */
5215+ if(off >= ctx->size || ISNEWLINE(off))
5216+ break;
5217+
5218+ if(!delimited)
5219+ return FALSE;
5220+ }
5221+
5222+ if(!found_pipe)
5223+ return FALSE;
5224+
5225+ *p_end = off;
5226+ *p_col_count = col_count;
5227+ return TRUE;
5228+}
5229+
5230+static int
5231+md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
5232+{
5233+ OFF off = beg;
5234+
5235+ while(off < ctx->size && CH(off) == CH(beg))
5236+ off++;
5237+
5238+ /* Fence must have at least three characters. */
5239+ if(off - beg < 3)
5240+ return FALSE;
5241+
5242+ ctx->code_fence_length = off - beg;
5243+
5244+ /* Optionally, space(s) can follow. */
5245+ while(off < ctx->size && CH(off) == _T(' '))
5246+ off++;
5247+
5248+ /* Optionally, an info string can follow. */
5249+ while(off < ctx->size && !ISNEWLINE(off)) {
5250+ /* Backtick-based fence must not contain '`' in the info string. */
5251+ if(CH(beg) == _T('`') && CH(off) == _T('`'))
5252+ return FALSE;
5253+ off++;
5254+ }
5255+
5256+ *p_end = off;
5257+ return TRUE;
5258+}
5259+
5260+static int
5261+md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
5262+{
5263+ OFF off = beg;
5264+ int ret = FALSE;
5265+
5266+ /* Closing fence must have at least the same length and use same char as
5267+ * opening one. */
5268+ while(off < ctx->size && CH(off) == ch)
5269+ off++;
5270+ if(off - beg < ctx->code_fence_length)
5271+ goto out;
5272+
5273+ /* Optionally, space(s) can follow */
5274+ while(off < ctx->size && CH(off) == _T(' '))
5275+ off++;
5276+
5277+ /* But nothing more is allowed on the line. */
5278+ if(off < ctx->size && !ISNEWLINE(off))
5279+ goto out;
5280+
5281+ ret = TRUE;
5282+
5283+out:
5284+ /* Note we set *p_end even on failure: If we are not closing fence, caller
5285+ * would eat the line anyway without any parsing. */
5286+ *p_end = off;
5287+ return ret;
5288+}
5289+
5290+/* Returns type of the raw HTML block, or FALSE if it is not HTML block.
5291+ * (Refer to CommonMark specification for details about the types.)
5292+ */
5293+static int
5294+md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
5295+{
5296+ typedef struct TAG_tag TAG;
5297+ struct TAG_tag {
5298+ const CHAR* name;
5299+ unsigned len : 8;
5300+ };
5301+
5302+ /* Type 6 is started by a long list of allowed tags. We use two-level
5303+ * tree to speed-up the search. */
5304+#ifdef X
5305+ #undef X
5306+#endif
5307+#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
5308+#define Xend { NULL, 0 }
5309+ static const TAG t1[] = { X("script"), X("pre"), X("style"), Xend };
5310+
5311+ static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
5312+ static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
5313+ static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
5314+ static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
5315+ X("div"), X("dl"), X("dt"), Xend };
5316+ static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
5317+ X("form"), X("frame"), X("frameset"), Xend };
5318+ static const TAG h6[] = { X("h1"), X("head"), X("header"), X("hr"), X("html"), Xend };
5319+ static const TAG i6[] = { X("iframe"), Xend };
5320+ static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
5321+ static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
5322+ static const TAG n6[] = { X("nav"), X("noframes"), Xend };
5323+ static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
5324+ static const TAG p6[] = { X("p"), X("param"), Xend };
5325+ static const TAG s6[] = { X("section"), X("source"), X("summary"), Xend };
5326+ static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
5327+ X("thead"), X("title"), X("tr"), X("track"), Xend };
5328+ static const TAG u6[] = { X("ul"), Xend };
5329+ static const TAG xx[] = { Xend };
5330+#undef X
5331+
5332+ static const TAG* map6[26] = {
5333+ a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
5334+ n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
5335+ };
5336+ OFF off = beg + 1;
5337+ int i;
5338+
5339+ /* Check for type 1: <script, <pre, or <style */
5340+ for(i = 0; t1[i].name != NULL; i++) {
5341+ if(off + t1[i].len <= ctx->size) {
5342+ if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len))
5343+ return 1;
5344+ }
5345+ }
5346+
5347+ /* Check for type 2: <!-- */
5348+ if(off + 3 < ctx->size && CH(off) == _T('!') && CH(off+1) == _T('-') && CH(off+2) == _T('-'))
5349+ return 2;
5350+
5351+ /* Check for type 3: <? */
5352+ if(off < ctx->size && CH(off) == _T('?'))
5353+ return 3;
5354+
5355+ /* Check for type 4 or 5: <! */
5356+ if(off < ctx->size && CH(off) == _T('!')) {
5357+ /* Check for type 4: <! followed by uppercase letter. */
5358+ if(off + 1 < ctx->size && ISUPPER(off+1))
5359+ return 4;
5360+
5361+ /* Check for type 5: <![CDATA[ */
5362+ if(off + 8 < ctx->size) {
5363+ if(md_ascii_eq(STR(off), _T("![CDATA["), 8))
5364+ return 5;
5365+ }
5366+ }
5367+
5368+ /* Check for type 6: Many possible starting tags listed above. */
5369+ if(off + 1 < ctx->size && (ISALPHA(off) || (CH(off) == _T('/') && ISALPHA(off+1)))) {
5370+ int slot;
5371+ const TAG* tags;
5372+
5373+ if(CH(off) == _T('/'))
5374+ off++;
5375+
5376+ slot = (ISUPPER(off) ? CH(off) - 'A' : CH(off) - 'a');
5377+ tags = map6[slot];
5378+
5379+ for(i = 0; tags[i].name != NULL; i++) {
5380+ if(off + tags[i].len <= ctx->size) {
5381+ if(md_ascii_case_eq(STR(off), tags[i].name, tags[i].len)) {
5382+ OFF tmp = off + tags[i].len;
5383+ if(tmp >= ctx->size)
5384+ return 6;
5385+ if(ISBLANK(tmp) || ISNEWLINE(tmp) || CH(tmp) == _T('>'))
5386+ return 6;
5387+ if(tmp+1 < ctx->size && CH(tmp) == _T('/') && CH(tmp+1) == _T('>'))
5388+ return 6;
5389+ break;
5390+ }
5391+ }
5392+ }
5393+ }
5394+
5395+ /* Check for type 7: any COMPLETE other opening or closing tag. */
5396+ if(off + 1 < ctx->size) {
5397+ OFF end;
5398+
5399+ if(md_is_html_tag(ctx, NULL, 0, beg, ctx->size, &end)) {
5400+ /* Only optional whitespace and new line may follow. */
5401+ while(end < ctx->size && ISWHITESPACE(end))
5402+ end++;
5403+ if(end >= ctx->size || ISNEWLINE(end))
5404+ return 7;
5405+ }
5406+ }
5407+
5408+ return FALSE;
5409+}
5410+
5411+/* Case sensitive check whether there is a substring 'what' between 'beg'
5412+ * and end of line. */
5413+static int
5414+md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
5415+{
5416+ OFF i;
5417+ for(i = beg; i + what_len < ctx->size; i++) {
5418+ if(ISNEWLINE(i))
5419+ break;
5420+ if(memcmp(STR(i), what, what_len * sizeof(CHAR)) == 0) {
5421+ *p_end = i + what_len;
5422+ return TRUE;
5423+ }
5424+ }
5425+
5426+ *p_end = i;
5427+ return FALSE;
5428+}
5429+
5430+/* Returns type of HTML block end condition or FALSE if not an end condition.
5431+ *
5432+ * Note it fills p_end even when it is not end condition as the caller
5433+ * does not need to analyze contents of a raw HTML block.
5434+ */
5435+static int
5436+md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
5437+{
5438+ switch(ctx->html_block_type) {
5439+ case 1:
5440+ {
5441+ OFF off = beg;
5442+
5443+ while(off < ctx->size && !ISNEWLINE(off)) {
5444+ if(CH(off) == _T('<')) {
5445+ if(md_ascii_case_eq(STR(off), _T("</script>"), 9)) {
5446+ *p_end = off + 9;
5447+ return TRUE;
5448+ }
5449+
5450+ if(md_ascii_case_eq(STR(off), _T("</style>"), 8)) {
5451+ *p_end = off + 8;
5452+ return TRUE;
5453+ }
5454+
5455+ if(md_ascii_case_eq(STR(off), _T("</pre>"), 6)) {
5456+ *p_end = off + 6;
5457+ return TRUE;
5458+ }
5459+ }
5460+
5461+ off++;
5462+ }
5463+ *p_end = off;
5464+ return FALSE;
5465+ }
5466+
5467+ case 2:
5468+ return (md_line_contains(ctx, beg, _T("-->"), 3, p_end) ? 2 : FALSE);
5469+
5470+ case 3:
5471+ return (md_line_contains(ctx, beg, _T("?>"), 2, p_end) ? 3 : FALSE);
5472+
5473+ case 4:
5474+ return (md_line_contains(ctx, beg, _T(">"), 1, p_end) ? 4 : FALSE);
5475+
5476+ case 5:
5477+ return (md_line_contains(ctx, beg, _T("]]>"), 3, p_end) ? 5 : FALSE);
5478+
5479+ case 6: /* Pass through */
5480+ case 7:
5481+ *p_end = beg;
5482+ return (ISNEWLINE(beg) ? ctx->html_block_type : FALSE);
5483+
5484+ default:
5485+ MD_UNREACHABLE();
5486+ }
5487+ return FALSE;
5488+}
5489+
5490+
5491+static int
5492+md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
5493+{
5494+ /* Block quote has no "items" like lists. */
5495+ if(container->ch == _T('>'))
5496+ return FALSE;
5497+
5498+ if(container->ch != pivot->ch)
5499+ return FALSE;
5500+ if(container->mark_indent > pivot->contents_indent)
5501+ return FALSE;
5502+
5503+ return TRUE;
5504+}
5505+
5506+static int
5507+md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
5508+{
5509+ if(ctx->n_containers >= ctx->alloc_containers) {
5510+ MD_CONTAINER* new_containers;
5511+
5512+ ctx->alloc_containers = (ctx->alloc_containers > 0
5513+ ? ctx->alloc_containers + ctx->alloc_containers / 2
5514+ : 16);
5515+ new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER));
5516+ if(new_containers == NULL) {
5517+ MD_LOG("realloc() failed.");
5518+ return -1;
5519+ }
5520+
5521+ ctx->containers = new_containers;
5522+ }
5523+
5524+ memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER));
5525+ return 0;
5526+}
5527+
5528+static int
5529+md_enter_child_containers(MD_CTX* ctx, int n_children, unsigned data)
5530+{
5531+ int i;
5532+ int ret = 0;
5533+
5534+ for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
5535+ MD_CONTAINER* c = &ctx->containers[i];
5536+ int is_ordered_list = FALSE;
5537+
5538+ switch(c->ch) {
5539+ case _T(')'):
5540+ case _T('.'):
5541+ is_ordered_list = TRUE;
5542+ MD_FALLTHROUGH();
5543+
5544+ case _T('-'):
5545+ case _T('+'):
5546+ case _T('*'):
5547+ /* Remember offset in ctx->block_bytes so we can revisit the
5548+ * block if we detect it is a loose list. */
5549+ md_end_current_block(ctx);
5550+ c->block_byte_off = ctx->n_block_bytes;
5551+
5552+ MD_CHECK(md_push_container_bytes(ctx,
5553+ (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
5554+ c->start, data, MD_BLOCK_CONTAINER_OPENER));
5555+ MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5556+ c->task_mark_off,
5557+ (c->is_task ? CH(c->task_mark_off) : 0),
5558+ MD_BLOCK_CONTAINER_OPENER));
5559+ break;
5560+
5561+ case _T('>'):
5562+ MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER));
5563+ break;
5564+
5565+ default:
5566+ MD_UNREACHABLE();
5567+ break;
5568+ }
5569+ }
5570+
5571+abort:
5572+ return ret;
5573+}
5574+
5575+static int
5576+md_leave_child_containers(MD_CTX* ctx, int n_keep)
5577+{
5578+ int ret = 0;
5579+
5580+ while(ctx->n_containers > n_keep) {
5581+ MD_CONTAINER* c = &ctx->containers[ctx->n_containers-1];
5582+ int is_ordered_list = FALSE;
5583+
5584+ switch(c->ch) {
5585+ case _T(')'):
5586+ case _T('.'):
5587+ is_ordered_list = TRUE;
5588+ MD_FALLTHROUGH();
5589+
5590+ case _T('-'):
5591+ case _T('+'):
5592+ case _T('*'):
5593+ MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5594+ c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : 0),
5595+ MD_BLOCK_CONTAINER_CLOSER));
5596+ MD_CHECK(md_push_container_bytes(ctx,
5597+ (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0,
5598+ c->ch, MD_BLOCK_CONTAINER_CLOSER));
5599+ break;
5600+
5601+ case _T('>'):
5602+ MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0,
5603+ 0, MD_BLOCK_CONTAINER_CLOSER));
5604+ break;
5605+
5606+ default:
5607+ MD_UNREACHABLE();
5608+ break;
5609+ }
5610+
5611+ ctx->n_containers--;
5612+ }
5613+
5614+abort:
5615+ return ret;
5616+}
5617+
5618+static int
5619+md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
5620+{
5621+ OFF off = beg;
5622+ OFF max_end;
5623+
5624+ if(off >= ctx->size || indent >= ctx->code_indent_offset)
5625+ return FALSE;
5626+
5627+ /* Check for block quote mark. */
5628+ if(CH(off) == _T('>')) {
5629+ off++;
5630+ p_container->ch = _T('>');
5631+ p_container->is_loose = FALSE;
5632+ p_container->is_task = FALSE;
5633+ p_container->mark_indent = indent;
5634+ p_container->contents_indent = indent + 1;
5635+ *p_end = off;
5636+ return TRUE;
5637+ }
5638+
5639+ /* Check for list item bullet mark. */
5640+ if(ISANYOF(off, _T("-+*")) && (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) {
5641+ p_container->ch = CH(off);
5642+ p_container->is_loose = FALSE;
5643+ p_container->is_task = FALSE;
5644+ p_container->mark_indent = indent;
5645+ p_container->contents_indent = indent + 1;
5646+ *p_end = off+1;
5647+ return TRUE;
5648+ }
5649+
5650+ /* Check for ordered list item marks. */
5651+ max_end = off + 9;
5652+ if(max_end > ctx->size)
5653+ max_end = ctx->size;
5654+ p_container->start = 0;
5655+ while(off < max_end && ISDIGIT(off)) {
5656+ p_container->start = p_container->start * 10 + CH(off) - _T('0');
5657+ off++;
5658+ }
5659+ if(off > beg &&
5660+ (CH(off) == _T('.') || CH(off) == _T(')')) &&
5661+ (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1)))
5662+ {
5663+ p_container->ch = CH(off);
5664+ p_container->is_loose = FALSE;
5665+ p_container->is_task = FALSE;
5666+ p_container->mark_indent = indent;
5667+ p_container->contents_indent = indent + off - beg + 1;
5668+ *p_end = off+1;
5669+ return TRUE;
5670+ }
5671+
5672+ return FALSE;
5673+}
5674+
5675+static unsigned
5676+md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
5677+{
5678+ OFF off = beg;
5679+ unsigned indent = total_indent;
5680+
5681+ while(off < ctx->size && ISBLANK(off)) {
5682+ if(CH(off) == _T('\t'))
5683+ indent = (indent + 4) & ~3;
5684+ else
5685+ indent++;
5686+ off++;
5687+ }
5688+
5689+ *p_end = off;
5690+ return indent - total_indent;
5691+}
5692+
5693+static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 };
5694+
5695+/* Analyze type of the line and find some its properties. This serves as a
5696+ * main input for determining type and boundaries of a block. */
5697+static int
5698+md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
5699+ const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
5700+{
5701+ unsigned total_indent = 0;
5702+ int n_parents = 0;
5703+ int n_brothers = 0;
5704+ int n_children = 0;
5705+ MD_CONTAINER container = { 0 };
5706+ int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
5707+ OFF off = beg;
5708+ OFF hr_killer = 0;
5709+ int ret = 0;
5710+
5711+ line->indent = md_line_indentation(ctx, total_indent, off, &off);
5712+ total_indent += line->indent;
5713+ line->beg = off;
5714+
5715+ /* Given the indentation and block quote marks '>', determine how many of
5716+ * the current containers are our parents. */
5717+ while(n_parents < ctx->n_containers) {
5718+ MD_CONTAINER* c = &ctx->containers[n_parents];
5719+
5720+ if(c->ch == _T('>') && line->indent < ctx->code_indent_offset &&
5721+ off < ctx->size && CH(off) == _T('>'))
5722+ {
5723+ /* Block quote mark. */
5724+ off++;
5725+ total_indent++;
5726+ line->indent = md_line_indentation(ctx, total_indent, off, &off);
5727+ total_indent += line->indent;
5728+
5729+ /* The optional 1st space after '>' is part of the block quote mark. */
5730+ if(line->indent > 0)
5731+ line->indent--;
5732+
5733+ line->beg = off;
5734+
5735+ } else if(c->ch != _T('>') && line->indent >= c->contents_indent) {
5736+ /* List. */
5737+ line->indent -= c->contents_indent;
5738+ } else {
5739+ break;
5740+ }
5741+
5742+ n_parents++;
5743+ }
5744+
5745+ if(off >= ctx->size || ISNEWLINE(off)) {
5746+ /* Blank line does not need any real indentation to be nested inside
5747+ * a list. */
5748+ if(n_brothers + n_children == 0) {
5749+ while(n_parents < ctx->n_containers && ctx->containers[n_parents].ch != _T('>'))
5750+ n_parents++;
5751+ }
5752+ }
5753+
5754+ while(TRUE) {
5755+ /* Check whether we are fenced code continuation. */
5756+ if(pivot_line->type == MD_LINE_FENCEDCODE) {
5757+ line->beg = off;
5758+
5759+ /* We are another MD_LINE_FENCEDCODE unless we are closing fence
5760+ * which we transform into MD_LINE_BLANK. */
5761+ if(line->indent < ctx->code_indent_offset) {
5762+ if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) {
5763+ line->type = MD_LINE_BLANK;
5764+ ctx->last_line_has_list_loosening_effect = FALSE;
5765+ break;
5766+ }
5767+ }
5768+
5769+ /* Change indentation accordingly to the initial code fence. */
5770+ if(n_parents == ctx->n_containers) {
5771+ if(line->indent > pivot_line->indent)
5772+ line->indent -= pivot_line->indent;
5773+ else
5774+ line->indent = 0;
5775+
5776+ line->type = MD_LINE_FENCEDCODE;
5777+ break;
5778+ }
5779+ }
5780+
5781+ /* Check whether we are HTML block continuation. */
5782+ if(pivot_line->type == MD_LINE_HTML && ctx->html_block_type > 0) {
5783+ if(n_parents < ctx->n_containers) {
5784+ /* HTML block is implicitly ended if the enclosing container
5785+ * block ends. */
5786+ ctx->html_block_type = 0;
5787+ } else {
5788+ int html_block_type;
5789+
5790+ html_block_type = md_is_html_block_end_condition(ctx, off, &off);
5791+ if(html_block_type > 0) {
5792+ MD_ASSERT(html_block_type == ctx->html_block_type);
5793+
5794+ /* Make sure this is the last line of the block. */
5795+ ctx->html_block_type = 0;
5796+
5797+ /* Some end conditions serve as blank lines at the same time. */
5798+ if(html_block_type == 6 || html_block_type == 7) {
5799+ line->type = MD_LINE_BLANK;
5800+ line->indent = 0;
5801+ break;
5802+ }
5803+ }
5804+
5805+ line->type = MD_LINE_HTML;
5806+ n_parents = ctx->n_containers;
5807+ break;
5808+ }
5809+ }
5810+
5811+ /* Check for blank line. */
5812+ if(off >= ctx->size || ISNEWLINE(off)) {
5813+ if(pivot_line->type == MD_LINE_INDENTEDCODE && n_parents == ctx->n_containers) {
5814+ line->type = MD_LINE_INDENTEDCODE;
5815+ if(line->indent > ctx->code_indent_offset)
5816+ line->indent -= ctx->code_indent_offset;
5817+ else
5818+ line->indent = 0;
5819+ ctx->last_line_has_list_loosening_effect = FALSE;
5820+ } else {
5821+ line->type = MD_LINE_BLANK;
5822+ ctx->last_line_has_list_loosening_effect = (n_parents > 0 &&
5823+ n_brothers + n_children == 0 &&
5824+ ctx->containers[n_parents-1].ch != _T('>'));
5825+
5826+ #if 1
5827+ /* See https://github.com/mity/md4c/issues/6
5828+ *
5829+ * This ugly checking tests we are in (yet empty) list item but
5830+ * not its very first line (i.e. not the line with the list
5831+ * item mark).
5832+ *
5833+ * If we are such a blank line, then any following non-blank
5834+ * line which would be part of the list item actually has to
5835+ * end the list because according to the specification, "a list
5836+ * item can begin with at most one blank line."
5837+ */
5838+ if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') &&
5839+ n_brothers + n_children == 0 && ctx->current_block == NULL &&
5840+ ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5841+ {
5842+ MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5843+ if(top_block->type == MD_BLOCK_LI)
5844+ ctx->last_list_item_starts_with_two_blank_lines = TRUE;
5845+ }
5846+ #endif
5847+ }
5848+ break;
5849+ } else {
5850+ #if 1
5851+ /* This is the 2nd half of the hack. If the flag is set (i.e. there
5852+ * was a 2nd blank line at the beginning of the list item) and if
5853+ * we would otherwise still belong to the list item, we enforce
5854+ * the end of the list. */
5855+ ctx->last_line_has_list_loosening_effect = FALSE;
5856+ if(ctx->last_list_item_starts_with_two_blank_lines) {
5857+ if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') &&
5858+ n_brothers + n_children == 0 && ctx->current_block == NULL &&
5859+ ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5860+ {
5861+ MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5862+ if(top_block->type == MD_BLOCK_LI)
5863+ n_parents--;
5864+ }
5865+
5866+ ctx->last_list_item_starts_with_two_blank_lines = FALSE;
5867+ }
5868+ #endif
5869+ }
5870+
5871+ /* Check whether we are Setext underline. */
5872+ if(line->indent < ctx->code_indent_offset && pivot_line->type == MD_LINE_TEXT
5873+ && (CH(off) == _T('=') || CH(off) == _T('-'))
5874+ && (n_parents == ctx->n_containers))
5875+ {
5876+ unsigned level;
5877+
5878+ if(md_is_setext_underline(ctx, off, &off, &level)) {
5879+ line->type = MD_LINE_SETEXTUNDERLINE;
5880+ line->data = level;
5881+ break;
5882+ }
5883+ }
5884+
5885+ /* Check for thematic break line. */
5886+ if(line->indent < ctx->code_indent_offset && ISANYOF(off, _T("-_*")) && off >= hr_killer) {
5887+ if(md_is_hr_line(ctx, off, &off, &hr_killer)) {
5888+ line->type = MD_LINE_HR;
5889+ break;
5890+ }
5891+ }
5892+
5893+ /* Check for "brother" container. I.e. whether we are another list item
5894+ * in already started list. */
5895+ if(n_parents < ctx->n_containers && n_brothers + n_children == 0) {
5896+ OFF tmp;
5897+
5898+ if(md_is_container_mark(ctx, line->indent, off, &tmp, &container) &&
5899+ md_is_container_compatible(&ctx->containers[n_parents], &container))
5900+ {
5901+ pivot_line = &md_dummy_blank_line;
5902+
5903+ off = tmp;
5904+
5905+ total_indent += container.contents_indent - container.mark_indent;
5906+ line->indent = md_line_indentation(ctx, total_indent, off, &off);
5907+ total_indent += line->indent;
5908+ line->beg = off;
5909+
5910+ /* Some of the following whitespace actually still belongs to the mark. */
5911+ if(off >= ctx->size || ISNEWLINE(off)) {
5912+ container.contents_indent++;
5913+ } else if(line->indent <= ctx->code_indent_offset) {
5914+ container.contents_indent += line->indent;
5915+ line->indent = 0;
5916+ } else {
5917+ container.contents_indent += 1;
5918+ line->indent--;
5919+ }
5920+
5921+ ctx->containers[n_parents].mark_indent = container.mark_indent;
5922+ ctx->containers[n_parents].contents_indent = container.contents_indent;
5923+
5924+ n_brothers++;
5925+ continue;
5926+ }
5927+ }
5928+
5929+ /* Check for indented code.
5930+ * Note indented code block cannot interrupt a paragraph. */
5931+ if(line->indent >= ctx->code_indent_offset &&
5932+ (pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE))
5933+ {
5934+ line->type = MD_LINE_INDENTEDCODE;
5935+ MD_ASSERT(line->indent >= ctx->code_indent_offset);
5936+ line->indent -= ctx->code_indent_offset;
5937+ line->data = 0;
5938+ break;
5939+ }
5940+
5941+ /* Check for start of a new container block. */
5942+ if(line->indent < ctx->code_indent_offset &&
5943+ md_is_container_mark(ctx, line->indent, off, &off, &container))
5944+ {
5945+ if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5946+ (off >= ctx->size || ISNEWLINE(off)) && container.ch != _T('>'))
5947+ {
5948+ /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */
5949+ } else if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5950+ (container.ch == _T('.') || container.ch == _T(')')) && container.start != 1)
5951+ {
5952+ /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */
5953+ } else {
5954+ total_indent += container.contents_indent - container.mark_indent;
5955+ line->indent = md_line_indentation(ctx, total_indent, off, &off);
5956+ total_indent += line->indent;
5957+
5958+ line->beg = off;
5959+ line->data = container.ch;
5960+
5961+ /* Some of the following whitespace actually still belongs to the mark. */
5962+ if(off >= ctx->size || ISNEWLINE(off)) {
5963+ container.contents_indent++;
5964+ } else if(line->indent <= ctx->code_indent_offset) {
5965+ container.contents_indent += line->indent;
5966+ line->indent = 0;
5967+ } else {
5968+ container.contents_indent += 1;
5969+ line->indent--;
5970+ }
5971+
5972+ if(n_brothers + n_children == 0)
5973+ pivot_line = &md_dummy_blank_line;
5974+
5975+ if(n_children == 0)
5976+ MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
5977+
5978+ n_children++;
5979+ MD_CHECK(md_push_container(ctx, &container));
5980+ continue;
5981+ }
5982+ }
5983+
5984+ /* Check whether we are table continuation. */
5985+ if(pivot_line->type == MD_LINE_TABLE && n_parents == ctx->n_containers) {
5986+ line->type = MD_LINE_TABLE;
5987+ break;
5988+ }
5989+
5990+ /* Check for ATX header. */
5991+ if(line->indent < ctx->code_indent_offset && CH(off) == _T('#')) {
5992+ unsigned level;
5993+
5994+ if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) {
5995+ line->type = MD_LINE_ATXHEADER;
5996+ line->data = level;
5997+ break;
5998+ }
5999+ }
6000+
6001+ /* Check whether we are starting code fence. */
6002+ if(CH(off) == _T('`') || CH(off) == _T('~')) {
6003+ if(md_is_opening_code_fence(ctx, off, &off)) {
6004+ line->type = MD_LINE_FENCEDCODE;
6005+ line->data = 1;
6006+ break;
6007+ }
6008+ }
6009+
6010+ /* Check for start of raw HTML block. */
6011+ if(CH(off) == _T('<') && !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
6012+ {
6013+ ctx->html_block_type = md_is_html_block_start_condition(ctx, off);
6014+
6015+ /* HTML block type 7 cannot interrupt paragraph. */
6016+ if(ctx->html_block_type == 7 && pivot_line->type == MD_LINE_TEXT)
6017+ ctx->html_block_type = 0;
6018+
6019+ if(ctx->html_block_type > 0) {
6020+ /* The line itself also may immediately close the block. */
6021+ if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) {
6022+ /* Make sure this is the last line of the block. */
6023+ ctx->html_block_type = 0;
6024+ }
6025+
6026+ line->type = MD_LINE_HTML;
6027+ break;
6028+ }
6029+ }
6030+
6031+ /* Check for table underline. */
6032+ if((ctx->parser.flags & MD_FLAG_TABLES) && pivot_line->type == MD_LINE_TEXT &&
6033+ (CH(off) == _T('|') || CH(off) == _T('-') || CH(off) == _T(':')) &&
6034+ n_parents == ctx->n_containers)
6035+ {
6036+ unsigned col_count;
6037+
6038+ if(ctx->current_block != NULL && ctx->current_block->n_lines == 1 &&
6039+ md_is_table_underline(ctx, off, &off, &col_count))
6040+ {
6041+ line->data = col_count;
6042+ line->type = MD_LINE_TABLEUNDERLINE;
6043+ break;
6044+ }
6045+ }
6046+
6047+ /* By default, we are normal text line. */
6048+ line->type = MD_LINE_TEXT;
6049+ if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == 0) {
6050+ /* Lazy continuation. */
6051+ n_parents = ctx->n_containers;
6052+ }
6053+
6054+ /* Check for task mark. */
6055+ if((ctx->parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > 0 &&
6056+ ISANYOF_(ctx->containers[ctx->n_containers-1].ch, _T("-+*.)")))
6057+ {
6058+ OFF tmp = off;
6059+
6060+ while(tmp < ctx->size && tmp < off + 3 && ISBLANK(tmp))
6061+ tmp++;
6062+ if(tmp + 2 < ctx->size && CH(tmp) == _T('[') &&
6063+ ISANYOF(tmp+1, _T("xX ")) && CH(tmp+2) == _T(']') &&
6064+ (tmp + 3 == ctx->size || ISBLANK(tmp+3) || ISNEWLINE(tmp+3)))
6065+ {
6066+ MD_CONTAINER* task_container = (n_children > 0 ? &ctx->containers[ctx->n_containers-1] : &container);
6067+ task_container->is_task = TRUE;
6068+ task_container->task_mark_off = tmp + 1;
6069+ off = tmp + 3;
6070+ while(ISWHITESPACE(off))
6071+ off++;
6072+ line->beg = off;
6073+ }
6074+ }
6075+
6076+ break;
6077+ }
6078+
6079+ /* Scan for end of the line.
6080+ *
6081+ * Note this is quite a bottleneck of the parsing as we here iterate almost
6082+ * over compete document.
6083+ */
6084+#if defined __linux__ && !defined MD4C_USE_UTF16
6085+ /* Recent glibc versions have superbly optimized strcspn(), even using
6086+ * vectorization if available. */
6087+ if(ctx->doc_ends_with_newline && off < ctx->size) {
6088+ while(TRUE) {
6089+ off += (OFF) strcspn(STR(off), "\r\n");
6090+
6091+ /* strcspn() can stop on zero terminator; but that can appear
6092+ * anywhere in the Markfown input... */
6093+ if(CH(off) == _T('\0'))
6094+ off++;
6095+ else
6096+ break;
6097+ }
6098+ } else
6099+#endif
6100+ {
6101+ /* Optimization: Use some loop unrolling. */
6102+ while(off + 3 < ctx->size && !ISNEWLINE(off+0) && !ISNEWLINE(off+1)
6103+ && !ISNEWLINE(off+2) && !ISNEWLINE(off+3))
6104+ off += 4;
6105+ while(off < ctx->size && !ISNEWLINE(off))
6106+ off++;
6107+ }
6108+
6109+ /* Set end of the line. */
6110+ line->end = off;
6111+
6112+ /* But for ATX header, we should exclude the optional trailing mark. */
6113+ if(line->type == MD_LINE_ATXHEADER) {
6114+ OFF tmp = line->end;
6115+ while(tmp > line->beg && CH(tmp-1) == _T(' '))
6116+ tmp--;
6117+ while(tmp > line->beg && CH(tmp-1) == _T('#'))
6118+ tmp--;
6119+ if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
6120+ line->end = tmp;
6121+ }
6122+
6123+ /* Trim trailing spaces. */
6124+ if(line->type != MD_LINE_INDENTEDCODE && line->type != MD_LINE_FENCEDCODE) {
6125+ while(line->end > line->beg && CH(line->end-1) == _T(' '))
6126+ line->end--;
6127+ }
6128+
6129+ /* Eat also the new line. */
6130+ if(off < ctx->size && CH(off) == _T('\r'))
6131+ off++;
6132+ if(off < ctx->size && CH(off) == _T('\n'))
6133+ off++;
6134+
6135+ *p_end = off;
6136+
6137+ /* If we belong to a list after seeing a blank line, the list is loose. */
6138+ if(prev_line_has_list_loosening_effect && line->type != MD_LINE_BLANK && n_parents + n_brothers > 0) {
6139+ MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1];
6140+ if(c->ch != _T('>')) {
6141+ MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off);
6142+ block->flags |= MD_BLOCK_LOOSE_LIST;
6143+ }
6144+ }
6145+
6146+ /* Leave any containers we are not part of anymore. */
6147+ if(n_children == 0 && n_parents + n_brothers < ctx->n_containers)
6148+ MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
6149+
6150+ /* Enter any container we found a mark for. */
6151+ if(n_brothers > 0) {
6152+ MD_ASSERT(n_brothers == 1);
6153+ MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6154+ ctx->containers[n_parents].task_mark_off,
6155+ (ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : 0),
6156+ MD_BLOCK_CONTAINER_CLOSER));
6157+ MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6158+ container.task_mark_off,
6159+ (container.is_task ? CH(container.task_mark_off) : 0),
6160+ MD_BLOCK_CONTAINER_OPENER));
6161+ ctx->containers[n_parents].is_task = container.is_task;
6162+ ctx->containers[n_parents].task_mark_off = container.task_mark_off;
6163+ }
6164+
6165+ if(n_children > 0)
6166+ MD_CHECK(md_enter_child_containers(ctx, n_children, line->data));
6167+
6168+abort:
6169+ return ret;
6170+}
6171+
6172+static int
6173+md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
6174+{
6175+ const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
6176+ int ret = 0;
6177+
6178+ /* Blank line ends current leaf block. */
6179+ if(line->type == MD_LINE_BLANK) {
6180+ MD_CHECK(md_end_current_block(ctx));
6181+ *p_pivot_line = &md_dummy_blank_line;
6182+ return 0;
6183+ }
6184+
6185+ /* Some line types form block on their own. */
6186+ if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) {
6187+ MD_CHECK(md_end_current_block(ctx));
6188+
6189+ /* Add our single-line block. */
6190+ MD_CHECK(md_start_new_block(ctx, line));
6191+ MD_CHECK(md_add_line_into_current_block(ctx, line));
6192+ MD_CHECK(md_end_current_block(ctx));
6193+ *p_pivot_line = &md_dummy_blank_line;
6194+ return 0;
6195+ }
6196+
6197+ /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */
6198+ if(line->type == MD_LINE_SETEXTUNDERLINE) {
6199+ MD_ASSERT(ctx->current_block != NULL);
6200+ ctx->current_block->type = MD_BLOCK_H;
6201+ ctx->current_block->data = line->data;
6202+ ctx->current_block->flags |= MD_BLOCK_SETEXT_HEADER;
6203+ MD_CHECK(md_add_line_into_current_block(ctx, line));
6204+ MD_CHECK(md_end_current_block(ctx));
6205+ if(ctx->current_block == NULL) {
6206+ *p_pivot_line = &md_dummy_blank_line;
6207+ } else {
6208+ /* This happens if we have consumed all the body as link ref. defs.
6209+ * and downgraded the underline into start of a new paragraph block. */
6210+ line->type = MD_LINE_TEXT;
6211+ *p_pivot_line = line;
6212+ }
6213+ return 0;
6214+ }
6215+
6216+ /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */
6217+ if(line->type == MD_LINE_TABLEUNDERLINE) {
6218+ MD_ASSERT(ctx->current_block != NULL);
6219+ MD_ASSERT(ctx->current_block->n_lines == 1);
6220+ ctx->current_block->type = MD_BLOCK_TABLE;
6221+ ctx->current_block->data = line->data;
6222+ MD_ASSERT(pivot_line != &md_dummy_blank_line);
6223+ ((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
6224+ MD_CHECK(md_add_line_into_current_block(ctx, line));
6225+ return 0;
6226+ }
6227+
6228+ /* The current block also ends if the line has different type. */
6229+ if(line->type != pivot_line->type)
6230+ MD_CHECK(md_end_current_block(ctx));
6231+
6232+ /* The current line may start a new block. */
6233+ if(ctx->current_block == NULL) {
6234+ MD_CHECK(md_start_new_block(ctx, line));
6235+ *p_pivot_line = line;
6236+ }
6237+
6238+ /* In all other cases the line is just a continuation of the current block. */
6239+ MD_CHECK(md_add_line_into_current_block(ctx, line));
6240+
6241+abort:
6242+ return ret;
6243+}
6244+
6245+static int
6246+md_process_doc(MD_CTX *ctx)
6247+{
6248+ const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
6249+ MD_LINE_ANALYSIS line_buf[2];
6250+ MD_LINE_ANALYSIS* line = &line_buf[0];
6251+ OFF off = 0;
6252+ int ret = 0;
6253+
6254+ MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
6255+
6256+ while(off < ctx->size) {
6257+ if(line == pivot_line)
6258+ line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]);
6259+
6260+ MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
6261+ MD_CHECK(md_process_line(ctx, &pivot_line, line));
6262+ }
6263+
6264+ md_end_current_block(ctx);
6265+
6266+ MD_CHECK(md_build_ref_def_hashtable(ctx));
6267+
6268+ /* Process all blocks. */
6269+ MD_CHECK(md_leave_child_containers(ctx, 0));
6270+ MD_CHECK(md_process_all_blocks(ctx));
6271+
6272+ MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
6273+
6274+abort:
6275+
6276+#if 0
6277+ /* Output some memory consumption statistics. */
6278+ {
6279+ char buffer[256];
6280+ sprintf(buffer, "Alloced %u bytes for block buffer.",
6281+ (unsigned)(ctx->alloc_block_bytes));
6282+ MD_LOG(buffer);
6283+
6284+ sprintf(buffer, "Alloced %u bytes for containers buffer.",
6285+ (unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
6286+ MD_LOG(buffer);
6287+
6288+ sprintf(buffer, "Alloced %u bytes for marks buffer.",
6289+ (unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
6290+ MD_LOG(buffer);
6291+
6292+ sprintf(buffer, "Alloced %u bytes for aux. buffer.",
6293+ (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
6294+ MD_LOG(buffer);
6295+ }
6296+#endif
6297+
6298+ return ret;
6299+}
6300+
6301+
6302+/********************
6303+ *** Public API ***
6304+ ********************/
6305+
6306+int
6307+md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
6308+{
6309+ MD_CTX ctx;
6310+ int i;
6311+ int ret;
6312+
6313+ if(parser->abi_version != 0) {
6314+ if(parser->debug_log != NULL)
6315+ parser->debug_log("Unsupported abi_version.", userdata);
6316+ return -1;
6317+ }
6318+
6319+ /* Setup context structure. */
6320+ memset(&ctx, 0, sizeof(MD_CTX));
6321+ ctx.text = text;
6322+ ctx.size = size;
6323+ memcpy(&ctx.parser, parser, sizeof(MD_PARSER));
6324+ ctx.userdata = userdata;
6325+ ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
6326+ md_build_mark_char_map(&ctx);
6327+ ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1]));
6328+
6329+ /* Reset all unresolved opener mark chains. */
6330+ for(i = 0; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) {
6331+ ctx.mark_chains[i].head = -1;
6332+ ctx.mark_chains[i].tail = -1;
6333+ }
6334+ ctx.unresolved_link_head = -1;
6335+ ctx.unresolved_link_tail = -1;
6336+
6337+ /* All the work. */
6338+ ret = md_process_doc(&ctx);
6339+
6340+ /* Clean-up. */
6341+ md_free_ref_defs(&ctx);
6342+ md_free_ref_def_hashtable(&ctx);
6343+ free(ctx.buffer);
6344+ free(ctx.marks);
6345+ free(ctx.block_bytes);
6346+ free(ctx.containers);
6347+
6348+ return ret;
6349+}
A · md4c.h
+405, -0 1@@ -0,0 +1,405 @@
2+/*
3+ * MD4C: Markdown parser for C
4+ * (http://github.com/mity/md4c)
5+ *
6+ * Copyright (c) 2016-2020 Martin Mitas
7+ *
8+ * Permission is hereby granted, free of charge, to any person obtaining a
9+ * copy of this software and associated documentation files (the "Software"),
10+ * to deal in the Software without restriction, including without limitation
11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12+ * and/or sell copies of the Software, and to permit persons to whom the
13+ * Software is furnished to do so, subject to the following conditions:
14+ *
15+ * The above copyright notice and this permission notice shall be included in
16+ * all copies or substantial portions of the Software.
17+ *
18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24+ * IN THE SOFTWARE.
25+ */
26+
27+#ifndef MD4C_H
28+#define MD4C_H
29+
30+#ifdef __cplusplus
31+ extern "C" {
32+#endif
33+
34+#if defined MD4C_USE_UTF16
35+ /* Magic to support UTF-16. Note that in order to use it, you have to define
36+ * the macro MD4C_USE_UTF16 both when building MD4C as well as when
37+ * including this header in your code. */
38+ #ifdef _WIN32
39+ #include <windows.h>
40+ typedef WCHAR MD_CHAR;
41+ #else
42+ #error MD4C_USE_UTF16 is only supported on Windows.
43+ #endif
44+#else
45+ typedef char MD_CHAR;
46+#endif
47+
48+typedef unsigned MD_SIZE;
49+typedef unsigned MD_OFFSET;
50+
51+
52+/* Block represents a part of document hierarchy structure like a paragraph
53+ * or list item.
54+ */
55+typedef enum MD_BLOCKTYPE {
56+ /* <body>...</body> */
57+ MD_BLOCK_DOC = 0,
58+
59+ /* <blockquote>...</blockquote> */
60+ MD_BLOCK_QUOTE,
61+
62+ /* <ul>...</ul>
63+ * Detail: Structure MD_BLOCK_UL_DETAIL. */
64+ MD_BLOCK_UL,
65+
66+ /* <ol>...</ol>
67+ * Detail: Structure MD_BLOCK_OL_DETAIL. */
68+ MD_BLOCK_OL,
69+
70+ /* <li>...</li>
71+ * Detail: Structure MD_BLOCK_LI_DETAIL. */
72+ MD_BLOCK_LI,
73+
74+ /* <hr> */
75+ MD_BLOCK_HR,
76+
77+ /* <h1>...</h1> (for levels up to 6)
78+ * Detail: Structure MD_BLOCK_H_DETAIL. */
79+ MD_BLOCK_H,
80+
81+ /* <pre><code>...</code></pre>
82+ * Note the text lines within code blocks are terminated with '\n'
83+ * instead of explicit MD_TEXT_BR. */
84+ MD_BLOCK_CODE,
85+
86+ /* Raw HTML block. This itself does not correspond to any particular HTML
87+ * tag. The contents of it _is_ raw HTML source intended to be put
88+ * in verbatim form to the HTML output. */
89+ MD_BLOCK_HTML,
90+
91+ /* <p>...</p> */
92+ MD_BLOCK_P,
93+
94+ /* <table>...</table> and its contents.
95+ * Detail: Structure MD_BLOCK_TABLE_DETAIL (for MD_BLOCK_TABLE),
96+ * structure MD_BLOCK_TD_DETAIL (for MD_BLOCK_TH and MD_BLOCK_TD)
97+ * Note all of these are used only if extension MD_FLAG_TABLES is enabled. */
98+ MD_BLOCK_TABLE,
99+ MD_BLOCK_THEAD,
100+ MD_BLOCK_TBODY,
101+ MD_BLOCK_TR,
102+ MD_BLOCK_TH,
103+ MD_BLOCK_TD
104+} MD_BLOCKTYPE;
105+
106+/* Span represents an in-line piece of a document which should be rendered with
107+ * the same font, color and other attributes. A sequence of spans forms a block
108+ * like paragraph or list item. */
109+typedef enum MD_SPANTYPE {
110+ /* <em>...</em> */
111+ MD_SPAN_EM,
112+
113+ /* <strong>...</strong> */
114+ MD_SPAN_STRONG,
115+
116+ /* <a href="xxx">...</a>
117+ * Detail: Structure MD_SPAN_A_DETAIL. */
118+ MD_SPAN_A,
119+
120+ /* <img src="xxx">...</a>
121+ * Detail: Structure MD_SPAN_IMG_DETAIL.
122+ * Note: Image text can contain nested spans and even nested images.
123+ * If rendered into ALT attribute of HTML <IMG> tag, it's responsibility
124+ * of the parser to deal with it.
125+ */
126+ MD_SPAN_IMG,
127+
128+ /* <code>...</code> */
129+ MD_SPAN_CODE,
130+
131+ /* <del>...</del>
132+ * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled.
133+ */
134+ MD_SPAN_DEL,
135+
136+ /* For recognizing inline ($) and display ($$) equations
137+ * Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled.
138+ */
139+ MD_SPAN_LATEXMATH,
140+ MD_SPAN_LATEXMATH_DISPLAY,
141+
142+ /* Wiki links
143+ * Note: Recognized only when MD_FLAG_WIKILINKS is enabled.
144+ */
145+ MD_SPAN_WIKILINK,
146+
147+ /* <u>...</u>
148+ * Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */
149+ MD_SPAN_U
150+} MD_SPANTYPE;
151+
152+/* Text is the actual textual contents of span. */
153+typedef enum MD_TEXTTYPE {
154+ /* Normal text. */
155+ MD_TEXT_NORMAL = 0,
156+
157+ /* NULL character. CommonMark requires replacing NULL character with
158+ * the replacement char U+FFFD, so this allows caller to do that easily. */
159+ MD_TEXT_NULLCHAR,
160+
161+ /* Line breaks.
162+ * Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE
163+ * or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */
164+ MD_TEXT_BR, /* <br> (hard break) */
165+ MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */
166+
167+ /* Entity.
168+ * (a) Named entity, e.g.
169+ * (Note MD4C does not have a list of known entities.
170+ * Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is
171+ * treated as a named entity.)
172+ * (b) Numerical entity, e.g. Ӓ
173+ * (c) Hexadecimal entity, e.g. ካ
174+ *
175+ * As MD4C is mostly encoding agnostic, application gets the verbatim
176+ * entity text into the MD_PARSER::text_callback(). */
177+ MD_TEXT_ENTITY,
178+
179+ /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`).
180+ * If it is inside MD_BLOCK_CODE, it includes spaces for indentation and
181+ * '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this
182+ * kind of text. */
183+ MD_TEXT_CODE,
184+
185+ /* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not
186+ * an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used.
187+ * The text contains verbatim '\n' for the new lines. */
188+ MD_TEXT_HTML,
189+
190+ /* Text is inside an equation. This is processed the same way as inlined code
191+ * spans (`code`). */
192+ MD_TEXT_LATEXMATH
193+} MD_TEXTTYPE;
194+
195+
196+/* Alignment enumeration. */
197+typedef enum MD_ALIGN {
198+ MD_ALIGN_DEFAULT = 0, /* When unspecified. */
199+ MD_ALIGN_LEFT,
200+ MD_ALIGN_CENTER,
201+ MD_ALIGN_RIGHT
202+} MD_ALIGN;
203+
204+
205+/* String attribute.
206+ *
207+ * This wraps strings which are outside of a normal text flow and which are
208+ * propagated within various detailed structures, but which still may contain
209+ * string portions of different types like e.g. entities.
210+ *
211+ * So, for example, lets consider this image:
212+ *
213+ * 
214+ *
215+ * The image alt text is propagated as a normal text via the MD_PARSER::text()
216+ * callback. However, the image title ('foo " bar') is propagated as
217+ * MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title.
218+ *
219+ * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following:
220+ * -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0)
221+ * -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4)
222+ * -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10)
223+ * -- [3]: (n/a) (n/a ; substr_offsets[3] == 14)
224+ *
225+ * Note that these invariants are always guaranteed:
226+ * -- substr_offsets[0] == 0
227+ * -- substr_offsets[LAST+1] == size
228+ * -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR
229+ * substrings can appear. This could change only of the specification
230+ * changes.
231+ */
232+typedef struct MD_ATTRIBUTE {
233+ const MD_CHAR* text;
234+ MD_SIZE size;
235+ const MD_TEXTTYPE* substr_types;
236+ const MD_OFFSET* substr_offsets;
237+} MD_ATTRIBUTE;
238+
239+
240+/* Detailed info for MD_BLOCK_UL. */
241+typedef struct MD_BLOCK_UL_DETAIL {
242+ int is_tight; /* Non-zero if tight list, zero if loose. */
243+ MD_CHAR mark; /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */
244+} MD_BLOCK_UL_DETAIL;
245+
246+/* Detailed info for MD_BLOCK_OL. */
247+typedef struct MD_BLOCK_OL_DETAIL {
248+ unsigned start; /* Start index of the ordered list. */
249+ int is_tight; /* Non-zero if tight list, zero if loose. */
250+ MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */
251+} MD_BLOCK_OL_DETAIL;
252+
253+/* Detailed info for MD_BLOCK_LI. */
254+typedef struct MD_BLOCK_LI_DETAIL {
255+ int is_task; /* Can be non-zero only with MD_FLAG_TASKLISTS */
256+ MD_CHAR task_mark; /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */
257+ MD_OFFSET task_mark_offset; /* If is_task, then offset in the input of the char between '[' and ']'. */
258+} MD_BLOCK_LI_DETAIL;
259+
260+/* Detailed info for MD_BLOCK_H. */
261+typedef struct MD_BLOCK_H_DETAIL {
262+ unsigned level; /* Header level (1 - 6) */
263+} MD_BLOCK_H_DETAIL;
264+
265+/* Detailed info for MD_BLOCK_CODE. */
266+typedef struct MD_BLOCK_CODE_DETAIL {
267+ MD_ATTRIBUTE info;
268+ MD_ATTRIBUTE lang;
269+ MD_CHAR fence_char; /* The character used for fenced code block; or zero for indented code block. */
270+} MD_BLOCK_CODE_DETAIL;
271+
272+/* Detailed info for MD_BLOCK_TABLE. */
273+typedef struct MD_BLOCK_TABLE_DETAIL {
274+ unsigned col_count; /* Count of columns in the table. */
275+ unsigned head_row_count; /* Count of rows in the table header (currently always 1) */
276+ unsigned body_row_count; /* Count of rows in the table body */
277+} MD_BLOCK_TABLE_DETAIL;
278+
279+/* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */
280+typedef struct MD_BLOCK_TD_DETAIL {
281+ MD_ALIGN align;
282+} MD_BLOCK_TD_DETAIL;
283+
284+/* Detailed info for MD_SPAN_A. */
285+typedef struct MD_SPAN_A_DETAIL {
286+ MD_ATTRIBUTE href;
287+ MD_ATTRIBUTE title;
288+} MD_SPAN_A_DETAIL;
289+
290+/* Detailed info for MD_SPAN_IMG. */
291+typedef struct MD_SPAN_IMG_DETAIL {
292+ MD_ATTRIBUTE src;
293+ MD_ATTRIBUTE title;
294+} MD_SPAN_IMG_DETAIL;
295+
296+/* Detailed info for MD_SPAN_WIKILINK. */
297+typedef struct MD_SPAN_WIKILINK {
298+ MD_ATTRIBUTE target;
299+} MD_SPAN_WIKILINK_DETAIL;
300+
301+/* Flags specifying extensions/deviations from CommonMark specification.
302+ *
303+ * By default (when MD_PARSER::flags == 0), we follow CommonMark specification.
304+ * The following flags may allow some extensions or deviations from it.
305+ */
306+#define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
307+#define MD_FLAG_PERMISSIVEATXHEADERS 0x0002 /* Do not require space in ATX headers ( ###header ) */
308+#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x0004 /* Recognize URLs as autolinks even without '<', '>' */
309+#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x0008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
310+#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x0010 /* Disable indented code blocks. (Only fenced code works.) */
311+#define MD_FLAG_NOHTMLBLOCKS 0x0020 /* Disable raw HTML blocks. */
312+#define MD_FLAG_NOHTMLSPANS 0x0040 /* Disable raw HTML (inline). */
313+#define MD_FLAG_TABLES 0x0100 /* Enable tables extension. */
314+#define MD_FLAG_STRIKETHROUGH 0x0200 /* Enable strikethrough extension. */
315+#define MD_FLAG_PERMISSIVEWWWAUTOLINKS 0x0400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
316+#define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */
317+#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */
318+#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */
319+#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */
320+
321+#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
322+#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
323+
324+/* Convenient sets of flags corresponding to well-known Markdown dialects.
325+ *
326+ * Note we may only support subset of features of the referred dialect.
327+ * The constant just enables those extensions which bring us as close as
328+ * possible given what features we implement.
329+ *
330+ * ABI compatibility note: Meaning of these can change in time as new
331+ * extensions, bringing the dialect closer to the original, are implemented.
332+ */
333+#define MD_DIALECT_COMMONMARK 0
334+#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS)
335+
336+/* Parser structure.
337+ */
338+typedef struct MD_PARSER {
339+ /* Reserved. Set to zero.
340+ */
341+ unsigned abi_version;
342+
343+ /* Dialect options. Bitmask of MD_FLAG_xxxx values.
344+ */
345+ unsigned flags;
346+
347+ /* Caller-provided rendering callbacks.
348+ *
349+ * For some block/span types, more detailed information is provided in a
350+ * type-specific structure pointed by the argument 'detail'.
351+ *
352+ * The last argument of all callbacks, 'userdata', is just propagated from
353+ * md_parse() and is available for any use by the application.
354+ *
355+ * Note any strings provided to the callbacks as their arguments or as
356+ * members of any detail structure are generally not zero-terminated.
357+ * Application has to take the respective size information into account.
358+ *
359+ * Any rendering callback may abort further parsing of the document by
360+ * returning non-zero.
361+ */
362+ int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
363+ int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
364+
365+ int (*enter_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
366+ int (*leave_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
367+
368+ int (*text)(MD_TEXTTYPE /*type*/, const MD_CHAR* /*text*/, MD_SIZE /*size*/, void* /*userdata*/);
369+
370+ /* Debug callback. Optional (may be NULL).
371+ *
372+ * If provided and something goes wrong, this function gets called.
373+ * This is intended for debugging and problem diagnosis for developers;
374+ * it is not intended to provide any errors suitable for displaying to an
375+ * end user.
376+ */
377+ void (*debug_log)(const char* /*msg*/, void* /*userdata*/);
378+
379+ /* Reserved. Set to NULL.
380+ */
381+ void (*syntax)(void);
382+} MD_PARSER;
383+
384+
385+/* For backward compatibility. Do not use in new code.
386+ */
387+typedef MD_PARSER MD_RENDERER;
388+
389+
390+/* Parse the Markdown document stored in the string 'text' of size 'size'.
391+ * The parser provides callbacks to be called during the parsing so the
392+ * caller can render the document on the screen or convert the Markdown
393+ * to another format.
394+ *
395+ * Zero is returned on success. If a runtime error occurs (e.g. a memory
396+ * fails), -1 is returned. If the processing is aborted due any callback
397+ * returning non-zero, the return value of the callback is returned.
398+ */
399+int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata);
400+
401+
402+#ifdef __cplusplus
403+ } /* extern "C" { */
404+#endif
405+
406+#endif /* MD4C_H */
A · md4c.o
+0, -0
M · stagit
+0, -0
M · stagit-index
+0, -0
M · stagit-index.c
+1, -11@@ -149,7 +149,7 @@ writelog(FILE *fp)
2
3 fputs("<tr><td><a href=\"", fp);
4 percentencode(fp, stripped_name, strlen(stripped_name));
5- fputs("/log.html\">", fp);
6+ fputs("/file/README.md.html\">", fp);
7 xmlencode(fp, stripped_name, strlen(stripped_name));
8 fputs("</a></td><td>", fp);
9 xmlencode(fp, description, strlen(description));
M · stagit-index.o
+0, -0
M · stagit.c
+56, -2 1@@ -14,6 +14,8 @@
2
3 #include <git2.h>
4
5+#include "md4c-html.h"
6+
7 #include "compat.h"
8
9 #define LEN(s) (sizeof(s)/sizeof(*s))
10@@ -558,6 +560,38 @@ writefooter(FILE *fp)
11 fputs("</div>\n</div>\n</div>\n</body>\n</html>\n", fp);
12 }
13
14+void
15+processmd(const char* output, unsigned int len, void *fp)
16+{
17+ fprintf((FILE *)fp, "%.*s", len, output);
18+}
19+
20+size_t
21+writeblobmd(FILE *fp, const git_blob *blob)
22+{
23+ size_t n = 0, i, len, prev, ret;
24+ const char *s = git_blob_rawcontent(blob);
25+ len = git_blob_rawsize(blob);
26+ fputs("<div id=\"md\">\n", fp);
27+ /* Counting lines in the file*/
28+ if (len > 0) {
29+ for (i = 0, prev = 0; i < len; i++) {
30+ if (s[i] != '\n')
31+ continue;
32+ n++;
33+ prev = i + 1;
34+ }
35+ if ((len - prev) > 0) {
36+ n++;
37+ }
38+ ret = md_html(s, len, processmd, fp, MD_FLAG_TABLES | MD_FLAG_TASKLISTS |
39+ MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS, 0);
40+ }
41+
42+ fputs("</div>\n", fp);
43+ return n;
44+}
45+
46 size_t
47 writeblobhtml(FILE *fp, const git_blob *blob)
48 {
49@@ -949,6 +983,18 @@ writeatom(FILE *fp, int all)
50 return 0;
51 }
52
53+int
54+file_is_md(const char *filename)
55+{
56+ int i = strlen(filename) - 3;
57+ if (filename[i++] == '.' &&
58+ filename[i++] == 'm' &&
59+ filename[i] == 'd')
60+ return 1;
61+ return 0;
62+
63+}
64+
65 size_t
66 writeblob(git_object *obj, const char *fpath, const char *filename, size_t filesize)
67 {
68@@ -975,10 +1021,18 @@ writeblob(git_object *obj, const char *fpath, const char *filename, size_t files
69 fputs("<p> ", fp);
70 xmlencode(fp, filename, strlen(filename));
71 fprintf(fp, " (%zuB)", filesize);
72- if (git_blob_is_binary((git_blob *)obj))
73+
74+ if (git_blob_is_binary((git_blob *)obj)) {
75 fputs("<p>Binary file.</p>\n", fp);
76- else
77+ } else if (file_is_md(filename)) {
78+ lc = writeblobmd(fp, (git_blob *)obj);
79+ if (ferror(fp))
80+ err(1, "md parse fail");
81+ } else {
82 lc = writeblobhtml(fp, (git_blob *)obj);
83+ if (ferror(fp))
84+ err(1, "fwrite");
85+ }
86
87 writefooter(fp);
88 checkfileerror(fp, fpath, 'w');
M · stagit.o
+0, -0