stagit | Arjun

Added markdown rendering and updated index link to README

Arjun Choudhary contact@arjunchoudhary.com

2022-06-05 20:28:42 +0530 IST commit: 4c0ee0b parent: 0e83f3f

16 files changed, 9691 insertions(+), 5 deletions(-)
M	Makefile	+8	-2
A	entity.c	+2190	-0
A	entity.h	+42	-0
A	entity.o	+0	-0
A	md4c-html.c	+573	-0
A	md4c-html.h	+68	-0
A	md4c-html.o	+0	-0
A	md4c.c	+6348	-0
A	md4c.h	+405	-0
A	md4c.o	+0	-0
M	stagit	+0	-0
M	stagit-index	+0	-0
M	stagit-index.c	+1	-1
M	stagit-index.o	+0	-0
M	stagit.c	+56	-2
M	stagit.o	+0	-0

M · Makefile +8, -2

 1@@ -22,7 +22,10 @@ SRC = \
 2 COMPATSRC = \
 3 	reallocarray.c\
 4 	strlcat.c\
 5-	strlcpy.c
 6+	strlcpy.c\
 7+	entity.c\
 8+	md4c.c\
 9+	md4c-html.c
10 BIN = \
11 	stagit\
12 	stagit-index
13@@ -37,7 +40,10 @@ HDR = compat.h
14 COMPATOBJ = \
15 	reallocarray.o\
16 	strlcat.o\
17-	strlcpy.o
18+	strlcpy.o\
19+	entity.o\
20+	md4c.o\
21+	md4c-html.o
22 
23 OBJ = ${SRC:.c=.o} ${COMPATOBJ}
24

A · entity.c +2190, -0

   1@@ -0,0 +1,2190 @@
   2+/*
   3+ * MD4C: Markdown parser for C
   4+ * (http://github.com/mity/md4c)
   5+ *
   6+ * Copyright (c) 2016-2017 Martin Mitas
   7+ *
   8+ * Permission is hereby granted, free of charge, to any person obtaining a
   9+ * copy of this software and associated documentation files (the "Software"),
  10+ * to deal in the Software without restriction, including without limitation
  11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12+ * and/or sell copies of the Software, and to permit persons to whom the
  13+ * Software is furnished to do so, subject to the following conditions:
  14+ *
  15+ * The above copyright notice and this permission notice shall be included in
  16+ * all copies or substantial portions of the Software.
  17+ *
  18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  24+ * IN THE SOFTWARE.
  25+ */
  26+
  27+#include "entity.h"
  28+#include <string.h>
  29+
  30+
  31+/* The table is generated from https://html.spec.whatwg.org/entities.json */
  32+static const struct entity entity_table[] = {
  33+    { "&AElig;", { 198, 0 } },
  34+    { "&AMP;", { 38, 0 } },
  35+    { "&Aacute;", { 193, 0 } },
  36+    { "&Abreve;", { 258, 0 } },
  37+    { "&Acirc;", { 194, 0 } },
  38+    { "&Acy;", { 1040, 0 } },
  39+    { "&Afr;", { 120068, 0 } },
  40+    { "&Agrave;", { 192, 0 } },
  41+    { "&Alpha;", { 913, 0 } },
  42+    { "&Amacr;", { 256, 0 } },
  43+    { "&And;", { 10835, 0 } },
  44+    { "&Aogon;", { 260, 0 } },
  45+    { "&Aopf;", { 120120, 0 } },
  46+    { "&ApplyFunction;", { 8289, 0 } },
  47+    { "&Aring;", { 197, 0 } },
  48+    { "&Ascr;", { 119964, 0 } },
  49+    { "&Assign;", { 8788, 0 } },
  50+    { "&Atilde;", { 195, 0 } },
  51+    { "&Auml;", { 196, 0 } },
  52+    { "&Backslash;", { 8726, 0 } },
  53+    { "&Barv;", { 10983, 0 } },
  54+    { "&Barwed;", { 8966, 0 } },
  55+    { "&Bcy;", { 1041, 0 } },
  56+    { "&Because;", { 8757, 0 } },
  57+    { "&Bernoullis;", { 8492, 0 } },
  58+    { "&Beta;", { 914, 0 } },
  59+    { "&Bfr;", { 120069, 0 } },
  60+    { "&Bopf;", { 120121, 0 } },
  61+    { "&Breve;", { 728, 0 } },
  62+    { "&Bscr;", { 8492, 0 } },
  63+    { "&Bumpeq;", { 8782, 0 } },
  64+    { "&CHcy;", { 1063, 0 } },
  65+    { "&COPY;", { 169, 0 } },
  66+    { "&Cacute;", { 262, 0 } },
  67+    { "&Cap;", { 8914, 0 } },
  68+    { "&CapitalDifferentialD;", { 8517, 0 } },
  69+    { "&Cayleys;", { 8493, 0 } },
  70+    { "&Ccaron;", { 268, 0 } },
  71+    { "&Ccedil;", { 199, 0 } },
  72+    { "&Ccirc;", { 264, 0 } },
  73+    { "&Cconint;", { 8752, 0 } },
  74+    { "&Cdot;", { 266, 0 } },
  75+    { "&Cedilla;", { 184, 0 } },
  76+    { "&CenterDot;", { 183, 0 } },
  77+    { "&Cfr;", { 8493, 0 } },
  78+    { "&Chi;", { 935, 0 } },
  79+    { "&CircleDot;", { 8857, 0 } },
  80+    { "&CircleMinus;", { 8854, 0 } },
  81+    { "&CirclePlus;", { 8853, 0 } },
  82+    { "&CircleTimes;", { 8855, 0 } },
  83+    { "&ClockwiseContourIntegral;", { 8754, 0 } },
  84+    { "&CloseCurlyDoubleQuote;", { 8221, 0 } },
  85+    { "&CloseCurlyQuote;", { 8217, 0 } },
  86+    { "&Colon;", { 8759, 0 } },
  87+    { "&Colone;", { 10868, 0 } },
  88+    { "&Congruent;", { 8801, 0 } },
  89+    { "&Conint;", { 8751, 0 } },
  90+    { "&ContourIntegral;", { 8750, 0 } },
  91+    { "&Copf;", { 8450, 0 } },
  92+    { "&Coproduct;", { 8720, 0 } },
  93+    { "&CounterClockwiseContourIntegral;", { 8755, 0 } },
  94+    { "&Cross;", { 10799, 0 } },
  95+    { "&Cscr;", { 119966, 0 } },
  96+    { "&Cup;", { 8915, 0 } },
  97+    { "&CupCap;", { 8781, 0 } },
  98+    { "&DD;", { 8517, 0 } },
  99+    { "&DDotrahd;", { 10513, 0 } },
 100+    { "&DJcy;", { 1026, 0 } },
 101+    { "&DScy;", { 1029, 0 } },
 102+    { "&DZcy;", { 1039, 0 } },
 103+    { "&Dagger;", { 8225, 0 } },
 104+    { "&Darr;", { 8609, 0 } },
 105+    { "&Dashv;", { 10980, 0 } },
 106+    { "&Dcaron;", { 270, 0 } },
 107+    { "&Dcy;", { 1044, 0 } },
 108+    { "&Del;", { 8711, 0 } },
 109+    { "&Delta;", { 916, 0 } },
 110+    { "&Dfr;", { 120071, 0 } },
 111+    { "&DiacriticalAcute;", { 180, 0 } },
 112+    { "&DiacriticalDot;", { 729, 0 } },
 113+    { "&DiacriticalDoubleAcute;", { 733, 0 } },
 114+    { "&DiacriticalGrave;", { 96, 0 } },
 115+    { "&DiacriticalTilde;", { 732, 0 } },
 116+    { "&Diamond;", { 8900, 0 } },
 117+    { "&DifferentialD;", { 8518, 0 } },
 118+    { "&Dopf;", { 120123, 0 } },
 119+    { "&Dot;", { 168, 0 } },
 120+    { "&DotDot;", { 8412, 0 } },
 121+    { "&DotEqual;", { 8784, 0 } },
 122+    { "&DoubleContourIntegral;", { 8751, 0 } },
 123+    { "&DoubleDot;", { 168, 0 } },
 124+    { "&DoubleDownArrow;", { 8659, 0 } },
 125+    { "&DoubleLeftArrow;", { 8656, 0 } },
 126+    { "&DoubleLeftRightArrow;", { 8660, 0 } },
 127+    { "&DoubleLeftTee;", { 10980, 0 } },
 128+    { "&DoubleLongLeftArrow;", { 10232, 0 } },
 129+    { "&DoubleLongLeftRightArrow;", { 10234, 0 } },
 130+    { "&DoubleLongRightArrow;", { 10233, 0 } },
 131+    { "&DoubleRightArrow;", { 8658, 0 } },
 132+    { "&DoubleRightTee;", { 8872, 0 } },
 133+    { "&DoubleUpArrow;", { 8657, 0 } },
 134+    { "&DoubleUpDownArrow;", { 8661, 0 } },
 135+    { "&DoubleVerticalBar;", { 8741, 0 } },
 136+    { "&DownArrow;", { 8595, 0 } },
 137+    { "&DownArrowBar;", { 10515, 0 } },
 138+    { "&DownArrowUpArrow;", { 8693, 0 } },
 139+    { "&DownBreve;", { 785, 0 } },
 140+    { "&DownLeftRightVector;", { 10576, 0 } },
 141+    { "&DownLeftTeeVector;", { 10590, 0 } },
 142+    { "&DownLeftVector;", { 8637, 0 } },
 143+    { "&DownLeftVectorBar;", { 10582, 0 } },
 144+    { "&DownRightTeeVector;", { 10591, 0 } },
 145+    { "&DownRightVector;", { 8641, 0 } },
 146+    { "&DownRightVectorBar;", { 10583, 0 } },
 147+    { "&DownTee;", { 8868, 0 } },
 148+    { "&DownTeeArrow;", { 8615, 0 } },
 149+    { "&Downarrow;", { 8659, 0 } },
 150+    { "&Dscr;", { 119967, 0 } },
 151+    { "&Dstrok;", { 272, 0 } },
 152+    { "&ENG;", { 330, 0 } },
 153+    { "&ETH;", { 208, 0 } },
 154+    { "&Eacute;", { 201, 0 } },
 155+    { "&Ecaron;", { 282, 0 } },
 156+    { "&Ecirc;", { 202, 0 } },
 157+    { "&Ecy;", { 1069, 0 } },
 158+    { "&Edot;", { 278, 0 } },
 159+    { "&Efr;", { 120072, 0 } },
 160+    { "&Egrave;", { 200, 0 } },
 161+    { "&Element;", { 8712, 0 } },
 162+    { "&Emacr;", { 274, 0 } },
 163+    { "&EmptySmallSquare;", { 9723, 0 } },
 164+    { "&EmptyVerySmallSquare;", { 9643, 0 } },
 165+    { "&Eogon;", { 280, 0 } },
 166+    { "&Eopf;", { 120124, 0 } },
 167+    { "&Epsilon;", { 917, 0 } },
 168+    { "&Equal;", { 10869, 0 } },
 169+    { "&EqualTilde;", { 8770, 0 } },
 170+    { "&Equilibrium;", { 8652, 0 } },
 171+    { "&Escr;", { 8496, 0 } },
 172+    { "&Esim;", { 10867, 0 } },
 173+    { "&Eta;", { 919, 0 } },
 174+    { "&Euml;", { 203, 0 } },
 175+    { "&Exists;", { 8707, 0 } },
 176+    { "&ExponentialE;", { 8519, 0 } },
 177+    { "&Fcy;", { 1060, 0 } },
 178+    { "&Ffr;", { 120073, 0 } },
 179+    { "&FilledSmallSquare;", { 9724, 0 } },
 180+    { "&FilledVerySmallSquare;", { 9642, 0 } },
 181+    { "&Fopf;", { 120125, 0 } },
 182+    { "&ForAll;", { 8704, 0 } },
 183+    { "&Fouriertrf;", { 8497, 0 } },
 184+    { "&Fscr;", { 8497, 0 } },
 185+    { "&GJcy;", { 1027, 0 } },
 186+    { "&GT;", { 62, 0 } },
 187+    { "&Gamma;", { 915, 0 } },
 188+    { "&Gammad;", { 988, 0 } },
 189+    { "&Gbreve;", { 286, 0 } },
 190+    { "&Gcedil;", { 290, 0 } },
 191+    { "&Gcirc;", { 284, 0 } },
 192+    { "&Gcy;", { 1043, 0 } },
 193+    { "&Gdot;", { 288, 0 } },
 194+    { "&Gfr;", { 120074, 0 } },
 195+    { "&Gg;", { 8921, 0 } },
 196+    { "&Gopf;", { 120126, 0 } },
 197+    { "&GreaterEqual;", { 8805, 0 } },
 198+    { "&GreaterEqualLess;", { 8923, 0 } },
 199+    { "&GreaterFullEqual;", { 8807, 0 } },
 200+    { "&GreaterGreater;", { 10914, 0 } },
 201+    { "&GreaterLess;", { 8823, 0 } },
 202+    { "&GreaterSlantEqual;", { 10878, 0 } },
 203+    { "&GreaterTilde;", { 8819, 0 } },
 204+    { "&Gscr;", { 119970, 0 } },
 205+    { "&Gt;", { 8811, 0 } },
 206+    { "&HARDcy;", { 1066, 0 } },
 207+    { "&Hacek;", { 711, 0 } },
 208+    { "&Hat;", { 94, 0 } },
 209+    { "&Hcirc;", { 292, 0 } },
 210+    { "&Hfr;", { 8460, 0 } },
 211+    { "&HilbertSpace;", { 8459, 0 } },
 212+    { "&Hopf;", { 8461, 0 } },
 213+    { "&HorizontalLine;", { 9472, 0 } },
 214+    { "&Hscr;", { 8459, 0 } },
 215+    { "&Hstrok;", { 294, 0 } },
 216+    { "&HumpDownHump;", { 8782, 0 } },
 217+    { "&HumpEqual;", { 8783, 0 } },
 218+    { "&IEcy;", { 1045, 0 } },
 219+    { "&IJlig;", { 306, 0 } },
 220+    { "&IOcy;", { 1025, 0 } },
 221+    { "&Iacute;", { 205, 0 } },
 222+    { "&Icirc;", { 206, 0 } },
 223+    { "&Icy;", { 1048, 0 } },
 224+    { "&Idot;", { 304, 0 } },
 225+    { "&Ifr;", { 8465, 0 } },
 226+    { "&Igrave;", { 204, 0 } },
 227+    { "&Im;", { 8465, 0 } },
 228+    { "&Imacr;", { 298, 0 } },
 229+    { "&ImaginaryI;", { 8520, 0 } },
 230+    { "&Implies;", { 8658, 0 } },
 231+    { "&Int;", { 8748, 0 } },
 232+    { "&Integral;", { 8747, 0 } },
 233+    { "&Intersection;", { 8898, 0 } },
 234+    { "&InvisibleComma;", { 8291, 0 } },
 235+    { "&InvisibleTimes;", { 8290, 0 } },
 236+    { "&Iogon;", { 302, 0 } },
 237+    { "&Iopf;", { 120128, 0 } },
 238+    { "&Iota;", { 921, 0 } },
 239+    { "&Iscr;", { 8464, 0 } },
 240+    { "&Itilde;", { 296, 0 } },
 241+    { "&Iukcy;", { 1030, 0 } },
 242+    { "&Iuml;", { 207, 0 } },
 243+    { "&Jcirc;", { 308, 0 } },
 244+    { "&Jcy;", { 1049, 0 } },
 245+    { "&Jfr;", { 120077, 0 } },
 246+    { "&Jopf;", { 120129, 0 } },
 247+    { "&Jscr;", { 119973, 0 } },
 248+    { "&Jsercy;", { 1032, 0 } },
 249+    { "&Jukcy;", { 1028, 0 } },
 250+    { "&KHcy;", { 1061, 0 } },
 251+    { "&KJcy;", { 1036, 0 } },
 252+    { "&Kappa;", { 922, 0 } },
 253+    { "&Kcedil;", { 310, 0 } },
 254+    { "&Kcy;", { 1050, 0 } },
 255+    { "&Kfr;", { 120078, 0 } },
 256+    { "&Kopf;", { 120130, 0 } },
 257+    { "&Kscr;", { 119974, 0 } },
 258+    { "&LJcy;", { 1033, 0 } },
 259+    { "&LT;", { 60, 0 } },
 260+    { "&Lacute;", { 313, 0 } },
 261+    { "&Lambda;", { 923, 0 } },
 262+    { "&Lang;", { 10218, 0 } },
 263+    { "&Laplacetrf;", { 8466, 0 } },
 264+    { "&Larr;", { 8606, 0 } },
 265+    { "&Lcaron;", { 317, 0 } },
 266+    { "&Lcedil;", { 315, 0 } },
 267+    { "&Lcy;", { 1051, 0 } },
 268+    { "&LeftAngleBracket;", { 10216, 0 } },
 269+    { "&LeftArrow;", { 8592, 0 } },
 270+    { "&LeftArrowBar;", { 8676, 0 } },
 271+    { "&LeftArrowRightArrow;", { 8646, 0 } },
 272+    { "&LeftCeiling;", { 8968, 0 } },
 273+    { "&LeftDoubleBracket;", { 10214, 0 } },
 274+    { "&LeftDownTeeVector;", { 10593, 0 } },
 275+    { "&LeftDownVector;", { 8643, 0 } },
 276+    { "&LeftDownVectorBar;", { 10585, 0 } },
 277+    { "&LeftFloor;", { 8970, 0 } },
 278+    { "&LeftRightArrow;", { 8596, 0 } },
 279+    { "&LeftRightVector;", { 10574, 0 } },
 280+    { "&LeftTee;", { 8867, 0 } },
 281+    { "&LeftTeeArrow;", { 8612, 0 } },
 282+    { "&LeftTeeVector;", { 10586, 0 } },
 283+    { "&LeftTriangle;", { 8882, 0 } },
 284+    { "&LeftTriangleBar;", { 10703, 0 } },
 285+    { "&LeftTriangleEqual;", { 8884, 0 } },
 286+    { "&LeftUpDownVector;", { 10577, 0 } },
 287+    { "&LeftUpTeeVector;", { 10592, 0 } },
 288+    { "&LeftUpVector;", { 8639, 0 } },
 289+    { "&LeftUpVectorBar;", { 10584, 0 } },
 290+    { "&LeftVector;", { 8636, 0 } },
 291+    { "&LeftVectorBar;", { 10578, 0 } },
 292+    { "&Leftarrow;", { 8656, 0 } },
 293+    { "&Leftrightarrow;", { 8660, 0 } },
 294+    { "&LessEqualGreater;", { 8922, 0 } },
 295+    { "&LessFullEqual;", { 8806, 0 } },
 296+    { "&LessGreater;", { 8822, 0 } },
 297+    { "&LessLess;", { 10913, 0 } },
 298+    { "&LessSlantEqual;", { 10877, 0 } },
 299+    { "&LessTilde;", { 8818, 0 } },
 300+    { "&Lfr;", { 120079, 0 } },
 301+    { "&Ll;", { 8920, 0 } },
 302+    { "&Lleftarrow;", { 8666, 0 } },
 303+    { "&Lmidot;", { 319, 0 } },
 304+    { "&LongLeftArrow;", { 10229, 0 } },
 305+    { "&LongLeftRightArrow;", { 10231, 0 } },
 306+    { "&LongRightArrow;", { 10230, 0 } },
 307+    { "&Longleftarrow;", { 10232, 0 } },
 308+    { "&Longleftrightarrow;", { 10234, 0 } },
 309+    { "&Longrightarrow;", { 10233, 0 } },
 310+    { "&Lopf;", { 120131, 0 } },
 311+    { "&LowerLeftArrow;", { 8601, 0 } },
 312+    { "&LowerRightArrow;", { 8600, 0 } },
 313+    { "&Lscr;", { 8466, 0 } },
 314+    { "&Lsh;", { 8624, 0 } },
 315+    { "&Lstrok;", { 321, 0 } },
 316+    { "&Lt;", { 8810, 0 } },
 317+    { "&Map;", { 10501, 0 } },
 318+    { "&Mcy;", { 1052, 0 } },
 319+    { "&MediumSpace;", { 8287, 0 } },
 320+    { "&Mellintrf;", { 8499, 0 } },
 321+    { "&Mfr;", { 120080, 0 } },
 322+    { "&MinusPlus;", { 8723, 0 } },
 323+    { "&Mopf;", { 120132, 0 } },
 324+    { "&Mscr;", { 8499, 0 } },
 325+    { "&Mu;", { 924, 0 } },
 326+    { "&NJcy;", { 1034, 0 } },
 327+    { "&Nacute;", { 323, 0 } },
 328+    { "&Ncaron;", { 327, 0 } },
 329+    { "&Ncedil;", { 325, 0 } },
 330+    { "&Ncy;", { 1053, 0 } },
 331+    { "&NegativeMediumSpace;", { 8203, 0 } },
 332+    { "&NegativeThickSpace;", { 8203, 0 } },
 333+    { "&NegativeThinSpace;", { 8203, 0 } },
 334+    { "&NegativeVeryThinSpace;", { 8203, 0 } },
 335+    { "&NestedGreaterGreater;", { 8811, 0 } },
 336+    { "&NestedLessLess;", { 8810, 0 } },
 337+    { "&NewLine;", { 10, 0 } },
 338+    { "&Nfr;", { 120081, 0 } },
 339+    { "&NoBreak;", { 8288, 0 } },
 340+    { "&NonBreakingSpace;", { 160, 0 } },
 341+    { "&Nopf;", { 8469, 0 } },
 342+    { "&Not;", { 10988, 0 } },
 343+    { "&NotCongruent;", { 8802, 0 } },
 344+    { "&NotCupCap;", { 8813, 0 } },
 345+    { "&NotDoubleVerticalBar;", { 8742, 0 } },
 346+    { "&NotElement;", { 8713, 0 } },
 347+    { "&NotEqual;", { 8800, 0 } },
 348+    { "&NotEqualTilde;", { 8770, 824 } },
 349+    { "&NotExists;", { 8708, 0 } },
 350+    { "&NotGreater;", { 8815, 0 } },
 351+    { "&NotGreaterEqual;", { 8817, 0 } },
 352+    { "&NotGreaterFullEqual;", { 8807, 824 } },
 353+    { "&NotGreaterGreater;", { 8811, 824 } },
 354+    { "&NotGreaterLess;", { 8825, 0 } },
 355+    { "&NotGreaterSlantEqual;", { 10878, 824 } },
 356+    { "&NotGreaterTilde;", { 8821, 0 } },
 357+    { "&NotHumpDownHump;", { 8782, 824 } },
 358+    { "&NotHumpEqual;", { 8783, 824 } },
 359+    { "&NotLeftTriangle;", { 8938, 0 } },
 360+    { "&NotLeftTriangleBar;", { 10703, 824 } },
 361+    { "&NotLeftTriangleEqual;", { 8940, 0 } },
 362+    { "&NotLess;", { 8814, 0 } },
 363+    { "&NotLessEqual;", { 8816, 0 } },
 364+    { "&NotLessGreater;", { 8824, 0 } },
 365+    { "&NotLessLess;", { 8810, 824 } },
 366+    { "&NotLessSlantEqual;", { 10877, 824 } },
 367+    { "&NotLessTilde;", { 8820, 0 } },
 368+    { "&NotNestedGreaterGreater;", { 10914, 824 } },
 369+    { "&NotNestedLessLess;", { 10913, 824 } },
 370+    { "&NotPrecedes;", { 8832, 0 } },
 371+    { "&NotPrecedesEqual;", { 10927, 824 } },
 372+    { "&NotPrecedesSlantEqual;", { 8928, 0 } },
 373+    { "&NotReverseElement;", { 8716, 0 } },
 374+    { "&NotRightTriangle;", { 8939, 0 } },
 375+    { "&NotRightTriangleBar;", { 10704, 824 } },
 376+    { "&NotRightTriangleEqual;", { 8941, 0 } },
 377+    { "&NotSquareSubset;", { 8847, 824 } },
 378+    { "&NotSquareSubsetEqual;", { 8930, 0 } },
 379+    { "&NotSquareSuperset;", { 8848, 824 } },
 380+    { "&NotSquareSupersetEqual;", { 8931, 0 } },
 381+    { "&NotSubset;", { 8834, 8402 } },
 382+    { "&NotSubsetEqual;", { 8840, 0 } },
 383+    { "&NotSucceeds;", { 8833, 0 } },
 384+    { "&NotSucceedsEqual;", { 10928, 824 } },
 385+    { "&NotSucceedsSlantEqual;", { 8929, 0 } },
 386+    { "&NotSucceedsTilde;", { 8831, 824 } },
 387+    { "&NotSuperset;", { 8835, 8402 } },
 388+    { "&NotSupersetEqual;", { 8841, 0 } },
 389+    { "&NotTilde;", { 8769, 0 } },
 390+    { "&NotTildeEqual;", { 8772, 0 } },
 391+    { "&NotTildeFullEqual;", { 8775, 0 } },
 392+    { "&NotTildeTilde;", { 8777, 0 } },
 393+    { "&NotVerticalBar;", { 8740, 0 } },
 394+    { "&Nscr;", { 119977, 0 } },
 395+    { "&Ntilde;", { 209, 0 } },
 396+    { "&Nu;", { 925, 0 } },
 397+    { "&OElig;", { 338, 0 } },
 398+    { "&Oacute;", { 211, 0 } },
 399+    { "&Ocirc;", { 212, 0 } },
 400+    { "&Ocy;", { 1054, 0 } },
 401+    { "&Odblac;", { 336, 0 } },
 402+    { "&Ofr;", { 120082, 0 } },
 403+    { "&Ograve;", { 210, 0 } },
 404+    { "&Omacr;", { 332, 0 } },
 405+    { "&Omega;", { 937, 0 } },
 406+    { "&Omicron;", { 927, 0 } },
 407+    { "&Oopf;", { 120134, 0 } },
 408+    { "&OpenCurlyDoubleQuote;", { 8220, 0 } },
 409+    { "&OpenCurlyQuote;", { 8216, 0 } },
 410+    { "&Or;", { 10836, 0 } },
 411+    { "&Oscr;", { 119978, 0 } },
 412+    { "&Oslash;", { 216, 0 } },
 413+    { "&Otilde;", { 213, 0 } },
 414+    { "&Otimes;", { 10807, 0 } },
 415+    { "&Ouml;", { 214, 0 } },
 416+    { "&OverBar;", { 8254, 0 } },
 417+    { "&OverBrace;", { 9182, 0 } },
 418+    { "&OverBracket;", { 9140, 0 } },
 419+    { "&OverParenthesis;", { 9180, 0 } },
 420+    { "&PartialD;", { 8706, 0 } },
 421+    { "&Pcy;", { 1055, 0 } },
 422+    { "&Pfr;", { 120083, 0 } },
 423+    { "&Phi;", { 934, 0 } },
 424+    { "&Pi;", { 928, 0 } },
 425+    { "&PlusMinus;", { 177, 0 } },
 426+    { "&Poincareplane;", { 8460, 0 } },
 427+    { "&Popf;", { 8473, 0 } },
 428+    { "&Pr;", { 10939, 0 } },
 429+    { "&Precedes;", { 8826, 0 } },
 430+    { "&PrecedesEqual;", { 10927, 0 } },
 431+    { "&PrecedesSlantEqual;", { 8828, 0 } },
 432+    { "&PrecedesTilde;", { 8830, 0 } },
 433+    { "&Prime;", { 8243, 0 } },
 434+    { "&Product;", { 8719, 0 } },
 435+    { "&Proportion;", { 8759, 0 } },
 436+    { "&Proportional;", { 8733, 0 } },
 437+    { "&Pscr;", { 119979, 0 } },
 438+    { "&Psi;", { 936, 0 } },
 439+    { "&QUOT;", { 34, 0 } },
 440+    { "&Qfr;", { 120084, 0 } },
 441+    { "&Qopf;", { 8474, 0 } },
 442+    { "&Qscr;", { 119980, 0 } },
 443+    { "&RBarr;", { 10512, 0 } },
 444+    { "&REG;", { 174, 0 } },
 445+    { "&Racute;", { 340, 0 } },
 446+    { "&Rang;", { 10219, 0 } },
 447+    { "&Rarr;", { 8608, 0 } },
 448+    { "&Rarrtl;", { 10518, 0 } },
 449+    { "&Rcaron;", { 344, 0 } },
 450+    { "&Rcedil;", { 342, 0 } },
 451+    { "&Rcy;", { 1056, 0 } },
 452+    { "&Re;", { 8476, 0 } },
 453+    { "&ReverseElement;", { 8715, 0 } },
 454+    { "&ReverseEquilibrium;", { 8651, 0 } },
 455+    { "&ReverseUpEquilibrium;", { 10607, 0 } },
 456+    { "&Rfr;", { 8476, 0 } },
 457+    { "&Rho;", { 929, 0 } },
 458+    { "&RightAngleBracket;", { 10217, 0 } },
 459+    { "&RightArrow;", { 8594, 0 } },
 460+    { "&RightArrowBar;", { 8677, 0 } },
 461+    { "&RightArrowLeftArrow;", { 8644, 0 } },
 462+    { "&RightCeiling;", { 8969, 0 } },
 463+    { "&RightDoubleBracket;", { 10215, 0 } },
 464+    { "&RightDownTeeVector;", { 10589, 0 } },
 465+    { "&RightDownVector;", { 8642, 0 } },
 466+    { "&RightDownVectorBar;", { 10581, 0 } },
 467+    { "&RightFloor;", { 8971, 0 } },
 468+    { "&RightTee;", { 8866, 0 } },
 469+    { "&RightTeeArrow;", { 8614, 0 } },
 470+    { "&RightTeeVector;", { 10587, 0 } },
 471+    { "&RightTriangle;", { 8883, 0 } },
 472+    { "&RightTriangleBar;", { 10704, 0 } },
 473+    { "&RightTriangleEqual;", { 8885, 0 } },
 474+    { "&RightUpDownVector;", { 10575, 0 } },
 475+    { "&RightUpTeeVector;", { 10588, 0 } },
 476+    { "&RightUpVector;", { 8638, 0 } },
 477+    { "&RightUpVectorBar;", { 10580, 0 } },
 478+    { "&RightVector;", { 8640, 0 } },
 479+    { "&RightVectorBar;", { 10579, 0 } },
 480+    { "&Rightarrow;", { 8658, 0 } },
 481+    { "&Ropf;", { 8477, 0 } },
 482+    { "&RoundImplies;", { 10608, 0 } },
 483+    { "&Rrightarrow;", { 8667, 0 } },
 484+    { "&Rscr;", { 8475, 0 } },
 485+    { "&Rsh;", { 8625, 0 } },
 486+    { "&RuleDelayed;", { 10740, 0 } },
 487+    { "&SHCHcy;", { 1065, 0 } },
 488+    { "&SHcy;", { 1064, 0 } },
 489+    { "&SOFTcy;", { 1068, 0 } },
 490+    { "&Sacute;", { 346, 0 } },
 491+    { "&Sc;", { 10940, 0 } },
 492+    { "&Scaron;", { 352, 0 } },
 493+    { "&Scedil;", { 350, 0 } },
 494+    { "&Scirc;", { 348, 0 } },
 495+    { "&Scy;", { 1057, 0 } },
 496+    { "&Sfr;", { 120086, 0 } },
 497+    { "&ShortDownArrow;", { 8595, 0 } },
 498+    { "&ShortLeftArrow;", { 8592, 0 } },
 499+    { "&ShortRightArrow;", { 8594, 0 } },
 500+    { "&ShortUpArrow;", { 8593, 0 } },
 501+    { "&Sigma;", { 931, 0 } },
 502+    { "&SmallCircle;", { 8728, 0 } },
 503+    { "&Sopf;", { 120138, 0 } },
 504+    { "&Sqrt;", { 8730, 0 } },
 505+    { "&Square;", { 9633, 0 } },
 506+    { "&SquareIntersection;", { 8851, 0 } },
 507+    { "&SquareSubset;", { 8847, 0 } },
 508+    { "&SquareSubsetEqual;", { 8849, 0 } },
 509+    { "&SquareSuperset;", { 8848, 0 } },
 510+    { "&SquareSupersetEqual;", { 8850, 0 } },
 511+    { "&SquareUnion;", { 8852, 0 } },
 512+    { "&Sscr;", { 119982, 0 } },
 513+    { "&Star;", { 8902, 0 } },
 514+    { "&Sub;", { 8912, 0 } },
 515+    { "&Subset;", { 8912, 0 } },
 516+    { "&SubsetEqual;", { 8838, 0 } },
 517+    { "&Succeeds;", { 8827, 0 } },
 518+    { "&SucceedsEqual;", { 10928, 0 } },
 519+    { "&SucceedsSlantEqual;", { 8829, 0 } },
 520+    { "&SucceedsTilde;", { 8831, 0 } },
 521+    { "&SuchThat;", { 8715, 0 } },
 522+    { "&Sum;", { 8721, 0 } },
 523+    { "&Sup;", { 8913, 0 } },
 524+    { "&Superset;", { 8835, 0 } },
 525+    { "&SupersetEqual;", { 8839, 0 } },
 526+    { "&Supset;", { 8913, 0 } },
 527+    { "&THORN;", { 222, 0 } },
 528+    { "&TRADE;", { 8482, 0 } },
 529+    { "&TSHcy;", { 1035, 0 } },
 530+    { "&TScy;", { 1062, 0 } },
 531+    { "&Tab;", { 9, 0 } },
 532+    { "&Tau;", { 932, 0 } },
 533+    { "&Tcaron;", { 356, 0 } },
 534+    { "&Tcedil;", { 354, 0 } },
 535+    { "&Tcy;", { 1058, 0 } },
 536+    { "&Tfr;", { 120087, 0 } },
 537+    { "&Therefore;", { 8756, 0 } },
 538+    { "&Theta;", { 920, 0 } },
 539+    { "&ThickSpace;", { 8287, 8202 } },
 540+    { "&ThinSpace;", { 8201, 0 } },
 541+    { "&Tilde;", { 8764, 0 } },
 542+    { "&TildeEqual;", { 8771, 0 } },
 543+    { "&TildeFullEqual;", { 8773, 0 } },
 544+    { "&TildeTilde;", { 8776, 0 } },
 545+    { "&Topf;", { 120139, 0 } },
 546+    { "&TripleDot;", { 8411, 0 } },
 547+    { "&Tscr;", { 119983, 0 } },
 548+    { "&Tstrok;", { 358, 0 } },
 549+    { "&Uacute;", { 218, 0 } },
 550+    { "&Uarr;", { 8607, 0 } },
 551+    { "&Uarrocir;", { 10569, 0 } },
 552+    { "&Ubrcy;", { 1038, 0 } },
 553+    { "&Ubreve;", { 364, 0 } },
 554+    { "&Ucirc;", { 219, 0 } },
 555+    { "&Ucy;", { 1059, 0 } },
 556+    { "&Udblac;", { 368, 0 } },
 557+    { "&Ufr;", { 120088, 0 } },
 558+    { "&Ugrave;", { 217, 0 } },
 559+    { "&Umacr;", { 362, 0 } },
 560+    { "&UnderBar;", { 95, 0 } },
 561+    { "&UnderBrace;", { 9183, 0 } },
 562+    { "&UnderBracket;", { 9141, 0 } },
 563+    { "&UnderParenthesis;", { 9181, 0 } },
 564+    { "&Union;", { 8899, 0 } },
 565+    { "&UnionPlus;", { 8846, 0 } },
 566+    { "&Uogon;", { 370, 0 } },
 567+    { "&Uopf;", { 120140, 0 } },
 568+    { "&UpArrow;", { 8593, 0 } },
 569+    { "&UpArrowBar;", { 10514, 0 } },
 570+    { "&UpArrowDownArrow;", { 8645, 0 } },
 571+    { "&UpDownArrow;", { 8597, 0 } },
 572+    { "&UpEquilibrium;", { 10606, 0 } },
 573+    { "&UpTee;", { 8869, 0 } },
 574+    { "&UpTeeArrow;", { 8613, 0 } },
 575+    { "&Uparrow;", { 8657, 0 } },
 576+    { "&Updownarrow;", { 8661, 0 } },
 577+    { "&UpperLeftArrow;", { 8598, 0 } },
 578+    { "&UpperRightArrow;", { 8599, 0 } },
 579+    { "&Upsi;", { 978, 0 } },
 580+    { "&Upsilon;", { 933, 0 } },
 581+    { "&Uring;", { 366, 0 } },
 582+    { "&Uscr;", { 119984, 0 } },
 583+    { "&Utilde;", { 360, 0 } },
 584+    { "&Uuml;", { 220, 0 } },
 585+    { "&VDash;", { 8875, 0 } },
 586+    { "&Vbar;", { 10987, 0 } },
 587+    { "&Vcy;", { 1042, 0 } },
 588+    { "&Vdash;", { 8873, 0 } },
 589+    { "&Vdashl;", { 10982, 0 } },
 590+    { "&Vee;", { 8897, 0 } },
 591+    { "&Verbar;", { 8214, 0 } },
 592+    { "&Vert;", { 8214, 0 } },
 593+    { "&VerticalBar;", { 8739, 0 } },
 594+    { "&VerticalLine;", { 124, 0 } },
 595+    { "&VerticalSeparator;", { 10072, 0 } },
 596+    { "&VerticalTilde;", { 8768, 0 } },
 597+    { "&VeryThinSpace;", { 8202, 0 } },
 598+    { "&Vfr;", { 120089, 0 } },
 599+    { "&Vopf;", { 120141, 0 } },
 600+    { "&Vscr;", { 119985, 0 } },
 601+    { "&Vvdash;", { 8874, 0 } },
 602+    { "&Wcirc;", { 372, 0 } },
 603+    { "&Wedge;", { 8896, 0 } },
 604+    { "&Wfr;", { 120090, 0 } },
 605+    { "&Wopf;", { 120142, 0 } },
 606+    { "&Wscr;", { 119986, 0 } },
 607+    { "&Xfr;", { 120091, 0 } },
 608+    { "&Xi;", { 926, 0 } },
 609+    { "&Xopf;", { 120143, 0 } },
 610+    { "&Xscr;", { 119987, 0 } },
 611+    { "&YAcy;", { 1071, 0 } },
 612+    { "&YIcy;", { 1031, 0 } },
 613+    { "&YUcy;", { 1070, 0 } },
 614+    { "&Yacute;", { 221, 0 } },
 615+    { "&Ycirc;", { 374, 0 } },
 616+    { "&Ycy;", { 1067, 0 } },
 617+    { "&Yfr;", { 120092, 0 } },
 618+    { "&Yopf;", { 120144, 0 } },
 619+    { "&Yscr;", { 119988, 0 } },
 620+    { "&Yuml;", { 376, 0 } },
 621+    { "&ZHcy;", { 1046, 0 } },
 622+    { "&Zacute;", { 377, 0 } },
 623+    { "&Zcaron;", { 381, 0 } },
 624+    { "&Zcy;", { 1047, 0 } },
 625+    { "&Zdot;", { 379, 0 } },
 626+    { "&ZeroWidthSpace;", { 8203, 0 } },
 627+    { "&Zeta;", { 918, 0 } },
 628+    { "&Zfr;", { 8488, 0 } },
 629+    { "&Zopf;", { 8484, 0 } },
 630+    { "&Zscr;", { 119989, 0 } },
 631+    { "&aacute;", { 225, 0 } },
 632+    { "&abreve;", { 259, 0 } },
 633+    { "&ac;", { 8766, 0 } },
 634+    { "&acE;", { 8766, 819 } },
 635+    { "&acd;", { 8767, 0 } },
 636+    { "&acirc;", { 226, 0 } },
 637+    { "&acute;", { 180, 0 } },
 638+    { "&acy;", { 1072, 0 } },
 639+    { "&aelig;", { 230, 0 } },
 640+    { "&af;", { 8289, 0 } },
 641+    { "&afr;", { 120094, 0 } },
 642+    { "&agrave;", { 224, 0 } },
 643+    { "&alefsym;", { 8501, 0 } },
 644+    { "&aleph;", { 8501, 0 } },
 645+    { "&alpha;", { 945, 0 } },
 646+    { "&amacr;", { 257, 0 } },
 647+    { "&amalg;", { 10815, 0 } },
 648+    { "&amp;", { 38, 0 } },
 649+    { "&and;", { 8743, 0 } },
 650+    { "&andand;", { 10837, 0 } },
 651+    { "&andd;", { 10844, 0 } },
 652+    { "&andslope;", { 10840, 0 } },
 653+    { "&andv;", { 10842, 0 } },
 654+    { "&ang;", { 8736, 0 } },
 655+    { "&ange;", { 10660, 0 } },
 656+    { "&angle;", { 8736, 0 } },
 657+    { "&angmsd;", { 8737, 0 } },
 658+    { "&angmsdaa;", { 10664, 0 } },
 659+    { "&angmsdab;", { 10665, 0 } },
 660+    { "&angmsdac;", { 10666, 0 } },
 661+    { "&angmsdad;", { 10667, 0 } },
 662+    { "&angmsdae;", { 10668, 0 } },
 663+    { "&angmsdaf;", { 10669, 0 } },
 664+    { "&angmsdag;", { 10670, 0 } },
 665+    { "&angmsdah;", { 10671, 0 } },
 666+    { "&angrt;", { 8735, 0 } },
 667+    { "&angrtvb;", { 8894, 0 } },
 668+    { "&angrtvbd;", { 10653, 0 } },
 669+    { "&angsph;", { 8738, 0 } },
 670+    { "&angst;", { 197, 0 } },
 671+    { "&angzarr;", { 9084, 0 } },
 672+    { "&aogon;", { 261, 0 } },
 673+    { "&aopf;", { 120146, 0 } },
 674+    { "&ap;", { 8776, 0 } },
 675+    { "&apE;", { 10864, 0 } },
 676+    { "&apacir;", { 10863, 0 } },
 677+    { "&ape;", { 8778, 0 } },
 678+    { "&apid;", { 8779, 0 } },
 679+    { "&apos;", { 39, 0 } },
 680+    { "&approx;", { 8776, 0 } },
 681+    { "&approxeq;", { 8778, 0 } },
 682+    { "&aring;", { 229, 0 } },
 683+    { "&ascr;", { 119990, 0 } },
 684+    { "&ast;", { 42, 0 } },
 685+    { "&asymp;", { 8776, 0 } },
 686+    { "&asympeq;", { 8781, 0 } },
 687+    { "&atilde;", { 227, 0 } },
 688+    { "&auml;", { 228, 0 } },
 689+    { "&awconint;", { 8755, 0 } },
 690+    { "&awint;", { 10769, 0 } },
 691+    { "&bNot;", { 10989, 0 } },
 692+    { "&backcong;", { 8780, 0 } },
 693+    { "&backepsilon;", { 1014, 0 } },
 694+    { "&backprime;", { 8245, 0 } },
 695+    { "&backsim;", { 8765, 0 } },
 696+    { "&backsimeq;", { 8909, 0 } },
 697+    { "&barvee;", { 8893, 0 } },
 698+    { "&barwed;", { 8965, 0 } },
 699+    { "&barwedge;", { 8965, 0 } },
 700+    { "&bbrk;", { 9141, 0 } },
 701+    { "&bbrktbrk;", { 9142, 0 } },
 702+    { "&bcong;", { 8780, 0 } },
 703+    { "&bcy;", { 1073, 0 } },
 704+    { "&bdquo;", { 8222, 0 } },
 705+    { "&becaus;", { 8757, 0 } },
 706+    { "&because;", { 8757, 0 } },
 707+    { "&bemptyv;", { 10672, 0 } },
 708+    { "&bepsi;", { 1014, 0 } },
 709+    { "&bernou;", { 8492, 0 } },
 710+    { "&beta;", { 946, 0 } },
 711+    { "&beth;", { 8502, 0 } },
 712+    { "&between;", { 8812, 0 } },
 713+    { "&bfr;", { 120095, 0 } },
 714+    { "&bigcap;", { 8898, 0 } },
 715+    { "&bigcirc;", { 9711, 0 } },
 716+    { "&bigcup;", { 8899, 0 } },
 717+    { "&bigodot;", { 10752, 0 } },
 718+    { "&bigoplus;", { 10753, 0 } },
 719+    { "&bigotimes;", { 10754, 0 } },
 720+    { "&bigsqcup;", { 10758, 0 } },
 721+    { "&bigstar;", { 9733, 0 } },
 722+    { "&bigtriangledown;", { 9661, 0 } },
 723+    { "&bigtriangleup;", { 9651, 0 } },
 724+    { "&biguplus;", { 10756, 0 } },
 725+    { "&bigvee;", { 8897, 0 } },
 726+    { "&bigwedge;", { 8896, 0 } },
 727+    { "&bkarow;", { 10509, 0 } },
 728+    { "&blacklozenge;", { 10731, 0 } },
 729+    { "&blacksquare;", { 9642, 0 } },
 730+    { "&blacktriangle;", { 9652, 0 } },
 731+    { "&blacktriangledown;", { 9662, 0 } },
 732+    { "&blacktriangleleft;", { 9666, 0 } },
 733+    { "&blacktriangleright;", { 9656, 0 } },
 734+    { "&blank;", { 9251, 0 } },
 735+    { "&blk12;", { 9618, 0 } },
 736+    { "&blk14;", { 9617, 0 } },
 737+    { "&blk34;", { 9619, 0 } },
 738+    { "&block;", { 9608, 0 } },
 739+    { "&bne;", { 61, 8421 } },
 740+    { "&bnequiv;", { 8801, 8421 } },
 741+    { "&bnot;", { 8976, 0 } },
 742+    { "&bopf;", { 120147, 0 } },
 743+    { "&bot;", { 8869, 0 } },
 744+    { "&bottom;", { 8869, 0 } },
 745+    { "&bowtie;", { 8904, 0 } },
 746+    { "&boxDL;", { 9559, 0 } },
 747+    { "&boxDR;", { 9556, 0 } },
 748+    { "&boxDl;", { 9558, 0 } },
 749+    { "&boxDr;", { 9555, 0 } },
 750+    { "&boxH;", { 9552, 0 } },
 751+    { "&boxHD;", { 9574, 0 } },
 752+    { "&boxHU;", { 9577, 0 } },
 753+    { "&boxHd;", { 9572, 0 } },
 754+    { "&boxHu;", { 9575, 0 } },
 755+    { "&boxUL;", { 9565, 0 } },
 756+    { "&boxUR;", { 9562, 0 } },
 757+    { "&boxUl;", { 9564, 0 } },
 758+    { "&boxUr;", { 9561, 0 } },
 759+    { "&boxV;", { 9553, 0 } },
 760+    { "&boxVH;", { 9580, 0 } },
 761+    { "&boxVL;", { 9571, 0 } },
 762+    { "&boxVR;", { 9568, 0 } },
 763+    { "&boxVh;", { 9579, 0 } },
 764+    { "&boxVl;", { 9570, 0 } },
 765+    { "&boxVr;", { 9567, 0 } },
 766+    { "&boxbox;", { 10697, 0 } },
 767+    { "&boxdL;", { 9557, 0 } },
 768+    { "&boxdR;", { 9554, 0 } },
 769+    { "&boxdl;", { 9488, 0 } },
 770+    { "&boxdr;", { 9484, 0 } },
 771+    { "&boxh;", { 9472, 0 } },
 772+    { "&boxhD;", { 9573, 0 } },
 773+    { "&boxhU;", { 9576, 0 } },
 774+    { "&boxhd;", { 9516, 0 } },
 775+    { "&boxhu;", { 9524, 0 } },
 776+    { "&boxminus;", { 8863, 0 } },
 777+    { "&boxplus;", { 8862, 0 } },
 778+    { "&boxtimes;", { 8864, 0 } },
 779+    { "&boxuL;", { 9563, 0 } },
 780+    { "&boxuR;", { 9560, 0 } },
 781+    { "&boxul;", { 9496, 0 } },
 782+    { "&boxur;", { 9492, 0 } },
 783+    { "&boxv;", { 9474, 0 } },
 784+    { "&boxvH;", { 9578, 0 } },
 785+    { "&boxvL;", { 9569, 0 } },
 786+    { "&boxvR;", { 9566, 0 } },
 787+    { "&boxvh;", { 9532, 0 } },
 788+    { "&boxvl;", { 9508, 0 } },
 789+    { "&boxvr;", { 9500, 0 } },
 790+    { "&bprime;", { 8245, 0 } },
 791+    { "&breve;", { 728, 0 } },
 792+    { "&brvbar;", { 166, 0 } },
 793+    { "&bscr;", { 119991, 0 } },
 794+    { "&bsemi;", { 8271, 0 } },
 795+    { "&bsim;", { 8765, 0 } },
 796+    { "&bsime;", { 8909, 0 } },
 797+    { "&bsol;", { 92, 0 } },
 798+    { "&bsolb;", { 10693, 0 } },
 799+    { "&bsolhsub;", { 10184, 0 } },
 800+    { "&bull;", { 8226, 0 } },
 801+    { "&bullet;", { 8226, 0 } },
 802+    { "&bump;", { 8782, 0 } },
 803+    { "&bumpE;", { 10926, 0 } },
 804+    { "&bumpe;", { 8783, 0 } },
 805+    { "&bumpeq;", { 8783, 0 } },
 806+    { "&cacute;", { 263, 0 } },
 807+    { "&cap;", { 8745, 0 } },
 808+    { "&capand;", { 10820, 0 } },
 809+    { "&capbrcup;", { 10825, 0 } },
 810+    { "&capcap;", { 10827, 0 } },
 811+    { "&capcup;", { 10823, 0 } },
 812+    { "&capdot;", { 10816, 0 } },
 813+    { "&caps;", { 8745, 65024 } },
 814+    { "&caret;", { 8257, 0 } },
 815+    { "&caron;", { 711, 0 } },
 816+    { "&ccaps;", { 10829, 0 } },
 817+    { "&ccaron;", { 269, 0 } },
 818+    { "&ccedil;", { 231, 0 } },
 819+    { "&ccirc;", { 265, 0 } },
 820+    { "&ccups;", { 10828, 0 } },
 821+    { "&ccupssm;", { 10832, 0 } },
 822+    { "&cdot;", { 267, 0 } },
 823+    { "&cedil;", { 184, 0 } },
 824+    { "&cemptyv;", { 10674, 0 } },
 825+    { "&cent;", { 162, 0 } },
 826+    { "&centerdot;", { 183, 0 } },
 827+    { "&cfr;", { 120096, 0 } },
 828+    { "&chcy;", { 1095, 0 } },
 829+    { "&check;", { 10003, 0 } },
 830+    { "&checkmark;", { 10003, 0 } },
 831+    { "&chi;", { 967, 0 } },
 832+    { "&cir;", { 9675, 0 } },
 833+    { "&cirE;", { 10691, 0 } },
 834+    { "&circ;", { 710, 0 } },
 835+    { "&circeq;", { 8791, 0 } },
 836+    { "&circlearrowleft;", { 8634, 0 } },
 837+    { "&circlearrowright;", { 8635, 0 } },
 838+    { "&circledR;", { 174, 0 } },
 839+    { "&circledS;", { 9416, 0 } },
 840+    { "&circledast;", { 8859, 0 } },
 841+    { "&circledcirc;", { 8858, 0 } },
 842+    { "&circleddash;", { 8861, 0 } },
 843+    { "&cire;", { 8791, 0 } },
 844+    { "&cirfnint;", { 10768, 0 } },
 845+    { "&cirmid;", { 10991, 0 } },
 846+    { "&cirscir;", { 10690, 0 } },
 847+    { "&clubs;", { 9827, 0 } },
 848+    { "&clubsuit;", { 9827, 0 } },
 849+    { "&colon;", { 58, 0 } },
 850+    { "&colone;", { 8788, 0 } },
 851+    { "&coloneq;", { 8788, 0 } },
 852+    { "&comma;", { 44, 0 } },
 853+    { "&commat;", { 64, 0 } },
 854+    { "&comp;", { 8705, 0 } },
 855+    { "&compfn;", { 8728, 0 } },
 856+    { "&complement;", { 8705, 0 } },
 857+    { "&complexes;", { 8450, 0 } },
 858+    { "&cong;", { 8773, 0 } },
 859+    { "&congdot;", { 10861, 0 } },
 860+    { "&conint;", { 8750, 0 } },
 861+    { "&copf;", { 120148, 0 } },
 862+    { "&coprod;", { 8720, 0 } },
 863+    { "&copy;", { 169, 0 } },
 864+    { "&copysr;", { 8471, 0 } },
 865+    { "&crarr;", { 8629, 0 } },
 866+    { "&cross;", { 10007, 0 } },
 867+    { "&cscr;", { 119992, 0 } },
 868+    { "&csub;", { 10959, 0 } },
 869+    { "&csube;", { 10961, 0 } },
 870+    { "&csup;", { 10960, 0 } },
 871+    { "&csupe;", { 10962, 0 } },
 872+    { "&ctdot;", { 8943, 0 } },
 873+    { "&cudarrl;", { 10552, 0 } },
 874+    { "&cudarrr;", { 10549, 0 } },
 875+    { "&cuepr;", { 8926, 0 } },
 876+    { "&cuesc;", { 8927, 0 } },
 877+    { "&cularr;", { 8630, 0 } },
 878+    { "&cularrp;", { 10557, 0 } },
 879+    { "&cup;", { 8746, 0 } },
 880+    { "&cupbrcap;", { 10824, 0 } },
 881+    { "&cupcap;", { 10822, 0 } },
 882+    { "&cupcup;", { 10826, 0 } },
 883+    { "&cupdot;", { 8845, 0 } },
 884+    { "&cupor;", { 10821, 0 } },
 885+    { "&cups;", { 8746, 65024 } },
 886+    { "&curarr;", { 8631, 0 } },
 887+    { "&curarrm;", { 10556, 0 } },
 888+    { "&curlyeqprec;", { 8926, 0 } },
 889+    { "&curlyeqsucc;", { 8927, 0 } },
 890+    { "&curlyvee;", { 8910, 0 } },
 891+    { "&curlywedge;", { 8911, 0 } },
 892+    { "&curren;", { 164, 0 } },
 893+    { "&curvearrowleft;", { 8630, 0 } },
 894+    { "&curvearrowright;", { 8631, 0 } },
 895+    { "&cuvee;", { 8910, 0 } },
 896+    { "&cuwed;", { 8911, 0 } },
 897+    { "&cwconint;", { 8754, 0 } },
 898+    { "&cwint;", { 8753, 0 } },
 899+    { "&cylcty;", { 9005, 0 } },
 900+    { "&dArr;", { 8659, 0 } },
 901+    { "&dHar;", { 10597, 0 } },
 902+    { "&dagger;", { 8224, 0 } },
 903+    { "&daleth;", { 8504, 0 } },
 904+    { "&darr;", { 8595, 0 } },
 905+    { "&dash;", { 8208, 0 } },
 906+    { "&dashv;", { 8867, 0 } },
 907+    { "&dbkarow;", { 10511, 0 } },
 908+    { "&dblac;", { 733, 0 } },
 909+    { "&dcaron;", { 271, 0 } },
 910+    { "&dcy;", { 1076, 0 } },
 911+    { "&dd;", { 8518, 0 } },
 912+    { "&ddagger;", { 8225, 0 } },
 913+    { "&ddarr;", { 8650, 0 } },
 914+    { "&ddotseq;", { 10871, 0 } },
 915+    { "&deg;", { 176, 0 } },
 916+    { "&delta;", { 948, 0 } },
 917+    { "&demptyv;", { 10673, 0 } },
 918+    { "&dfisht;", { 10623, 0 } },
 919+    { "&dfr;", { 120097, 0 } },
 920+    { "&dharl;", { 8643, 0 } },
 921+    { "&dharr;", { 8642, 0 } },
 922+    { "&diam;", { 8900, 0 } },
 923+    { "&diamond;", { 8900, 0 } },
 924+    { "&diamondsuit;", { 9830, 0 } },
 925+    { "&diams;", { 9830, 0 } },
 926+    { "&die;", { 168, 0 } },
 927+    { "&digamma;", { 989, 0 } },
 928+    { "&disin;", { 8946, 0 } },
 929+    { "&div;", { 247, 0 } },
 930+    { "&divide;", { 247, 0 } },
 931+    { "&divideontimes;", { 8903, 0 } },
 932+    { "&divonx;", { 8903, 0 } },
 933+    { "&djcy;", { 1106, 0 } },
 934+    { "&dlcorn;", { 8990, 0 } },
 935+    { "&dlcrop;", { 8973, 0 } },
 936+    { "&dollar;", { 36, 0 } },
 937+    { "&dopf;", { 120149, 0 } },
 938+    { "&dot;", { 729, 0 } },
 939+    { "&doteq;", { 8784, 0 } },
 940+    { "&doteqdot;", { 8785, 0 } },
 941+    { "&dotminus;", { 8760, 0 } },
 942+    { "&dotplus;", { 8724, 0 } },
 943+    { "&dotsquare;", { 8865, 0 } },
 944+    { "&doublebarwedge;", { 8966, 0 } },
 945+    { "&downarrow;", { 8595, 0 } },
 946+    { "&downdownarrows;", { 8650, 0 } },
 947+    { "&downharpoonleft;", { 8643, 0 } },
 948+    { "&downharpoonright;", { 8642, 0 } },
 949+    { "&drbkarow;", { 10512, 0 } },
 950+    { "&drcorn;", { 8991, 0 } },
 951+    { "&drcrop;", { 8972, 0 } },
 952+    { "&dscr;", { 119993, 0 } },
 953+    { "&dscy;", { 1109, 0 } },
 954+    { "&dsol;", { 10742, 0 } },
 955+    { "&dstrok;", { 273, 0 } },
 956+    { "&dtdot;", { 8945, 0 } },
 957+    { "&dtri;", { 9663, 0 } },
 958+    { "&dtrif;", { 9662, 0 } },
 959+    { "&duarr;", { 8693, 0 } },
 960+    { "&duhar;", { 10607, 0 } },
 961+    { "&dwangle;", { 10662, 0 } },
 962+    { "&dzcy;", { 1119, 0 } },
 963+    { "&dzigrarr;", { 10239, 0 } },
 964+    { "&eDDot;", { 10871, 0 } },
 965+    { "&eDot;", { 8785, 0 } },
 966+    { "&eacute;", { 233, 0 } },
 967+    { "&easter;", { 10862, 0 } },
 968+    { "&ecaron;", { 283, 0 } },
 969+    { "&ecir;", { 8790, 0 } },
 970+    { "&ecirc;", { 234, 0 } },
 971+    { "&ecolon;", { 8789, 0 } },
 972+    { "&ecy;", { 1101, 0 } },
 973+    { "&edot;", { 279, 0 } },
 974+    { "&ee;", { 8519, 0 } },
 975+    { "&efDot;", { 8786, 0 } },
 976+    { "&efr;", { 120098, 0 } },
 977+    { "&eg;", { 10906, 0 } },
 978+    { "&egrave;", { 232, 0 } },
 979+    { "&egs;", { 10902, 0 } },
 980+    { "&egsdot;", { 10904, 0 } },
 981+    { "&el;", { 10905, 0 } },
 982+    { "&elinters;", { 9191, 0 } },
 983+    { "&ell;", { 8467, 0 } },
 984+    { "&els;", { 10901, 0 } },
 985+    { "&elsdot;", { 10903, 0 } },
 986+    { "&emacr;", { 275, 0 } },
 987+    { "&empty;", { 8709, 0 } },
 988+    { "&emptyset;", { 8709, 0 } },
 989+    { "&emptyv;", { 8709, 0 } },
 990+    { "&emsp13;", { 8196, 0 } },
 991+    { "&emsp14;", { 8197, 0 } },
 992+    { "&emsp;", { 8195, 0 } },
 993+    { "&eng;", { 331, 0 } },
 994+    { "&ensp;", { 8194, 0 } },
 995+    { "&eogon;", { 281, 0 } },
 996+    { "&eopf;", { 120150, 0 } },
 997+    { "&epar;", { 8917, 0 } },
 998+    { "&eparsl;", { 10723, 0 } },
 999+    { "&eplus;", { 10865, 0 } },
1000+    { "&epsi;", { 949, 0 } },
1001+    { "&epsilon;", { 949, 0 } },
1002+    { "&epsiv;", { 1013, 0 } },
1003+    { "&eqcirc;", { 8790, 0 } },
1004+    { "&eqcolon;", { 8789, 0 } },
1005+    { "&eqsim;", { 8770, 0 } },
1006+    { "&eqslantgtr;", { 10902, 0 } },
1007+    { "&eqslantless;", { 10901, 0 } },
1008+    { "&equals;", { 61, 0 } },
1009+    { "&equest;", { 8799, 0 } },
1010+    { "&equiv;", { 8801, 0 } },
1011+    { "&equivDD;", { 10872, 0 } },
1012+    { "&eqvparsl;", { 10725, 0 } },
1013+    { "&erDot;", { 8787, 0 } },
1014+    { "&erarr;", { 10609, 0 } },
1015+    { "&escr;", { 8495, 0 } },
1016+    { "&esdot;", { 8784, 0 } },
1017+    { "&esim;", { 8770, 0 } },
1018+    { "&eta;", { 951, 0 } },
1019+    { "&eth;", { 240, 0 } },
1020+    { "&euml;", { 235, 0 } },
1021+    { "&euro;", { 8364, 0 } },
1022+    { "&excl;", { 33, 0 } },
1023+    { "&exist;", { 8707, 0 } },
1024+    { "&expectation;", { 8496, 0 } },
1025+    { "&exponentiale;", { 8519, 0 } },
1026+    { "&fallingdotseq;", { 8786, 0 } },
1027+    { "&fcy;", { 1092, 0 } },
1028+    { "&female;", { 9792, 0 } },
1029+    { "&ffilig;", { 64259, 0 } },
1030+    { "&fflig;", { 64256, 0 } },
1031+    { "&ffllig;", { 64260, 0 } },
1032+    { "&ffr;", { 120099, 0 } },
1033+    { "&filig;", { 64257, 0 } },
1034+    { "&fjlig;", { 102, 106 } },
1035+    { "&flat;", { 9837, 0 } },
1036+    { "&fllig;", { 64258, 0 } },
1037+    { "&fltns;", { 9649, 0 } },
1038+    { "&fnof;", { 402, 0 } },
1039+    { "&fopf;", { 120151, 0 } },
1040+    { "&forall;", { 8704, 0 } },
1041+    { "&fork;", { 8916, 0 } },
1042+    { "&forkv;", { 10969, 0 } },
1043+    { "&fpartint;", { 10765, 0 } },
1044+    { "&frac12", { 189, 0 } },
1045+    { "&frac12;", { 189, 0 } },
1046+    { "&frac13;", { 8531, 0 } },
1047+    { "&frac14", { 188, 0 } },
1048+    { "&frac14;", { 188, 0 } },
1049+    { "&frac15;", { 8533, 0 } },
1050+    { "&frac16;", { 8537, 0 } },
1051+    { "&frac18;", { 8539, 0 } },
1052+    { "&frac23;", { 8532, 0 } },
1053+    { "&frac25;", { 8534, 0 } },
1054+    { "&frac34", { 190, 0 } },
1055+    { "&frac34;", { 190, 0 } },
1056+    { "&frac35;", { 8535, 0 } },
1057+    { "&frac38;", { 8540, 0 } },
1058+    { "&frac45;", { 8536, 0 } },
1059+    { "&frac56;", { 8538, 0 } },
1060+    { "&frac58;", { 8541, 0 } },
1061+    { "&frac78;", { 8542, 0 } },
1062+    { "&frasl;", { 8260, 0 } },
1063+    { "&frown;", { 8994, 0 } },
1064+    { "&fscr;", { 119995, 0 } },
1065+    { "&gE;", { 8807, 0 } },
1066+    { "&gEl;", { 10892, 0 } },
1067+    { "&gacute;", { 501, 0 } },
1068+    { "&gamma;", { 947, 0 } },
1069+    { "&gammad;", { 989, 0 } },
1070+    { "&gap;", { 10886, 0 } },
1071+    { "&gbreve;", { 287, 0 } },
1072+    { "&gcirc;", { 285, 0 } },
1073+    { "&gcy;", { 1075, 0 } },
1074+    { "&gdot;", { 289, 0 } },
1075+    { "&ge;", { 8805, 0 } },
1076+    { "&gel;", { 8923, 0 } },
1077+    { "&geq;", { 8805, 0 } },
1078+    { "&geqq;", { 8807, 0 } },
1079+    { "&geqslant;", { 10878, 0 } },
1080+    { "&ges;", { 10878, 0 } },
1081+    { "&gescc;", { 10921, 0 } },
1082+    { "&gesdot;", { 10880, 0 } },
1083+    { "&gesdoto;", { 10882, 0 } },
1084+    { "&gesdotol;", { 10884, 0 } },
1085+    { "&gesl;", { 8923, 65024 } },
1086+    { "&gesles;", { 10900, 0 } },
1087+    { "&gfr;", { 120100, 0 } },
1088+    { "&gg;", { 8811, 0 } },
1089+    { "&ggg;", { 8921, 0 } },
1090+    { "&gimel;", { 8503, 0 } },
1091+    { "&gjcy;", { 1107, 0 } },
1092+    { "&gl;", { 8823, 0 } },
1093+    { "&glE;", { 10898, 0 } },
1094+    { "&gla;", { 10917, 0 } },
1095+    { "&glj;", { 10916, 0 } },
1096+    { "&gnE;", { 8809, 0 } },
1097+    { "&gnap;", { 10890, 0 } },
1098+    { "&gnapprox;", { 10890, 0 } },
1099+    { "&gne;", { 10888, 0 } },
1100+    { "&gneq;", { 10888, 0 } },
1101+    { "&gneqq;", { 8809, 0 } },
1102+    { "&gnsim;", { 8935, 0 } },
1103+    { "&gopf;", { 120152, 0 } },
1104+    { "&grave;", { 96, 0 } },
1105+    { "&gscr;", { 8458, 0 } },
1106+    { "&gsim;", { 8819, 0 } },
1107+    { "&gsime;", { 10894, 0 } },
1108+    { "&gsiml;", { 10896, 0 } },
1109+    { "&gt;", { 62, 0 } },
1110+    { "&gtcc;", { 10919, 0 } },
1111+    { "&gtcir;", { 10874, 0 } },
1112+    { "&gtdot;", { 8919, 0 } },
1113+    { "&gtlPar;", { 10645, 0 } },
1114+    { "&gtquest;", { 10876, 0 } },
1115+    { "&gtrapprox;", { 10886, 0 } },
1116+    { "&gtrarr;", { 10616, 0 } },
1117+    { "&gtrdot;", { 8919, 0 } },
1118+    { "&gtreqless;", { 8923, 0 } },
1119+    { "&gtreqqless;", { 10892, 0 } },
1120+    { "&gtrless;", { 8823, 0 } },
1121+    { "&gtrsim;", { 8819, 0 } },
1122+    { "&gvertneqq;", { 8809, 65024 } },
1123+    { "&gvnE;", { 8809, 65024 } },
1124+    { "&hArr;", { 8660, 0 } },
1125+    { "&hairsp;", { 8202, 0 } },
1126+    { "&half;", { 189, 0 } },
1127+    { "&hamilt;", { 8459, 0 } },
1128+    { "&hardcy;", { 1098, 0 } },
1129+    { "&harr;", { 8596, 0 } },
1130+    { "&harrcir;", { 10568, 0 } },
1131+    { "&harrw;", { 8621, 0 } },
1132+    { "&hbar;", { 8463, 0 } },
1133+    { "&hcirc;", { 293, 0 } },
1134+    { "&hearts;", { 9829, 0 } },
1135+    { "&heartsuit;", { 9829, 0 } },
1136+    { "&hellip;", { 8230, 0 } },
1137+    { "&hercon;", { 8889, 0 } },
1138+    { "&hfr;", { 120101, 0 } },
1139+    { "&hksearow;", { 10533, 0 } },
1140+    { "&hkswarow;", { 10534, 0 } },
1141+    { "&hoarr;", { 8703, 0 } },
1142+    { "&homtht;", { 8763, 0 } },
1143+    { "&hookleftarrow;", { 8617, 0 } },
1144+    { "&hookrightarrow;", { 8618, 0 } },
1145+    { "&hopf;", { 120153, 0 } },
1146+    { "&horbar;", { 8213, 0 } },
1147+    { "&hscr;", { 119997, 0 } },
1148+    { "&hslash;", { 8463, 0 } },
1149+    { "&hstrok;", { 295, 0 } },
1150+    { "&hybull;", { 8259, 0 } },
1151+    { "&hyphen;", { 8208, 0 } },
1152+    { "&iacute;", { 237, 0 } },
1153+    { "&ic;", { 8291, 0 } },
1154+    { "&icirc;", { 238, 0 } },
1155+    { "&icy;", { 1080, 0 } },
1156+    { "&iecy;", { 1077, 0 } },
1157+    { "&iexcl;", { 161, 0 } },
1158+    { "&iff;", { 8660, 0 } },
1159+    { "&ifr;", { 120102, 0 } },
1160+    { "&igrave;", { 236, 0 } },
1161+    { "&ii;", { 8520, 0 } },
1162+    { "&iiiint;", { 10764, 0 } },
1163+    { "&iiint;", { 8749, 0 } },
1164+    { "&iinfin;", { 10716, 0 } },
1165+    { "&iiota;", { 8489, 0 } },
1166+    { "&ijlig;", { 307, 0 } },
1167+    { "&imacr;", { 299, 0 } },
1168+    { "&image;", { 8465, 0 } },
1169+    { "&imagline;", { 8464, 0 } },
1170+    { "&imagpart;", { 8465, 0 } },
1171+    { "&imath;", { 305, 0 } },
1172+    { "&imof;", { 8887, 0 } },
1173+    { "&imped;", { 437, 0 } },
1174+    { "&in;", { 8712, 0 } },
1175+    { "&incare;", { 8453, 0 } },
1176+    { "&infin;", { 8734, 0 } },
1177+    { "&infintie;", { 10717, 0 } },
1178+    { "&inodot;", { 305, 0 } },
1179+    { "&int;", { 8747, 0 } },
1180+    { "&intcal;", { 8890, 0 } },
1181+    { "&integers;", { 8484, 0 } },
1182+    { "&intercal;", { 8890, 0 } },
1183+    { "&intlarhk;", { 10775, 0 } },
1184+    { "&intprod;", { 10812, 0 } },
1185+    { "&iocy;", { 1105, 0 } },
1186+    { "&iogon;", { 303, 0 } },
1187+    { "&iopf;", { 120154, 0 } },
1188+    { "&iota;", { 953, 0 } },
1189+    { "&iprod;", { 10812, 0 } },
1190+    { "&iquest;", { 191, 0 } },
1191+    { "&iscr;", { 119998, 0 } },
1192+    { "&isin;", { 8712, 0 } },
1193+    { "&isinE;", { 8953, 0 } },
1194+    { "&isindot;", { 8949, 0 } },
1195+    { "&isins;", { 8948, 0 } },
1196+    { "&isinsv;", { 8947, 0 } },
1197+    { "&isinv;", { 8712, 0 } },
1198+    { "&it;", { 8290, 0 } },
1199+    { "&itilde;", { 297, 0 } },
1200+    { "&iukcy;", { 1110, 0 } },
1201+    { "&iuml;", { 239, 0 } },
1202+    { "&jcirc;", { 309, 0 } },
1203+    { "&jcy;", { 1081, 0 } },
1204+    { "&jfr;", { 120103, 0 } },
1205+    { "&jmath;", { 567, 0 } },
1206+    { "&jopf;", { 120155, 0 } },
1207+    { "&jscr;", { 119999, 0 } },
1208+    { "&jsercy;", { 1112, 0 } },
1209+    { "&jukcy;", { 1108, 0 } },
1210+    { "&kappa;", { 954, 0 } },
1211+    { "&kappav;", { 1008, 0 } },
1212+    { "&kcedil;", { 311, 0 } },
1213+    { "&kcy;", { 1082, 0 } },
1214+    { "&kfr;", { 120104, 0 } },
1215+    { "&kgreen;", { 312, 0 } },
1216+    { "&khcy;", { 1093, 0 } },
1217+    { "&kjcy;", { 1116, 0 } },
1218+    { "&kopf;", { 120156, 0 } },
1219+    { "&kscr;", { 120000, 0 } },
1220+    { "&lAarr;", { 8666, 0 } },
1221+    { "&lArr;", { 8656, 0 } },
1222+    { "&lAtail;", { 10523, 0 } },
1223+    { "&lBarr;", { 10510, 0 } },
1224+    { "&lE;", { 8806, 0 } },
1225+    { "&lEg;", { 10891, 0 } },
1226+    { "&lHar;", { 10594, 0 } },
1227+    { "&lacute;", { 314, 0 } },
1228+    { "&laemptyv;", { 10676, 0 } },
1229+    { "&lagran;", { 8466, 0 } },
1230+    { "&lambda;", { 955, 0 } },
1231+    { "&lang;", { 10216, 0 } },
1232+    { "&langd;", { 10641, 0 } },
1233+    { "&langle;", { 10216, 0 } },
1234+    { "&lap;", { 10885, 0 } },
1235+    { "&laquo;", { 171, 0 } },
1236+    { "&larr;", { 8592, 0 } },
1237+    { "&larrb;", { 8676, 0 } },
1238+    { "&larrbfs;", { 10527, 0 } },
1239+    { "&larrfs;", { 10525, 0 } },
1240+    { "&larrhk;", { 8617, 0 } },
1241+    { "&larrlp;", { 8619, 0 } },
1242+    { "&larrpl;", { 10553, 0 } },
1243+    { "&larrsim;", { 10611, 0 } },
1244+    { "&larrtl;", { 8610, 0 } },
1245+    { "&lat;", { 10923, 0 } },
1246+    { "&latail;", { 10521, 0 } },
1247+    { "&late;", { 10925, 0 } },
1248+    { "&lates;", { 10925, 65024 } },
1249+    { "&lbarr;", { 10508, 0 } },
1250+    { "&lbbrk;", { 10098, 0 } },
1251+    { "&lbrace;", { 123, 0 } },
1252+    { "&lbrack;", { 91, 0 } },
1253+    { "&lbrke;", { 10635, 0 } },
1254+    { "&lbrksld;", { 10639, 0 } },
1255+    { "&lbrkslu;", { 10637, 0 } },
1256+    { "&lcaron;", { 318, 0 } },
1257+    { "&lcedil;", { 316, 0 } },
1258+    { "&lceil;", { 8968, 0 } },
1259+    { "&lcub;", { 123, 0 } },
1260+    { "&lcy;", { 1083, 0 } },
1261+    { "&ldca;", { 10550, 0 } },
1262+    { "&ldquo;", { 8220, 0 } },
1263+    { "&ldquor;", { 8222, 0 } },
1264+    { "&ldrdhar;", { 10599, 0 } },
1265+    { "&ldrushar;", { 10571, 0 } },
1266+    { "&ldsh;", { 8626, 0 } },
1267+    { "&le;", { 8804, 0 } },
1268+    { "&leftarrow;", { 8592, 0 } },
1269+    { "&leftarrowtail;", { 8610, 0 } },
1270+    { "&leftharpoondown;", { 8637, 0 } },
1271+    { "&leftharpoonup;", { 8636, 0 } },
1272+    { "&leftleftarrows;", { 8647, 0 } },
1273+    { "&leftrightarrow;", { 8596, 0 } },
1274+    { "&leftrightarrows;", { 8646, 0 } },
1275+    { "&leftrightharpoons;", { 8651, 0 } },
1276+    { "&leftrightsquigarrow;", { 8621, 0 } },
1277+    { "&leftthreetimes;", { 8907, 0 } },
1278+    { "&leg;", { 8922, 0 } },
1279+    { "&leq;", { 8804, 0 } },
1280+    { "&leqq;", { 8806, 0 } },
1281+    { "&leqslant;", { 10877, 0 } },
1282+    { "&les;", { 10877, 0 } },
1283+    { "&lescc;", { 10920, 0 } },
1284+    { "&lesdot;", { 10879, 0 } },
1285+    { "&lesdoto;", { 10881, 0 } },
1286+    { "&lesdotor;", { 10883, 0 } },
1287+    { "&lesg;", { 8922, 65024 } },
1288+    { "&lesges;", { 10899, 0 } },
1289+    { "&lessapprox;", { 10885, 0 } },
1290+    { "&lessdot;", { 8918, 0 } },
1291+    { "&lesseqgtr;", { 8922, 0 } },
1292+    { "&lesseqqgtr;", { 10891, 0 } },
1293+    { "&lessgtr;", { 8822, 0 } },
1294+    { "&lesssim;", { 8818, 0 } },
1295+    { "&lfisht;", { 10620, 0 } },
1296+    { "&lfloor;", { 8970, 0 } },
1297+    { "&lfr;", { 120105, 0 } },
1298+    { "&lg;", { 8822, 0 } },
1299+    { "&lgE;", { 10897, 0 } },
1300+    { "&lhard;", { 8637, 0 } },
1301+    { "&lharu;", { 8636, 0 } },
1302+    { "&lharul;", { 10602, 0 } },
1303+    { "&lhblk;", { 9604, 0 } },
1304+    { "&ljcy;", { 1113, 0 } },
1305+    { "&ll;", { 8810, 0 } },
1306+    { "&llarr;", { 8647, 0 } },
1307+    { "&llcorner;", { 8990, 0 } },
1308+    { "&llhard;", { 10603, 0 } },
1309+    { "&lltri;", { 9722, 0 } },
1310+    { "&lmidot;", { 320, 0 } },
1311+    { "&lmoust;", { 9136, 0 } },
1312+    { "&lmoustache;", { 9136, 0 } },
1313+    { "&lnE;", { 8808, 0 } },
1314+    { "&lnap;", { 10889, 0 } },
1315+    { "&lnapprox;", { 10889, 0 } },
1316+    { "&lne;", { 10887, 0 } },
1317+    { "&lneq;", { 10887, 0 } },
1318+    { "&lneqq;", { 8808, 0 } },
1319+    { "&lnsim;", { 8934, 0 } },
1320+    { "&loang;", { 10220, 0 } },
1321+    { "&loarr;", { 8701, 0 } },
1322+    { "&lobrk;", { 10214, 0 } },
1323+    { "&longleftarrow;", { 10229, 0 } },
1324+    { "&longleftrightarrow;", { 10231, 0 } },
1325+    { "&longmapsto;", { 10236, 0 } },
1326+    { "&longrightarrow;", { 10230, 0 } },
1327+    { "&looparrowleft;", { 8619, 0 } },
1328+    { "&looparrowright;", { 8620, 0 } },
1329+    { "&lopar;", { 10629, 0 } },
1330+    { "&lopf;", { 120157, 0 } },
1331+    { "&loplus;", { 10797, 0 } },
1332+    { "&lotimes;", { 10804, 0 } },
1333+    { "&lowast;", { 8727, 0 } },
1334+    { "&lowbar;", { 95, 0 } },
1335+    { "&loz;", { 9674, 0 } },
1336+    { "&lozenge;", { 9674, 0 } },
1337+    { "&lozf;", { 10731, 0 } },
1338+    { "&lpar;", { 40, 0 } },
1339+    { "&lparlt;", { 10643, 0 } },
1340+    { "&lrarr;", { 8646, 0 } },
1341+    { "&lrcorner;", { 8991, 0 } },
1342+    { "&lrhar;", { 8651, 0 } },
1343+    { "&lrhard;", { 10605, 0 } },
1344+    { "&lrm;", { 8206, 0 } },
1345+    { "&lrtri;", { 8895, 0 } },
1346+    { "&lsaquo;", { 8249, 0 } },
1347+    { "&lscr;", { 120001, 0 } },
1348+    { "&lsh;", { 8624, 0 } },
1349+    { "&lsim;", { 8818, 0 } },
1350+    { "&lsime;", { 10893, 0 } },
1351+    { "&lsimg;", { 10895, 0 } },
1352+    { "&lsqb;", { 91, 0 } },
1353+    { "&lsquo;", { 8216, 0 } },
1354+    { "&lsquor;", { 8218, 0 } },
1355+    { "&lstrok;", { 322, 0 } },
1356+    { "&lt;", { 60, 0 } },
1357+    { "&ltcc;", { 10918, 0 } },
1358+    { "&ltcir;", { 10873, 0 } },
1359+    { "&ltdot;", { 8918, 0 } },
1360+    { "&lthree;", { 8907, 0 } },
1361+    { "&ltimes;", { 8905, 0 } },
1362+    { "&ltlarr;", { 10614, 0 } },
1363+    { "&ltquest;", { 10875, 0 } },
1364+    { "&ltrPar;", { 10646, 0 } },
1365+    { "&ltri;", { 9667, 0 } },
1366+    { "&ltrie;", { 8884, 0 } },
1367+    { "&ltrif;", { 9666, 0 } },
1368+    { "&lurdshar;", { 10570, 0 } },
1369+    { "&luruhar;", { 10598, 0 } },
1370+    { "&lvertneqq;", { 8808, 65024 } },
1371+    { "&lvnE;", { 8808, 65024 } },
1372+    { "&mDDot;", { 8762, 0 } },
1373+    { "&macr;", { 175, 0 } },
1374+    { "&male;", { 9794, 0 } },
1375+    { "&malt;", { 10016, 0 } },
1376+    { "&maltese;", { 10016, 0 } },
1377+    { "&map;", { 8614, 0 } },
1378+    { "&mapsto;", { 8614, 0 } },
1379+    { "&mapstodown;", { 8615, 0 } },
1380+    { "&mapstoleft;", { 8612, 0 } },
1381+    { "&mapstoup;", { 8613, 0 } },
1382+    { "&marker;", { 9646, 0 } },
1383+    { "&mcomma;", { 10793, 0 } },
1384+    { "&mcy;", { 1084, 0 } },
1385+    { "&mdash;", { 8212, 0 } },
1386+    { "&measuredangle;", { 8737, 0 } },
1387+    { "&mfr;", { 120106, 0 } },
1388+    { "&mho;", { 8487, 0 } },
1389+    { "&micro;", { 181, 0 } },
1390+    { "&mid;", { 8739, 0 } },
1391+    { "&midast;", { 42, 0 } },
1392+    { "&midcir;", { 10992, 0 } },
1393+    { "&middot;", { 183, 0 } },
1394+    { "&minus;", { 8722, 0 } },
1395+    { "&minusb;", { 8863, 0 } },
1396+    { "&minusd;", { 8760, 0 } },
1397+    { "&minusdu;", { 10794, 0 } },
1398+    { "&mlcp;", { 10971, 0 } },
1399+    { "&mldr;", { 8230, 0 } },
1400+    { "&mnplus;", { 8723, 0 } },
1401+    { "&models;", { 8871, 0 } },
1402+    { "&mopf;", { 120158, 0 } },
1403+    { "&mp;", { 8723, 0 } },
1404+    { "&mscr;", { 120002, 0 } },
1405+    { "&mstpos;", { 8766, 0 } },
1406+    { "&mu;", { 956, 0 } },
1407+    { "&multimap;", { 8888, 0 } },
1408+    { "&mumap;", { 8888, 0 } },
1409+    { "&nGg;", { 8921, 824 } },
1410+    { "&nGt;", { 8811, 8402 } },
1411+    { "&nGtv;", { 8811, 824 } },
1412+    { "&nLeftarrow;", { 8653, 0 } },
1413+    { "&nLeftrightarrow;", { 8654, 0 } },
1414+    { "&nLl;", { 8920, 824 } },
1415+    { "&nLt;", { 8810, 8402 } },
1416+    { "&nLtv;", { 8810, 824 } },
1417+    { "&nRightarrow;", { 8655, 0 } },
1418+    { "&nVDash;", { 8879, 0 } },
1419+    { "&nVdash;", { 8878, 0 } },
1420+    { "&nabla;", { 8711, 0 } },
1421+    { "&nacute;", { 324, 0 } },
1422+    { "&nang;", { 8736, 8402 } },
1423+    { "&nap;", { 8777, 0 } },
1424+    { "&napE;", { 10864, 824 } },
1425+    { "&napid;", { 8779, 824 } },
1426+    { "&napos;", { 329, 0 } },
1427+    { "&napprox;", { 8777, 0 } },
1428+    { "&natur;", { 9838, 0 } },
1429+    { "&natural;", { 9838, 0 } },
1430+    { "&naturals;", { 8469, 0 } },
1431+    { "&nbsp;", { 160, 0 } },
1432+    { "&nbump;", { 8782, 824 } },
1433+    { "&nbumpe;", { 8783, 824 } },
1434+    { "&ncap;", { 10819, 0 } },
1435+    { "&ncaron;", { 328, 0 } },
1436+    { "&ncedil;", { 326, 0 } },
1437+    { "&ncong;", { 8775, 0 } },
1438+    { "&ncongdot;", { 10861, 824 } },
1439+    { "&ncup;", { 10818, 0 } },
1440+    { "&ncy;", { 1085, 0 } },
1441+    { "&ndash;", { 8211, 0 } },
1442+    { "&ne;", { 8800, 0 } },
1443+    { "&neArr;", { 8663, 0 } },
1444+    { "&nearhk;", { 10532, 0 } },
1445+    { "&nearr;", { 8599, 0 } },
1446+    { "&nearrow;", { 8599, 0 } },
1447+    { "&nedot;", { 8784, 824 } },
1448+    { "&nequiv;", { 8802, 0 } },
1449+    { "&nesear;", { 10536, 0 } },
1450+    { "&nesim;", { 8770, 824 } },
1451+    { "&nexist;", { 8708, 0 } },
1452+    { "&nexists;", { 8708, 0 } },
1453+    { "&nfr;", { 120107, 0 } },
1454+    { "&ngE;", { 8807, 824 } },
1455+    { "&nge;", { 8817, 0 } },
1456+    { "&ngeq;", { 8817, 0 } },
1457+    { "&ngeqq;", { 8807, 824 } },
1458+    { "&ngeqslant;", { 10878, 824 } },
1459+    { "&nges;", { 10878, 824 } },
1460+    { "&ngsim;", { 8821, 0 } },
1461+    { "&ngt;", { 8815, 0 } },
1462+    { "&ngtr;", { 8815, 0 } },
1463+    { "&nhArr;", { 8654, 0 } },
1464+    { "&nharr;", { 8622, 0 } },
1465+    { "&nhpar;", { 10994, 0 } },
1466+    { "&ni;", { 8715, 0 } },
1467+    { "&nis;", { 8956, 0 } },
1468+    { "&nisd;", { 8954, 0 } },
1469+    { "&niv;", { 8715, 0 } },
1470+    { "&njcy;", { 1114, 0 } },
1471+    { "&nlArr;", { 8653, 0 } },
1472+    { "&nlE;", { 8806, 824 } },
1473+    { "&nlarr;", { 8602, 0 } },
1474+    { "&nldr;", { 8229, 0 } },
1475+    { "&nle;", { 8816, 0 } },
1476+    { "&nleftarrow;", { 8602, 0 } },
1477+    { "&nleftrightarrow;", { 8622, 0 } },
1478+    { "&nleq;", { 8816, 0 } },
1479+    { "&nleqq;", { 8806, 824 } },
1480+    { "&nleqslant;", { 10877, 824 } },
1481+    { "&nles;", { 10877, 824 } },
1482+    { "&nless;", { 8814, 0 } },
1483+    { "&nlsim;", { 8820, 0 } },
1484+    { "&nlt;", { 8814, 0 } },
1485+    { "&nltri;", { 8938, 0 } },
1486+    { "&nltrie;", { 8940, 0 } },
1487+    { "&nmid;", { 8740, 0 } },
1488+    { "&nopf;", { 120159, 0 } },
1489+    { "&not;", { 172, 0 } },
1490+    { "&notin;", { 8713, 0 } },
1491+    { "&notinE;", { 8953, 824 } },
1492+    { "&notindot;", { 8949, 824 } },
1493+    { "&notinva;", { 8713, 0 } },
1494+    { "&notinvb;", { 8951, 0 } },
1495+    { "&notinvc;", { 8950, 0 } },
1496+    { "&notni;", { 8716, 0 } },
1497+    { "&notniva;", { 8716, 0 } },
1498+    { "&notnivb;", { 8958, 0 } },
1499+    { "&notnivc;", { 8957, 0 } },
1500+    { "&npar;", { 8742, 0 } },
1501+    { "&nparallel;", { 8742, 0 } },
1502+    { "&nparsl;", { 11005, 8421 } },
1503+    { "&npart;", { 8706, 824 } },
1504+    { "&npolint;", { 10772, 0 } },
1505+    { "&npr;", { 8832, 0 } },
1506+    { "&nprcue;", { 8928, 0 } },
1507+    { "&npre;", { 10927, 824 } },
1508+    { "&nprec;", { 8832, 0 } },
1509+    { "&npreceq;", { 10927, 824 } },
1510+    { "&nrArr;", { 8655, 0 } },
1511+    { "&nrarr;", { 8603, 0 } },
1512+    { "&nrarrc;", { 10547, 824 } },
1513+    { "&nrarrw;", { 8605, 824 } },
1514+    { "&nrightarrow;", { 8603, 0 } },
1515+    { "&nrtri;", { 8939, 0 } },
1516+    { "&nrtrie;", { 8941, 0 } },
1517+    { "&nsc;", { 8833, 0 } },
1518+    { "&nsccue;", { 8929, 0 } },
1519+    { "&nsce;", { 10928, 824 } },
1520+    { "&nscr;", { 120003, 0 } },
1521+    { "&nshortmid;", { 8740, 0 } },
1522+    { "&nshortparallel;", { 8742, 0 } },
1523+    { "&nsim;", { 8769, 0 } },
1524+    { "&nsime;", { 8772, 0 } },
1525+    { "&nsimeq;", { 8772, 0 } },
1526+    { "&nsmid;", { 8740, 0 } },
1527+    { "&nspar;", { 8742, 0 } },
1528+    { "&nsqsube;", { 8930, 0 } },
1529+    { "&nsqsupe;", { 8931, 0 } },
1530+    { "&nsub;", { 8836, 0 } },
1531+    { "&nsubE;", { 10949, 824 } },
1532+    { "&nsube;", { 8840, 0 } },
1533+    { "&nsubset;", { 8834, 8402 } },
1534+    { "&nsubseteq;", { 8840, 0 } },
1535+    { "&nsubseteqq;", { 10949, 824 } },
1536+    { "&nsucc;", { 8833, 0 } },
1537+    { "&nsucceq;", { 10928, 824 } },
1538+    { "&nsup;", { 8837, 0 } },
1539+    { "&nsupE;", { 10950, 824 } },
1540+    { "&nsupe;", { 8841, 0 } },
1541+    { "&nsupset;", { 8835, 8402 } },
1542+    { "&nsupseteq;", { 8841, 0 } },
1543+    { "&nsupseteqq;", { 10950, 824 } },
1544+    { "&ntgl;", { 8825, 0 } },
1545+    { "&ntilde;", { 241, 0 } },
1546+    { "&ntlg;", { 8824, 0 } },
1547+    { "&ntriangleleft;", { 8938, 0 } },
1548+    { "&ntrianglelefteq;", { 8940, 0 } },
1549+    { "&ntriangleright;", { 8939, 0 } },
1550+    { "&ntrianglerighteq;", { 8941, 0 } },
1551+    { "&nu;", { 957, 0 } },
1552+    { "&num;", { 35, 0 } },
1553+    { "&numero;", { 8470, 0 } },
1554+    { "&numsp;", { 8199, 0 } },
1555+    { "&nvDash;", { 8877, 0 } },
1556+    { "&nvHarr;", { 10500, 0 } },
1557+    { "&nvap;", { 8781, 8402 } },
1558+    { "&nvdash;", { 8876, 0 } },
1559+    { "&nvge;", { 8805, 8402 } },
1560+    { "&nvgt;", { 62, 8402 } },
1561+    { "&nvinfin;", { 10718, 0 } },
1562+    { "&nvlArr;", { 10498, 0 } },
1563+    { "&nvle;", { 8804, 8402 } },
1564+    { "&nvlt;", { 60, 8402 } },
1565+    { "&nvltrie;", { 8884, 8402 } },
1566+    { "&nvrArr;", { 10499, 0 } },
1567+    { "&nvrtrie;", { 8885, 8402 } },
1568+    { "&nvsim;", { 8764, 8402 } },
1569+    { "&nwArr;", { 8662, 0 } },
1570+    { "&nwarhk;", { 10531, 0 } },
1571+    { "&nwarr;", { 8598, 0 } },
1572+    { "&nwarrow;", { 8598, 0 } },
1573+    { "&nwnear;", { 10535, 0 } },
1574+    { "&oS;", { 9416, 0 } },
1575+    { "&oacute;", { 243, 0 } },
1576+    { "&oast;", { 8859, 0 } },
1577+    { "&ocir;", { 8858, 0 } },
1578+    { "&ocirc;", { 244, 0 } },
1579+    { "&ocy;", { 1086, 0 } },
1580+    { "&odash;", { 8861, 0 } },
1581+    { "&odblac;", { 337, 0 } },
1582+    { "&odiv;", { 10808, 0 } },
1583+    { "&odot;", { 8857, 0 } },
1584+    { "&odsold;", { 10684, 0 } },
1585+    { "&oelig;", { 339, 0 } },
1586+    { "&ofcir;", { 10687, 0 } },
1587+    { "&ofr;", { 120108, 0 } },
1588+    { "&ogon;", { 731, 0 } },
1589+    { "&ograve;", { 242, 0 } },
1590+    { "&ogt;", { 10689, 0 } },
1591+    { "&ohbar;", { 10677, 0 } },
1592+    { "&ohm;", { 937, 0 } },
1593+    { "&oint;", { 8750, 0 } },
1594+    { "&olarr;", { 8634, 0 } },
1595+    { "&olcir;", { 10686, 0 } },
1596+    { "&olcross;", { 10683, 0 } },
1597+    { "&oline;", { 8254, 0 } },
1598+    { "&olt;", { 10688, 0 } },
1599+    { "&omacr;", { 333, 0 } },
1600+    { "&omega;", { 969, 0 } },
1601+    { "&omicron;", { 959, 0 } },
1602+    { "&omid;", { 10678, 0 } },
1603+    { "&ominus;", { 8854, 0 } },
1604+    { "&oopf;", { 120160, 0 } },
1605+    { "&opar;", { 10679, 0 } },
1606+    { "&operp;", { 10681, 0 } },
1607+    { "&oplus;", { 8853, 0 } },
1608+    { "&or;", { 8744, 0 } },
1609+    { "&orarr;", { 8635, 0 } },
1610+    { "&ord;", { 10845, 0 } },
1611+    { "&order;", { 8500, 0 } },
1612+    { "&orderof;", { 8500, 0 } },
1613+    { "&ordf;", { 170, 0 } },
1614+    { "&ordm;", { 186, 0 } },
1615+    { "&origof;", { 8886, 0 } },
1616+    { "&oror;", { 10838, 0 } },
1617+    { "&orslope;", { 10839, 0 } },
1618+    { "&orv;", { 10843, 0 } },
1619+    { "&oscr;", { 8500, 0 } },
1620+    { "&oslash;", { 248, 0 } },
1621+    { "&osol;", { 8856, 0 } },
1622+    { "&otilde;", { 245, 0 } },
1623+    { "&otimes;", { 8855, 0 } },
1624+    { "&otimesas;", { 10806, 0 } },
1625+    { "&ouml;", { 246, 0 } },
1626+    { "&ovbar;", { 9021, 0 } },
1627+    { "&par;", { 8741, 0 } },
1628+    { "&para;", { 182, 0 } },
1629+    { "&parallel;", { 8741, 0 } },
1630+    { "&parsim;", { 10995, 0 } },
1631+    { "&parsl;", { 11005, 0 } },
1632+    { "&part;", { 8706, 0 } },
1633+    { "&pcy;", { 1087, 0 } },
1634+    { "&percnt;", { 37, 0 } },
1635+    { "&period;", { 46, 0 } },
1636+    { "&permil;", { 8240, 0 } },
1637+    { "&perp;", { 8869, 0 } },
1638+    { "&pertenk;", { 8241, 0 } },
1639+    { "&pfr;", { 120109, 0 } },
1640+    { "&phi;", { 966, 0 } },
1641+    { "&phiv;", { 981, 0 } },
1642+    { "&phmmat;", { 8499, 0 } },
1643+    { "&phone;", { 9742, 0 } },
1644+    { "&pi;", { 960, 0 } },
1645+    { "&pitchfork;", { 8916, 0 } },
1646+    { "&piv;", { 982, 0 } },
1647+    { "&planck;", { 8463, 0 } },
1648+    { "&planckh;", { 8462, 0 } },
1649+    { "&plankv;", { 8463, 0 } },
1650+    { "&plus;", { 43, 0 } },
1651+    { "&plusacir;", { 10787, 0 } },
1652+    { "&plusb;", { 8862, 0 } },
1653+    { "&pluscir;", { 10786, 0 } },
1654+    { "&plusdo;", { 8724, 0 } },
1655+    { "&plusdu;", { 10789, 0 } },
1656+    { "&pluse;", { 10866, 0 } },
1657+    { "&plusmn;", { 177, 0 } },
1658+    { "&plussim;", { 10790, 0 } },
1659+    { "&plustwo;", { 10791, 0 } },
1660+    { "&pm;", { 177, 0 } },
1661+    { "&pointint;", { 10773, 0 } },
1662+    { "&popf;", { 120161, 0 } },
1663+    { "&pound;", { 163, 0 } },
1664+    { "&pr;", { 8826, 0 } },
1665+    { "&prE;", { 10931, 0 } },
1666+    { "&prap;", { 10935, 0 } },
1667+    { "&prcue;", { 8828, 0 } },
1668+    { "&pre;", { 10927, 0 } },
1669+    { "&prec;", { 8826, 0 } },
1670+    { "&precapprox;", { 10935, 0 } },
1671+    { "&preccurlyeq;", { 8828, 0 } },
1672+    { "&preceq;", { 10927, 0 } },
1673+    { "&precnapprox;", { 10937, 0 } },
1674+    { "&precneqq;", { 10933, 0 } },
1675+    { "&precnsim;", { 8936, 0 } },
1676+    { "&precsim;", { 8830, 0 } },
1677+    { "&prime;", { 8242, 0 } },
1678+    { "&primes;", { 8473, 0 } },
1679+    { "&prnE;", { 10933, 0 } },
1680+    { "&prnap;", { 10937, 0 } },
1681+    { "&prnsim;", { 8936, 0 } },
1682+    { "&prod;", { 8719, 0 } },
1683+    { "&profalar;", { 9006, 0 } },
1684+    { "&profline;", { 8978, 0 } },
1685+    { "&profsurf;", { 8979, 0 } },
1686+    { "&prop;", { 8733, 0 } },
1687+    { "&propto;", { 8733, 0 } },
1688+    { "&prsim;", { 8830, 0 } },
1689+    { "&prurel;", { 8880, 0 } },
1690+    { "&pscr;", { 120005, 0 } },
1691+    { "&psi;", { 968, 0 } },
1692+    { "&puncsp;", { 8200, 0 } },
1693+    { "&qfr;", { 120110, 0 } },
1694+    { "&qint;", { 10764, 0 } },
1695+    { "&qopf;", { 120162, 0 } },
1696+    { "&qprime;", { 8279, 0 } },
1697+    { "&qscr;", { 120006, 0 } },
1698+    { "&quaternions;", { 8461, 0 } },
1699+    { "&quatint;", { 10774, 0 } },
1700+    { "&quest;", { 63, 0 } },
1701+    { "&questeq;", { 8799, 0 } },
1702+    { "&quot;", { 34, 0 } },
1703+    { "&rAarr;", { 8667, 0 } },
1704+    { "&rArr;", { 8658, 0 } },
1705+    { "&rAtail;", { 10524, 0 } },
1706+    { "&rBarr;", { 10511, 0 } },
1707+    { "&rHar;", { 10596, 0 } },
1708+    { "&race;", { 8765, 817 } },
1709+    { "&racute;", { 341, 0 } },
1710+    { "&radic;", { 8730, 0 } },
1711+    { "&raemptyv;", { 10675, 0 } },
1712+    { "&rang;", { 10217, 0 } },
1713+    { "&rangd;", { 10642, 0 } },
1714+    { "&range;", { 10661, 0 } },
1715+    { "&rangle;", { 10217, 0 } },
1716+    { "&raquo;", { 187, 0 } },
1717+    { "&rarr;", { 8594, 0 } },
1718+    { "&rarrap;", { 10613, 0 } },
1719+    { "&rarrb;", { 8677, 0 } },
1720+    { "&rarrbfs;", { 10528, 0 } },
1721+    { "&rarrc;", { 10547, 0 } },
1722+    { "&rarrfs;", { 10526, 0 } },
1723+    { "&rarrhk;", { 8618, 0 } },
1724+    { "&rarrlp;", { 8620, 0 } },
1725+    { "&rarrpl;", { 10565, 0 } },
1726+    { "&rarrsim;", { 10612, 0 } },
1727+    { "&rarrtl;", { 8611, 0 } },
1728+    { "&rarrw;", { 8605, 0 } },
1729+    { "&ratail;", { 10522, 0 } },
1730+    { "&ratio;", { 8758, 0 } },
1731+    { "&rationals;", { 8474, 0 } },
1732+    { "&rbarr;", { 10509, 0 } },
1733+    { "&rbbrk;", { 10099, 0 } },
1734+    { "&rbrace;", { 125, 0 } },
1735+    { "&rbrack;", { 93, 0 } },
1736+    { "&rbrke;", { 10636, 0 } },
1737+    { "&rbrksld;", { 10638, 0 } },
1738+    { "&rbrkslu;", { 10640, 0 } },
1739+    { "&rcaron;", { 345, 0 } },
1740+    { "&rcedil;", { 343, 0 } },
1741+    { "&rceil;", { 8969, 0 } },
1742+    { "&rcub;", { 125, 0 } },
1743+    { "&rcy;", { 1088, 0 } },
1744+    { "&rdca;", { 10551, 0 } },
1745+    { "&rdldhar;", { 10601, 0 } },
1746+    { "&rdquo;", { 8221, 0 } },
1747+    { "&rdquor;", { 8221, 0 } },
1748+    { "&rdsh;", { 8627, 0 } },
1749+    { "&real;", { 8476, 0 } },
1750+    { "&realine;", { 8475, 0 } },
1751+    { "&realpart;", { 8476, 0 } },
1752+    { "&reals;", { 8477, 0 } },
1753+    { "&rect;", { 9645, 0 } },
1754+    { "&reg;", { 174, 0 } },
1755+    { "&rfisht;", { 10621, 0 } },
1756+    { "&rfloor;", { 8971, 0 } },
1757+    { "&rfr;", { 120111, 0 } },
1758+    { "&rhard;", { 8641, 0 } },
1759+    { "&rharu;", { 8640, 0 } },
1760+    { "&rharul;", { 10604, 0 } },
1761+    { "&rho;", { 961, 0 } },
1762+    { "&rhov;", { 1009, 0 } },
1763+    { "&rightarrow;", { 8594, 0 } },
1764+    { "&rightarrowtail;", { 8611, 0 } },
1765+    { "&rightharpoondown;", { 8641, 0 } },
1766+    { "&rightharpoonup;", { 8640, 0 } },
1767+    { "&rightleftarrows;", { 8644, 0 } },
1768+    { "&rightleftharpoons;", { 8652, 0 } },
1769+    { "&rightrightarrows;", { 8649, 0 } },
1770+    { "&rightsquigarrow;", { 8605, 0 } },
1771+    { "&rightthreetimes;", { 8908, 0 } },
1772+    { "&ring;", { 730, 0 } },
1773+    { "&risingdotseq;", { 8787, 0 } },
1774+    { "&rlarr;", { 8644, 0 } },
1775+    { "&rlhar;", { 8652, 0 } },
1776+    { "&rlm;", { 8207, 0 } },
1777+    { "&rmoust;", { 9137, 0 } },
1778+    { "&rmoustache;", { 9137, 0 } },
1779+    { "&rnmid;", { 10990, 0 } },
1780+    { "&roang;", { 10221, 0 } },
1781+    { "&roarr;", { 8702, 0 } },
1782+    { "&robrk;", { 10215, 0 } },
1783+    { "&ropar;", { 10630, 0 } },
1784+    { "&ropf;", { 120163, 0 } },
1785+    { "&roplus;", { 10798, 0 } },
1786+    { "&rotimes;", { 10805, 0 } },
1787+    { "&rpar;", { 41, 0 } },
1788+    { "&rpargt;", { 10644, 0 } },
1789+    { "&rppolint;", { 10770, 0 } },
1790+    { "&rrarr;", { 8649, 0 } },
1791+    { "&rsaquo;", { 8250, 0 } },
1792+    { "&rscr;", { 120007, 0 } },
1793+    { "&rsh;", { 8625, 0 } },
1794+    { "&rsqb;", { 93, 0 } },
1795+    { "&rsquo;", { 8217, 0 } },
1796+    { "&rsquor;", { 8217, 0 } },
1797+    { "&rthree;", { 8908, 0 } },
1798+    { "&rtimes;", { 8906, 0 } },
1799+    { "&rtri;", { 9657, 0 } },
1800+    { "&rtrie;", { 8885, 0 } },
1801+    { "&rtrif;", { 9656, 0 } },
1802+    { "&rtriltri;", { 10702, 0 } },
1803+    { "&ruluhar;", { 10600, 0 } },
1804+    { "&rx;", { 8478, 0 } },
1805+    { "&sacute;", { 347, 0 } },
1806+    { "&sbquo;", { 8218, 0 } },
1807+    { "&sc;", { 8827, 0 } },
1808+    { "&scE;", { 10932, 0 } },
1809+    { "&scap;", { 10936, 0 } },
1810+    { "&scaron;", { 353, 0 } },
1811+    { "&sccue;", { 8829, 0 } },
1812+    { "&sce;", { 10928, 0 } },
1813+    { "&scedil;", { 351, 0 } },
1814+    { "&scirc;", { 349, 0 } },
1815+    { "&scnE;", { 10934, 0 } },
1816+    { "&scnap;", { 10938, 0 } },
1817+    { "&scnsim;", { 8937, 0 } },
1818+    { "&scpolint;", { 10771, 0 } },
1819+    { "&scsim;", { 8831, 0 } },
1820+    { "&scy;", { 1089, 0 } },
1821+    { "&sdot;", { 8901, 0 } },
1822+    { "&sdotb;", { 8865, 0 } },
1823+    { "&sdote;", { 10854, 0 } },
1824+    { "&seArr;", { 8664, 0 } },
1825+    { "&searhk;", { 10533, 0 } },
1826+    { "&searr;", { 8600, 0 } },
1827+    { "&searrow;", { 8600, 0 } },
1828+    { "&sect;", { 167, 0 } },
1829+    { "&semi;", { 59, 0 } },
1830+    { "&seswar;", { 10537, 0 } },
1831+    { "&setminus;", { 8726, 0 } },
1832+    { "&setmn;", { 8726, 0 } },
1833+    { "&sext;", { 10038, 0 } },
1834+    { "&sfr;", { 120112, 0 } },
1835+    { "&sfrown;", { 8994, 0 } },
1836+    { "&sharp;", { 9839, 0 } },
1837+    { "&shchcy;", { 1097, 0 } },
1838+    { "&shcy;", { 1096, 0 } },
1839+    { "&shortmid;", { 8739, 0 } },
1840+    { "&shortparallel;", { 8741, 0 } },
1841+    { "&shy;", { 173, 0 } },
1842+    { "&sigma;", { 963, 0 } },
1843+    { "&sigmaf;", { 962, 0 } },
1844+    { "&sigmav;", { 962, 0 } },
1845+    { "&sim;", { 8764, 0 } },
1846+    { "&simdot;", { 10858, 0 } },
1847+    { "&sime;", { 8771, 0 } },
1848+    { "&simeq;", { 8771, 0 } },
1849+    { "&simg;", { 10910, 0 } },
1850+    { "&simgE;", { 10912, 0 } },
1851+    { "&siml;", { 10909, 0 } },
1852+    { "&simlE;", { 10911, 0 } },
1853+    { "&simne;", { 8774, 0 } },
1854+    { "&simplus;", { 10788, 0 } },
1855+    { "&simrarr;", { 10610, 0 } },
1856+    { "&slarr;", { 8592, 0 } },
1857+    { "&smallsetminus;", { 8726, 0 } },
1858+    { "&smashp;", { 10803, 0 } },
1859+    { "&smeparsl;", { 10724, 0 } },
1860+    { "&smid;", { 8739, 0 } },
1861+    { "&smile;", { 8995, 0 } },
1862+    { "&smt;", { 10922, 0 } },
1863+    { "&smte;", { 10924, 0 } },
1864+    { "&smtes;", { 10924, 65024 } },
1865+    { "&softcy;", { 1100, 0 } },
1866+    { "&sol;", { 47, 0 } },
1867+    { "&solb;", { 10692, 0 } },
1868+    { "&solbar;", { 9023, 0 } },
1869+    { "&sopf;", { 120164, 0 } },
1870+    { "&spades;", { 9824, 0 } },
1871+    { "&spadesuit;", { 9824, 0 } },
1872+    { "&spar;", { 8741, 0 } },
1873+    { "&sqcap;", { 8851, 0 } },
1874+    { "&sqcaps;", { 8851, 65024 } },
1875+    { "&sqcup;", { 8852, 0 } },
1876+    { "&sqcups;", { 8852, 65024 } },
1877+    { "&sqsub;", { 8847, 0 } },
1878+    { "&sqsube;", { 8849, 0 } },
1879+    { "&sqsubset;", { 8847, 0 } },
1880+    { "&sqsubseteq;", { 8849, 0 } },
1881+    { "&sqsup;", { 8848, 0 } },
1882+    { "&sqsupe;", { 8850, 0 } },
1883+    { "&sqsupset;", { 8848, 0 } },
1884+    { "&sqsupseteq;", { 8850, 0 } },
1885+    { "&squ;", { 9633, 0 } },
1886+    { "&square;", { 9633, 0 } },
1887+    { "&squarf;", { 9642, 0 } },
1888+    { "&squf;", { 9642, 0 } },
1889+    { "&srarr;", { 8594, 0 } },
1890+    { "&sscr;", { 120008, 0 } },
1891+    { "&ssetmn;", { 8726, 0 } },
1892+    { "&ssmile;", { 8995, 0 } },
1893+    { "&sstarf;", { 8902, 0 } },
1894+    { "&star;", { 9734, 0 } },
1895+    { "&starf;", { 9733, 0 } },
1896+    { "&straightepsilon;", { 1013, 0 } },
1897+    { "&straightphi;", { 981, 0 } },
1898+    { "&strns;", { 175, 0 } },
1899+    { "&sub;", { 8834, 0 } },
1900+    { "&subE;", { 10949, 0 } },
1901+    { "&subdot;", { 10941, 0 } },
1902+    { "&sube;", { 8838, 0 } },
1903+    { "&subedot;", { 10947, 0 } },
1904+    { "&submult;", { 10945, 0 } },
1905+    { "&subnE;", { 10955, 0 } },
1906+    { "&subne;", { 8842, 0 } },
1907+    { "&subplus;", { 10943, 0 } },
1908+    { "&subrarr;", { 10617, 0 } },
1909+    { "&subset;", { 8834, 0 } },
1910+    { "&subseteq;", { 8838, 0 } },
1911+    { "&subseteqq;", { 10949, 0 } },
1912+    { "&subsetneq;", { 8842, 0 } },
1913+    { "&subsetneqq;", { 10955, 0 } },
1914+    { "&subsim;", { 10951, 0 } },
1915+    { "&subsub;", { 10965, 0 } },
1916+    { "&subsup;", { 10963, 0 } },
1917+    { "&succ;", { 8827, 0 } },
1918+    { "&succapprox;", { 10936, 0 } },
1919+    { "&succcurlyeq;", { 8829, 0 } },
1920+    { "&succeq;", { 10928, 0 } },
1921+    { "&succnapprox;", { 10938, 0 } },
1922+    { "&succneqq;", { 10934, 0 } },
1923+    { "&succnsim;", { 8937, 0 } },
1924+    { "&succsim;", { 8831, 0 } },
1925+    { "&sum;", { 8721, 0 } },
1926+    { "&sung;", { 9834, 0 } },
1927+    { "&sup1", { 185, 0 } },
1928+    { "&sup1;", { 185, 0 } },
1929+    { "&sup2", { 178, 0 } },
1930+    { "&sup2;", { 178, 0 } },
1931+    { "&sup3", { 179, 0 } },
1932+    { "&sup3;", { 179, 0 } },
1933+    { "&sup;", { 8835, 0 } },
1934+    { "&supE;", { 10950, 0 } },
1935+    { "&supdot;", { 10942, 0 } },
1936+    { "&supdsub;", { 10968, 0 } },
1937+    { "&supe;", { 8839, 0 } },
1938+    { "&supedot;", { 10948, 0 } },
1939+    { "&suphsol;", { 10185, 0 } },
1940+    { "&suphsub;", { 10967, 0 } },
1941+    { "&suplarr;", { 10619, 0 } },
1942+    { "&supmult;", { 10946, 0 } },
1943+    { "&supnE;", { 10956, 0 } },
1944+    { "&supne;", { 8843, 0 } },
1945+    { "&supplus;", { 10944, 0 } },
1946+    { "&supset;", { 8835, 0 } },
1947+    { "&supseteq;", { 8839, 0 } },
1948+    { "&supseteqq;", { 10950, 0 } },
1949+    { "&supsetneq;", { 8843, 0 } },
1950+    { "&supsetneqq;", { 10956, 0 } },
1951+    { "&supsim;", { 10952, 0 } },
1952+    { "&supsub;", { 10964, 0 } },
1953+    { "&supsup;", { 10966, 0 } },
1954+    { "&swArr;", { 8665, 0 } },
1955+    { "&swarhk;", { 10534, 0 } },
1956+    { "&swarr;", { 8601, 0 } },
1957+    { "&swarrow;", { 8601, 0 } },
1958+    { "&swnwar;", { 10538, 0 } },
1959+    { "&szlig;", { 223, 0 } },
1960+    { "&target;", { 8982, 0 } },
1961+    { "&tau;", { 964, 0 } },
1962+    { "&tbrk;", { 9140, 0 } },
1963+    { "&tcaron;", { 357, 0 } },
1964+    { "&tcedil;", { 355, 0 } },
1965+    { "&tcy;", { 1090, 0 } },
1966+    { "&tdot;", { 8411, 0 } },
1967+    { "&telrec;", { 8981, 0 } },
1968+    { "&tfr;", { 120113, 0 } },
1969+    { "&there4;", { 8756, 0 } },
1970+    { "&therefore;", { 8756, 0 } },
1971+    { "&theta;", { 952, 0 } },
1972+    { "&thetasym;", { 977, 0 } },
1973+    { "&thetav;", { 977, 0 } },
1974+    { "&thickapprox;", { 8776, 0 } },
1975+    { "&thicksim;", { 8764, 0 } },
1976+    { "&thinsp;", { 8201, 0 } },
1977+    { "&thkap;", { 8776, 0 } },
1978+    { "&thksim;", { 8764, 0 } },
1979+    { "&thorn;", { 254, 0 } },
1980+    { "&tilde;", { 732, 0 } },
1981+    { "&times;", { 215, 0 } },
1982+    { "&timesb;", { 8864, 0 } },
1983+    { "&timesbar;", { 10801, 0 } },
1984+    { "&timesd;", { 10800, 0 } },
1985+    { "&tint;", { 8749, 0 } },
1986+    { "&toea;", { 10536, 0 } },
1987+    { "&top;", { 8868, 0 } },
1988+    { "&topbot;", { 9014, 0 } },
1989+    { "&topcir;", { 10993, 0 } },
1990+    { "&topf;", { 120165, 0 } },
1991+    { "&topfork;", { 10970, 0 } },
1992+    { "&tosa;", { 10537, 0 } },
1993+    { "&tprime;", { 8244, 0 } },
1994+    { "&trade;", { 8482, 0 } },
1995+    { "&triangle;", { 9653, 0 } },
1996+    { "&triangledown;", { 9663, 0 } },
1997+    { "&triangleleft;", { 9667, 0 } },
1998+    { "&trianglelefteq;", { 8884, 0 } },
1999+    { "&triangleq;", { 8796, 0 } },
2000+    { "&triangleright;", { 9657, 0 } },
2001+    { "&trianglerighteq;", { 8885, 0 } },
2002+    { "&tridot;", { 9708, 0 } },
2003+    { "&trie;", { 8796, 0 } },
2004+    { "&triminus;", { 10810, 0 } },
2005+    { "&triplus;", { 10809, 0 } },
2006+    { "&trisb;", { 10701, 0 } },
2007+    { "&tritime;", { 10811, 0 } },
2008+    { "&trpezium;", { 9186, 0 } },
2009+    { "&tscr;", { 120009, 0 } },
2010+    { "&tscy;", { 1094, 0 } },
2011+    { "&tshcy;", { 1115, 0 } },
2012+    { "&tstrok;", { 359, 0 } },
2013+    { "&twixt;", { 8812, 0 } },
2014+    { "&twoheadleftarrow;", { 8606, 0 } },
2015+    { "&twoheadrightarrow;", { 8608, 0 } },
2016+    { "&uArr;", { 8657, 0 } },
2017+    { "&uHar;", { 10595, 0 } },
2018+    { "&uacute;", { 250, 0 } },
2019+    { "&uarr;", { 8593, 0 } },
2020+    { "&ubrcy;", { 1118, 0 } },
2021+    { "&ubreve;", { 365, 0 } },
2022+    { "&ucirc;", { 251, 0 } },
2023+    { "&ucy;", { 1091, 0 } },
2024+    { "&udarr;", { 8645, 0 } },
2025+    { "&udblac;", { 369, 0 } },
2026+    { "&udhar;", { 10606, 0 } },
2027+    { "&ufisht;", { 10622, 0 } },
2028+    { "&ufr;", { 120114, 0 } },
2029+    { "&ugrave;", { 249, 0 } },
2030+    { "&uharl;", { 8639, 0 } },
2031+    { "&uharr;", { 8638, 0 } },
2032+    { "&uhblk;", { 9600, 0 } },
2033+    { "&ulcorn;", { 8988, 0 } },
2034+    { "&ulcorner;", { 8988, 0 } },
2035+    { "&ulcrop;", { 8975, 0 } },
2036+    { "&ultri;", { 9720, 0 } },
2037+    { "&umacr;", { 363, 0 } },
2038+    { "&uml;", { 168, 0 } },
2039+    { "&uogon;", { 371, 0 } },
2040+    { "&uopf;", { 120166, 0 } },
2041+    { "&uparrow;", { 8593, 0 } },
2042+    { "&updownarrow;", { 8597, 0 } },
2043+    { "&upharpoonleft;", { 8639, 0 } },
2044+    { "&upharpoonright;", { 8638, 0 } },
2045+    { "&uplus;", { 8846, 0 } },
2046+    { "&upsi;", { 965, 0 } },
2047+    { "&upsih;", { 978, 0 } },
2048+    { "&upsilon;", { 965, 0 } },
2049+    { "&upuparrows;", { 8648, 0 } },
2050+    { "&urcorn;", { 8989, 0 } },
2051+    { "&urcorner;", { 8989, 0 } },
2052+    { "&urcrop;", { 8974, 0 } },
2053+    { "&uring;", { 367, 0 } },
2054+    { "&urtri;", { 9721, 0 } },
2055+    { "&uscr;", { 120010, 0 } },
2056+    { "&utdot;", { 8944, 0 } },
2057+    { "&utilde;", { 361, 0 } },
2058+    { "&utri;", { 9653, 0 } },
2059+    { "&utrif;", { 9652, 0 } },
2060+    { "&uuarr;", { 8648, 0 } },
2061+    { "&uuml;", { 252, 0 } },
2062+    { "&uwangle;", { 10663, 0 } },
2063+    { "&vArr;", { 8661, 0 } },
2064+    { "&vBar;", { 10984, 0 } },
2065+    { "&vBarv;", { 10985, 0 } },
2066+    { "&vDash;", { 8872, 0 } },
2067+    { "&vangrt;", { 10652, 0 } },
2068+    { "&varepsilon;", { 1013, 0 } },
2069+    { "&varkappa;", { 1008, 0 } },
2070+    { "&varnothing;", { 8709, 0 } },
2071+    { "&varphi;", { 981, 0 } },
2072+    { "&varpi;", { 982, 0 } },
2073+    { "&varpropto;", { 8733, 0 } },
2074+    { "&varr;", { 8597, 0 } },
2075+    { "&varrho;", { 1009, 0 } },
2076+    { "&varsigma;", { 962, 0 } },
2077+    { "&varsubsetneq;", { 8842, 65024 } },
2078+    { "&varsubsetneqq;", { 10955, 65024 } },
2079+    { "&varsupsetneq;", { 8843, 65024 } },
2080+    { "&varsupsetneqq;", { 10956, 65024 } },
2081+    { "&vartheta;", { 977, 0 } },
2082+    { "&vartriangleleft;", { 8882, 0 } },
2083+    { "&vartriangleright;", { 8883, 0 } },
2084+    { "&vcy;", { 1074, 0 } },
2085+    { "&vdash;", { 8866, 0 } },
2086+    { "&vee;", { 8744, 0 } },
2087+    { "&veebar;", { 8891, 0 } },
2088+    { "&veeeq;", { 8794, 0 } },
2089+    { "&vellip;", { 8942, 0 } },
2090+    { "&verbar;", { 124, 0 } },
2091+    { "&vert;", { 124, 0 } },
2092+    { "&vfr;", { 120115, 0 } },
2093+    { "&vltri;", { 8882, 0 } },
2094+    { "&vnsub;", { 8834, 8402 } },
2095+    { "&vnsup;", { 8835, 8402 } },
2096+    { "&vopf;", { 120167, 0 } },
2097+    { "&vprop;", { 8733, 0 } },
2098+    { "&vrtri;", { 8883, 0 } },
2099+    { "&vscr;", { 120011, 0 } },
2100+    { "&vsubnE;", { 10955, 65024 } },
2101+    { "&vsubne;", { 8842, 65024 } },
2102+    { "&vsupnE;", { 10956, 65024 } },
2103+    { "&vsupne;", { 8843, 65024 } },
2104+    { "&vzigzag;", { 10650, 0 } },
2105+    { "&wcirc;", { 373, 0 } },
2106+    { "&wedbar;", { 10847, 0 } },
2107+    { "&wedge;", { 8743, 0 } },
2108+    { "&wedgeq;", { 8793, 0 } },
2109+    { "&weierp;", { 8472, 0 } },
2110+    { "&wfr;", { 120116, 0 } },
2111+    { "&wopf;", { 120168, 0 } },
2112+    { "&wp;", { 8472, 0 } },
2113+    { "&wr;", { 8768, 0 } },
2114+    { "&wreath;", { 8768, 0 } },
2115+    { "&wscr;", { 120012, 0 } },
2116+    { "&xcap;", { 8898, 0 } },
2117+    { "&xcirc;", { 9711, 0 } },
2118+    { "&xcup;", { 8899, 0 } },
2119+    { "&xdtri;", { 9661, 0 } },
2120+    { "&xfr;", { 120117, 0 } },
2121+    { "&xhArr;", { 10234, 0 } },
2122+    { "&xharr;", { 10231, 0 } },
2123+    { "&xi;", { 958, 0 } },
2124+    { "&xlArr;", { 10232, 0 } },
2125+    { "&xlarr;", { 10229, 0 } },
2126+    { "&xmap;", { 10236, 0 } },
2127+    { "&xnis;", { 8955, 0 } },
2128+    { "&xodot;", { 10752, 0 } },
2129+    { "&xopf;", { 120169, 0 } },
2130+    { "&xoplus;", { 10753, 0 } },
2131+    { "&xotime;", { 10754, 0 } },
2132+    { "&xrArr;", { 10233, 0 } },
2133+    { "&xrarr;", { 10230, 0 } },
2134+    { "&xscr;", { 120013, 0 } },
2135+    { "&xsqcup;", { 10758, 0 } },
2136+    { "&xuplus;", { 10756, 0 } },
2137+    { "&xutri;", { 9651, 0 } },
2138+    { "&xvee;", { 8897, 0 } },
2139+    { "&xwedge;", { 8896, 0 } },
2140+    { "&yacute;", { 253, 0 } },
2141+    { "&yacy;", { 1103, 0 } },
2142+    { "&ycirc;", { 375, 0 } },
2143+    { "&ycy;", { 1099, 0 } },
2144+    { "&yen;", { 165, 0 } },
2145+    { "&yfr;", { 120118, 0 } },
2146+    { "&yicy;", { 1111, 0 } },
2147+    { "&yopf;", { 120170, 0 } },
2148+    { "&yscr;", { 120014, 0 } },
2149+    { "&yucy;", { 1102, 0 } },
2150+    { "&yuml;", { 255, 0 } },
2151+    { "&zacute;", { 378, 0 } },
2152+    { "&zcaron;", { 382, 0 } },
2153+    { "&zcy;", { 1079, 0 } },
2154+    { "&zdot;", { 380, 0 } },
2155+    { "&zeetrf;", { 8488, 0 } },
2156+    { "&zeta;", { 950, 0 } },
2157+    { "&zfr;", { 120119, 0 } },
2158+    { "&zhcy;", { 1078, 0 } },
2159+    { "&zigrarr;", { 8669, 0 } },
2160+    { "&zopf;", { 120171, 0 } },
2161+    { "&zscr;", { 120015, 0 } },
2162+    { "&zwj;", { 8205, 0 } },
2163+    { "&zwnj;", { 8204, 0 } }
2164+};
2165+
2166+
2167+struct entity_key {
2168+    const char* name;
2169+    size_t name_size;
2170+};
2171+
2172+static int
2173+entity_cmp(const void* p_key, const void* p_entity)
2174+{
2175+    struct entity_key* key = (struct entity_key*) p_key;
2176+    struct entity* ent = (struct entity*) p_entity;
2177+
2178+    return strncmp(key->name, ent->name, key->name_size);
2179+}
2180+
2181+const struct entity*
2182+entity_lookup(const char* name, size_t name_size)
2183+{
2184+    struct entity_key key = { name, name_size };
2185+
2186+    return bsearch(&key,
2187+                   entity_table,
2188+                   sizeof(entity_table) / sizeof(entity_table[0]),
2189+                   sizeof(struct entity),
2190+                   entity_cmp);
2191+}

A · entity.h +42, -0

 1@@ -0,0 +1,42 @@
 2+/*
 3+ * MD4C: Markdown parser for C
 4+ * (http://github.com/mity/md4c)
 5+ *
 6+ * Copyright (c) 2016-2019 Martin Mitas
 7+ *
 8+ * Permission is hereby granted, free of charge, to any person obtaining a
 9+ * copy of this software and associated documentation files (the "Software"),
10+ * to deal in the Software without restriction, including without limitation
11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12+ * and/or sell copies of the Software, and to permit persons to whom the
13+ * Software is furnished to do so, subject to the following conditions:
14+ *
15+ * The above copyright notice and this permission notice shall be included in
16+ * all copies or substantial portions of the Software.
17+ *
18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24+ * IN THE SOFTWARE.
25+ */
26+
27+#ifndef MD4C_ENTITY_H
28+#define MD4C_ENTITY_H
29+
30+#include <stdlib.h>
31+
32+
33+/* Most entities are formed by single Unicode codepoint, few by two codepoints.
34+ * Single-codepoint entities have codepoints[1] set to zero. */
35+struct entity {
36+    const char* name;
37+    unsigned codepoints[2];
38+};
39+
40+const struct entity* entity_lookup(const char* name, size_t name_size);
41+
42+
43+#endif  /* MD4C_ENTITY_H */

A · entity.o +0, -0

A · md4c-html.c +573, -0

  1@@ -0,0 +1,573 @@
  2+/*
  3+ * MD4C: Markdown parser for C
  4+ * (http://github.com/mity/md4c)
  5+ *
  6+ * Copyright (c) 2016-2019 Martin Mitas
  7+ *
  8+ * Permission is hereby granted, free of charge, to any person obtaining a
  9+ * copy of this software and associated documentation files (the "Software"),
 10+ * to deal in the Software without restriction, including without limitation
 11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 12+ * and/or sell copies of the Software, and to permit persons to whom the
 13+ * Software is furnished to do so, subject to the following conditions:
 14+ *
 15+ * The above copyright notice and this permission notice shall be included in
 16+ * all copies or substantial portions of the Software.
 17+ *
 18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 24+ * IN THE SOFTWARE.
 25+ */
 26+
 27+#include <stdio.h>
 28+#include <string.h>
 29+
 30+#include "md4c-html.h"
 31+#include "entity.h"
 32+
 33+
 34+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
 35+    /* C89/90 or old compilers in general may not understand "inline". */
 36+    #if defined __GNUC__
 37+        #define inline __inline__
 38+    #elif defined _MSC_VER
 39+        #define inline __inline
 40+    #else
 41+        #define inline
 42+    #endif
 43+#endif
 44+
 45+#ifdef _WIN32
 46+    #define snprintf _snprintf
 47+#endif
 48+
 49+
 50+
 51+typedef struct MD_HTML_tag MD_HTML;
 52+struct MD_HTML_tag {
 53+    void (*process_output)(const MD_CHAR*, MD_SIZE, void*);
 54+    void* userdata;
 55+    unsigned flags;
 56+    int image_nesting_level;
 57+    char escape_map[256];
 58+};
 59+
 60+#define NEED_HTML_ESC_FLAG   0x1
 61+#define NEED_URL_ESC_FLAG    0x2
 62+
 63+
 64+/*****************************************
 65+ ***  HTML rendering helper functions  ***
 66+ *****************************************/
 67+
 68+#define ISDIGIT(ch)     ('0' <= (ch) && (ch) <= '9')
 69+#define ISLOWER(ch)     ('a' <= (ch) && (ch) <= 'z')
 70+#define ISUPPER(ch)     ('A' <= (ch) && (ch) <= 'Z')
 71+#define ISALNUM(ch)     (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
 72+
 73+
 74+static inline void
 75+render_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size)
 76+{
 77+    r->process_output(text, size, r->userdata);
 78+}
 79+
 80+/* Keep this as a macro. Most compiler should then be smart enough to replace
 81+ * the strlen() call with a compile-time constant if the string is a C literal. */
 82+#define RENDER_VERBATIM(r, verbatim)                                    \
 83+        render_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim)))
 84+
 85+
 86+static void
 87+render_html_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
 88+{
 89+    MD_OFFSET beg = 0;
 90+    MD_OFFSET off = 0;
 91+
 92+    /* Some characters need to be escaped in normal HTML text. */
 93+    #define NEED_HTML_ESC(ch)   (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG)
 94+
 95+    while(1) {
 96+        /* Optimization: Use some loop unrolling. */
 97+        while(off + 3 < size  &&  !NEED_HTML_ESC(data[off+0])  &&  !NEED_HTML_ESC(data[off+1])
 98+                              &&  !NEED_HTML_ESC(data[off+2])  &&  !NEED_HTML_ESC(data[off+3]))
 99+            off += 4;
100+        while(off < size  &&  !NEED_HTML_ESC(data[off]))
101+            off++;
102+
103+        if(off > beg)
104+            render_verbatim(r, data + beg, off - beg);
105+
106+        if(off < size) {
107+            switch(data[off]) {
108+                case '&':   RENDER_VERBATIM(r, "&amp;"); break;
109+                case '<':   RENDER_VERBATIM(r, "&lt;"); break;
110+                case '>':   RENDER_VERBATIM(r, "&gt;"); break;
111+                case '"':   RENDER_VERBATIM(r, "&quot;"); break;
112+            }
113+            off++;
114+        } else {
115+            break;
116+        }
117+        beg = off;
118+    }
119+}
120+
121+static void
122+render_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
123+{
124+    static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
125+    MD_OFFSET beg = 0;
126+    MD_OFFSET off = 0;
127+
128+    /* Some characters need to be escaped in URL attributes. */
129+    #define NEED_URL_ESC(ch)    (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG)
130+
131+    while(1) {
132+        while(off < size  &&  !NEED_URL_ESC(data[off]))
133+            off++;
134+        if(off > beg)
135+            render_verbatim(r, data + beg, off - beg);
136+
137+        if(off < size) {
138+            char hex[3];
139+
140+            switch(data[off]) {
141+                case '&':   RENDER_VERBATIM(r, "&amp;"); break;
142+                default:
143+                    hex[0] = '%';
144+                    hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
145+                    hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
146+                    render_verbatim(r, hex, 3);
147+                    break;
148+            }
149+            off++;
150+        } else {
151+            break;
152+        }
153+
154+        beg = off;
155+    }
156+}
157+
158+static unsigned
159+hex_val(char ch)
160+{
161+    if('0' <= ch && ch <= '9')
162+        return ch - '0';
163+    if('A' <= ch && ch <= 'Z')
164+        return ch - 'A' + 10;
165+    else
166+        return ch - 'a' + 10;
167+}
168+
169+static void
170+render_utf8_codepoint(MD_HTML* r, unsigned codepoint,
171+                      void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
172+{
173+    static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
174+
175+    unsigned char utf8[4];
176+    size_t n;
177+
178+    if(codepoint <= 0x7f) {
179+        n = 1;
180+        utf8[0] = codepoint;
181+    } else if(codepoint <= 0x7ff) {
182+        n = 2;
183+        utf8[0] = 0xc0 | ((codepoint >>  6) & 0x1f);
184+        utf8[1] = 0x80 + ((codepoint >>  0) & 0x3f);
185+    } else if(codepoint <= 0xffff) {
186+        n = 3;
187+        utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
188+        utf8[1] = 0x80 + ((codepoint >>  6) & 0x3f);
189+        utf8[2] = 0x80 + ((codepoint >>  0) & 0x3f);
190+    } else {
191+        n = 4;
192+        utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
193+        utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
194+        utf8[2] = 0x80 + ((codepoint >>  6) & 0x3f);
195+        utf8[3] = 0x80 + ((codepoint >>  0) & 0x3f);
196+    }
197+
198+    if(0 < codepoint  &&  codepoint <= 0x10ffff)
199+        fn_append(r, (char*)utf8, n);
200+    else
201+        fn_append(r, utf8_replacement_char, 3);
202+}
203+
204+/* Translate entity to its UTF-8 equivalent, or output the verbatim one
205+ * if such entity is unknown (or if the translation is disabled). */
206+static void
207+render_entity(MD_HTML* r, const MD_CHAR* text, MD_SIZE size,
208+              void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
209+{
210+    if(r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) {
211+        render_verbatim(r, text, size);
212+        return;
213+    }
214+
215+    /* We assume UTF-8 output is what is desired. */
216+    if(size > 3 && text[1] == '#') {
217+        unsigned codepoint = 0;
218+
219+        if(text[2] == 'x' || text[2] == 'X') {
220+            /* Hexadecimal entity (e.g. "&#x1234abcd;")). */
221+            MD_SIZE i;
222+            for(i = 3; i < size-1; i++)
223+                codepoint = 16 * codepoint + hex_val(text[i]);
224+        } else {
225+            /* Decimal entity (e.g. "&1234;") */
226+            MD_SIZE i;
227+            for(i = 2; i < size-1; i++)
228+                codepoint = 10 * codepoint + (text[i] - '0');
229+        }
230+
231+        render_utf8_codepoint(r, codepoint, fn_append);
232+        return;
233+    } else {
234+        /* Named entity (e.g. "&nbsp;"). */
235+        const struct entity* ent;
236+
237+        ent = entity_lookup(text, size);
238+        if(ent != NULL) {
239+            render_utf8_codepoint(r, ent->codepoints[0], fn_append);
240+            if(ent->codepoints[1])
241+                render_utf8_codepoint(r, ent->codepoints[1], fn_append);
242+            return;
243+        }
244+    }
245+
246+    fn_append(r, text, size);
247+}
248+
249+static void
250+render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr,
251+                 void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
252+{
253+    int i;
254+
255+    for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
256+        MD_TEXTTYPE type = attr->substr_types[i];
257+        MD_OFFSET off = attr->substr_offsets[i];
258+        MD_SIZE size = attr->substr_offsets[i+1] - off;
259+        const MD_CHAR* text = attr->text + off;
260+
261+        switch(type) {
262+            case MD_TEXT_NULLCHAR:  render_utf8_codepoint(r, 0x0000, render_verbatim); break;
263+            case MD_TEXT_ENTITY:    render_entity(r, text, size, fn_append); break;
264+            default:                fn_append(r, text, size); break;
265+        }
266+    }
267+}
268+
269+
270+static void
271+render_open_ol_block(MD_HTML* r, const MD_BLOCK_OL_DETAIL* det)
272+{
273+    char buf[64];
274+
275+    if(det->start == 1) {
276+        RENDER_VERBATIM(r, "<ol>\n");
277+        return;
278+    }
279+
280+    snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
281+    RENDER_VERBATIM(r, buf);
282+}
283+
284+static void
285+render_open_li_block(MD_HTML* r, const MD_BLOCK_LI_DETAIL* det)
286+{
287+    if(det->is_task) {
288+        RENDER_VERBATIM(r, "<li class=\"task-list-item\">"
289+                          "<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
290+        if(det->task_mark == 'x' || det->task_mark == 'X')
291+            RENDER_VERBATIM(r, " checked");
292+        RENDER_VERBATIM(r, ">");
293+    } else {
294+        RENDER_VERBATIM(r, "<li>");
295+    }
296+}
297+
298+static void
299+render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det)
300+{
301+    RENDER_VERBATIM(r, "<pre><code");
302+
303+    /* If known, output the HTML 5 attribute class="language-LANGNAME". */
304+    if(det->lang.text != NULL) {
305+        RENDER_VERBATIM(r, " class=\"language-");
306+        render_attribute(r, &det->lang, render_html_escaped);
307+        RENDER_VERBATIM(r, "\"");
308+    }
309+
310+    RENDER_VERBATIM(r, ">");
311+}
312+
313+static void
314+render_open_td_block(MD_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
315+{
316+    RENDER_VERBATIM(r, "<");
317+    RENDER_VERBATIM(r, cell_type);
318+
319+    switch(det->align) {
320+        case MD_ALIGN_LEFT:     RENDER_VERBATIM(r, " align=\"left\">"); break;
321+        case MD_ALIGN_CENTER:   RENDER_VERBATIM(r, " align=\"center\">"); break;
322+        case MD_ALIGN_RIGHT:    RENDER_VERBATIM(r, " align=\"right\">"); break;
323+        default:                RENDER_VERBATIM(r, ">"); break;
324+    }
325+}
326+
327+static void
328+render_open_a_span(MD_HTML* r, const MD_SPAN_A_DETAIL* det)
329+{
330+    RENDER_VERBATIM(r, "<a href=\"");
331+    render_attribute(r, &det->href, render_url_escaped);
332+
333+    if(det->title.text != NULL) {
334+        RENDER_VERBATIM(r, "\" title=\"");
335+        render_attribute(r, &det->title, render_html_escaped);
336+    }
337+
338+    RENDER_VERBATIM(r, "\">");
339+}
340+
341+static void
342+render_open_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
343+{
344+    RENDER_VERBATIM(r, "<img src=\"");
345+    render_attribute(r, &det->src, render_url_escaped);
346+
347+    RENDER_VERBATIM(r, "\" alt=\"");
348+
349+    r->image_nesting_level++;
350+}
351+
352+static void
353+render_close_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
354+{
355+    if(det->title.text != NULL) {
356+        RENDER_VERBATIM(r, "\" title=\"");
357+        render_attribute(r, &det->title, render_html_escaped);
358+    }
359+
360+    RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
361+
362+    r->image_nesting_level--;
363+}
364+
365+static void
366+render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det)
367+{
368+    RENDER_VERBATIM(r, "<x-wikilink data-target=\"");
369+    render_attribute(r, &det->target, render_html_escaped);
370+
371+    RENDER_VERBATIM(r, "\">");
372+}
373+
374+
375+/**************************************
376+ ***  HTML renderer implementation  ***
377+ **************************************/
378+
379+static int
380+enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
381+{
382+    static const MD_CHAR* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" };
383+    MD_HTML* r = (MD_HTML*) userdata;
384+
385+    switch(type) {
386+        case MD_BLOCK_DOC:      /* noop */ break;
387+        case MD_BLOCK_QUOTE:    RENDER_VERBATIM(r, "<blockquote>\n"); break;
388+        case MD_BLOCK_UL:       RENDER_VERBATIM(r, "<ul>\n"); break;
389+        case MD_BLOCK_OL:       render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break;
390+        case MD_BLOCK_LI:       render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break;
391+        case MD_BLOCK_HR:       RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "<hr />\n" : "<hr>\n"); break;
392+        case MD_BLOCK_H:        RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
393+        case MD_BLOCK_CODE:     render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL*) detail); break;
394+        case MD_BLOCK_HTML:     /* noop */ break;
395+        case MD_BLOCK_P:        RENDER_VERBATIM(r, "<p>"); break;
396+        case MD_BLOCK_TABLE:    RENDER_VERBATIM(r, "<table>\n"); break;
397+        case MD_BLOCK_THEAD:    RENDER_VERBATIM(r, "<thead>\n"); break;
398+        case MD_BLOCK_TBODY:    RENDER_VERBATIM(r, "<tbody>\n"); break;
399+        case MD_BLOCK_TR:       RENDER_VERBATIM(r, "<tr>\n"); break;
400+        case MD_BLOCK_TH:       render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break;
401+        case MD_BLOCK_TD:       render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break;
402+    }
403+
404+    return 0;
405+}
406+
407+static int
408+leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
409+{
410+    static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
411+    MD_HTML* r = (MD_HTML*) userdata;
412+
413+    switch(type) {
414+        case MD_BLOCK_DOC:      /*noop*/ break;
415+        case MD_BLOCK_QUOTE:    RENDER_VERBATIM(r, "</blockquote>\n"); break;
416+        case MD_BLOCK_UL:       RENDER_VERBATIM(r, "</ul>\n"); break;
417+        case MD_BLOCK_OL:       RENDER_VERBATIM(r, "</ol>\n"); break;
418+        case MD_BLOCK_LI:       RENDER_VERBATIM(r, "</li>\n"); break;
419+        case MD_BLOCK_HR:       /*noop*/ break;
420+        case MD_BLOCK_H:        RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
421+        case MD_BLOCK_CODE:     RENDER_VERBATIM(r, "</code></pre>\n"); break;
422+        case MD_BLOCK_HTML:     /* noop */ break;
423+        case MD_BLOCK_P:        RENDER_VERBATIM(r, "</p>\n"); break;
424+        case MD_BLOCK_TABLE:    RENDER_VERBATIM(r, "</table>\n"); break;
425+        case MD_BLOCK_THEAD:    RENDER_VERBATIM(r, "</thead>\n"); break;
426+        case MD_BLOCK_TBODY:    RENDER_VERBATIM(r, "</tbody>\n"); break;
427+        case MD_BLOCK_TR:       RENDER_VERBATIM(r, "</tr>\n"); break;
428+        case MD_BLOCK_TH:       RENDER_VERBATIM(r, "</th>\n"); break;
429+        case MD_BLOCK_TD:       RENDER_VERBATIM(r, "</td>\n"); break;
430+    }
431+
432+    return 0;
433+}
434+
435+static int
436+enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
437+{
438+    MD_HTML* r = (MD_HTML*) userdata;
439+
440+    if(r->image_nesting_level > 0) {
441+        /* We are inside a Markdown image label. Markdown allows to use any
442+         * emphasis and other rich contents in that context similarly as in
443+         * any link label.
444+         *
445+         * However, unlike in the case of links (where that contents becomes
446+         * contents of the <a>...</a> tag), in the case of images the contents
447+         * is supposed to fall into the attribute alt: <img alt="...">.
448+         *
449+         * In that context we naturally cannot output nested HTML tags. So lets
450+         * suppress them and only output the plain text (i.e. what falls into
451+         * text() callback).
452+         *
453+         * This make-it-a-plain-text approach is the recommended practice by
454+         * CommonMark specification (for HTML output).
455+         */
456+        return 0;
457+    }
458+
459+    switch(type) {
460+        case MD_SPAN_EM:                RENDER_VERBATIM(r, "<em>"); break;
461+        case MD_SPAN_STRONG:            RENDER_VERBATIM(r, "<strong>"); break;
462+        case MD_SPAN_U:                 RENDER_VERBATIM(r, "<u>"); break;
463+        case MD_SPAN_A:                 render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
464+        case MD_SPAN_IMG:               render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
465+        case MD_SPAN_CODE:              RENDER_VERBATIM(r, "<code>"); break;
466+        case MD_SPAN_DEL:               RENDER_VERBATIM(r, "<del>"); break;
467+        case MD_SPAN_LATEXMATH:         RENDER_VERBATIM(r, "<x-equation>"); break;
468+        case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "<x-equation type=\"display\">"); break;
469+        case MD_SPAN_WIKILINK:          render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL*) detail); break;
470+    }
471+
472+    return 0;
473+}
474+
475+static int
476+leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
477+{
478+    MD_HTML* r = (MD_HTML*) userdata;
479+
480+    if(r->image_nesting_level > 0) {
481+        /* Ditto as in enter_span_callback(), except we have to allow the
482+         * end of the <img> tag. */
483+        if(r->image_nesting_level == 1  &&  type == MD_SPAN_IMG)
484+            render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail);
485+        return 0;
486+    }
487+
488+    switch(type) {
489+        case MD_SPAN_EM:                RENDER_VERBATIM(r, "</em>"); break;
490+        case MD_SPAN_STRONG:            RENDER_VERBATIM(r, "</strong>"); break;
491+        case MD_SPAN_U:                 RENDER_VERBATIM(r, "</u>"); break;
492+        case MD_SPAN_A:                 RENDER_VERBATIM(r, "</a>"); break;
493+        case MD_SPAN_IMG:               /*noop, handled above*/ break;
494+        case MD_SPAN_CODE:              RENDER_VERBATIM(r, "</code>"); break;
495+        case MD_SPAN_DEL:               RENDER_VERBATIM(r, "</del>"); break;
496+        case MD_SPAN_LATEXMATH:         /*fall through*/
497+        case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "</x-equation>"); break;
498+        case MD_SPAN_WIKILINK:          RENDER_VERBATIM(r, "</x-wikilink>"); break;
499+    }
500+
501+    return 0;
502+}
503+
504+static int
505+text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
506+{
507+    MD_HTML* r = (MD_HTML*) userdata;
508+
509+    switch(type) {
510+        case MD_TEXT_NULLCHAR:  render_utf8_codepoint(r, 0x0000, render_verbatim); break;
511+        case MD_TEXT_BR:        RENDER_VERBATIM(r, (r->image_nesting_level == 0
512+                                        ? ((r->flags & MD_HTML_FLAG_XHTML) ? "<br />\n" : "<br>\n")
513+                                        : " "));
514+                                break;
515+        case MD_TEXT_SOFTBR:    RENDER_VERBATIM(r, (r->image_nesting_level == 0 ? "\n" : " ")); break;
516+        case MD_TEXT_HTML:      render_verbatim(r, text, size); break;
517+        case MD_TEXT_ENTITY:    render_entity(r, text, size, render_html_escaped); break;
518+        default:                render_html_escaped(r, text, size); break;
519+    }
520+
521+    return 0;
522+}
523+
524+static void
525+debug_log_callback(const char* msg, void* userdata)
526+{
527+    MD_HTML* r = (MD_HTML*) userdata;
528+    if(r->flags & MD_HTML_FLAG_DEBUG)
529+        fprintf(stderr, "MD4C: %s\n", msg);
530+}
531+
532+int
533+md_html(const MD_CHAR* input, MD_SIZE input_size,
534+        void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
535+        void* userdata, unsigned parser_flags, unsigned renderer_flags)
536+{
537+    MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
538+    int i;
539+
540+    MD_PARSER parser = {
541+        0,
542+        parser_flags,
543+        enter_block_callback,
544+        leave_block_callback,
545+        enter_span_callback,
546+        leave_span_callback,
547+        text_callback,
548+        debug_log_callback,
549+        NULL
550+    };
551+
552+    /* Build map of characters which need escaping. */
553+    for(i = 0; i < 256; i++) {
554+        unsigned char ch = (unsigned char) i;
555+
556+        if(strchr("\"&<>", ch) != NULL)
557+            render.escape_map[i] |= NEED_HTML_ESC_FLAG;
558+
559+        if(!ISALNUM(ch)  &&  strchr("-_.+!*(),%#@?=;:/,+$", ch) == NULL)
560+            render.escape_map[i] |= NEED_URL_ESC_FLAG;
561+    }
562+
563+    /* Consider skipping UTF-8 byte order mark (BOM). */
564+    if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM  &&  sizeof(MD_CHAR) == 1) {
565+        static const MD_CHAR bom[3] = { 0xef, 0xbb, 0xbf };
566+        if(input_size >= sizeof(bom)  &&  memcmp(input, bom, sizeof(bom)) == 0) {
567+            input += sizeof(bom);
568+            input_size -= sizeof(bom);
569+        }
570+    }
571+
572+    return md_parse(input, input_size, &parser, (void*) &render);
573+}
574+

A · md4c-html.h +68, -0

 1@@ -0,0 +1,68 @@
 2+/*
 3+ * MD4C: Markdown parser for C
 4+ * (http://github.com/mity/md4c)
 5+ *
 6+ * Copyright (c) 2016-2017 Martin Mitas
 7+ *
 8+ * Permission is hereby granted, free of charge, to any person obtaining a
 9+ * copy of this software and associated documentation files (the "Software"),
10+ * to deal in the Software without restriction, including without limitation
11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12+ * and/or sell copies of the Software, and to permit persons to whom the
13+ * Software is furnished to do so, subject to the following conditions:
14+ *
15+ * The above copyright notice and this permission notice shall be included in
16+ * all copies or substantial portions of the Software.
17+ *
18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24+ * IN THE SOFTWARE.
25+ */
26+
27+#ifndef MD4C_HTML_H
28+#define MD4C_HTML_H
29+
30+#include "md4c.h"
31+
32+#ifdef __cplusplus
33+    extern "C" {
34+#endif
35+
36+
37+/* If set, debug output from md_parse() is sent to stderr. */
38+#define MD_HTML_FLAG_DEBUG                  0x0001
39+#define MD_HTML_FLAG_VERBATIM_ENTITIES      0x0002
40+#define MD_HTML_FLAG_SKIP_UTF8_BOM          0x0004
41+#define MD_HTML_FLAG_XHTML                  0x0008
42+
43+
44+/* Render Markdown into HTML.
45+ *
46+ * Note only contents of <body> tag is generated. Caller must generate
47+ * HTML header/footer manually before/after calling md_html().
48+ *
49+ * Params input and input_size specify the Markdown input.
50+ * Callback process_output() gets called with chunks of HTML output.
51+ * (Typical implementation may just output the bytes to a file or append to
52+ * some buffer).
53+ * Param userdata is just propgated back to process_output() callback.
54+ * Param parser_flags are flags from md4c.h propagated to md_parse().
55+ * Param render_flags is bitmask of MD_HTML_FLAG_xxxx.
56+ *
57+ * Returns -1 on error (if md_parse() fails.)
58+ * Returns 0 on success.
59+ */
60+int md_html(const MD_CHAR* input, MD_SIZE input_size,
61+            void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
62+            void* userdata, unsigned parser_flags, unsigned renderer_flags);
63+
64+
65+#ifdef __cplusplus
66+    }  /* extern "C" { */
67+#endif
68+
69+#endif  /* MD4C_HTML_H */

A · md4c-html.o +0, -0

A · md4c.c +6348, -0

   1@@ -0,0 +1,6348 @@
   2+/*
   3+ * MD4C: Markdown parser for C
   4+ * (http://github.com/mity/md4c)
   5+ *
   6+ * Copyright (c) 2016-2020 Martin Mitas
   7+ *
   8+ * Permission is hereby granted, free of charge, to any person obtaining a
   9+ * copy of this software and associated documentation files (the "Software"),
  10+ * to deal in the Software without restriction, including without limitation
  11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12+ * and/or sell copies of the Software, and to permit persons to whom the
  13+ * Software is furnished to do so, subject to the following conditions:
  14+ *
  15+ * The above copyright notice and this permission notice shall be included in
  16+ * all copies or substantial portions of the Software.
  17+ *
  18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  24+ * IN THE SOFTWARE.
  25+ */
  26+
  27+#include "md4c.h"
  28+
  29+#include <limits.h>
  30+#include <stdio.h>
  31+#include <stdlib.h>
  32+#include <string.h>
  33+
  34+
  35+/*****************************
  36+ ***  Miscellaneous Stuff  ***
  37+ *****************************/
  38+
  39+#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
  40+    /* C89/90 or old compilers in general may not understand "inline". */
  41+    #if defined __GNUC__
  42+        #define inline __inline__
  43+    #elif defined _MSC_VER
  44+        #define inline __inline
  45+    #else
  46+        #define inline
  47+    #endif
  48+#endif
  49+
  50+/* Make the UTF-8 support the default. */
  51+#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
  52+    #define MD4C_USE_UTF8
  53+#endif
  54+
  55+/* Magic for making wide literals with MD4C_USE_UTF16. */
  56+#ifdef _T
  57+    #undef _T
  58+#endif
  59+#if defined MD4C_USE_UTF16
  60+    #define _T(x)           L##x
  61+#else
  62+    #define _T(x)           x
  63+#endif
  64+
  65+/* Misc. macros. */
  66+#define SIZEOF_ARRAY(a)     (sizeof(a) / sizeof(a[0]))
  67+
  68+#define STRINGIZE_(x)       #x
  69+#define STRINGIZE(x)        STRINGIZE_(x)
  70+
  71+#ifndef TRUE
  72+    #define TRUE            1
  73+    #define FALSE           0
  74+#endif
  75+
  76+#define MD_LOG(msg)                                                     \
  77+    do {                                                                \
  78+        if(ctx->parser.debug_log != NULL)                               \
  79+            ctx->parser.debug_log((msg), ctx->userdata);                \
  80+    } while(0)
  81+
  82+#ifdef DEBUG
  83+    #define MD_ASSERT(cond)                                             \
  84+            do {                                                        \
  85+                if(!(cond)) {                                           \
  86+                    MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": "        \
  87+                           "Assertion '" STRINGIZE(cond) "' failed.");  \
  88+                    exit(1);                                            \
  89+                }                                                       \
  90+            } while(0)
  91+
  92+    #define MD_UNREACHABLE()        MD_ASSERT(1 == 0)
  93+#else
  94+    #ifdef __GNUC__
  95+        #define MD_ASSERT(cond)     do { if(!(cond)) __builtin_unreachable(); } while(0)
  96+        #define MD_UNREACHABLE()    do { __builtin_unreachable(); } while(0)
  97+    #elif defined _MSC_VER  &&  _MSC_VER > 120
  98+        #define MD_ASSERT(cond)     do { __assume(cond); } while(0)
  99+        #define MD_UNREACHABLE()    do { __assume(0); } while(0)
 100+    #else
 101+        #define MD_ASSERT(cond)     do {} while(0)
 102+        #define MD_UNREACHABLE()    do {} while(0)
 103+    #endif
 104+#endif
 105+
 106+/* For falling through case labels in switch statements. */
 107+#if defined __clang__ && __clang_major__ >= 12
 108+    #define MD_FALLTHROUGH()        __attribute__((fallthrough))
 109+#elif defined __GNUC__ && __GNUC__ >= 7
 110+    #define MD_FALLTHROUGH()        __attribute__((fallthrough))
 111+#else
 112+    #define MD_FALLTHROUGH()        ((void)0)
 113+#endif
 114+
 115+/* Suppress "unused parameter" warnings. */
 116+#define MD_UNUSED(x)                ((void)x)
 117+
 118+
 119+/************************
 120+ ***  Internal Types  ***
 121+ ************************/
 122+
 123+/* These are omnipresent so lets save some typing. */
 124+#define CHAR    MD_CHAR
 125+#define SZ      MD_SIZE
 126+#define OFF     MD_OFFSET
 127+
 128+typedef struct MD_MARK_tag MD_MARK;
 129+typedef struct MD_BLOCK_tag MD_BLOCK;
 130+typedef struct MD_CONTAINER_tag MD_CONTAINER;
 131+typedef struct MD_REF_DEF_tag MD_REF_DEF;
 132+
 133+
 134+/* During analyzes of inline marks, we need to manage some "mark chains",
 135+ * of (yet unresolved) openers. This structure holds start/end of the chain.
 136+ * The chain internals are then realized through MD_MARK::prev and ::next.
 137+ */
 138+typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN;
 139+struct MD_MARKCHAIN_tag {
 140+    int head;   /* Index of first mark in the chain, or -1 if empty. */
 141+    int tail;   /* Index of last mark in the chain, or -1 if empty. */
 142+};
 143+
 144+/* Context propagated through all the parsing. */
 145+typedef struct MD_CTX_tag MD_CTX;
 146+struct MD_CTX_tag {
 147+    /* Immutable stuff (parameters of md_parse()). */
 148+    const CHAR* text;
 149+    SZ size;
 150+    MD_PARSER parser;
 151+    void* userdata;
 152+
 153+    /* When this is true, it allows some optimizations. */
 154+    int doc_ends_with_newline;
 155+
 156+    /* Helper temporary growing buffer. */
 157+    CHAR* buffer;
 158+    unsigned alloc_buffer;
 159+
 160+    /* Reference definitions. */
 161+    MD_REF_DEF* ref_defs;
 162+    int n_ref_defs;
 163+    int alloc_ref_defs;
 164+    void** ref_def_hashtable;
 165+    int ref_def_hashtable_size;
 166+
 167+    /* Stack of inline/span markers.
 168+     * This is only used for parsing a single block contents but by storing it
 169+     * here we may reuse the stack for subsequent blocks; i.e. we have fewer
 170+     * (re)allocations. */
 171+    MD_MARK* marks;
 172+    int n_marks;
 173+    int alloc_marks;
 174+
 175+#if defined MD4C_USE_UTF16
 176+    char mark_char_map[128];
 177+#else
 178+    char mark_char_map[256];
 179+#endif
 180+
 181+    /* For resolving of inline spans. */
 182+    MD_MARKCHAIN mark_chains[13];
 183+#define PTR_CHAIN                               (ctx->mark_chains[0])
 184+#define TABLECELLBOUNDARIES                     (ctx->mark_chains[1])
 185+#define ASTERISK_OPENERS_extraword_mod3_0       (ctx->mark_chains[2])
 186+#define ASTERISK_OPENERS_extraword_mod3_1       (ctx->mark_chains[3])
 187+#define ASTERISK_OPENERS_extraword_mod3_2       (ctx->mark_chains[4])
 188+#define ASTERISK_OPENERS_intraword_mod3_0       (ctx->mark_chains[5])
 189+#define ASTERISK_OPENERS_intraword_mod3_1       (ctx->mark_chains[6])
 190+#define ASTERISK_OPENERS_intraword_mod3_2       (ctx->mark_chains[7])
 191+#define UNDERSCORE_OPENERS                      (ctx->mark_chains[8])
 192+#define TILDE_OPENERS_1                         (ctx->mark_chains[9])
 193+#define TILDE_OPENERS_2                         (ctx->mark_chains[10])
 194+#define BRACKET_OPENERS                         (ctx->mark_chains[11])
 195+#define DOLLAR_OPENERS                          (ctx->mark_chains[12])
 196+#define OPENERS_CHAIN_FIRST                     2
 197+#define OPENERS_CHAIN_LAST                      12
 198+
 199+    int n_table_cell_boundaries;
 200+
 201+    /* For resolving links. */
 202+    int unresolved_link_head;
 203+    int unresolved_link_tail;
 204+
 205+    /* For resolving raw HTML. */
 206+    OFF html_comment_horizon;
 207+    OFF html_proc_instr_horizon;
 208+    OFF html_decl_horizon;
 209+    OFF html_cdata_horizon;
 210+
 211+    /* For block analysis.
 212+     * Notes:
 213+     *   -- It holds MD_BLOCK as well as MD_LINE structures. After each
 214+     *      MD_BLOCK, its (multiple) MD_LINE(s) follow.
 215+     *   -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
 216+     *      instead of MD_LINE(s).
 217+     */
 218+    void* block_bytes;
 219+    MD_BLOCK* current_block;
 220+    int n_block_bytes;
 221+    int alloc_block_bytes;
 222+
 223+    /* For container block analysis. */
 224+    MD_CONTAINER* containers;
 225+    int n_containers;
 226+    int alloc_containers;
 227+
 228+    /* Minimal indentation to call the block "indented code block". */
 229+    unsigned code_indent_offset;
 230+
 231+    /* Contextual info for line analysis. */
 232+    SZ code_fence_length;   /* For checking closing fence length. */
 233+    int html_block_type;    /* For checking closing raw HTML condition. */
 234+    int last_line_has_list_loosening_effect;
 235+    int last_list_item_starts_with_two_blank_lines;
 236+};
 237+
 238+enum MD_LINETYPE_tag {
 239+    MD_LINE_BLANK,
 240+    MD_LINE_HR,
 241+    MD_LINE_ATXHEADER,
 242+    MD_LINE_SETEXTHEADER,
 243+    MD_LINE_SETEXTUNDERLINE,
 244+    MD_LINE_INDENTEDCODE,
 245+    MD_LINE_FENCEDCODE,
 246+    MD_LINE_HTML,
 247+    MD_LINE_TEXT,
 248+    MD_LINE_TABLE,
 249+    MD_LINE_TABLEUNDERLINE
 250+};
 251+typedef enum MD_LINETYPE_tag MD_LINETYPE;
 252+
 253+typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
 254+struct MD_LINE_ANALYSIS_tag {
 255+    MD_LINETYPE type    : 16;
 256+    unsigned data       : 16;
 257+    OFF beg;
 258+    OFF end;
 259+    unsigned indent;        /* Indentation level. */
 260+};
 261+
 262+typedef struct MD_LINE_tag MD_LINE;
 263+struct MD_LINE_tag {
 264+    OFF beg;
 265+    OFF end;
 266+};
 267+
 268+typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
 269+struct MD_VERBATIMLINE_tag {
 270+    OFF beg;
 271+    OFF end;
 272+    OFF indent;
 273+};
 274+
 275+
 276+/*****************
 277+ ***  Helpers  ***
 278+ *****************/
 279+
 280+/* Character accessors. */
 281+#define CH(off)                 (ctx->text[(off)])
 282+#define STR(off)                (ctx->text + (off))
 283+
 284+/* Character classification.
 285+ * Note we assume ASCII compatibility of code points < 128 here. */
 286+#define ISIN_(ch, ch_min, ch_max)       ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
 287+#define ISANYOF_(ch, palette)           ((ch) != _T('\0')  &&  md_strchr((palette), (ch)) != NULL)
 288+#define ISANYOF2_(ch, ch1, ch2)         ((ch) == (ch1) || (ch) == (ch2))
 289+#define ISANYOF3_(ch, ch1, ch2, ch3)    ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
 290+#define ISASCII_(ch)                    ((unsigned)(ch) <= 127)
 291+#define ISBLANK_(ch)                    (ISANYOF2_((ch), _T(' '), _T('\t')))
 292+#define ISNEWLINE_(ch)                  (ISANYOF2_((ch), _T('\r'), _T('\n')))
 293+#define ISWHITESPACE_(ch)               (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
 294+#define ISCNTRL_(ch)                    ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
 295+#define ISPUNCT_(ch)                    (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
 296+#define ISUPPER_(ch)                    (ISIN_(ch, _T('A'), _T('Z')))
 297+#define ISLOWER_(ch)                    (ISIN_(ch, _T('a'), _T('z')))
 298+#define ISALPHA_(ch)                    (ISUPPER_(ch) || ISLOWER_(ch))
 299+#define ISDIGIT_(ch)                    (ISIN_(ch, _T('0'), _T('9')))
 300+#define ISXDIGIT_(ch)                   (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
 301+#define ISALNUM_(ch)                    (ISALPHA_(ch) || ISDIGIT_(ch))
 302+
 303+#define ISANYOF(off, palette)           ISANYOF_(CH(off), (palette))
 304+#define ISANYOF2(off, ch1, ch2)         ISANYOF2_(CH(off), (ch1), (ch2))
 305+#define ISANYOF3(off, ch1, ch2, ch3)    ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
 306+#define ISASCII(off)                    ISASCII_(CH(off))
 307+#define ISBLANK(off)                    ISBLANK_(CH(off))
 308+#define ISNEWLINE(off)                  ISNEWLINE_(CH(off))
 309+#define ISWHITESPACE(off)               ISWHITESPACE_(CH(off))
 310+#define ISCNTRL(off)                    ISCNTRL_(CH(off))
 311+#define ISPUNCT(off)                    ISPUNCT_(CH(off))
 312+#define ISUPPER(off)                    ISUPPER_(CH(off))
 313+#define ISLOWER(off)                    ISLOWER_(CH(off))
 314+#define ISALPHA(off)                    ISALPHA_(CH(off))
 315+#define ISDIGIT(off)                    ISDIGIT_(CH(off))
 316+#define ISXDIGIT(off)                   ISXDIGIT_(CH(off))
 317+#define ISALNUM(off)                    ISALNUM_(CH(off))
 318+
 319+
 320+#if defined MD4C_USE_UTF16
 321+    #define md_strchr wcschr
 322+#else
 323+    #define md_strchr strchr
 324+#endif
 325+
 326+
 327+/* Case insensitive check of string equality. */
 328+static inline int
 329+md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
 330+{
 331+    OFF i;
 332+    for(i = 0; i < n; i++) {
 333+        CHAR ch1 = s1[i];
 334+        CHAR ch2 = s2[i];
 335+
 336+        if(ISLOWER_(ch1))
 337+            ch1 += ('A'-'a');
 338+        if(ISLOWER_(ch2))
 339+            ch2 += ('A'-'a');
 340+        if(ch1 != ch2)
 341+            return FALSE;
 342+    }
 343+    return TRUE;
 344+}
 345+
 346+static inline int
 347+md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
 348+{
 349+    return memcmp(s1, s2, n * sizeof(CHAR)) == 0;
 350+}
 351+
 352+static int
 353+md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
 354+{
 355+    OFF off = 0;
 356+    int ret = 0;
 357+
 358+    while(1) {
 359+        while(off < size  &&  str[off] != _T('\0'))
 360+            off++;
 361+
 362+        if(off > 0) {
 363+            ret = ctx->parser.text(type, str, off, ctx->userdata);
 364+            if(ret != 0)
 365+                return ret;
 366+
 367+            str += off;
 368+            size -= off;
 369+            off = 0;
 370+        }
 371+
 372+        if(off >= size)
 373+            return 0;
 374+
 375+        ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata);
 376+        if(ret != 0)
 377+            return ret;
 378+        off++;
 379+    }
 380+}
 381+
 382+
 383+#define MD_CHECK(func)                                                      \
 384+    do {                                                                    \
 385+        ret = (func);                                                       \
 386+        if(ret < 0)                                                         \
 387+            goto abort;                                                     \
 388+    } while(0)
 389+
 390+
 391+#define MD_TEMP_BUFFER(sz)                                                  \
 392+    do {                                                                    \
 393+        if(sz > ctx->alloc_buffer) {                                        \
 394+            CHAR* new_buffer;                                               \
 395+            SZ new_size = ((sz) + (sz) / 2 + 128) & ~127;                   \
 396+                                                                            \
 397+            new_buffer = realloc(ctx->buffer, new_size);                    \
 398+            if(new_buffer == NULL) {                                        \
 399+                MD_LOG("realloc() failed.");                                \
 400+                ret = -1;                                                   \
 401+                goto abort;                                                 \
 402+            }                                                               \
 403+                                                                            \
 404+            ctx->buffer = new_buffer;                                       \
 405+            ctx->alloc_buffer = new_size;                                   \
 406+        }                                                                   \
 407+    } while(0)
 408+
 409+
 410+#define MD_ENTER_BLOCK(type, arg)                                           \
 411+    do {                                                                    \
 412+        ret = ctx->parser.enter_block((type), (arg), ctx->userdata);        \
 413+        if(ret != 0) {                                                      \
 414+            MD_LOG("Aborted from enter_block() callback.");                 \
 415+            goto abort;                                                     \
 416+        }                                                                   \
 417+    } while(0)
 418+
 419+#define MD_LEAVE_BLOCK(type, arg)                                           \
 420+    do {                                                                    \
 421+        ret = ctx->parser.leave_block((type), (arg), ctx->userdata);        \
 422+        if(ret != 0) {                                                      \
 423+            MD_LOG("Aborted from leave_block() callback.");                 \
 424+            goto abort;                                                     \
 425+        }                                                                   \
 426+    } while(0)
 427+
 428+#define MD_ENTER_SPAN(type, arg)                                            \
 429+    do {                                                                    \
 430+        ret = ctx->parser.enter_span((type), (arg), ctx->userdata);         \
 431+        if(ret != 0) {                                                      \
 432+            MD_LOG("Aborted from enter_span() callback.");                  \
 433+            goto abort;                                                     \
 434+        }                                                                   \
 435+    } while(0)
 436+
 437+#define MD_LEAVE_SPAN(type, arg)                                            \
 438+    do {                                                                    \
 439+        ret = ctx->parser.leave_span((type), (arg), ctx->userdata);         \
 440+        if(ret != 0) {                                                      \
 441+            MD_LOG("Aborted from leave_span() callback.");                  \
 442+            goto abort;                                                     \
 443+        }                                                                   \
 444+    } while(0)
 445+
 446+#define MD_TEXT(type, str, size)                                            \
 447+    do {                                                                    \
 448+        if(size > 0) {                                                      \
 449+            ret = ctx->parser.text((type), (str), (size), ctx->userdata);   \
 450+            if(ret != 0) {                                                  \
 451+                MD_LOG("Aborted from text() callback.");                    \
 452+                goto abort;                                                 \
 453+            }                                                               \
 454+        }                                                                   \
 455+    } while(0)
 456+
 457+#define MD_TEXT_INSECURE(type, str, size)                                   \
 458+    do {                                                                    \
 459+        if(size > 0) {                                                      \
 460+            ret = md_text_with_null_replacement(ctx, type, str, size);      \
 461+            if(ret != 0) {                                                  \
 462+                MD_LOG("Aborted from text() callback.");                    \
 463+                goto abort;                                                 \
 464+            }                                                               \
 465+        }                                                                   \
 466+    } while(0)
 467+
 468+
 469+
 470+/*************************
 471+ ***  Unicode Support  ***
 472+ *************************/
 473+
 474+typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
 475+struct MD_UNICODE_FOLD_INFO_tag {
 476+    unsigned codepoints[3];
 477+    unsigned n_codepoints;
 478+};
 479+
 480+
 481+#if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8
 482+    /* Binary search over sorted "map" of codepoints. Consecutive sequences
 483+     * of codepoints may be encoded in the map by just using the
 484+     * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000).
 485+     *
 486+     * Returns index of the found record in the map (in the case of ranges,
 487+     * the minimal value is used); or -1 on failure. */
 488+    static int
 489+    md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
 490+    {
 491+        int beg, end;
 492+        int pivot_beg, pivot_end;
 493+
 494+        beg = 0;
 495+        end = (int) map_size-1;
 496+        while(beg <= end) {
 497+            /* Pivot may be a range, not just a single value. */
 498+            pivot_beg = pivot_end = (beg + end) / 2;
 499+            if(map[pivot_end] & 0x40000000)
 500+                pivot_end++;
 501+            if(map[pivot_beg] & 0x80000000)
 502+                pivot_beg--;
 503+
 504+            if(codepoint < (map[pivot_beg] & 0x00ffffff))
 505+                end = pivot_beg - 1;
 506+            else if(codepoint > (map[pivot_end] & 0x00ffffff))
 507+                beg = pivot_end + 1;
 508+            else
 509+                return pivot_beg;
 510+        }
 511+
 512+        return -1;
 513+    }
 514+
 515+    static int
 516+    md_is_unicode_whitespace__(unsigned codepoint)
 517+    {
 518+#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
 519+#define S(cp)               (cp)
 520+        /* Unicode "Zs" category.
 521+         * (generated by scripts/build_whitespace_map.py) */
 522+        static const unsigned WHITESPACE_MAP[] = {
 523+            S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000)
 524+        };
 525+#undef R
 526+#undef S
 527+
 528+        /* The ASCII ones are the most frequently used ones, also CommonMark
 529+         * specification requests few more in this range. */
 530+        if(codepoint <= 0x7f)
 531+            return ISWHITESPACE_(codepoint);
 532+
 533+        return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0);
 534+    }
 535+
 536+    static int
 537+    md_is_unicode_punct__(unsigned codepoint)
 538+    {
 539+#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
 540+#define S(cp)               (cp)
 541+        /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
 542+         * (generated by scripts/build_punct_map.py) */
 543+        static const unsigned PUNCT_MAP[] = {
 544+            R(0x0021,0x0023), R(0x0025,0x002a), R(0x002c,0x002f), R(0x003a,0x003b), R(0x003f,0x0040),
 545+            R(0x005b,0x005d), S(0x005f), S(0x007b), S(0x007d), S(0x00a1), S(0x00a7), S(0x00ab), R(0x00b6,0x00b7),
 546+            S(0x00bb), S(0x00bf), S(0x037e), S(0x0387), R(0x055a,0x055f), R(0x0589,0x058a), S(0x05be), S(0x05c0),
 547+            S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0609,0x060a), R(0x060c,0x060d), S(0x061b), R(0x061e,0x061f),
 548+            R(0x066a,0x066d), S(0x06d4), R(0x0700,0x070d), R(0x07f7,0x07f9), R(0x0830,0x083e), S(0x085e),
 549+            R(0x0964,0x0965), S(0x0970), S(0x09fd), S(0x0a76), S(0x0af0), S(0x0c77), S(0x0c84), S(0x0df4), S(0x0e4f),
 550+            R(0x0e5a,0x0e5b), R(0x0f04,0x0f12), S(0x0f14), R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fd0,0x0fd4),
 551+            R(0x0fd9,0x0fda), R(0x104a,0x104f), S(0x10fb), R(0x1360,0x1368), S(0x1400), S(0x166e), R(0x169b,0x169c),
 552+            R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17da), R(0x1800,0x180a),
 553+            R(0x1944,0x1945), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6), R(0x1aa8,0x1aad), R(0x1b5a,0x1b60),
 554+            R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f), R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), R(0x2010,0x2027),
 555+            R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e),
 556+            R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef),
 557+            R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70),
 558+            R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f),
 559+            S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
 560+            R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f),
 561+            S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1),
 562+            S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68),
 563+            R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b),
 564+            R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102),
 565+            S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f),
 566+            R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59),
 567+            R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175),
 568+            R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9),
 569+            R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643),
 570+            R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46),
 571+            R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8),
 572+            S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44),
 573+            R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f)
 574+        };
 575+#undef R
 576+#undef S
 577+
 578+        /* The ASCII ones are the most frequently used ones, also CommonMark
 579+         * specification requests few more in this range. */
 580+        if(codepoint <= 0x7f)
 581+            return ISPUNCT_(codepoint);
 582+
 583+        return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0);
 584+    }
 585+
 586+    static void
 587+    md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
 588+    {
 589+#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
 590+#define S(cp)               (cp)
 591+        /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
 592+         * (generated by scripts/build_punct_map.py) */
 593+        static const unsigned FOLD_MAP_1[] = {
 594+            R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136),
 595+            R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182),
 596+            S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190),
 597+            S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f),
 598+            R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2),
 599+            S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8),
 600+            S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7),
 601+            R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241),
 602+            S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f),
 603+            S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab),
 604+            S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1),
 605+            S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f),
 606+            R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e),
 607+            R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81),
 608+            S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
 609+            R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d),
 610+            R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f),
 611+            R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8),
 612+            S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8),
 613+            S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183),
 614+            R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b),
 615+            S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2),
 616+            S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e),
 617+            S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792),
 618+            R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2),
 619+            S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5),
 620+            R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2),
 621+            R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
 622+        };
 623+        static const unsigned FOLD_MAP_1_DATA[] = {
 624+            0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148,
 625+            0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257,
 626+            0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275,
 627+            0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292,
 628+            0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3,
 629+            0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242,
 630+            0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af,
 631+            0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0,
 632+            0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f,
 633+            0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586,
 634+            0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a,
 635+            0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07,
 636+            0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60,
 637+            0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0,
 638+            0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170,
 639+            0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251,
 640+            0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641,
 641+            0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c,
 642+            0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d,
 643+            0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41,
 644+            0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922,
 645+            0x1e943
 646+        };
 647+        static const unsigned FOLD_MAP_2[] = {
 648+            S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),
 649+            S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f),
 650+            R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2),
 651+            S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3),
 652+            S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13),
 653+            S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17)
 654+        };
 655+        static const unsigned FOLD_MAP_2_DATA[] = {
 656+            0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308,
 657+            0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9,
 658+            0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9,
 659+            0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342,
 660+            0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342,
 661+            0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9,
 662+            0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565,
 663+            0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d
 664+        };
 665+        static const unsigned FOLD_MAP_3[] = {
 666+            S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3),
 667+            S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04)
 668+        };
 669+        static const unsigned FOLD_MAP_3_DATA[] = {
 670+            0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301,
 671+            0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300,
 672+            0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301,
 673+            0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c
 674+        };
 675+#undef R
 676+#undef S
 677+        static const struct {
 678+            const unsigned* map;
 679+            const unsigned* data;
 680+            size_t map_size;
 681+            unsigned n_codepoints;
 682+        } FOLD_MAP_LIST[] = {
 683+            { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 },
 684+            { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 },
 685+            { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 }
 686+        };
 687+
 688+        int i;
 689+
 690+        /* Fast path for ASCII characters. */
 691+        if(codepoint <= 0x7f) {
 692+            info->codepoints[0] = codepoint;
 693+            if(ISUPPER_(codepoint))
 694+                info->codepoints[0] += 'a' - 'A';
 695+            info->n_codepoints = 1;
 696+            return;
 697+        }
 698+
 699+        /* Try to locate the codepoint in any of the maps. */
 700+        for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
 701+            int index;
 702+
 703+            index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size);
 704+            if(index >= 0) {
 705+                /* Found the mapping. */
 706+                unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
 707+                const unsigned* map = FOLD_MAP_LIST[i].map;
 708+                const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
 709+
 710+                memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints);
 711+                info->n_codepoints = n_codepoints;
 712+
 713+                if(FOLD_MAP_LIST[i].map[index] != codepoint) {
 714+                    /* The found mapping maps whole range of codepoints,
 715+                     * i.e. we have to offset info->codepoints[0] accordingly. */
 716+                    if((map[index] & 0x00ffffff)+1 == codepoints[0]) {
 717+                        /* Alternating type of the range. */
 718+                        info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0);
 719+                    } else {
 720+                        /* Range to range kind of mapping. */
 721+                        info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff));
 722+                    }
 723+                }
 724+
 725+                return;
 726+            }
 727+        }
 728+
 729+        /* No mapping found. Map the codepoint to itself. */
 730+        info->codepoints[0] = codepoint;
 731+        info->n_codepoints = 1;
 732+    }
 733+#endif
 734+
 735+
 736+#if defined MD4C_USE_UTF16
 737+    #define IS_UTF16_SURROGATE_HI(word)     (((WORD)(word) & 0xfc00) == 0xd800)
 738+    #define IS_UTF16_SURROGATE_LO(word)     (((WORD)(word) & 0xfc00) == 0xdc00)
 739+    #define UTF16_DECODE_SURROGATE(hi, lo)  (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0)))
 740+
 741+    static unsigned
 742+    md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
 743+    {
 744+        if(IS_UTF16_SURROGATE_HI(str[0])) {
 745+            if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) {
 746+                if(p_size != NULL)
 747+                    *p_size = 2;
 748+                return UTF16_DECODE_SURROGATE(str[0], str[1]);
 749+            }
 750+        }
 751+
 752+        if(p_size != NULL)
 753+            *p_size = 1;
 754+        return str[0];
 755+    }
 756+
 757+    static unsigned
 758+    md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
 759+    {
 760+        if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1)))
 761+            return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1));
 762+
 763+        return CH(off);
 764+    }
 765+
 766+    /* No whitespace uses surrogates, so no decoding needed here. */
 767+    #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
 768+    #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(CH(off))
 769+    #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(CH((off)-1))
 770+
 771+    #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
 772+    #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
 773+
 774+    static inline int
 775+    md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
 776+    {
 777+        return md_decode_utf16le__(str+off, str_size-off, p_char_size);
 778+    }
 779+#elif defined MD4C_USE_UTF8
 780+    #define IS_UTF8_LEAD1(byte)     ((unsigned char)(byte) <= 0x7f)
 781+    #define IS_UTF8_LEAD2(byte)     (((unsigned char)(byte) & 0xe0) == 0xc0)
 782+    #define IS_UTF8_LEAD3(byte)     (((unsigned char)(byte) & 0xf0) == 0xe0)
 783+    #define IS_UTF8_LEAD4(byte)     (((unsigned char)(byte) & 0xf8) == 0xf0)
 784+    #define IS_UTF8_TAIL(byte)      (((unsigned char)(byte) & 0xc0) == 0x80)
 785+
 786+    static unsigned
 787+    md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
 788+    {
 789+        if(!IS_UTF8_LEAD1(str[0])) {
 790+            if(IS_UTF8_LEAD2(str[0])) {
 791+                if(1 < str_size && IS_UTF8_TAIL(str[1])) {
 792+                    if(p_size != NULL)
 793+                        *p_size = 2;
 794+
 795+                    return (((unsigned int)str[0] & 0x1f) << 6) |
 796+                           (((unsigned int)str[1] & 0x3f) << 0);
 797+                }
 798+            } else if(IS_UTF8_LEAD3(str[0])) {
 799+                if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) {
 800+                    if(p_size != NULL)
 801+                        *p_size = 3;
 802+
 803+                    return (((unsigned int)str[0] & 0x0f) << 12) |
 804+                           (((unsigned int)str[1] & 0x3f) << 6) |
 805+                           (((unsigned int)str[2] & 0x3f) << 0);
 806+                }
 807+            } else if(IS_UTF8_LEAD4(str[0])) {
 808+                if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) {
 809+                    if(p_size != NULL)
 810+                        *p_size = 4;
 811+
 812+                    return (((unsigned int)str[0] & 0x07) << 18) |
 813+                           (((unsigned int)str[1] & 0x3f) << 12) |
 814+                           (((unsigned int)str[2] & 0x3f) << 6) |
 815+                           (((unsigned int)str[3] & 0x3f) << 0);
 816+                }
 817+            }
 818+        }
 819+
 820+        if(p_size != NULL)
 821+            *p_size = 1;
 822+        return (unsigned) str[0];
 823+    }
 824+
 825+    static unsigned
 826+    md_decode_utf8_before__(MD_CTX* ctx, OFF off)
 827+    {
 828+        if(!IS_UTF8_LEAD1(CH(off-1))) {
 829+            if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
 830+                return (((unsigned int)CH(off-2) & 0x1f) << 6) |
 831+                       (((unsigned int)CH(off-1) & 0x3f) << 0);
 832+
 833+            if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
 834+                return (((unsigned int)CH(off-3) & 0x0f) << 12) |
 835+                       (((unsigned int)CH(off-2) & 0x3f) << 6) |
 836+                       (((unsigned int)CH(off-1) & 0x3f) << 0);
 837+
 838+            if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
 839+                return (((unsigned int)CH(off-4) & 0x07) << 18) |
 840+                       (((unsigned int)CH(off-3) & 0x3f) << 12) |
 841+                       (((unsigned int)CH(off-2) & 0x3f) << 6) |
 842+                       (((unsigned int)CH(off-1) & 0x3f) << 0);
 843+        }
 844+
 845+        return (unsigned) CH(off-1);
 846+    }
 847+
 848+    #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
 849+    #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
 850+    #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
 851+
 852+    #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
 853+    #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
 854+
 855+    static inline unsigned
 856+    md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
 857+    {
 858+        return md_decode_utf8__(str+off, str_size-off, p_char_size);
 859+    }
 860+#else
 861+    #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
 862+    #define ISUNICODEWHITESPACE(off)        ISWHITESPACE(off)
 863+    #define ISUNICODEWHITESPACEBEFORE(off)  ISWHITESPACE((off)-1)
 864+
 865+    #define ISUNICODEPUNCT(off)             ISPUNCT(off)
 866+    #define ISUNICODEPUNCTBEFORE(off)       ISPUNCT((off)-1)
 867+
 868+    static inline void
 869+    md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
 870+    {
 871+        info->codepoints[0] = codepoint;
 872+        if(ISUPPER_(codepoint))
 873+            info->codepoints[0] += 'a' - 'A';
 874+        info->n_codepoints = 1;
 875+    }
 876+
 877+    static inline unsigned
 878+    md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
 879+    {
 880+        *p_size = 1;
 881+        return (unsigned) str[off];
 882+    }
 883+#endif
 884+
 885+
 886+/*************************************
 887+ ***  Helper string manipulations  ***
 888+ *************************************/
 889+
 890+/* Fill buffer with copy of the string between 'beg' and 'end' but replace any
 891+ * line breaks with given replacement character.
 892+ *
 893+ * NOTE: Caller is responsible to make sure the buffer is large enough.
 894+ * (Given the output is always shorter then input, (end - beg) is good idea
 895+ * what the caller should allocate.)
 896+ */
 897+static void
 898+md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
 899+               CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
 900+{
 901+    CHAR* ptr = buffer;
 902+    int line_index = 0;
 903+    OFF off = beg;
 904+
 905+    MD_UNUSED(n_lines);
 906+
 907+    while(1) {
 908+        const MD_LINE* line = &lines[line_index];
 909+        OFF line_end = line->end;
 910+        if(end < line_end)
 911+            line_end = end;
 912+
 913+        while(off < line_end) {
 914+            *ptr = CH(off);
 915+            ptr++;
 916+            off++;
 917+        }
 918+
 919+        if(off >= end) {
 920+            *p_size = ptr - buffer;
 921+            return;
 922+        }
 923+
 924+        *ptr = line_break_replacement_char;
 925+        ptr++;
 926+
 927+        line_index++;
 928+        off = lines[line_index].beg;
 929+    }
 930+}
 931+
 932+/* Wrapper of md_merge_lines() which allocates new buffer for the output string.
 933+ */
 934+static int
 935+md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
 936+                    CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
 937+{
 938+    CHAR* buffer;
 939+
 940+    buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
 941+    if(buffer == NULL) {
 942+        MD_LOG("malloc() failed.");
 943+        return -1;
 944+    }
 945+
 946+    md_merge_lines(ctx, beg, end, lines, n_lines,
 947+                line_break_replacement_char, buffer, p_size);
 948+
 949+    *p_str = buffer;
 950+    return 0;
 951+}
 952+
 953+static OFF
 954+md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
 955+{
 956+    SZ char_size;
 957+    unsigned codepoint;
 958+
 959+    while(off < size) {
 960+        codepoint = md_decode_unicode(label, off, size, &char_size);
 961+        if(!ISUNICODEWHITESPACE_(codepoint)  &&  !ISNEWLINE_(label[off]))
 962+            break;
 963+        off += char_size;
 964+    }
 965+
 966+    return off;
 967+}
 968+
 969+
 970+/******************************
 971+ ***  Recognizing raw HTML  ***
 972+ ******************************/
 973+
 974+/* md_is_html_tag() may be called when processing inlines (inline raw HTML)
 975+ * or when breaking document to blocks (checking for start of HTML block type 7).
 976+ *
 977+ * When breaking document to blocks, we do not yet know line boundaries, but
 978+ * in that case the whole tag has to live on a single line. We distinguish this
 979+ * by n_lines == 0.
 980+ */
 981+static int
 982+md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
 983+{
 984+    int attr_state;
 985+    OFF off = beg;
 986+    OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size;
 987+    int i = 0;
 988+
 989+    MD_ASSERT(CH(beg) == _T('<'));
 990+
 991+    if(off + 1 >= line_end)
 992+        return FALSE;
 993+    off++;
 994+
 995+    /* For parsing attributes, we need a little state automaton below.
 996+     * State -1: no attributes are allowed.
 997+     * State 0: attribute could follow after some whitespace.
 998+     * State 1: after a whitespace (attribute name may follow).
 999+     * State 2: after attribute name ('=' MAY follow).
1000+     * State 3: after '=' (value specification MUST follow).
1001+     * State 41: in middle of unquoted attribute value.
1002+     * State 42: in middle of single-quoted attribute value.
1003+     * State 43: in middle of double-quoted attribute value.
1004+     */
1005+    attr_state = 0;
1006+
1007+    if(CH(off) == _T('/')) {
1008+        /* Closer tag "</ ... >". No attributes may be present. */
1009+        attr_state = -1;
1010+        off++;
1011+    }
1012+
1013+    /* Tag name */
1014+    if(off >= line_end  ||  !ISALPHA(off))
1015+        return FALSE;
1016+    off++;
1017+    while(off < line_end  &&  (ISALNUM(off)  ||  CH(off) == _T('-')))
1018+        off++;
1019+
1020+    /* (Optional) attributes (if not closer), (optional) '/' (if not closer)
1021+     * and final '>'. */
1022+    while(1) {
1023+        while(off < line_end  &&  !ISNEWLINE(off)) {
1024+            if(attr_state > 40) {
1025+                if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) {
1026+                    attr_state = 0;
1027+                    off--;  /* Put the char back for re-inspection in the new state. */
1028+                } else if(attr_state == 42 && CH(off) == _T('\'')) {
1029+                    attr_state = 0;
1030+                } else if(attr_state == 43 && CH(off) == _T('"')) {
1031+                    attr_state = 0;
1032+                }
1033+                off++;
1034+            } else if(ISWHITESPACE(off)) {
1035+                if(attr_state == 0)
1036+                    attr_state = 1;
1037+                off++;
1038+            } else if(attr_state <= 2 && CH(off) == _T('>')) {
1039+                /* End. */
1040+                goto done;
1041+            } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) {
1042+                /* End with digraph '/>' */
1043+                off++;
1044+                goto done;
1045+            } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) {
1046+                off++;
1047+                /* Attribute name */
1048+                while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-"))))
1049+                    off++;
1050+                attr_state = 2;
1051+            } else if(attr_state == 2 && CH(off) == _T('=')) {
1052+                /* Attribute assignment sign */
1053+                off++;
1054+                attr_state = 3;
1055+            } else if(attr_state == 3) {
1056+                /* Expecting start of attribute value. */
1057+                if(CH(off) == _T('"'))
1058+                    attr_state = 43;
1059+                else if(CH(off) == _T('\''))
1060+                    attr_state = 42;
1061+                else if(!ISANYOF(off, _T("\"'=<>`"))  &&  !ISNEWLINE(off))
1062+                    attr_state = 41;
1063+                else
1064+                    return FALSE;
1065+                off++;
1066+            } else {
1067+                /* Anything unexpected. */
1068+                return FALSE;
1069+            }
1070+        }
1071+
1072+        /* We have to be on a single line. See definition of start condition
1073+         * of HTML block, type 7. */
1074+        if(n_lines == 0)
1075+            return FALSE;
1076+
1077+        i++;
1078+        if(i >= n_lines)
1079+            return FALSE;
1080+
1081+        off = lines[i].beg;
1082+        line_end = lines[i].end;
1083+
1084+        if(attr_state == 0  ||  attr_state == 41)
1085+            attr_state = 1;
1086+
1087+        if(off >= max_end)
1088+            return FALSE;
1089+    }
1090+
1091+done:
1092+    if(off >= max_end)
1093+        return FALSE;
1094+
1095+    *p_end = off+1;
1096+    return TRUE;
1097+}
1098+
1099+static int
1100+md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
1101+                        const MD_LINE* lines, int n_lines,
1102+                        OFF beg, OFF max_end, OFF* p_end,
1103+                        OFF* p_scan_horizon)
1104+{
1105+    OFF off = beg;
1106+    int i = 0;
1107+
1108+    if(off < *p_scan_horizon  &&  *p_scan_horizon >= max_end - len) {
1109+        /* We have already scanned the range up to the max_end so we know
1110+         * there is nothing to see. */
1111+        return FALSE;
1112+    }
1113+
1114+    while(TRUE) {
1115+        while(off + len <= lines[i].end  &&  off + len <= max_end) {
1116+            if(md_ascii_eq(STR(off), str, len)) {
1117+                /* Success. */
1118+                *p_end = off + len;
1119+                return TRUE;
1120+            }
1121+            off++;
1122+        }
1123+
1124+        i++;
1125+        if(off >= max_end  ||  i >= n_lines) {
1126+            /* Failure. */
1127+            *p_scan_horizon = off;
1128+            return FALSE;
1129+        }
1130+
1131+        off = lines[i].beg;
1132+    }
1133+}
1134+
1135+static int
1136+md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1137+{
1138+    OFF off = beg;
1139+
1140+    MD_ASSERT(CH(beg) == _T('<'));
1141+
1142+    if(off + 4 >= lines[0].end)
1143+        return FALSE;
1144+    if(CH(off+1) != _T('!')  ||  CH(off+2) != _T('-')  ||  CH(off+3) != _T('-'))
1145+        return FALSE;
1146+    off += 4;
1147+
1148+    /* ">" and "->" must not follow the opening. */
1149+    if(off < lines[0].end  &&  CH(off) == _T('>'))
1150+        return FALSE;
1151+    if(off+1 < lines[0].end  &&  CH(off) == _T('-')  &&  CH(off+1) == _T('>'))
1152+        return FALSE;
1153+
1154+    /* HTML comment must not contain "--", so we scan just for "--" instead
1155+     * of "-->" and verify manually that '>' follows. */
1156+    if(md_scan_for_html_closer(ctx, _T("--"), 2,
1157+                lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon))
1158+    {
1159+        if(*p_end < max_end  &&  CH(*p_end) == _T('>')) {
1160+            *p_end = *p_end + 1;
1161+            return TRUE;
1162+        }
1163+    }
1164+
1165+    return FALSE;
1166+}
1167+
1168+static int
1169+md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1170+{
1171+    OFF off = beg;
1172+
1173+    if(off + 2 >= lines[0].end)
1174+        return FALSE;
1175+    if(CH(off+1) != _T('?'))
1176+        return FALSE;
1177+    off += 2;
1178+
1179+    return md_scan_for_html_closer(ctx, _T("?>"), 2,
1180+                lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon);
1181+}
1182+
1183+static int
1184+md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1185+{
1186+    OFF off = beg;
1187+
1188+    if(off + 2 >= lines[0].end)
1189+        return FALSE;
1190+    if(CH(off+1) != _T('!'))
1191+        return FALSE;
1192+    off += 2;
1193+
1194+    /* Declaration name. */
1195+    if(off >= lines[0].end  ||  !ISALPHA(off))
1196+        return FALSE;
1197+    off++;
1198+    while(off < lines[0].end  &&  ISALPHA(off))
1199+        off++;
1200+    if(off < lines[0].end  &&  !ISWHITESPACE(off))
1201+        return FALSE;
1202+
1203+    return md_scan_for_html_closer(ctx, _T(">"), 1,
1204+                lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon);
1205+}
1206+
1207+static int
1208+md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1209+{
1210+    static const CHAR open_str[] = _T("<![CDATA[");
1211+    static const SZ open_size = SIZEOF_ARRAY(open_str) - 1;
1212+
1213+    OFF off = beg;
1214+
1215+    if(off + open_size >= lines[0].end)
1216+        return FALSE;
1217+    if(memcmp(STR(off), open_str, open_size) != 0)
1218+        return FALSE;
1219+    off += open_size;
1220+
1221+    if(lines[n_lines-1].end < max_end)
1222+        max_end = lines[n_lines-1].end - 2;
1223+
1224+    return md_scan_for_html_closer(ctx, _T("]]>"), 3,
1225+                lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon);
1226+}
1227+
1228+static int
1229+md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1230+{
1231+    MD_ASSERT(CH(beg) == _T('<'));
1232+    return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end)  ||
1233+            md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end)  ||
1234+            md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end)  ||
1235+            md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end)  ||
1236+            md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
1237+}
1238+
1239+
1240+/****************************
1241+ ***  Recognizing Entity  ***
1242+ ****************************/
1243+
1244+static int
1245+md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1246+{
1247+    OFF off = beg;
1248+    MD_UNUSED(ctx);
1249+
1250+    while(off < max_end  &&  ISXDIGIT_(text[off])  &&  off - beg <= 8)
1251+        off++;
1252+
1253+    if(1 <= off - beg  &&  off - beg <= 6) {
1254+        *p_end = off;
1255+        return TRUE;
1256+    } else {
1257+        return FALSE;
1258+    }
1259+}
1260+
1261+static int
1262+md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1263+{
1264+    OFF off = beg;
1265+    MD_UNUSED(ctx);
1266+
1267+    while(off < max_end  &&  ISDIGIT_(text[off])  &&  off - beg <= 8)
1268+        off++;
1269+
1270+    if(1 <= off - beg  &&  off - beg <= 7) {
1271+        *p_end = off;
1272+        return TRUE;
1273+    } else {
1274+        return FALSE;
1275+    }
1276+}
1277+
1278+static int
1279+md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1280+{
1281+    OFF off = beg;
1282+    MD_UNUSED(ctx);
1283+
1284+    if(off < max_end  &&  ISALPHA_(text[off]))
1285+        off++;
1286+    else
1287+        return FALSE;
1288+
1289+    while(off < max_end  &&  ISALNUM_(text[off])  &&  off - beg <= 48)
1290+        off++;
1291+
1292+    if(2 <= off - beg  &&  off - beg <= 48) {
1293+        *p_end = off;
1294+        return TRUE;
1295+    } else {
1296+        return FALSE;
1297+    }
1298+}
1299+
1300+static int
1301+md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1302+{
1303+    int is_contents;
1304+    OFF off = beg;
1305+
1306+    MD_ASSERT(text[off] == _T('&'));
1307+    off++;
1308+
1309+    if(off+2 < max_end  &&  text[off] == _T('#')  &&  (text[off+1] == _T('x') || text[off+1] == _T('X')))
1310+        is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
1311+    else if(off+1 < max_end  &&  text[off] == _T('#'))
1312+        is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
1313+    else
1314+        is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
1315+
1316+    if(is_contents  &&  off < max_end  &&  text[off] == _T(';')) {
1317+        *p_end = off+1;
1318+        return TRUE;
1319+    } else {
1320+        return FALSE;
1321+    }
1322+}
1323+
1324+static inline int
1325+md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
1326+{
1327+    return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
1328+}
1329+
1330+
1331+/******************************
1332+ ***  Attribute Management  ***
1333+ ******************************/
1334+
1335+typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
1336+struct MD_ATTRIBUTE_BUILD_tag {
1337+    CHAR* text;
1338+    MD_TEXTTYPE* substr_types;
1339+    OFF* substr_offsets;
1340+    int substr_count;
1341+    int substr_alloc;
1342+    MD_TEXTTYPE trivial_types[1];
1343+    OFF trivial_offsets[2];
1344+};
1345+
1346+
1347+#define MD_BUILD_ATTR_NO_ESCAPES    0x0001
1348+
1349+static int
1350+md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
1351+                            MD_TEXTTYPE type, OFF off)
1352+{
1353+    if(build->substr_count >= build->substr_alloc) {
1354+        MD_TEXTTYPE* new_substr_types;
1355+        OFF* new_substr_offsets;
1356+
1357+        build->substr_alloc = (build->substr_alloc > 0
1358+                ? build->substr_alloc + build->substr_alloc / 2
1359+                : 8);
1360+        new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
1361+                                    build->substr_alloc * sizeof(MD_TEXTTYPE));
1362+        if(new_substr_types == NULL) {
1363+            MD_LOG("realloc() failed.");
1364+            return -1;
1365+        }
1366+        /* Note +1 to reserve space for final offset (== raw_size). */
1367+        new_substr_offsets = (OFF*) realloc(build->substr_offsets,
1368+                                    (build->substr_alloc+1) * sizeof(OFF));
1369+        if(new_substr_offsets == NULL) {
1370+            MD_LOG("realloc() failed.");
1371+            free(new_substr_types);
1372+            return -1;
1373+        }
1374+
1375+        build->substr_types = new_substr_types;
1376+        build->substr_offsets = new_substr_offsets;
1377+    }
1378+
1379+    build->substr_types[build->substr_count] = type;
1380+    build->substr_offsets[build->substr_count] = off;
1381+    build->substr_count++;
1382+    return 0;
1383+}
1384+
1385+static void
1386+md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
1387+{
1388+    MD_UNUSED(ctx);
1389+
1390+    if(build->substr_alloc > 0) {
1391+        free(build->text);
1392+        free(build->substr_types);
1393+        free(build->substr_offsets);
1394+    }
1395+}
1396+
1397+static int
1398+md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
1399+                   unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
1400+{
1401+    OFF raw_off, off;
1402+    int is_trivial;
1403+    int ret = 0;
1404+
1405+    memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
1406+
1407+    /* If there is no backslash and no ampersand, build trivial attribute
1408+     * without any malloc(). */
1409+    is_trivial = TRUE;
1410+    for(raw_off = 0; raw_off < raw_size; raw_off++) {
1411+        if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
1412+            is_trivial = FALSE;
1413+            break;
1414+        }
1415+    }
1416+
1417+    if(is_trivial) {
1418+        build->text = (CHAR*) (raw_size ? raw_text : NULL);
1419+        build->substr_types = build->trivial_types;
1420+        build->substr_offsets = build->trivial_offsets;
1421+        build->substr_count = 1;
1422+        build->substr_alloc = 0;
1423+        build->trivial_types[0] = MD_TEXT_NORMAL;
1424+        build->trivial_offsets[0] = 0;
1425+        build->trivial_offsets[1] = raw_size;
1426+        off = raw_size;
1427+    } else {
1428+        build->text = (CHAR*) malloc(raw_size * sizeof(CHAR));
1429+        if(build->text == NULL) {
1430+            MD_LOG("malloc() failed.");
1431+            goto abort;
1432+        }
1433+
1434+        raw_off = 0;
1435+        off = 0;
1436+
1437+        while(raw_off < raw_size) {
1438+            if(raw_text[raw_off] == _T('\0')) {
1439+                MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
1440+                memcpy(build->text + off, raw_text + raw_off, 1);
1441+                off++;
1442+                raw_off++;
1443+                continue;
1444+            }
1445+
1446+            if(raw_text[raw_off] == _T('&')) {
1447+                OFF ent_end;
1448+
1449+                if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
1450+                    MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
1451+                    memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off);
1452+                    off += ent_end - raw_off;
1453+                    raw_off = ent_end;
1454+                    continue;
1455+                }
1456+            }
1457+
1458+            if(build->substr_count == 0  ||  build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL)
1459+                MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
1460+
1461+            if(!(flags & MD_BUILD_ATTR_NO_ESCAPES)  &&
1462+               raw_text[raw_off] == _T('\\')  &&  raw_off+1 < raw_size  &&
1463+               (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
1464+                raw_off++;
1465+
1466+            build->text[off++] = raw_text[raw_off++];
1467+        }
1468+        build->substr_offsets[build->substr_count] = off;
1469+    }
1470+
1471+    attr->text = build->text;
1472+    attr->size = off;
1473+    attr->substr_offsets = build->substr_offsets;
1474+    attr->substr_types = build->substr_types;
1475+    return 0;
1476+
1477+abort:
1478+    md_free_attribute(ctx, build);
1479+    return -1;
1480+}
1481+
1482+
1483+/*********************************************
1484+ ***  Dictionary of Reference Definitions  ***
1485+ *********************************************/
1486+
1487+#define MD_FNV1A_BASE       2166136261U
1488+#define MD_FNV1A_PRIME      16777619U
1489+
1490+static inline unsigned
1491+md_fnv1a(unsigned base, const void* data, size_t n)
1492+{
1493+    const unsigned char* buf = (const unsigned char*) data;
1494+    unsigned hash = base;
1495+    size_t i;
1496+
1497+    for(i = 0; i < n; i++) {
1498+        hash ^= buf[i];
1499+        hash *= MD_FNV1A_PRIME;
1500+    }
1501+
1502+    return hash;
1503+}
1504+
1505+
1506+struct MD_REF_DEF_tag {
1507+    CHAR* label;
1508+    CHAR* title;
1509+    unsigned hash;
1510+    SZ label_size;
1511+    SZ title_size;
1512+    OFF dest_beg;
1513+    OFF dest_end;
1514+    unsigned char label_needs_free : 1;
1515+    unsigned char title_needs_free : 1;
1516+};
1517+
1518+/* Label equivalence is quite complicated with regards to whitespace and case
1519+ * folding. This complicates computing a hash of it as well as direct comparison
1520+ * of two labels. */
1521+
1522+static unsigned
1523+md_link_label_hash(const CHAR* label, SZ size)
1524+{
1525+    unsigned hash = MD_FNV1A_BASE;
1526+    OFF off;
1527+    unsigned codepoint;
1528+    int is_whitespace = FALSE;
1529+
1530+    off = md_skip_unicode_whitespace(label, 0, size);
1531+    while(off < size) {
1532+        SZ char_size;
1533+
1534+        codepoint = md_decode_unicode(label, off, size, &char_size);
1535+        is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]);
1536+
1537+        if(is_whitespace) {
1538+            codepoint = ' ';
1539+            hash = md_fnv1a(hash, &codepoint, sizeof(unsigned));
1540+            off = md_skip_unicode_whitespace(label, off, size);
1541+        } else {
1542+            MD_UNICODE_FOLD_INFO fold_info;
1543+
1544+            md_get_unicode_fold_info(codepoint, &fold_info);
1545+            hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned));
1546+            off += char_size;
1547+        }
1548+    }
1549+
1550+    return hash;
1551+}
1552+
1553+static OFF
1554+md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
1555+                                 MD_UNICODE_FOLD_INFO* fold_info)
1556+{
1557+    unsigned codepoint;
1558+    SZ char_size;
1559+
1560+    if(off >= size) {
1561+        /* Treat end of a link label as a whitespace. */
1562+        goto whitespace;
1563+    }
1564+
1565+    codepoint = md_decode_unicode(label, off, size, &char_size);
1566+    off += char_size;
1567+    if(ISUNICODEWHITESPACE_(codepoint)) {
1568+        /* Treat all whitespace as equivalent */
1569+        goto whitespace;
1570+    }
1571+
1572+    /* Get real folding info. */
1573+    md_get_unicode_fold_info(codepoint, fold_info);
1574+    return off;
1575+
1576+whitespace:
1577+    fold_info->codepoints[0] = _T(' ');
1578+    fold_info->n_codepoints = 1;
1579+    return md_skip_unicode_whitespace(label, off, size);
1580+}
1581+
1582+static int
1583+md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
1584+{
1585+    OFF a_off;
1586+    OFF b_off;
1587+    MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 };
1588+    MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 };
1589+    OFF a_fi_off = 0;
1590+    OFF b_fi_off = 0;
1591+    int cmp;
1592+
1593+    a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
1594+    b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
1595+    while(a_off < a_size || a_fi_off < a_fi.n_codepoints ||
1596+          b_off < b_size || b_fi_off < b_fi.n_codepoints)
1597+    {
1598+        /* If needed, load fold info for next char. */
1599+        if(a_fi_off >= a_fi.n_codepoints) {
1600+            a_fi_off = 0;
1601+            a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi);
1602+        }
1603+        if(b_fi_off >= b_fi.n_codepoints) {
1604+            b_fi_off = 0;
1605+            b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi);
1606+        }
1607+
1608+        cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
1609+        if(cmp != 0)
1610+            return cmp;
1611+
1612+        a_fi_off++;
1613+        b_fi_off++;
1614+    }
1615+
1616+    return 0;
1617+}
1618+
1619+typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
1620+struct MD_REF_DEF_LIST_tag {
1621+    int n_ref_defs;
1622+    int alloc_ref_defs;
1623+    MD_REF_DEF* ref_defs[];  /* Valid items always  point into ctx->ref_defs[] */
1624+};
1625+
1626+static int
1627+md_ref_def_cmp(const void* a, const void* b)
1628+{
1629+    const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1630+    const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1631+
1632+    if(a_ref->hash < b_ref->hash)
1633+        return -1;
1634+    else if(a_ref->hash > b_ref->hash)
1635+        return +1;
1636+    else
1637+        return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size);
1638+}
1639+
1640+static int
1641+md_ref_def_cmp_for_sort(const void* a, const void* b)
1642+{
1643+    int cmp;
1644+
1645+    cmp = md_ref_def_cmp(a, b);
1646+
1647+    /* Ensure stability of the sorting. */
1648+    if(cmp == 0) {
1649+        const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1650+        const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1651+
1652+        if(a_ref < b_ref)
1653+            cmp = -1;
1654+        else if(a_ref > b_ref)
1655+            cmp = +1;
1656+        else
1657+            cmp = 0;
1658+    }
1659+
1660+    return cmp;
1661+}
1662+
1663+static int
1664+md_build_ref_def_hashtable(MD_CTX* ctx)
1665+{
1666+    int i, j;
1667+
1668+    if(ctx->n_ref_defs == 0)
1669+        return 0;
1670+
1671+    ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4;
1672+    ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*));
1673+    if(ctx->ref_def_hashtable == NULL) {
1674+        MD_LOG("malloc() failed.");
1675+        goto abort;
1676+    }
1677+    memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*));
1678+
1679+    /* Each member of ctx->ref_def_hashtable[] can be:
1680+     *  -- NULL,
1681+     *  -- pointer to the MD_REF_DEF in ctx->ref_defs[], or
1682+     *  -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to
1683+     *     such MD_REF_DEFs.
1684+     */
1685+    for(i = 0; i < ctx->n_ref_defs; i++) {
1686+        MD_REF_DEF* def = &ctx->ref_defs[i];
1687+        void* bucket;
1688+        MD_REF_DEF_LIST* list;
1689+
1690+        def->hash = md_link_label_hash(def->label, def->label_size);
1691+        bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
1692+
1693+        if(bucket == NULL) {
1694+            /* The bucket is empty. Make it just point to the def. */
1695+            ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
1696+            continue;
1697+        }
1698+
1699+        if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1700+            /* The bucket already contains one ref. def. Lets see whether it
1701+             * is the same label (ref. def. duplicate) or different one
1702+             * (hash conflict). */
1703+            MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
1704+
1705+            if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) {
1706+                /* Duplicate label: Ignore this ref. def. */
1707+                continue;
1708+            }
1709+
1710+            /* Make the bucket complex, i.e. able to hold more ref. defs. */
1711+            list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*));
1712+            if(list == NULL) {
1713+                MD_LOG("malloc() failed.");
1714+                goto abort;
1715+            }
1716+            list->ref_defs[0] = old_def;
1717+            list->ref_defs[1] = def;
1718+            list->n_ref_defs = 2;
1719+            list->alloc_ref_defs = 2;
1720+            ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1721+            continue;
1722+        }
1723+
1724+        /* Append the def to the complex bucket list.
1725+         *
1726+         * Note in this case we ignore potential duplicates to avoid expensive
1727+         * iterating over the complex bucket. Below, we revisit all the complex
1728+         * buckets and handle it more cheaply after the complex bucket contents
1729+         * is sorted. */
1730+        list = (MD_REF_DEF_LIST*) bucket;
1731+        if(list->n_ref_defs >= list->alloc_ref_defs) {
1732+            int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2;
1733+            MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
1734+                        sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
1735+            if(list_tmp == NULL) {
1736+                MD_LOG("realloc() failed.");
1737+                goto abort;
1738+            }
1739+            list = list_tmp;
1740+            list->alloc_ref_defs = alloc_ref_defs;
1741+            ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1742+        }
1743+
1744+        list->ref_defs[list->n_ref_defs] = def;
1745+        list->n_ref_defs++;
1746+    }
1747+
1748+    /* Sort the complex buckets so we can use bsearch() with them. */
1749+    for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1750+        void* bucket = ctx->ref_def_hashtable[i];
1751+        MD_REF_DEF_LIST* list;
1752+
1753+        if(bucket == NULL)
1754+            continue;
1755+        if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1756+            continue;
1757+
1758+        list = (MD_REF_DEF_LIST*) bucket;
1759+        qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
1760+
1761+        /* Disable all duplicates in the complex bucket by forcing all such
1762+         * records to point to the 1st such ref. def. I.e. no matter which
1763+         * record is found during the lookup, it will always point to the right
1764+         * ref. def. in ctx->ref_defs[]. */
1765+        for(j = 1; j < list->n_ref_defs; j++) {
1766+            if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0)
1767+                list->ref_defs[j] = list->ref_defs[j-1];
1768+        }
1769+    }
1770+
1771+    return 0;
1772+
1773+abort:
1774+    return -1;
1775+}
1776+
1777+static void
1778+md_free_ref_def_hashtable(MD_CTX* ctx)
1779+{
1780+    if(ctx->ref_def_hashtable != NULL) {
1781+        int i;
1782+
1783+        for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1784+            void* bucket = ctx->ref_def_hashtable[i];
1785+            if(bucket == NULL)
1786+                continue;
1787+            if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1788+                continue;
1789+            free(bucket);
1790+        }
1791+
1792+        free(ctx->ref_def_hashtable);
1793+    }
1794+}
1795+
1796+static const MD_REF_DEF*
1797+md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
1798+{
1799+    unsigned hash;
1800+    void* bucket;
1801+
1802+    if(ctx->ref_def_hashtable_size == 0)
1803+        return NULL;
1804+
1805+    hash = md_link_label_hash(label, label_size);
1806+    bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
1807+
1808+    if(bucket == NULL) {
1809+        return NULL;
1810+    } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1811+        const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
1812+
1813+        if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0)
1814+            return def;
1815+        else
1816+            return NULL;
1817+    } else {
1818+        MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
1819+        MD_REF_DEF key_buf;
1820+        const MD_REF_DEF* key = &key_buf;
1821+        const MD_REF_DEF** ret;
1822+
1823+        key_buf.label = (CHAR*) label;
1824+        key_buf.label_size = label_size;
1825+        key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size);
1826+
1827+        ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs,
1828+                    list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp);
1829+        if(ret != NULL)
1830+            return *ret;
1831+        else
1832+            return NULL;
1833+    }
1834+}
1835+
1836+
1837+/***************************
1838+ ***  Recognizing Links  ***
1839+ ***************************/
1840+
1841+/* Note this code is partially shared between processing inlines and blocks
1842+ * as reference definitions and links share some helper parser functions.
1843+ */
1844+
1845+typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
1846+struct MD_LINK_ATTR_tag {
1847+    OFF dest_beg;
1848+    OFF dest_end;
1849+
1850+    CHAR* title;
1851+    SZ title_size;
1852+    int title_needs_free;
1853+};
1854+
1855+
1856+static int
1857+md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
1858+                 OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
1859+                 OFF* p_contents_beg, OFF* p_contents_end)
1860+{
1861+    OFF off = beg;
1862+    OFF contents_beg = 0;
1863+    OFF contents_end = 0;
1864+    int line_index = 0;
1865+    int len = 0;
1866+
1867+    if(CH(off) != _T('['))
1868+        return FALSE;
1869+    off++;
1870+
1871+    while(1) {
1872+        OFF line_end = lines[line_index].end;
1873+
1874+        while(off < line_end) {
1875+            if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
1876+                if(contents_end == 0) {
1877+                    contents_beg = off;
1878+                    *p_beg_line_index = line_index;
1879+                }
1880+                contents_end = off + 2;
1881+                off += 2;
1882+            } else if(CH(off) == _T('[')) {
1883+                return FALSE;
1884+            } else if(CH(off) == _T(']')) {
1885+                if(contents_beg < contents_end) {
1886+                    /* Success. */
1887+                    *p_contents_beg = contents_beg;
1888+                    *p_contents_end = contents_end;
1889+                    *p_end = off+1;
1890+                    *p_end_line_index = line_index;
1891+                    return TRUE;
1892+                } else {
1893+                    /* Link label must have some non-whitespace contents. */
1894+                    return FALSE;
1895+                }
1896+            } else {
1897+                unsigned codepoint;
1898+                SZ char_size;
1899+
1900+                codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size);
1901+                if(!ISUNICODEWHITESPACE_(codepoint)) {
1902+                    if(contents_end == 0) {
1903+                        contents_beg = off;
1904+                        *p_beg_line_index = line_index;
1905+                    }
1906+                    contents_end = off + char_size;
1907+                }
1908+
1909+                off += char_size;
1910+            }
1911+
1912+            len++;
1913+            if(len > 999)
1914+                return FALSE;
1915+        }
1916+
1917+        line_index++;
1918+        len++;
1919+        if(line_index < n_lines)
1920+            off = lines[line_index].beg;
1921+        else
1922+            break;
1923+    }
1924+
1925+    return FALSE;
1926+}
1927+
1928+static int
1929+md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1930+                         OFF* p_contents_beg, OFF* p_contents_end)
1931+{
1932+    OFF off = beg;
1933+
1934+    if(off >= max_end  ||  CH(off) != _T('<'))
1935+        return FALSE;
1936+    off++;
1937+
1938+    while(off < max_end) {
1939+        if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
1940+            off += 2;
1941+            continue;
1942+        }
1943+
1944+        if(ISNEWLINE(off)  ||  CH(off) == _T('<'))
1945+            return FALSE;
1946+
1947+        if(CH(off) == _T('>')) {
1948+            /* Success. */
1949+            *p_contents_beg = beg+1;
1950+            *p_contents_end = off;
1951+            *p_end = off+1;
1952+            return TRUE;
1953+        }
1954+
1955+        off++;
1956+    }
1957+
1958+    return FALSE;
1959+}
1960+
1961+static int
1962+md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1963+                         OFF* p_contents_beg, OFF* p_contents_end)
1964+{
1965+    OFF off = beg;
1966+    int parenthesis_level = 0;
1967+
1968+    while(off < max_end) {
1969+        if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
1970+            off += 2;
1971+            continue;
1972+        }
1973+
1974+        if(ISWHITESPACE(off) || ISCNTRL(off))
1975+            break;
1976+
1977+        /* Link destination may include balanced pairs of unescaped '(' ')'.
1978+         * Note we limit the maximal nesting level by 32 to protect us from
1979+         * https://github.com/jgm/cmark/issues/214 */
1980+        if(CH(off) == _T('(')) {
1981+            parenthesis_level++;
1982+            if(parenthesis_level > 32)
1983+                return FALSE;
1984+        } else if(CH(off) == _T(')')) {
1985+            if(parenthesis_level == 0)
1986+                break;
1987+            parenthesis_level--;
1988+        }
1989+
1990+        off++;
1991+    }
1992+
1993+    if(parenthesis_level != 0  ||  off == beg)
1994+        return FALSE;
1995+
1996+    /* Success. */
1997+    *p_contents_beg = beg;
1998+    *p_contents_end = off;
1999+    *p_end = off;
2000+    return TRUE;
2001+}
2002+
2003+static inline int
2004+md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
2005+                       OFF* p_contents_beg, OFF* p_contents_end)
2006+{
2007+    if(CH(beg) == _T('<'))
2008+        return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2009+    else
2010+        return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2011+}
2012+
2013+static int
2014+md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2015+                 OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
2016+                 OFF* p_contents_beg, OFF* p_contents_end)
2017+{
2018+    OFF off = beg;
2019+    CHAR closer_char;
2020+    int line_index = 0;
2021+
2022+    /* White space with up to one line break. */
2023+    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2024+        off++;
2025+    if(off >= lines[line_index].end) {
2026+        line_index++;
2027+        if(line_index >= n_lines)
2028+            return FALSE;
2029+        off = lines[line_index].beg;
2030+    }
2031+    if(off == beg)
2032+        return FALSE;
2033+
2034+    *p_beg_line_index = line_index;
2035+
2036+    /* First char determines how to detect end of it. */
2037+    switch(CH(off)) {
2038+        case _T('"'):   closer_char = _T('"'); break;
2039+        case _T('\''):  closer_char = _T('\''); break;
2040+        case _T('('):   closer_char = _T(')'); break;
2041+        default:        return FALSE;
2042+    }
2043+    off++;
2044+
2045+    *p_contents_beg = off;
2046+
2047+    while(line_index < n_lines) {
2048+        OFF line_end = lines[line_index].end;
2049+
2050+        while(off < line_end) {
2051+            if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
2052+                off++;
2053+            } else if(CH(off) == closer_char) {
2054+                /* Success. */
2055+                *p_contents_end = off;
2056+                *p_end = off+1;
2057+                *p_end_line_index = line_index;
2058+                return TRUE;
2059+            } else if(closer_char == _T(')')  &&  CH(off) == _T('(')) {
2060+                /* ()-style title cannot contain (unescaped '(')) */
2061+                return FALSE;
2062+            }
2063+
2064+            off++;
2065+        }
2066+
2067+        line_index++;
2068+    }
2069+
2070+    return FALSE;
2071+}
2072+
2073+/* Returns 0 if it is not a reference definition.
2074+ *
2075+ * Returns N > 0 if it is a reference definition. N then corresponds to the
2076+ * number of lines forming it). In this case the definition is stored for
2077+ * resolving any links referring to it.
2078+ *
2079+ * Returns -1 in case of an error (out of memory).
2080+ */
2081+static int
2082+md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
2083+{
2084+    OFF label_contents_beg;
2085+    OFF label_contents_end;
2086+    int label_contents_line_index = -1;
2087+    int label_is_multiline = FALSE;
2088+    OFF dest_contents_beg;
2089+    OFF dest_contents_end;
2090+    OFF title_contents_beg;
2091+    OFF title_contents_end;
2092+    int title_contents_line_index;
2093+    int title_is_multiline = FALSE;
2094+    OFF off;
2095+    int line_index = 0;
2096+    int tmp_line_index;
2097+    MD_REF_DEF* def = NULL;
2098+    int ret = 0;
2099+
2100+    /* Link label. */
2101+    if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg,
2102+                &off, &label_contents_line_index, &line_index,
2103+                &label_contents_beg, &label_contents_end))
2104+        return FALSE;
2105+    label_is_multiline = (label_contents_line_index != line_index);
2106+
2107+    /* Colon. */
2108+    if(off >= lines[line_index].end  ||  CH(off) != _T(':'))
2109+        return FALSE;
2110+    off++;
2111+
2112+    /* Optional white space with up to one line break. */
2113+    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2114+        off++;
2115+    if(off >= lines[line_index].end) {
2116+        line_index++;
2117+        if(line_index >= n_lines)
2118+            return FALSE;
2119+        off = lines[line_index].beg;
2120+    }
2121+
2122+    /* Link destination. */
2123+    if(!md_is_link_destination(ctx, off, lines[line_index].end,
2124+                &off, &dest_contents_beg, &dest_contents_end))
2125+        return FALSE;
2126+
2127+    /* (Optional) title. Note we interpret it as an title only if nothing
2128+     * more follows on its last line. */
2129+    if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2130+                &off, &title_contents_line_index, &tmp_line_index,
2131+                &title_contents_beg, &title_contents_end)
2132+        &&  off >= lines[line_index + tmp_line_index].end)
2133+    {
2134+        title_is_multiline = (tmp_line_index != title_contents_line_index);
2135+        title_contents_line_index += line_index;
2136+        line_index += tmp_line_index;
2137+    } else {
2138+        /* Not a title. */
2139+        title_is_multiline = FALSE;
2140+        title_contents_beg = off;
2141+        title_contents_end = off;
2142+        title_contents_line_index = 0;
2143+    }
2144+
2145+    /* Nothing more can follow on the last line. */
2146+    if(off < lines[line_index].end)
2147+        return FALSE;
2148+
2149+    /* So, it _is_ a reference definition. Remember it. */
2150+    if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
2151+        MD_REF_DEF* new_defs;
2152+
2153+        ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
2154+                ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
2155+                : 16);
2156+        new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
2157+        if(new_defs == NULL) {
2158+            MD_LOG("realloc() failed.");
2159+            goto abort;
2160+        }
2161+
2162+        ctx->ref_defs = new_defs;
2163+    }
2164+    def = &ctx->ref_defs[ctx->n_ref_defs];
2165+    memset(def, 0, sizeof(MD_REF_DEF));
2166+
2167+    if(label_is_multiline) {
2168+        MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
2169+                    lines + label_contents_line_index, n_lines - label_contents_line_index,
2170+                    _T(' '), &def->label, &def->label_size));
2171+        def->label_needs_free = TRUE;
2172+    } else {
2173+        def->label = (CHAR*) STR(label_contents_beg);
2174+        def->label_size = label_contents_end - label_contents_beg;
2175+    }
2176+
2177+    if(title_is_multiline) {
2178+        MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2179+                    lines + title_contents_line_index, n_lines - title_contents_line_index,
2180+                    _T('\n'), &def->title, &def->title_size));
2181+        def->title_needs_free = TRUE;
2182+    } else {
2183+        def->title = (CHAR*) STR(title_contents_beg);
2184+        def->title_size = title_contents_end - title_contents_beg;
2185+    }
2186+
2187+    def->dest_beg = dest_contents_beg;
2188+    def->dest_end = dest_contents_end;
2189+
2190+    /* Success. */
2191+    ctx->n_ref_defs++;
2192+    return line_index + 1;
2193+
2194+abort:
2195+    /* Failure. */
2196+    if(def != NULL  &&  def->label_needs_free)
2197+        free(def->label);
2198+    if(def != NULL  &&  def->title_needs_free)
2199+        free(def->title);
2200+    return ret;
2201+}
2202+
2203+static int
2204+md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2205+                     OFF beg, OFF end, MD_LINK_ATTR* attr)
2206+{
2207+    const MD_REF_DEF* def;
2208+    const MD_LINE* beg_line;
2209+    const MD_LINE* end_line;
2210+    CHAR* label;
2211+    SZ label_size;
2212+    int ret;
2213+
2214+    MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
2215+    MD_ASSERT(CH(end-1) == _T(']'));
2216+
2217+    beg += (CH(beg) == _T('!') ? 2 : 1);
2218+    end--;
2219+
2220+    /* Find lines corresponding to the beg and end positions. */
2221+    MD_ASSERT(lines[0].beg <= beg);
2222+    beg_line = lines;
2223+    while(beg >= beg_line->end)
2224+        beg_line++;
2225+
2226+    MD_ASSERT(end <= lines[n_lines-1].end);
2227+    end_line = beg_line;
2228+    while(end >= end_line->end)
2229+        end_line++;
2230+
2231+    if(beg_line != end_line) {
2232+        MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
2233+                 n_lines - (beg_line - lines), _T(' '), &label, &label_size));
2234+    } else {
2235+        label = (CHAR*) STR(beg);
2236+        label_size = end - beg;
2237+    }
2238+
2239+    def = md_lookup_ref_def(ctx, label, label_size);
2240+    if(def != NULL) {
2241+        attr->dest_beg = def->dest_beg;
2242+        attr->dest_end = def->dest_end;
2243+        attr->title = def->title;
2244+        attr->title_size = def->title_size;
2245+        attr->title_needs_free = FALSE;
2246+    }
2247+
2248+    if(beg_line != end_line)
2249+        free(label);
2250+
2251+    ret = (def != NULL);
2252+
2253+abort:
2254+    return ret;
2255+}
2256+
2257+static int
2258+md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2259+                       OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
2260+{
2261+    int line_index = 0;
2262+    int tmp_line_index;
2263+    OFF title_contents_beg;
2264+    OFF title_contents_end;
2265+    int title_contents_line_index;
2266+    int title_is_multiline;
2267+    OFF off = beg;
2268+    int ret = FALSE;
2269+
2270+    while(off >= lines[line_index].end)
2271+        line_index++;
2272+
2273+    MD_ASSERT(CH(off) == _T('('));
2274+    off++;
2275+
2276+    /* Optional white space with up to one line break. */
2277+    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2278+        off++;
2279+    if(off >= lines[line_index].end  &&  ISNEWLINE(off)) {
2280+        line_index++;
2281+        if(line_index >= n_lines)
2282+            return FALSE;
2283+        off = lines[line_index].beg;
2284+    }
2285+
2286+    /* Link destination may be omitted, but only when not also having a title. */
2287+    if(off < ctx->size  &&  CH(off) == _T(')')) {
2288+        attr->dest_beg = off;
2289+        attr->dest_end = off;
2290+        attr->title = NULL;
2291+        attr->title_size = 0;
2292+        attr->title_needs_free = FALSE;
2293+        off++;
2294+        *p_end = off;
2295+        return TRUE;
2296+    }
2297+
2298+    /* Link destination. */
2299+    if(!md_is_link_destination(ctx, off, lines[line_index].end,
2300+                        &off, &attr->dest_beg, &attr->dest_end))
2301+        return FALSE;
2302+
2303+    /* (Optional) title. */
2304+    if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2305+                &off, &title_contents_line_index, &tmp_line_index,
2306+                &title_contents_beg, &title_contents_end))
2307+    {
2308+        title_is_multiline = (tmp_line_index != title_contents_line_index);
2309+        title_contents_line_index += line_index;
2310+        line_index += tmp_line_index;
2311+    } else {
2312+        /* Not a title. */
2313+        title_is_multiline = FALSE;
2314+        title_contents_beg = off;
2315+        title_contents_end = off;
2316+        title_contents_line_index = 0;
2317+    }
2318+
2319+    /* Optional whitespace followed with final ')'. */
2320+    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2321+        off++;
2322+    if(off >= lines[line_index].end  &&  ISNEWLINE(off)) {
2323+        line_index++;
2324+        if(line_index >= n_lines)
2325+            return FALSE;
2326+        off = lines[line_index].beg;
2327+    }
2328+    if(CH(off) != _T(')'))
2329+        goto abort;
2330+    off++;
2331+
2332+    if(title_contents_beg >= title_contents_end) {
2333+        attr->title = NULL;
2334+        attr->title_size = 0;
2335+        attr->title_needs_free = FALSE;
2336+    } else if(!title_is_multiline) {
2337+        attr->title = (CHAR*) STR(title_contents_beg);
2338+        attr->title_size = title_contents_end - title_contents_beg;
2339+        attr->title_needs_free = FALSE;
2340+    } else {
2341+        MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2342+                    lines + title_contents_line_index, n_lines - title_contents_line_index,
2343+                    _T('\n'), &attr->title, &attr->title_size));
2344+        attr->title_needs_free = TRUE;
2345+    }
2346+
2347+    *p_end = off;
2348+    ret = TRUE;
2349+
2350+abort:
2351+    return ret;
2352+}
2353+
2354+static void
2355+md_free_ref_defs(MD_CTX* ctx)
2356+{
2357+    int i;
2358+
2359+    for(i = 0; i < ctx->n_ref_defs; i++) {
2360+        MD_REF_DEF* def = &ctx->ref_defs[i];
2361+
2362+        if(def->label_needs_free)
2363+            free(def->label);
2364+        if(def->title_needs_free)
2365+            free(def->title);
2366+    }
2367+
2368+    free(ctx->ref_defs);
2369+}
2370+
2371+
2372+/******************************************
2373+ ***  Processing Inlines (a.k.a Spans)  ***
2374+ ******************************************/
2375+
2376+/* We process inlines in few phases:
2377+ *
2378+ * (1) We go through the block text and collect all significant characters
2379+ *     which may start/end a span or some other significant position into
2380+ *     ctx->marks[]. Core of this is what md_collect_marks() does.
2381+ *
2382+ *     We also do some very brief preliminary context-less analysis, whether
2383+ *     it might be opener or closer (e.g. of an emphasis span).
2384+ *
2385+ *     This speeds the other steps as we do not need to re-iterate over all
2386+ *     characters anymore.
2387+ *
2388+ * (2) We analyze each potential mark types, in order by their precedence.
2389+ *
2390+ *     In each md_analyze_XXX() function, we re-iterate list of the marks,
2391+ *     skipping already resolved regions (in preceding precedences) and try to
2392+ *     resolve them.
2393+ *
2394+ * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark
2395+ *       them as resolved.
2396+ *
2397+ * (2.2) For range-type marks, we analyze whether the mark could be closer
2398+ *       and, if yes, whether there is some preceding opener it could satisfy.
2399+ *
2400+ *       If not we check whether it could be really an opener and if yes, we
2401+ *       remember it so subsequent closers may resolve it.
2402+ *
2403+ * (3) Finally, when all marks were analyzed, we render the block contents
2404+ *     by calling MD_RENDERER::text() callback, interrupting by ::enter_span()
2405+ *     or ::close_span() whenever we reach a resolved mark.
2406+ */
2407+
2408+
2409+/* The mark structure.
2410+ *
2411+ * '\\': Maybe escape sequence.
2412+ * '\0': NULL char.
2413+ *  '*': Maybe (strong) emphasis start/end.
2414+ *  '_': Maybe (strong) emphasis start/end.
2415+ *  '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
2416+ *  '`': Maybe code span start/end.
2417+ *  '&': Maybe start of entity.
2418+ *  ';': Maybe end of entity.
2419+ *  '<': Maybe start of raw HTML or autolink.
2420+ *  '>': Maybe end of raw HTML or autolink.
2421+ *  '[': Maybe start of link label or link text.
2422+ *  '!': Equivalent of '[' for image.
2423+ *  ']': Maybe end of link label or link text.
2424+ *  '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS).
2425+ *  ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS).
2426+ *  '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS).
2427+ *  'D': Dummy mark, it reserves a space for splitting a previous mark
2428+ *       (e.g. emphasis) or to make more space for storing some special data
2429+ *       related to the preceding mark (e.g. link).
2430+ *
2431+ * Note that not all instances of these chars in the text imply creation of the
2432+ * structure. Only those which have (or may have, after we see more context)
2433+ * the special meaning.
2434+ *
2435+ * (Keep this struct as small as possible to fit as much of them into CPU
2436+ * cache line.)
2437+ */
2438+struct MD_MARK_tag {
2439+    OFF beg;
2440+    OFF end;
2441+
2442+    /* For unresolved openers, 'prev' and 'next' form the chain of open openers
2443+     * of given type 'ch'.
2444+     *
2445+     * During resolving, we disconnect from the chain and point to the
2446+     * corresponding counterpart so opener points to its closer and vice versa.
2447+     */
2448+    int prev;
2449+    int next;
2450+    CHAR ch;
2451+    unsigned char flags;
2452+};
2453+
2454+/* Mark flags (these apply to ALL mark types). */
2455+#define MD_MARK_POTENTIAL_OPENER            0x01  /* Maybe opener. */
2456+#define MD_MARK_POTENTIAL_CLOSER            0x02  /* Maybe closer. */
2457+#define MD_MARK_OPENER                      0x04  /* Definitely opener. */
2458+#define MD_MARK_CLOSER                      0x08  /* Definitely closer. */
2459+#define MD_MARK_RESOLVED                    0x10  /* Resolved in any definite way. */
2460+
2461+/* Mark flags specific for various mark types (so they can share bits). */
2462+#define MD_MARK_EMPH_INTRAWORD              0x20  /* Helper for the "rule of 3". */
2463+#define MD_MARK_EMPH_MOD3_0                 0x40
2464+#define MD_MARK_EMPH_MOD3_1                 0x80
2465+#define MD_MARK_EMPH_MOD3_2                 (0x40 | 0x80)
2466+#define MD_MARK_EMPH_MOD3_MASK              (0x40 | 0x80)
2467+#define MD_MARK_AUTOLINK                    0x20  /* Distinguisher for '<', '>'. */
2468+#define MD_MARK_VALIDPERMISSIVEAUTOLINK     0x20  /* For permissive autolinks. */
2469+
2470+static MD_MARKCHAIN*
2471+md_asterisk_chain(MD_CTX* ctx, unsigned flags)
2472+{
2473+    switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) {
2474+        case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0:  return &ASTERISK_OPENERS_intraword_mod3_0;
2475+        case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1:  return &ASTERISK_OPENERS_intraword_mod3_1;
2476+        case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2:  return &ASTERISK_OPENERS_intraword_mod3_2;
2477+        case MD_MARK_EMPH_MOD3_0:                           return &ASTERISK_OPENERS_extraword_mod3_0;
2478+        case MD_MARK_EMPH_MOD3_1:                           return &ASTERISK_OPENERS_extraword_mod3_1;
2479+        case MD_MARK_EMPH_MOD3_2:                           return &ASTERISK_OPENERS_extraword_mod3_2;
2480+        default:                                            MD_UNREACHABLE();
2481+    }
2482+    return NULL;
2483+}
2484+
2485+static MD_MARKCHAIN*
2486+md_mark_chain(MD_CTX* ctx, int mark_index)
2487+{
2488+    MD_MARK* mark = &ctx->marks[mark_index];
2489+
2490+    switch(mark->ch) {
2491+        case _T('*'):   return md_asterisk_chain(ctx, mark->flags);
2492+        case _T('_'):   return &UNDERSCORE_OPENERS;
2493+        case _T('~'):   return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
2494+        case _T('['):   return &BRACKET_OPENERS;
2495+        case _T('|'):   return &TABLECELLBOUNDARIES;
2496+        default:        return NULL;
2497+    }
2498+}
2499+
2500+static MD_MARK*
2501+md_push_mark(MD_CTX* ctx)
2502+{
2503+    if(ctx->n_marks >= ctx->alloc_marks) {
2504+        MD_MARK* new_marks;
2505+
2506+        ctx->alloc_marks = (ctx->alloc_marks > 0
2507+                ? ctx->alloc_marks + ctx->alloc_marks / 2
2508+                : 64);
2509+        new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
2510+        if(new_marks == NULL) {
2511+            MD_LOG("realloc() failed.");
2512+            return NULL;
2513+        }
2514+
2515+        ctx->marks = new_marks;
2516+    }
2517+
2518+    return &ctx->marks[ctx->n_marks++];
2519+}
2520+
2521+#define PUSH_MARK_()                                                    \
2522+        do {                                                            \
2523+            mark = md_push_mark(ctx);                                   \
2524+            if(mark == NULL) {                                          \
2525+                ret = -1;                                               \
2526+                goto abort;                                             \
2527+            }                                                           \
2528+        } while(0)
2529+
2530+#define PUSH_MARK(ch_, beg_, end_, flags_)                              \
2531+        do {                                                            \
2532+            PUSH_MARK_();                                               \
2533+            mark->beg = (beg_);                                         \
2534+            mark->end = (end_);                                         \
2535+            mark->prev = -1;                                            \
2536+            mark->next = -1;                                            \
2537+            mark->ch = (char)(ch_);                                     \
2538+            mark->flags = (flags_);                                     \
2539+        } while(0)
2540+
2541+
2542+static void
2543+md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index)
2544+{
2545+    if(chain->tail >= 0)
2546+        ctx->marks[chain->tail].next = mark_index;
2547+    else
2548+        chain->head = mark_index;
2549+
2550+    ctx->marks[mark_index].prev = chain->tail;
2551+    ctx->marks[mark_index].next = -1;
2552+    chain->tail = mark_index;
2553+}
2554+
2555+/* Sometimes, we need to store a pointer into the mark. It is quite rare
2556+ * so we do not bother to make MD_MARK use union, and it can only happen
2557+ * for dummy marks. */
2558+static inline void
2559+md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
2560+{
2561+    MD_MARK* mark = &ctx->marks[mark_index];
2562+    MD_ASSERT(mark->ch == 'D');
2563+
2564+    /* Check only members beg and end are misused for this. */
2565+    MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF));
2566+    memcpy(mark, &ptr, sizeof(void*));
2567+}
2568+
2569+static inline void*
2570+md_mark_get_ptr(MD_CTX* ctx, int mark_index)
2571+{
2572+    void* ptr;
2573+    MD_MARK* mark = &ctx->marks[mark_index];
2574+    MD_ASSERT(mark->ch == 'D');
2575+    memcpy(&ptr, mark, sizeof(void*));
2576+    return ptr;
2577+}
2578+
2579+static void
2580+md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index)
2581+{
2582+    MD_MARK* opener = &ctx->marks[opener_index];
2583+    MD_MARK* closer = &ctx->marks[closer_index];
2584+
2585+    /* Remove opener from the list of openers. */
2586+    if(chain != NULL) {
2587+        if(opener->prev >= 0)
2588+            ctx->marks[opener->prev].next = opener->next;
2589+        else
2590+            chain->head = opener->next;
2591+
2592+        if(opener->next >= 0)
2593+            ctx->marks[opener->next].prev = opener->prev;
2594+        else
2595+            chain->tail = opener->prev;
2596+    }
2597+
2598+    /* Interconnect opener and closer and mark both as resolved. */
2599+    opener->next = closer_index;
2600+    opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
2601+    closer->prev = opener_index;
2602+    closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
2603+}
2604+
2605+
2606+#define MD_ROLLBACK_ALL         0
2607+#define MD_ROLLBACK_CROSSING    1
2608+
2609+/* In the range ctx->marks[opener_index] ... [closer_index], undo some or all
2610+ * resolvings accordingly to these rules:
2611+ *
2612+ * (1) All openers BEFORE the range corresponding to any closer inside the
2613+ *     range are un-resolved and they are re-added to their respective chains
2614+ *     of unresolved openers. This ensures we can reuse the opener for closers
2615+ *     AFTER the range.
2616+ *
2617+ * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range
2618+ *     are discarded.
2619+ *
2620+ * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled
2621+ *     in (1) are discarded. I.e. pairs of openers and closers which are both
2622+ *     inside the range are retained as well as any unpaired marks.
2623+ */
2624+static void
2625+md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
2626+{
2627+    int i;
2628+    int mark_index;
2629+
2630+    /* Cut all unresolved openers at the mark index. */
2631+    for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) {
2632+        MD_MARKCHAIN* chain = &ctx->mark_chains[i];
2633+
2634+        while(chain->tail >= opener_index)
2635+            chain->tail = ctx->marks[chain->tail].prev;
2636+
2637+        if(chain->tail >= 0)
2638+            ctx->marks[chain->tail].next = -1;
2639+        else
2640+            chain->head = -1;
2641+    }
2642+
2643+    /* Go backwards so that unresolved openers are re-added into their
2644+     * respective chains, in the right order. */
2645+    mark_index = closer_index - 1;
2646+    while(mark_index > opener_index) {
2647+        MD_MARK* mark = &ctx->marks[mark_index];
2648+        int mark_flags = mark->flags;
2649+        int discard_flag = (how == MD_ROLLBACK_ALL);
2650+
2651+        if(mark->flags & MD_MARK_CLOSER) {
2652+            int mark_opener_index = mark->prev;
2653+
2654+            /* Undo opener BEFORE the range. */
2655+            if(mark_opener_index < opener_index) {
2656+                MD_MARK* mark_opener = &ctx->marks[mark_opener_index];
2657+                MD_MARKCHAIN* chain;
2658+
2659+                mark_opener->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
2660+                chain = md_mark_chain(ctx, opener_index);
2661+                if(chain != NULL) {
2662+                    md_mark_chain_append(ctx, chain, mark_opener_index);
2663+                    discard_flag = 1;
2664+                }
2665+            }
2666+        }
2667+
2668+        /* And reset our flags. */
2669+        if(discard_flag)
2670+            mark->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
2671+
2672+        /* Jump as far as we can over unresolved or non-interesting marks. */
2673+        switch(how) {
2674+            case MD_ROLLBACK_CROSSING:
2675+                if((mark_flags & MD_MARK_CLOSER)  &&  mark->prev > opener_index) {
2676+                    /* If we are closer with opener INSIDE the range, there may
2677+                     * not be any other crosser inside the subrange. */
2678+                    mark_index = mark->prev;
2679+                    break;
2680+                }
2681+                MD_FALLTHROUGH();
2682+            default:
2683+                mark_index--;
2684+                break;
2685+        }
2686+    }
2687+}
2688+
2689+static void
2690+md_build_mark_char_map(MD_CTX* ctx)
2691+{
2692+    memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map));
2693+
2694+    ctx->mark_char_map['\\'] = 1;
2695+    ctx->mark_char_map['*'] = 1;
2696+    ctx->mark_char_map['_'] = 1;
2697+    ctx->mark_char_map['`'] = 1;
2698+    ctx->mark_char_map['&'] = 1;
2699+    ctx->mark_char_map[';'] = 1;
2700+    ctx->mark_char_map['<'] = 1;
2701+    ctx->mark_char_map['>'] = 1;
2702+    ctx->mark_char_map['['] = 1;
2703+    ctx->mark_char_map['!'] = 1;
2704+    ctx->mark_char_map[']'] = 1;
2705+    ctx->mark_char_map['\0'] = 1;
2706+
2707+    if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
2708+        ctx->mark_char_map['~'] = 1;
2709+
2710+    if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
2711+        ctx->mark_char_map['$'] = 1;
2712+
2713+    if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
2714+        ctx->mark_char_map['@'] = 1;
2715+
2716+    if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
2717+        ctx->mark_char_map[':'] = 1;
2718+
2719+    if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
2720+        ctx->mark_char_map['.'] = 1;
2721+
2722+    if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS))
2723+        ctx->mark_char_map['|'] = 1;
2724+
2725+    if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
2726+        int i;
2727+
2728+        for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) {
2729+            if(ISWHITESPACE_(i))
2730+                ctx->mark_char_map[i] = 1;
2731+        }
2732+    }
2733+}
2734+
2735+/* We limit code span marks to lower than 32 backticks. This solves the
2736+ * pathologic case of too many openers, each of different length: Their
2737+ * resolving would be then O(n^2). */
2738+#define CODESPAN_MARK_MAXLEN    32
2739+
2740+static int
2741+md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2742+                OFF* p_opener_beg, OFF* p_opener_end,
2743+                OFF* p_closer_beg, OFF* p_closer_end,
2744+                OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
2745+                int* p_reached_paragraph_end)
2746+{
2747+    OFF opener_beg = beg;
2748+    OFF opener_end;
2749+    OFF closer_beg;
2750+    OFF closer_end;
2751+    SZ mark_len;
2752+    OFF line_end;
2753+    int has_space_after_opener = FALSE;
2754+    int has_eol_after_opener = FALSE;
2755+    int has_space_before_closer = FALSE;
2756+    int has_eol_before_closer = FALSE;
2757+    int has_only_space = TRUE;
2758+    int line_index = 0;
2759+
2760+    line_end = lines[0].end;
2761+    opener_end = opener_beg;
2762+    while(opener_end < line_end  &&  CH(opener_end) == _T('`'))
2763+        opener_end++;
2764+    has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' '));
2765+    has_eol_after_opener = (opener_end == line_end);
2766+
2767+    /* The caller needs to know end of the opening mark even if we fail. */
2768+    *p_opener_end = opener_end;
2769+
2770+    mark_len = opener_end - opener_beg;
2771+    if(mark_len > CODESPAN_MARK_MAXLEN)
2772+        return FALSE;
2773+
2774+    /* Check whether we already know there is no closer of this length.
2775+     * If so, re-scan does no sense. This fixes issue #59. */
2776+    if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end  ||
2777+       (*p_reached_paragraph_end  &&  last_potential_closers[mark_len-1] < opener_end))
2778+        return FALSE;
2779+
2780+    closer_beg = opener_end;
2781+    closer_end = opener_end;
2782+
2783+    /* Find closer mark. */
2784+    while(TRUE) {
2785+        while(closer_beg < line_end  &&  CH(closer_beg) != _T('`')) {
2786+            if(CH(closer_beg) != _T(' '))
2787+                has_only_space = FALSE;
2788+            closer_beg++;
2789+        }
2790+        closer_end = closer_beg;
2791+        while(closer_end < line_end  &&  CH(closer_end) == _T('`'))
2792+            closer_end++;
2793+
2794+        if(closer_end - closer_beg == mark_len) {
2795+            /* Success. */
2796+            has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' '));
2797+            has_eol_before_closer = (closer_beg == lines[line_index].beg);
2798+            break;
2799+        }
2800+
2801+        if(closer_end - closer_beg > 0) {
2802+            /* We have found a back-tick which is not part of the closer. */
2803+            has_only_space = FALSE;
2804+
2805+            /* But if we eventually fail, remember it as a potential closer
2806+             * of its own length for future attempts. This mitigates needs for
2807+             * rescans. */
2808+            if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
2809+                if(closer_beg > last_potential_closers[closer_end - closer_beg - 1])
2810+                    last_potential_closers[closer_end - closer_beg - 1] = closer_beg;
2811+            }
2812+        }
2813+
2814+        if(closer_end >= line_end) {
2815+            line_index++;
2816+            if(line_index >= n_lines) {
2817+                /* Reached end of the paragraph and still nothing. */
2818+                *p_reached_paragraph_end = TRUE;
2819+                return FALSE;
2820+            }
2821+            /* Try on the next line. */
2822+            line_end = lines[line_index].end;
2823+            closer_beg = lines[line_index].beg;
2824+        } else {
2825+            closer_beg = closer_end;
2826+        }
2827+    }
2828+
2829+    /* If there is a space or a new line both after and before the opener
2830+     * (and if the code span is not made of spaces only), consume one initial
2831+     * and one trailing space as part of the marks. */
2832+    if(!has_only_space  &&
2833+       (has_space_after_opener || has_eol_after_opener)  &&
2834+       (has_space_before_closer || has_eol_before_closer))
2835+    {
2836+        if(has_space_after_opener)
2837+            opener_end++;
2838+        else
2839+            opener_end = lines[1].beg;
2840+
2841+        if(has_space_before_closer)
2842+            closer_beg--;
2843+        else {
2844+            closer_beg = lines[line_index-1].end;
2845+            /* We need to eat the preceding "\r\n" but not any line trailing
2846+             * spaces. */
2847+            while(closer_beg < ctx->size  &&  ISBLANK(closer_beg))
2848+                closer_beg++;
2849+        }
2850+    }
2851+
2852+    *p_opener_beg = opener_beg;
2853+    *p_opener_end = opener_end;
2854+    *p_closer_beg = closer_beg;
2855+    *p_closer_end = closer_end;
2856+    return TRUE;
2857+}
2858+
2859+static int
2860+md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2861+{
2862+    OFF off = beg+1;
2863+
2864+    MD_ASSERT(CH(beg) == _T('<'));
2865+
2866+    /* Check for scheme. */
2867+    if(off >= max_end  ||  !ISASCII(off))
2868+        return FALSE;
2869+    off++;
2870+    while(1) {
2871+        if(off >= max_end)
2872+            return FALSE;
2873+        if(off - beg > 32)
2874+            return FALSE;
2875+        if(CH(off) == _T(':')  &&  off - beg >= 3)
2876+            break;
2877+        if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
2878+            return FALSE;
2879+        off++;
2880+    }
2881+
2882+    /* Check the path after the scheme. */
2883+    while(off < max_end  &&  CH(off) != _T('>')) {
2884+        if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<'))
2885+            return FALSE;
2886+        off++;
2887+    }
2888+
2889+    if(off >= max_end)
2890+        return FALSE;
2891+
2892+    MD_ASSERT(CH(off) == _T('>'));
2893+    *p_end = off+1;
2894+    return TRUE;
2895+}
2896+
2897+static int
2898+md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2899+{
2900+    OFF off = beg + 1;
2901+    int label_len;
2902+
2903+    MD_ASSERT(CH(beg) == _T('<'));
2904+
2905+    /* The code should correspond to this regexp:
2906+            /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+
2907+            @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
2908+            (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
2909+     */
2910+
2911+    /* Username (before '@'). */
2912+    while(off < max_end  &&  (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-"))))
2913+        off++;
2914+    if(off <= beg+1)
2915+        return FALSE;
2916+
2917+    /* '@' */
2918+    if(off >= max_end  ||  CH(off) != _T('@'))
2919+        return FALSE;
2920+    off++;
2921+
2922+    /* Labels delimited with '.'; each label is sequence of 1 - 63 alnum
2923+     * characters or '-', but '-' is not allowed as first or last char. */
2924+    label_len = 0;
2925+    while(off < max_end) {
2926+        if(ISALNUM(off))
2927+            label_len++;
2928+        else if(CH(off) == _T('-')  &&  label_len > 0)
2929+            label_len++;
2930+        else if(CH(off) == _T('.')  &&  label_len > 0  &&  CH(off-1) != _T('-'))
2931+            label_len = 0;
2932+        else
2933+            break;
2934+
2935+        if(label_len > 63)
2936+            return FALSE;
2937+
2938+        off++;
2939+    }
2940+
2941+    if(label_len <= 0  || off >= max_end  ||  CH(off) != _T('>') ||  CH(off-1) == _T('-'))
2942+        return FALSE;
2943+
2944+    *p_end = off+1;
2945+    return TRUE;
2946+}
2947+
2948+static int
2949+md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
2950+{
2951+    if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
2952+        *p_missing_mailto = FALSE;
2953+        return TRUE;
2954+    }
2955+
2956+    if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
2957+        *p_missing_mailto = TRUE;
2958+        return TRUE;
2959+    }
2960+
2961+    return FALSE;
2962+}
2963+
2964+static int
2965+md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
2966+{
2967+    int i;
2968+    int ret = 0;
2969+    MD_MARK* mark;
2970+    OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 };
2971+    int codespan_scanned_till_paragraph_end = FALSE;
2972+
2973+    for(i = 0; i < n_lines; i++) {
2974+        const MD_LINE* line = &lines[i];
2975+        OFF off = line->beg;
2976+        OFF line_end = line->end;
2977+
2978+        while(TRUE) {
2979+            CHAR ch;
2980+
2981+#ifdef MD4C_USE_UTF16
2982+    /* For UTF-16, mark_char_map[] covers only ASCII. */
2983+    #define IS_MARK_CHAR(off)   ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map))  &&  \
2984+                                (ctx->mark_char_map[(unsigned char) CH(off)]))
2985+#else
2986+    /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */
2987+    #define IS_MARK_CHAR(off)   (ctx->mark_char_map[(unsigned char) CH(off)])
2988+#endif
2989+
2990+            /* Optimization: Use some loop unrolling. */
2991+            while(off + 3 < line_end  &&  !IS_MARK_CHAR(off+0)  &&  !IS_MARK_CHAR(off+1)
2992+                                      &&  !IS_MARK_CHAR(off+2)  &&  !IS_MARK_CHAR(off+3))
2993+                off += 4;
2994+            while(off < line_end  &&  !IS_MARK_CHAR(off+0))
2995+                off++;
2996+
2997+            if(off >= line_end)
2998+                break;
2999+
3000+            ch = CH(off);
3001+
3002+            /* A backslash escape.
3003+             * It can go beyond line->end as it may involve escaped new
3004+             * line to form a hard break. */
3005+            if(ch == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
3006+                /* Hard-break cannot be on the last line of the block. */
3007+                if(!ISNEWLINE(off+1)  ||  i+1 < n_lines)
3008+                    PUSH_MARK(ch, off, off+2, MD_MARK_RESOLVED);
3009+                off += 2;
3010+                continue;
3011+            }
3012+
3013+            /* A potential (string) emphasis start/end. */
3014+            if(ch == _T('*')  ||  ch == _T('_')) {
3015+                OFF tmp = off+1;
3016+                int left_level;     /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */
3017+                int right_level;    /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */
3018+
3019+                while(tmp < line_end  &&  CH(tmp) == ch)
3020+                    tmp++;
3021+
3022+                if(off == line->beg  ||  ISUNICODEWHITESPACEBEFORE(off))
3023+                    left_level = 0;
3024+                else if(ISUNICODEPUNCTBEFORE(off))
3025+                    left_level = 1;
3026+                else
3027+                    left_level = 2;
3028+
3029+                if(tmp == line_end  ||  ISUNICODEWHITESPACE(tmp))
3030+                    right_level = 0;
3031+                else if(ISUNICODEPUNCT(tmp))
3032+                    right_level = 1;
3033+                else
3034+                    right_level = 2;
3035+
3036+                /* Intra-word underscore doesn't have special meaning. */
3037+                if(ch == _T('_')  &&  left_level == 2  &&  right_level == 2) {
3038+                    left_level = 0;
3039+                    right_level = 0;
3040+                }
3041+
3042+                if(left_level != 0  ||  right_level != 0) {
3043+                    unsigned flags = 0;
3044+
3045+                    if(left_level > 0  &&  left_level >= right_level)
3046+                        flags |= MD_MARK_POTENTIAL_CLOSER;
3047+                    if(right_level > 0  &&  right_level >= left_level)
3048+                        flags |= MD_MARK_POTENTIAL_OPENER;
3049+                    if(left_level == 2  &&  right_level == 2)
3050+                        flags |= MD_MARK_EMPH_INTRAWORD;
3051+
3052+                    /* For "the rule of three" we need to remember the original
3053+                     * size of the mark (modulo three), before we potentially
3054+                     * split the mark when being later resolved partially by some
3055+                     * shorter closer. */
3056+                    switch((tmp - off) % 3) {
3057+                        case 0: flags |= MD_MARK_EMPH_MOD3_0; break;
3058+                        case 1: flags |= MD_MARK_EMPH_MOD3_1; break;
3059+                        case 2: flags |= MD_MARK_EMPH_MOD3_2; break;
3060+                    }
3061+
3062+                    PUSH_MARK(ch, off, tmp, flags);
3063+
3064+                    /* During resolving, multiple asterisks may have to be
3065+                     * split into independent span start/ends. Consider e.g.
3066+                     * "**foo* bar*". Therefore we push also some empty dummy
3067+                     * marks to have enough space for that. */
3068+                    off++;
3069+                    while(off < tmp) {
3070+                        PUSH_MARK('D', off, off, 0);
3071+                        off++;
3072+                    }
3073+                    continue;
3074+                }
3075+
3076+                off = tmp;
3077+                continue;
3078+            }
3079+
3080+            /* A potential code span start/end. */
3081+            if(ch == _T('`')) {
3082+                OFF opener_beg, opener_end;
3083+                OFF closer_beg, closer_end;
3084+                int is_code_span;
3085+
3086+                is_code_span = md_is_code_span(ctx, lines + i, n_lines - i, off,
3087+                                    &opener_beg, &opener_end, &closer_beg, &closer_end,
3088+                                    codespan_last_potential_closers,
3089+                                    &codespan_scanned_till_paragraph_end);
3090+                if(is_code_span) {
3091+                    PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED);
3092+                    PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
3093+                    ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3094+                    ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3095+
3096+                    off = closer_end;
3097+
3098+                    /* Advance the current line accordingly. */
3099+                    while(off > line_end) {
3100+                        i++;
3101+                        line++;
3102+                        line_end = line->end;
3103+                    }
3104+                    continue;
3105+                }
3106+
3107+                off = opener_end;
3108+                continue;
3109+            }
3110+
3111+            /* A potential entity start. */
3112+            if(ch == _T('&')) {
3113+                PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
3114+                off++;
3115+                continue;
3116+            }
3117+
3118+            /* A potential entity end. */
3119+            if(ch == _T(';')) {
3120+                /* We surely cannot be entity unless the previous mark is '&'. */
3121+                if(ctx->n_marks > 0  &&  ctx->marks[ctx->n_marks-1].ch == _T('&'))
3122+                    PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
3123+
3124+                off++;
3125+                continue;
3126+            }
3127+
3128+            /* A potential autolink or raw HTML start/end. */
3129+            if(ch == _T('<')) {
3130+                int is_autolink;
3131+                OFF autolink_end;
3132+                int missing_mailto;
3133+
3134+                if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
3135+                    int is_html;
3136+                    OFF html_end;
3137+
3138+                    /* Given the nature of the raw HTML, we have to recognize
3139+                     * it here. Doing so later in md_analyze_lt_gt() could
3140+                     * open can of worms of quadratic complexity. */
3141+                    is_html = md_is_html_any(ctx, lines + i, n_lines - i, off,
3142+                                    lines[n_lines-1].end, &html_end);
3143+                    if(is_html) {
3144+                        PUSH_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED);
3145+                        PUSH_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
3146+                        ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3147+                        ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3148+                        off = html_end;
3149+
3150+                        /* Advance the current line accordingly. */
3151+                        while(off > line_end) {
3152+                            i++;
3153+                            line++;
3154+                            line_end = line->end;
3155+                        }
3156+                        continue;
3157+                    }
3158+                }
3159+
3160+                is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end,
3161+                                    &autolink_end, &missing_mailto);
3162+                if(is_autolink) {
3163+                    PUSH_MARK((missing_mailto ? _T('@') : _T('<')), off, off+1,
3164+                                MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
3165+                    PUSH_MARK(_T('>'), autolink_end-1, autolink_end,
3166+                                MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
3167+                    ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3168+                    ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3169+                    off = autolink_end;
3170+                    continue;
3171+                }
3172+
3173+                off++;
3174+                continue;
3175+            }
3176+
3177+            /* A potential link or its part. */
3178+            if(ch == _T('[')  ||  (ch == _T('!') && off+1 < line_end && CH(off+1) == _T('['))) {
3179+                OFF tmp = (ch == _T('[') ? off+1 : off+2);
3180+                PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
3181+                off = tmp;
3182+                /* Two dummies to make enough place for data we need if it is
3183+                 * a link. */
3184+                PUSH_MARK('D', off, off, 0);
3185+                PUSH_MARK('D', off, off, 0);
3186+                continue;
3187+            }
3188+            if(ch == _T(']')) {
3189+                PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
3190+                off++;
3191+                continue;
3192+            }
3193+
3194+            /* A potential permissive e-mail autolink. */
3195+            if(ch == _T('@')) {
3196+                if(line->beg + 1 <= off  &&  ISALNUM(off-1)  &&
3197+                    off + 3 < line->end  &&  ISALNUM(off+1))
3198+                {
3199+                    PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
3200+                    /* Push a dummy as a reserve for a closer. */
3201+                    PUSH_MARK('D', off, off, 0);
3202+                }
3203+
3204+                off++;
3205+                continue;
3206+            }
3207+
3208+            /* A potential permissive URL autolink. */
3209+            if(ch == _T(':')) {
3210+                static struct {
3211+                    const CHAR* scheme;
3212+                    SZ scheme_size;
3213+                    const CHAR* suffix;
3214+                    SZ suffix_size;
3215+                } scheme_map[] = {
3216+                    /* In the order from the most frequently used, arguably. */
3217+                    { _T("http"), 4,    _T("//"), 2 },
3218+                    { _T("https"), 5,   _T("//"), 2 },
3219+                    { _T("ftp"), 3,     _T("//"), 2 }
3220+                };
3221+                int scheme_index;
3222+
3223+                for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
3224+                    const CHAR* scheme = scheme_map[scheme_index].scheme;
3225+                    const SZ scheme_size = scheme_map[scheme_index].scheme_size;
3226+                    const CHAR* suffix = scheme_map[scheme_index].suffix;
3227+                    const SZ suffix_size = scheme_map[scheme_index].suffix_size;
3228+
3229+                    if(line->beg + scheme_size <= off  &&  md_ascii_eq(STR(off-scheme_size), scheme, scheme_size)  &&
3230+                        (line->beg + scheme_size == off || ISWHITESPACE(off-scheme_size-1) || ISANYOF(off-scheme_size-1, _T("*_~([")))  &&
3231+                        off + 1 + suffix_size < line->end  &&  md_ascii_eq(STR(off+1), suffix, suffix_size))
3232+                    {
3233+                        PUSH_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER);
3234+                        /* Push a dummy as a reserve for a closer. */
3235+                        PUSH_MARK('D', off, off, 0);
3236+                        off += 1 + suffix_size;
3237+                        break;
3238+                    }
3239+                }
3240+
3241+                off++;
3242+                continue;
3243+            }
3244+
3245+            /* A potential permissive WWW autolink. */
3246+            if(ch == _T('.')) {
3247+                if(line->beg + 3 <= off  &&  md_ascii_eq(STR(off-3), _T("www"), 3)  &&
3248+                    (line->beg + 3 == off || ISWHITESPACE(off-4) || ISANYOF(off-4, _T("*_~([")))  &&
3249+                    off + 1 < line_end)
3250+                {
3251+                    PUSH_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER);
3252+                    /* Push a dummy as a reserve for a closer. */
3253+                    PUSH_MARK('D', off, off, 0);
3254+                    off++;
3255+                    continue;
3256+                }
3257+
3258+                off++;
3259+                continue;
3260+            }
3261+
3262+            /* A potential table cell boundary or wiki link label delimiter. */
3263+            if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) {
3264+                PUSH_MARK(ch, off, off+1, 0);
3265+                off++;
3266+                continue;
3267+            }
3268+
3269+            /* A potential strikethrough start/end. */
3270+            if(ch == _T('~')) {
3271+                OFF tmp = off+1;
3272+
3273+                while(tmp < line_end  &&  CH(tmp) == _T('~'))
3274+                    tmp++;
3275+
3276+                if(tmp - off < 3) {
3277+                    unsigned flags = 0;
3278+
3279+                    if(tmp < line_end  &&  !ISUNICODEWHITESPACE(tmp))
3280+                        flags |= MD_MARK_POTENTIAL_OPENER;
3281+                    if(off > line->beg  &&  !ISUNICODEWHITESPACEBEFORE(off))
3282+                        flags |= MD_MARK_POTENTIAL_CLOSER;
3283+                    if(flags != 0)
3284+                        PUSH_MARK(ch, off, tmp, flags);
3285+                }
3286+
3287+                off = tmp;
3288+                continue;
3289+            }
3290+
3291+            /* A potential equation start/end */
3292+            if(ch == _T('$')) {
3293+                /* We can have at most two consecutive $ signs,
3294+                 * where two dollar signs signify a display equation. */
3295+                OFF tmp = off+1;
3296+
3297+                while(tmp < line_end && CH(tmp) == _T('$'))
3298+                    tmp++;
3299+
3300+                if (tmp - off <= 2)
3301+                    PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER);
3302+                off = tmp;
3303+                continue;
3304+            }
3305+
3306+            /* Turn non-trivial whitespace into single space. */
3307+            if(ISWHITESPACE_(ch)) {
3308+                OFF tmp = off+1;
3309+
3310+                while(tmp < line_end  &&  ISWHITESPACE(tmp))
3311+                    tmp++;
3312+
3313+                if(tmp - off > 1  ||  ch != _T(' '))
3314+                    PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED);
3315+
3316+                off = tmp;
3317+                continue;
3318+            }
3319+
3320+            /* NULL character. */
3321+            if(ch == _T('\0')) {
3322+                PUSH_MARK(ch, off, off+1, MD_MARK_RESOLVED);
3323+                off++;
3324+                continue;
3325+            }
3326+
3327+            off++;
3328+        }
3329+    }
3330+
3331+    /* Add a dummy mark at the end of the mark vector to simplify
3332+     * process_inlines(). */
3333+    PUSH_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED);
3334+
3335+abort:
3336+    return ret;
3337+}
3338+
3339+static void
3340+md_analyze_bracket(MD_CTX* ctx, int mark_index)
3341+{
3342+    /* We cannot really resolve links here as for that we would need
3343+     * more context. E.g. a following pair of brackets (reference link),
3344+     * or enclosing pair of brackets (if the inner is the link, the outer
3345+     * one cannot be.)
3346+     *
3347+     * Therefore we here only construct a list of resolved '[' ']' pairs
3348+     * ordered by position of the closer. This allows ur to analyze what is
3349+     * or is not link in the right order, from inside to outside in case
3350+     * of nested brackets.
3351+     *
3352+     * The resolving itself is deferred into md_resolve_links().
3353+     */
3354+
3355+    MD_MARK* mark = &ctx->marks[mark_index];
3356+
3357+    if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
3358+        md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index);
3359+        return;
3360+    }
3361+
3362+    if(BRACKET_OPENERS.tail >= 0) {
3363+        /* Pop the opener from the chain. */
3364+        int opener_index = BRACKET_OPENERS.tail;
3365+        MD_MARK* opener = &ctx->marks[opener_index];
3366+        if(opener->prev >= 0)
3367+            ctx->marks[opener->prev].next = -1;
3368+        else
3369+            BRACKET_OPENERS.head = -1;
3370+        BRACKET_OPENERS.tail = opener->prev;
3371+
3372+        /* Interconnect the opener and closer. */
3373+        opener->next = mark_index;
3374+        mark->prev = opener_index;
3375+
3376+        /* Add the pair into chain of potential links for md_resolve_links().
3377+         * Note we misuse opener->prev for this as opener->next points to its
3378+         * closer. */
3379+        if(ctx->unresolved_link_tail >= 0)
3380+            ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
3381+        else
3382+            ctx->unresolved_link_head = opener_index;
3383+        ctx->unresolved_link_tail = opener_index;
3384+        opener->prev = -1;
3385+    }
3386+}
3387+
3388+/* Forward declaration. */
3389+static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3390+                                     int mark_beg, int mark_end);
3391+
3392+static int
3393+md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
3394+{
3395+    int opener_index = ctx->unresolved_link_head;
3396+    OFF last_link_beg = 0;
3397+    OFF last_link_end = 0;
3398+    OFF last_img_beg = 0;
3399+    OFF last_img_end = 0;
3400+
3401+    while(opener_index >= 0) {
3402+        MD_MARK* opener = &ctx->marks[opener_index];
3403+        int closer_index = opener->next;
3404+        MD_MARK* closer = &ctx->marks[closer_index];
3405+        int next_index = opener->prev;
3406+        MD_MARK* next_opener;
3407+        MD_MARK* next_closer;
3408+        MD_LINK_ATTR attr;
3409+        int is_link = FALSE;
3410+
3411+        if(next_index >= 0) {
3412+            next_opener = &ctx->marks[next_index];
3413+            next_closer = &ctx->marks[next_opener->next];
3414+        } else {
3415+            next_opener = NULL;
3416+            next_closer = NULL;
3417+        }
3418+
3419+        /* If nested ("[ [ ] ]"), we need to make sure that:
3420+         *   - The outer does not end inside of (...) belonging to the inner.
3421+         *   - The outer cannot be link if the inner is link (i.e. not image).
3422+         *
3423+         * (Note we here analyze from inner to outer as the marks are ordered
3424+         * by closer->beg.)
3425+         */
3426+        if((opener->beg < last_link_beg  &&  closer->end < last_link_end)  ||
3427+           (opener->beg < last_img_beg  &&  closer->end < last_img_end)  ||
3428+           (opener->beg < last_link_end  &&  opener->ch == '['))
3429+        {
3430+            opener_index = next_index;
3431+            continue;
3432+        }
3433+
3434+        /* Recognize and resolve wiki links.
3435+         * Wiki-links maybe '[[destination]]' or '[[destination|label]]'.
3436+         */
3437+        if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
3438+            (opener->end - opener->beg == 1) &&         /* not image */
3439+            next_opener != NULL &&                      /* double '[' opener */
3440+            next_opener->ch == '[' &&
3441+            (next_opener->beg == opener->beg - 1) &&
3442+            (next_opener->end - next_opener->beg == 1) &&
3443+            next_closer != NULL &&                      /* double ']' closer */
3444+            next_closer->ch == ']' &&
3445+            (next_closer->beg == closer->beg + 1) &&
3446+            (next_closer->end - next_closer->beg == 1))
3447+        {
3448+            MD_MARK* delim = NULL;
3449+            int delim_index;
3450+            OFF dest_beg, dest_end;
3451+
3452+            is_link = TRUE;
3453+
3454+            /* We don't allow destination to be longer than 100 characters.
3455+             * Lets scan to see whether there is '|'. (If not then the whole
3456+             * wiki-link has to be below the 100 characters.) */
3457+            delim_index = opener_index + 1;
3458+            while(delim_index < closer_index) {
3459+                MD_MARK* m = &ctx->marks[delim_index];
3460+                if(m->ch == '|') {
3461+                    delim = m;
3462+                    break;
3463+                }
3464+                if(m->ch != 'D'  &&  m->beg - opener->end > 100)
3465+                    break;
3466+                delim_index++;
3467+            }
3468+            dest_beg = opener->end;
3469+            dest_end = (delim != NULL) ? delim->beg : closer->beg;
3470+            if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100)
3471+                is_link = FALSE;
3472+
3473+            /* There may not be any new line in the destination. */
3474+            if(is_link) {
3475+                OFF off;
3476+                for(off = dest_beg; off < dest_end; off++) {
3477+                    if(ISNEWLINE(off)) {
3478+                        is_link = FALSE;
3479+                        break;
3480+                    }
3481+                }
3482+            }
3483+
3484+            if(is_link) {
3485+                if(delim != NULL) {
3486+                    if(delim->end < closer->beg) {
3487+                        opener->end = delim->beg;
3488+                    } else {
3489+                        /* The pipe is just before the closer: [[foo|]] */
3490+                        closer->beg = delim->beg;
3491+                        delim = NULL;
3492+                    }
3493+                }
3494+
3495+                opener->beg = next_opener->beg;
3496+                opener->next = closer_index;
3497+                opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
3498+
3499+                closer->end = next_closer->end;
3500+                closer->prev = opener_index;
3501+                closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
3502+
3503+                last_link_beg = opener->beg;
3504+                last_link_end = closer->end;
3505+
3506+                if(delim != NULL) {
3507+                    delim->flags |= MD_MARK_RESOLVED;
3508+                    md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
3509+                    md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
3510+                } else {
3511+                    md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3512+                }
3513+
3514+                opener_index = next_opener->prev;
3515+                continue;
3516+            }
3517+        }
3518+
3519+        if(next_opener != NULL  &&  next_opener->beg == closer->end) {
3520+            if(next_closer->beg > closer->end + 1) {
3521+                /* Might be full reference link. */
3522+                is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
3523+            } else {
3524+                /* Might be shortcut reference link. */
3525+                is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3526+            }
3527+
3528+            if(is_link < 0)
3529+                return -1;
3530+
3531+            if(is_link) {
3532+                /* Eat the 2nd "[...]". */
3533+                closer->end = next_closer->end;
3534+
3535+                /* Do not analyze the label as a standalone link in the next
3536+                 * iteration. */
3537+                next_index = ctx->marks[next_index].prev;
3538+            }
3539+        } else {
3540+            if(closer->end < ctx->size  &&  CH(closer->end) == _T('(')) {
3541+                /* Might be inline link. */
3542+                OFF inline_link_end = UINT_MAX;
3543+
3544+                is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr);
3545+                if(is_link < 0)
3546+                    return -1;
3547+
3548+                /* Check the closing ')' is not inside an already resolved range
3549+                 * (i.e. a range with a higher priority), e.g. a code span. */
3550+                if(is_link) {
3551+                    int i = closer_index + 1;
3552+
3553+                    while(i < ctx->n_marks) {
3554+                        MD_MARK* mark = &ctx->marks[i];
3555+
3556+                        if(mark->beg >= inline_link_end)
3557+                            break;
3558+                        if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) {
3559+                            if(ctx->marks[mark->next].beg >= inline_link_end) {
3560+                                /* Cancel the link status. */
3561+                                if(attr.title_needs_free)
3562+                                    free(attr.title);
3563+                                is_link = FALSE;
3564+                                break;
3565+                            }
3566+
3567+                            i = mark->next + 1;
3568+                        } else {
3569+                            i++;
3570+                        }
3571+                    }
3572+                }
3573+
3574+                if(is_link) {
3575+                    /* Eat the "(...)" */
3576+                    closer->end = inline_link_end;
3577+                }
3578+            }
3579+
3580+            if(!is_link) {
3581+                /* Might be collapsed reference link. */
3582+                is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3583+                if(is_link < 0)
3584+                    return -1;
3585+            }
3586+        }
3587+
3588+        if(is_link) {
3589+            /* Resolve the brackets as a link. */
3590+            opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
3591+            closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
3592+
3593+            /* If it is a link, we store the destination and title in the two
3594+             * dummy marks after the opener. */
3595+            MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
3596+            ctx->marks[opener_index+1].beg = attr.dest_beg;
3597+            ctx->marks[opener_index+1].end = attr.dest_end;
3598+
3599+            MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
3600+            md_mark_store_ptr(ctx, opener_index+2, attr.title);
3601+            /* The title might or might not have been allocated for us. */
3602+            if(attr.title_needs_free)
3603+                md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+2);
3604+            ctx->marks[opener_index+2].prev = attr.title_size;
3605+
3606+            if(opener->ch == '[') {
3607+                last_link_beg = opener->beg;
3608+                last_link_end = closer->end;
3609+            } else {
3610+                last_img_beg = opener->beg;
3611+                last_img_end = closer->end;
3612+            }
3613+
3614+            md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
3615+        }
3616+
3617+        opener_index = next_index;
3618+    }
3619+
3620+    return 0;
3621+}
3622+
3623+/* Analyze whether the mark '&' starts a HTML entity.
3624+ * If so, update its flags as well as flags of corresponding closer ';'. */
3625+static void
3626+md_analyze_entity(MD_CTX* ctx, int mark_index)
3627+{
3628+    MD_MARK* opener = &ctx->marks[mark_index];
3629+    MD_MARK* closer;
3630+    OFF off;
3631+
3632+    /* Cannot be entity if there is no closer as the next mark.
3633+     * (Any other mark between would mean strange character which cannot be
3634+     * part of the entity.
3635+     *
3636+     * So we can do all the work on '&' and do not call this later for the
3637+     * closing mark ';'.
3638+     */
3639+    if(mark_index + 1 >= ctx->n_marks)
3640+        return;
3641+    closer = &ctx->marks[mark_index+1];
3642+    if(closer->ch != ';')
3643+        return;
3644+
3645+    if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
3646+        MD_ASSERT(off == closer->end);
3647+
3648+        md_resolve_range(ctx, NULL, mark_index, mark_index+1);
3649+        opener->end = closer->end;
3650+    }
3651+}
3652+
3653+static void
3654+md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
3655+{
3656+    MD_MARK* mark = &ctx->marks[mark_index];
3657+    mark->flags |= MD_MARK_RESOLVED;
3658+
3659+    md_mark_chain_append(ctx, &TABLECELLBOUNDARIES, mark_index);
3660+    ctx->n_table_cell_boundaries++;
3661+}
3662+
3663+/* Split a longer mark into two. The new mark takes the given count of
3664+ * characters. May only be called if an adequate number of dummy 'D' marks
3665+ * follows.
3666+ */
3667+static int
3668+md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
3669+{
3670+    MD_MARK* mark = &ctx->marks[mark_index];
3671+    int new_mark_index = mark_index + (mark->end - mark->beg - n);
3672+    MD_MARK* dummy = &ctx->marks[new_mark_index];
3673+
3674+    MD_ASSERT(mark->end - mark->beg > n);
3675+    MD_ASSERT(dummy->ch == 'D');
3676+
3677+    memcpy(dummy, mark, sizeof(MD_MARK));
3678+    mark->end -= n;
3679+    dummy->beg = mark->end;
3680+
3681+    return new_mark_index;
3682+}
3683+
3684+static void
3685+md_analyze_emph(MD_CTX* ctx, int mark_index)
3686+{
3687+    MD_MARK* mark = &ctx->marks[mark_index];
3688+    MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3689+
3690+    /* If we can be a closer, try to resolve with the preceding opener. */
3691+    if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
3692+        MD_MARK* opener = NULL;
3693+        int opener_index = 0;
3694+
3695+        if(mark->ch == _T('*')) {
3696+            MD_MARKCHAIN* opener_chains[6];
3697+            int i, n_opener_chains;
3698+            unsigned flags = mark->flags;
3699+
3700+            /* Apply the "rule of three". */
3701+            n_opener_chains = 0;
3702+            opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
3703+            if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3704+                opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1;
3705+            if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3706+                opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2;
3707+            opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0;
3708+            if(!(flags & MD_MARK_EMPH_INTRAWORD)  ||  (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3709+                opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1;
3710+            if(!(flags & MD_MARK_EMPH_INTRAWORD)  ||  (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3711+                opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2;
3712+
3713+            /* Opener is the most recent mark from the allowed chains. */
3714+            for(i = 0; i < n_opener_chains; i++) {
3715+                if(opener_chains[i]->tail >= 0) {
3716+                    int tmp_index = opener_chains[i]->tail;
3717+                    MD_MARK* tmp_mark = &ctx->marks[tmp_index];
3718+                    if(opener == NULL  ||  tmp_mark->end > opener->end) {
3719+                        opener_index = tmp_index;
3720+                        opener = tmp_mark;
3721+                    }
3722+                }
3723+            }
3724+        } else {
3725+            /* Simple emph. mark */
3726+            if(chain->tail >= 0) {
3727+                opener_index = chain->tail;
3728+                opener = &ctx->marks[opener_index];
3729+            }
3730+        }
3731+
3732+        /* Resolve, if we have found matching opener. */
3733+        if(opener != NULL) {
3734+            SZ opener_size = opener->end - opener->beg;
3735+            SZ closer_size = mark->end - mark->beg;
3736+            MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index);
3737+
3738+            if(opener_size > closer_size) {
3739+                opener_index = md_split_emph_mark(ctx, opener_index, closer_size);
3740+                md_mark_chain_append(ctx, opener_chain, opener_index);
3741+            } else if(opener_size < closer_size) {
3742+                md_split_emph_mark(ctx, mark_index, closer_size - opener_size);
3743+            }
3744+
3745+            md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3746+            md_resolve_range(ctx, opener_chain, opener_index, mark_index);
3747+            return;
3748+        }
3749+    }
3750+
3751+    /* If we could not resolve as closer, we may be yet be an opener. */
3752+    if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3753+        md_mark_chain_append(ctx, chain, mark_index);
3754+}
3755+
3756+static void
3757+md_analyze_tilde(MD_CTX* ctx, int mark_index)
3758+{
3759+    MD_MARK* mark = &ctx->marks[mark_index];
3760+    MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3761+
3762+    /* We attempt to be Github Flavored Markdown compatible here. GFM accepts
3763+     * only tildes sequences of length 1 and 2, and the length of the opener
3764+     * and closer has to match. */
3765+
3766+    if((mark->flags & MD_MARK_POTENTIAL_CLOSER)  &&  chain->head >= 0) {
3767+        int opener_index = chain->head;
3768+
3769+        md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3770+        md_resolve_range(ctx, chain, opener_index, mark_index);
3771+        return;
3772+    }
3773+
3774+    if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3775+        md_mark_chain_append(ctx, chain, mark_index);
3776+}
3777+
3778+static void
3779+md_analyze_dollar(MD_CTX* ctx, int mark_index)
3780+{
3781+    /* This should mimic the way inline equations work in LaTeX, so there
3782+     * can only ever be one item in the chain (i.e. the dollars can't be
3783+     * nested). This is basically the same as the md_analyze_tilde function,
3784+     * except that we require matching openers and closers to be of the same
3785+     * length.
3786+     *
3787+     * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */
3788+    if(DOLLAR_OPENERS.head >= 0) {
3789+        /* If the potential closer has a non-matching number of $, discard */
3790+        MD_MARK* open = &ctx->marks[DOLLAR_OPENERS.head];
3791+        MD_MARK* close = &ctx->marks[mark_index];
3792+
3793+        int opener_index = DOLLAR_OPENERS.head;
3794+        md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL);
3795+        if (open->end - open->beg == close->end - close->beg) {
3796+            /* We are the matching closer */
3797+            md_resolve_range(ctx, &DOLLAR_OPENERS, opener_index, mark_index);
3798+        } else {
3799+            /* We don't match the opener, so discard old opener and insert as opener */
3800+            md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
3801+        }
3802+    } else {
3803+        /* No unmatched openers, so we are opener */
3804+        md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
3805+    }
3806+}
3807+
3808+static void
3809+md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
3810+{
3811+    MD_MARK* opener = &ctx->marks[mark_index];
3812+    int closer_index = mark_index + 1;
3813+    MD_MARK* closer = &ctx->marks[closer_index];
3814+    MD_MARK* next_resolved_mark;
3815+    OFF off = opener->end;
3816+    int n_dots = FALSE;
3817+    int has_underscore_in_last_seg = FALSE;
3818+    int has_underscore_in_next_to_last_seg = FALSE;
3819+    int n_opened_parenthesis = 0;
3820+    int n_excess_parenthesis = 0;
3821+
3822+    /* Check for domain. */
3823+    while(off < ctx->size) {
3824+        if(ISALNUM(off) || CH(off) == _T('-')) {
3825+            off++;
3826+        } else if(CH(off) == _T('.')) {
3827+            /* We must see at least one period. */
3828+            n_dots++;
3829+            has_underscore_in_next_to_last_seg = has_underscore_in_last_seg;
3830+            has_underscore_in_last_seg = FALSE;
3831+            off++;
3832+        } else if(CH(off) == _T('_')) {
3833+            /* No underscore may be present in the last two domain segments. */
3834+            has_underscore_in_last_seg = TRUE;
3835+            off++;
3836+        } else {
3837+            break;
3838+        }
3839+    }
3840+    if(off > opener->end  &&  CH(off-1) == _T('.')) {
3841+        off--;
3842+        n_dots--;
3843+    }
3844+    if(off <= opener->end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg)
3845+        return;
3846+
3847+    /* Check for path. */
3848+    next_resolved_mark = closer + 1;
3849+    while(next_resolved_mark->ch == 'D' || !(next_resolved_mark->flags & MD_MARK_RESOLVED))
3850+        next_resolved_mark++;
3851+    while(off < next_resolved_mark->beg  &&  CH(off) != _T('<')  &&  !ISWHITESPACE(off)  &&  !ISNEWLINE(off)) {
3852+        /* Parenthesis must be balanced. */
3853+        if(CH(off) == _T('(')) {
3854+            n_opened_parenthesis++;
3855+        } else if(CH(off) == _T(')')) {
3856+            if(n_opened_parenthesis > 0)
3857+                n_opened_parenthesis--;
3858+            else
3859+                n_excess_parenthesis++;
3860+        }
3861+
3862+        off++;
3863+    }
3864+
3865+    /* Trim a trailing punctuation from the end. */
3866+    while(TRUE) {
3867+        if(ISANYOF(off-1, _T("?!.,:*_~"))) {
3868+            off--;
3869+        } else if(CH(off-1) == ')'  &&  n_excess_parenthesis > 0) {
3870+            /* Unmatched ')' can be in an interior of the path but not at the
3871+             * of it, so the auto-link may be safely nested in a parenthesis
3872+             * pair. */
3873+            off--;
3874+            n_excess_parenthesis--;
3875+        } else {
3876+            break;
3877+        }
3878+    }
3879+
3880+    /* Ok. Lets call it an auto-link. Adapt opener and create closer to zero
3881+     * length so all the contents becomes the link text. */
3882+    MD_ASSERT(closer->ch == 'D');
3883+    opener->end = opener->beg;
3884+    closer->ch = opener->ch;
3885+    closer->beg = off;
3886+    closer->end = off;
3887+    md_resolve_range(ctx, NULL, mark_index, closer_index);
3888+}
3889+
3890+/* The permissive autolinks do not have to be enclosed in '<' '>' but we
3891+ * instead impose stricter rules what is understood as an e-mail address
3892+ * here. Actually any non-alphanumeric characters with exception of '.'
3893+ * are prohibited both in username and after '@'. */
3894+static void
3895+md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
3896+{
3897+    MD_MARK* opener = &ctx->marks[mark_index];
3898+    int closer_index;
3899+    MD_MARK* closer;
3900+    OFF beg = opener->beg;
3901+    OFF end = opener->end;
3902+    int dot_count = 0;
3903+
3904+    MD_ASSERT(CH(beg) == _T('@'));
3905+
3906+    /* Scan for name before '@'. */
3907+    while(beg > 0  &&  (ISALNUM(beg-1) || ISANYOF(beg-1, _T(".-_+"))))
3908+        beg--;
3909+
3910+    /* Scan for domain after '@'. */
3911+    while(end < ctx->size  &&  (ISALNUM(end) || ISANYOF(end, _T(".-_")))) {
3912+        if(CH(end) == _T('.'))
3913+            dot_count++;
3914+        end++;
3915+    }
3916+    if(CH(end-1) == _T('.')) {  /* Final '.' not part of it. */
3917+        dot_count--;
3918+        end--;
3919+    }
3920+    else if(ISANYOF2(end-1, _T('-'), _T('_'))) /* These are forbidden at the end. */
3921+        return;
3922+    if(CH(end-1) == _T('@')  ||  dot_count == 0)
3923+        return;
3924+
3925+    /* Ok. Lets call it auto-link. Adapt opener and create closer to zero
3926+     * length so all the contents becomes the link text. */
3927+    closer_index = mark_index + 1;
3928+    closer = &ctx->marks[closer_index];
3929+    MD_ASSERT(closer->ch == 'D');
3930+
3931+    opener->beg = beg;
3932+    opener->end = beg;
3933+    closer->ch = opener->ch;
3934+    closer->beg = end;
3935+    closer->end = end;
3936+    md_resolve_range(ctx, NULL, mark_index, closer_index);
3937+}
3938+
3939+static inline void
3940+md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3941+                 int mark_beg, int mark_end, const CHAR* mark_chars)
3942+{
3943+    int i = mark_beg;
3944+    MD_UNUSED(lines);
3945+    MD_UNUSED(n_lines);
3946+
3947+    while(i < mark_end) {
3948+        MD_MARK* mark = &ctx->marks[i];
3949+
3950+        /* Skip resolved spans. */
3951+        if(mark->flags & MD_MARK_RESOLVED) {
3952+            if(mark->flags & MD_MARK_OPENER) {
3953+                MD_ASSERT(i < mark->next);
3954+                i = mark->next + 1;
3955+            } else {
3956+                i++;
3957+            }
3958+            continue;
3959+        }
3960+
3961+        /* Skip marks we do not want to deal with. */
3962+        if(!ISANYOF_(mark->ch, mark_chars)) {
3963+            i++;
3964+            continue;
3965+        }
3966+
3967+        /* Analyze the mark. */
3968+        switch(mark->ch) {
3969+            case '[':   /* Pass through. */
3970+            case '!':   /* Pass through. */
3971+            case ']':   md_analyze_bracket(ctx, i); break;
3972+            case '&':   md_analyze_entity(ctx, i); break;
3973+            case '|':   md_analyze_table_cell_boundary(ctx, i); break;
3974+            case '_':   /* Pass through. */
3975+            case '*':   md_analyze_emph(ctx, i); break;
3976+            case '~':   md_analyze_tilde(ctx, i); break;
3977+            case '$':   md_analyze_dollar(ctx, i); break;
3978+            case '.':   /* Pass through. */
3979+            case ':':   md_analyze_permissive_url_autolink(ctx, i); break;
3980+            case '@':   md_analyze_permissive_email_autolink(ctx, i); break;
3981+        }
3982+
3983+        i++;
3984+    }
3985+}
3986+
3987+/* Analyze marks (build ctx->marks). */
3988+static int
3989+md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
3990+{
3991+    int ret;
3992+
3993+    /* Reset the previously collected stack of marks. */
3994+    ctx->n_marks = 0;
3995+
3996+    /* Collect all marks. */
3997+    MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
3998+
3999+    /* We analyze marks in few groups to handle their precedence. */
4000+    /* (1) Entities; code spans; autolinks; raw HTML. */
4001+    md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("&"));
4002+
4003+    /* (2) Links. */
4004+    md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!"));
4005+    MD_CHECK(md_resolve_links(ctx, lines, n_lines));
4006+    BRACKET_OPENERS.head = -1;
4007+    BRACKET_OPENERS.tail = -1;
4008+    ctx->unresolved_link_head = -1;
4009+    ctx->unresolved_link_tail = -1;
4010+
4011+    if(table_mode) {
4012+        /* (3) Analyze table cell boundaries.
4013+         * Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(),
4014+         * not after, because caller may need it. */
4015+        MD_ASSERT(n_lines == 1);
4016+        TABLECELLBOUNDARIES.head = -1;
4017+        TABLECELLBOUNDARIES.tail = -1;
4018+        ctx->n_table_cell_boundaries = 0;
4019+        md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("|"));
4020+        return ret;
4021+    }
4022+
4023+    /* (4) Emphasis and strong emphasis; permissive autolinks. */
4024+    md_analyze_link_contents(ctx, lines, n_lines, 0, ctx->n_marks);
4025+
4026+abort:
4027+    return ret;
4028+}
4029+
4030+static void
4031+md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
4032+                         int mark_beg, int mark_end)
4033+{
4034+    int i;
4035+
4036+    md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:."));
4037+
4038+    for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) {
4039+        ctx->mark_chains[i].head = -1;
4040+        ctx->mark_chains[i].tail = -1;
4041+    }
4042+}
4043+
4044+static int
4045+md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
4046+                      const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest,
4047+                      const CHAR* title, SZ title_size)
4048+{
4049+    MD_ATTRIBUTE_BUILD href_build = { 0 };
4050+    MD_ATTRIBUTE_BUILD title_build = { 0 };
4051+    MD_SPAN_A_DETAIL det;
4052+    int ret = 0;
4053+
4054+    /* Note we here rely on fact that MD_SPAN_A_DETAIL and
4055+     * MD_SPAN_IMG_DETAIL are binary-compatible. */
4056+    memset(&det, 0, sizeof(MD_SPAN_A_DETAIL));
4057+    MD_CHECK(md_build_attribute(ctx, dest, dest_size,
4058+                    (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0),
4059+                    &det.href, &href_build));
4060+    MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build));
4061+
4062+    if(enter)
4063+        MD_ENTER_SPAN(type, &det);
4064+    else
4065+        MD_LEAVE_SPAN(type, &det);
4066+
4067+abort:
4068+    md_free_attribute(ctx, &href_build);
4069+    md_free_attribute(ctx, &title_build);
4070+    return ret;
4071+}
4072+
4073+static int
4074+md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
4075+{
4076+    MD_ATTRIBUTE_BUILD target_build = { 0 };
4077+    MD_SPAN_WIKILINK_DETAIL det;
4078+    int ret = 0;
4079+
4080+    memset(&det, 0, sizeof(MD_SPAN_WIKILINK_DETAIL));
4081+    MD_CHECK(md_build_attribute(ctx, target, target_size, 0, &det.target, &target_build));
4082+
4083+    if (enter)
4084+        MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det);
4085+    else
4086+        MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det);
4087+
4088+abort:
4089+    md_free_attribute(ctx, &target_build);
4090+    return ret;
4091+}
4092+
4093+
4094+/* Render the output, accordingly to the analyzed ctx->marks. */
4095+static int
4096+md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4097+{
4098+    MD_TEXTTYPE text_type;
4099+    const MD_LINE* line = lines;
4100+    MD_MARK* prev_mark = NULL;
4101+    MD_MARK* mark;
4102+    OFF off = lines[0].beg;
4103+    OFF end = lines[n_lines-1].end;
4104+    int enforce_hardbreak = 0;
4105+    int ret = 0;
4106+
4107+    /* Find first resolved mark. Note there is always at least one resolved
4108+     * mark,  the dummy last one after the end of the latest line we actually
4109+     * never really reach. This saves us of a lot of special checks and cases
4110+     * in this function. */
4111+    mark = ctx->marks;
4112+    while(!(mark->flags & MD_MARK_RESOLVED))
4113+        mark++;
4114+
4115+    text_type = MD_TEXT_NORMAL;
4116+
4117+    while(1) {
4118+        /* Process the text up to the next mark or end-of-line. */
4119+        OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
4120+        if(tmp > off) {
4121+            MD_TEXT(text_type, STR(off), tmp - off);
4122+            off = tmp;
4123+        }
4124+
4125+        /* If reached the mark, process it and move to next one. */
4126+        if(off >= mark->beg) {
4127+            switch(mark->ch) {
4128+                case '\\':      /* Backslash escape. */
4129+                    if(ISNEWLINE(mark->beg+1))
4130+                        enforce_hardbreak = 1;
4131+                    else
4132+                        MD_TEXT(text_type, STR(mark->beg+1), 1);
4133+                    break;
4134+
4135+                case ' ':       /* Non-trivial space. */
4136+                    MD_TEXT(text_type, _T(" "), 1);
4137+                    break;
4138+
4139+                case '`':       /* Code span. */
4140+                    if(mark->flags & MD_MARK_OPENER) {
4141+                        MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
4142+                        text_type = MD_TEXT_CODE;
4143+                    } else {
4144+                        MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
4145+                        text_type = MD_TEXT_NORMAL;
4146+                    }
4147+                    break;
4148+
4149+                case '_':       /* Underline (or emphasis if we fall through). */
4150+                    if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
4151+                        if(mark->flags & MD_MARK_OPENER) {
4152+                            while(off < mark->end) {
4153+                                MD_ENTER_SPAN(MD_SPAN_U, NULL);
4154+                                off++;
4155+                            }
4156+                        } else {
4157+                            while(off < mark->end) {
4158+                                MD_LEAVE_SPAN(MD_SPAN_U, NULL);
4159+                                off++;
4160+                            }
4161+                        }
4162+                        break;
4163+                    }
4164+                    MD_FALLTHROUGH();
4165+
4166+                case '*':       /* Emphasis, strong emphasis. */
4167+                    if(mark->flags & MD_MARK_OPENER) {
4168+                        if((mark->end - off) % 2) {
4169+                            MD_ENTER_SPAN(MD_SPAN_EM, NULL);
4170+                            off++;
4171+                        }
4172+                        while(off + 1 < mark->end) {
4173+                            MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
4174+                            off += 2;
4175+                        }
4176+                    } else {
4177+                        while(off + 1 < mark->end) {
4178+                            MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
4179+                            off += 2;
4180+                        }
4181+                        if((mark->end - off) % 2) {
4182+                            MD_LEAVE_SPAN(MD_SPAN_EM, NULL);
4183+                            off++;
4184+                        }
4185+                    }
4186+                    break;
4187+
4188+                case '~':
4189+                    if(mark->flags & MD_MARK_OPENER)
4190+                        MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
4191+                    else
4192+                        MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
4193+                    break;
4194+
4195+                case '$':
4196+                    if(mark->flags & MD_MARK_OPENER) {
4197+                        MD_ENTER_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4198+                        text_type = MD_TEXT_LATEXMATH;
4199+                    } else {
4200+                        MD_LEAVE_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
4201+                        text_type = MD_TEXT_NORMAL;
4202+                    }
4203+                    break;
4204+
4205+                case '[':       /* Link, wiki link, image. */
4206+                case '!':
4207+                case ']':
4208+                {
4209+                    const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
4210+                    const MD_MARK* closer = &ctx->marks[opener->next];
4211+                    const MD_MARK* dest_mark;
4212+                    const MD_MARK* title_mark;
4213+
4214+                    if ((opener->ch == '[' && closer->ch == ']') &&
4215+                        opener->end - opener->beg >= 2 &&
4216+                        closer->end - closer->beg >= 2)
4217+                    {
4218+                        int has_label = (opener->end - opener->beg > 2);
4219+                        SZ target_sz;
4220+
4221+                        if(has_label)
4222+                            target_sz = opener->end - (opener->beg+2);
4223+                        else
4224+                            target_sz = closer->beg - opener->end;
4225+
4226+                        MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != ']'),
4227+                                 has_label ? STR(opener->beg+2) : STR(opener->end),
4228+                                 target_sz));
4229+
4230+                        break;
4231+                    }
4232+
4233+                    dest_mark = opener+1;
4234+                    MD_ASSERT(dest_mark->ch == 'D');
4235+                    title_mark = opener+2;
4236+                    MD_ASSERT(title_mark->ch == 'D');
4237+
4238+                    MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
4239+                                (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
4240+                                STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
4241+                                md_mark_get_ptr(ctx, title_mark - ctx->marks), title_mark->prev));
4242+
4243+                    /* link/image closer may span multiple lines. */
4244+                    if(mark->ch == ']') {
4245+                        while(mark->end > line->end)
4246+                            line++;
4247+                    }
4248+
4249+                    break;
4250+                }
4251+
4252+                case '<':
4253+                case '>':       /* Autolink or raw HTML. */
4254+                    if(!(mark->flags & MD_MARK_AUTOLINK)) {
4255+                        /* Raw HTML. */
4256+                        if(mark->flags & MD_MARK_OPENER)
4257+                            text_type = MD_TEXT_HTML;
4258+                        else
4259+                            text_type = MD_TEXT_NORMAL;
4260+                        break;
4261+                    }
4262+                    /* Pass through, if auto-link. */
4263+                    MD_FALLTHROUGH();
4264+
4265+                case '@':       /* Permissive e-mail autolink. */
4266+                case ':':       /* Permissive URL autolink. */
4267+                case '.':       /* Permissive WWW autolink. */
4268+                {
4269+                    MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
4270+                    MD_MARK* closer = &ctx->marks[opener->next];
4271+                    const CHAR* dest = STR(opener->end);
4272+                    SZ dest_size = closer->beg - opener->end;
4273+
4274+                    /* For permissive auto-links we do not know closer mark
4275+                     * position at the time of md_collect_marks(), therefore
4276+                     * it can be out-of-order in ctx->marks[].
4277+                     *
4278+                     * With this flag, we make sure that we output the closer
4279+                     * only if we processed the opener. */
4280+                    if(mark->flags & MD_MARK_OPENER)
4281+                        closer->flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK;
4282+
4283+                    if(opener->ch == '@' || opener->ch == '.') {
4284+                        dest_size += 7;
4285+                        MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
4286+                        memcpy(ctx->buffer,
4287+                                (opener->ch == '@' ? _T("mailto:") : _T("http://")),
4288+                                7 * sizeof(CHAR));
4289+                        memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
4290+                        dest = ctx->buffer;
4291+                    }
4292+
4293+                    if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK)
4294+                        MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
4295+                                    MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
4296+                    break;
4297+                }
4298+
4299+                case '&':       /* Entity. */
4300+                    MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
4301+                    break;
4302+
4303+                case '\0':
4304+                    MD_TEXT(MD_TEXT_NULLCHAR, _T(""), 1);
4305+                    break;
4306+
4307+                case 127:
4308+                    goto abort;
4309+            }
4310+
4311+            off = mark->end;
4312+
4313+            /* Move to next resolved mark. */
4314+            prev_mark = mark;
4315+            mark++;
4316+            while(!(mark->flags & MD_MARK_RESOLVED)  ||  mark->beg < off)
4317+                mark++;
4318+        }
4319+
4320+        /* If reached end of line, move to next one. */
4321+        if(off >= line->end) {
4322+            /* If it is the last line, we are done. */
4323+            if(off >= end)
4324+                break;
4325+
4326+            if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) {
4327+                OFF tmp;
4328+
4329+                MD_ASSERT(prev_mark != NULL);
4330+                MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', '$')  &&  (prev_mark->flags & MD_MARK_OPENER));
4331+                MD_ASSERT(ISANYOF2_(mark->ch, '`', '$')  &&  (mark->flags & MD_MARK_CLOSER));
4332+
4333+                /* Inside a code span, trailing line whitespace has to be
4334+                 * outputted. */
4335+                tmp = off;
4336+                while(off < ctx->size  &&  ISBLANK(off))
4337+                    off++;
4338+                if(off > tmp)
4339+                    MD_TEXT(text_type, STR(tmp), off-tmp);
4340+
4341+                /* and new lines are transformed into single spaces. */
4342+                if(prev_mark->end < off  &&  off < mark->beg)
4343+                    MD_TEXT(text_type, _T(" "), 1);
4344+            } else if(text_type == MD_TEXT_HTML) {
4345+                /* Inside raw HTML, we output the new line verbatim, including
4346+                 * any trailing spaces. */
4347+                OFF tmp = off;
4348+
4349+                while(tmp < end  &&  ISBLANK(tmp))
4350+                    tmp++;
4351+                if(tmp > off)
4352+                    MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
4353+                MD_TEXT(MD_TEXT_HTML, _T("\n"), 1);
4354+            } else {
4355+                /* Output soft or hard line break. */
4356+                MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
4357+
4358+                if(text_type == MD_TEXT_NORMAL) {
4359+                    if(enforce_hardbreak)
4360+                        break_type = MD_TEXT_BR;
4361+                    else if((CH(line->end) == _T(' ') && CH(line->end+1) == _T(' ')))
4362+                        break_type = MD_TEXT_BR;
4363+                }
4364+
4365+                MD_TEXT(break_type, _T("\n"), 1);
4366+            }
4367+
4368+            /* Move to the next line. */
4369+            line++;
4370+            off = line->beg;
4371+
4372+            enforce_hardbreak = 0;
4373+        }
4374+    }
4375+
4376+abort:
4377+    return ret;
4378+}
4379+
4380+
4381+/***************************
4382+ ***  Processing Tables  ***
4383+ ***************************/
4384+
4385+static void
4386+md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
4387+{
4388+    static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
4389+    OFF off = beg;
4390+
4391+    while(n_align > 0) {
4392+        int index = 0;  /* index into align_map[] */
4393+
4394+        while(CH(off) != _T('-'))
4395+            off++;
4396+        if(off > beg  &&  CH(off-1) == _T(':'))
4397+            index |= 1;
4398+        while(off < end  &&  CH(off) == _T('-'))
4399+            off++;
4400+        if(off < end  &&  CH(off) == _T(':'))
4401+            index |= 2;
4402+
4403+        *align = align_map[index];
4404+        align++;
4405+        n_align--;
4406+    }
4407+
4408+}
4409+
4410+/* Forward declaration. */
4411+static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines);
4412+
4413+static int
4414+md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
4415+{
4416+    MD_LINE line;
4417+    MD_BLOCK_TD_DETAIL det;
4418+    int ret = 0;
4419+
4420+    while(beg < end  &&  ISWHITESPACE(beg))
4421+        beg++;
4422+    while(end > beg  &&  ISWHITESPACE(end-1))
4423+        end--;
4424+
4425+    det.align = align;
4426+    line.beg = beg;
4427+    line.end = end;
4428+
4429+    MD_ENTER_BLOCK(cell_type, &det);
4430+    MD_CHECK(md_process_normal_block_contents(ctx, &line, 1));
4431+    MD_LEAVE_BLOCK(cell_type, &det);
4432+
4433+abort:
4434+    return ret;
4435+}
4436+
4437+static int
4438+md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
4439+                     const MD_ALIGN* align, int col_count)
4440+{
4441+    MD_LINE line;
4442+    OFF* pipe_offs = NULL;
4443+    int i, j, k, n;
4444+    int ret = 0;
4445+
4446+    line.beg = beg;
4447+    line.end = end;
4448+
4449+    /* Break the line into table cells by identifying pipe characters who
4450+     * form the cell boundary. */
4451+    MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
4452+
4453+    /* We have to remember the cell boundaries in local buffer because
4454+     * ctx->marks[] shall be reused during cell contents processing. */
4455+    n = ctx->n_table_cell_boundaries + 2;
4456+    pipe_offs = (OFF*) malloc(n * sizeof(OFF));
4457+    if(pipe_offs == NULL) {
4458+        MD_LOG("malloc() failed.");
4459+        ret = -1;
4460+        goto abort;
4461+    }
4462+    j = 0;
4463+    pipe_offs[j++] = beg;
4464+    for(i = TABLECELLBOUNDARIES.head; i >= 0; i = ctx->marks[i].next) {
4465+        MD_MARK* mark = &ctx->marks[i];
4466+        pipe_offs[j++] = mark->end;
4467+    }
4468+    pipe_offs[j++] = end+1;
4469+
4470+    /* Process cells. */
4471+    MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
4472+    k = 0;
4473+    for(i = 0; i < j-1  &&  k < col_count; i++) {
4474+        if(pipe_offs[i] < pipe_offs[i+1]-1)
4475+            MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1));
4476+    }
4477+    /* Make sure we call enough table cells even if the current table contains
4478+     * too few of them. */
4479+    while(k < col_count)
4480+        MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0));
4481+    MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
4482+
4483+abort:
4484+    free(pipe_offs);
4485+
4486+    /* Free any temporary memory blocks stored within some dummy marks. */
4487+    for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
4488+        free(md_mark_get_ptr(ctx, i));
4489+    PTR_CHAIN.head = -1;
4490+    PTR_CHAIN.tail = -1;
4491+
4492+    return ret;
4493+}
4494+
4495+static int
4496+md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines)
4497+{
4498+    MD_ALIGN* align;
4499+    int i;
4500+    int ret = 0;
4501+
4502+    /* At least two lines have to be present: The column headers and the line
4503+     * with the underlines. */
4504+    MD_ASSERT(n_lines >= 2);
4505+
4506+    align = malloc(col_count * sizeof(MD_ALIGN));
4507+    if(align == NULL) {
4508+        MD_LOG("malloc() failed.");
4509+        ret = -1;
4510+        goto abort;
4511+    }
4512+
4513+    md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align, col_count);
4514+
4515+    MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL);
4516+    MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
4517+                        lines[0].beg, lines[0].end, align, col_count));
4518+    MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL);
4519+
4520+    if(n_lines > 2) {
4521+        MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL);
4522+        for(i = 2; i < n_lines; i++) {
4523+            MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
4524+                     lines[i].beg, lines[i].end, align, col_count));
4525+        }
4526+        MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL);
4527+    }
4528+
4529+abort:
4530+    free(align);
4531+    return ret;
4532+}
4533+
4534+
4535+/**************************
4536+ ***  Processing Block  ***
4537+ **************************/
4538+
4539+#define MD_BLOCK_CONTAINER_OPENER   0x01
4540+#define MD_BLOCK_CONTAINER_CLOSER   0x02
4541+#define MD_BLOCK_CONTAINER          (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER)
4542+#define MD_BLOCK_LOOSE_LIST         0x04
4543+#define MD_BLOCK_SETEXT_HEADER      0x08
4544+
4545+struct MD_BLOCK_tag {
4546+    MD_BLOCKTYPE type  :  8;
4547+    unsigned flags     :  8;
4548+
4549+    /* MD_BLOCK_H:      Header level (1 - 6)
4550+     * MD_BLOCK_CODE:   Non-zero if fenced, zero if indented.
4551+     * MD_BLOCK_LI:     Task mark character (0 if not task list item, 'x', 'X' or ' ').
4552+     * MD_BLOCK_TABLE:  Column count (as determined by the table underline).
4553+     */
4554+    unsigned data      : 16;
4555+
4556+    /* Leaf blocks:     Count of lines (MD_LINE or MD_VERBATIMLINE) on the block.
4557+     * MD_BLOCK_LI:     Task mark offset in the input doc.
4558+     * MD_BLOCK_OL:     Start item number.
4559+     */
4560+    unsigned n_lines;
4561+};
4562+
4563+struct MD_CONTAINER_tag {
4564+    CHAR ch;
4565+    unsigned is_loose    : 8;
4566+    unsigned is_task     : 8;
4567+    unsigned start;
4568+    unsigned mark_indent;
4569+    unsigned contents_indent;
4570+    OFF block_byte_off;
4571+    OFF task_mark_off;
4572+};
4573+
4574+
4575+static int
4576+md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4577+{
4578+    int i;
4579+    int ret;
4580+
4581+    MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
4582+    MD_CHECK(md_process_inlines(ctx, lines, n_lines));
4583+
4584+abort:
4585+    /* Free any temporary memory blocks stored within some dummy marks. */
4586+    for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
4587+        free(md_mark_get_ptr(ctx, i));
4588+    PTR_CHAIN.head = -1;
4589+    PTR_CHAIN.tail = -1;
4590+
4591+    return ret;
4592+}
4593+
4594+static int
4595+md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines)
4596+{
4597+    static const CHAR indent_chunk_str[] = _T("                ");
4598+    static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - 1;
4599+
4600+    int i;
4601+    int ret = 0;
4602+
4603+    for(i = 0; i < n_lines; i++) {
4604+        const MD_VERBATIMLINE* line = &lines[i];
4605+        int indent = line->indent;
4606+
4607+        MD_ASSERT(indent >= 0);
4608+
4609+        /* Output code indentation. */
4610+        while(indent > (int) indent_chunk_size) {
4611+            MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
4612+            indent -= indent_chunk_size;
4613+        }
4614+        if(indent > 0)
4615+            MD_TEXT(text_type, indent_chunk_str, indent);
4616+
4617+        /* Output the code line itself. */
4618+        MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
4619+
4620+        /* Enforce end-of-line. */
4621+        MD_TEXT(text_type, _T("\n"), 1);
4622+    }
4623+
4624+abort:
4625+    return ret;
4626+}
4627+
4628+static int
4629+md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines)
4630+{
4631+    if(is_fenced) {
4632+        /* Skip the first line in case of fenced code: It is the fence.
4633+         * (Only the starting fence is present due to logic in md_analyze_line().) */
4634+        lines++;
4635+        n_lines--;
4636+    } else {
4637+        /* Ignore blank lines at start/end of indented code block. */
4638+        while(n_lines > 0  &&  lines[0].beg == lines[0].end) {
4639+            lines++;
4640+            n_lines--;
4641+        }
4642+        while(n_lines > 0  &&  lines[n_lines-1].beg == lines[n_lines-1].end) {
4643+            n_lines--;
4644+        }
4645+    }
4646+
4647+    if(n_lines == 0)
4648+        return 0;
4649+
4650+    return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
4651+}
4652+
4653+static int
4654+md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
4655+                            MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
4656+{
4657+    const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
4658+    OFF beg = fence_line->beg;
4659+    OFF end = fence_line->end;
4660+    OFF lang_end;
4661+    CHAR fence_ch = CH(fence_line->beg);
4662+    int ret = 0;
4663+
4664+    /* Skip the fence itself. */
4665+    while(beg < ctx->size  &&  CH(beg) == fence_ch)
4666+        beg++;
4667+    /* Trim initial spaces. */
4668+    while(beg < ctx->size  &&  CH(beg) == _T(' '))
4669+        beg++;
4670+
4671+    /* Trim trailing spaces. */
4672+    while(end > beg  &&  CH(end-1) == _T(' '))
4673+        end--;
4674+
4675+    /* Build info string attribute. */
4676+    MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info, info_build));
4677+
4678+    /* Build info string attribute. */
4679+    lang_end = beg;
4680+    while(lang_end < end  &&  !ISWHITESPACE(lang_end))
4681+        lang_end++;
4682+    MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang, lang_build));
4683+
4684+    det->fence_char = fence_ch;
4685+
4686+abort:
4687+    return ret;
4688+}
4689+
4690+static int
4691+md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
4692+{
4693+    union {
4694+        MD_BLOCK_H_DETAIL header;
4695+        MD_BLOCK_CODE_DETAIL code;
4696+        MD_BLOCK_TABLE_DETAIL table;
4697+    } det;
4698+    MD_ATTRIBUTE_BUILD info_build;
4699+    MD_ATTRIBUTE_BUILD lang_build;
4700+    int is_in_tight_list;
4701+    int clean_fence_code_detail = FALSE;
4702+    int ret = 0;
4703+
4704+    memset(&det, 0, sizeof(det));
4705+
4706+    if(ctx->n_containers == 0)
4707+        is_in_tight_list = FALSE;
4708+    else
4709+        is_in_tight_list = !ctx->containers[ctx->n_containers-1].is_loose;
4710+
4711+    switch(block->type) {
4712+        case MD_BLOCK_H:
4713+            det.header.level = block->data;
4714+            break;
4715+
4716+        case MD_BLOCK_CODE:
4717+            /* For fenced code block, we may need to set the info string. */
4718+            if(block->data != 0) {
4719+                memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL));
4720+                clean_fence_code_detail = TRUE;
4721+                MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
4722+            }
4723+            break;
4724+
4725+        case MD_BLOCK_TABLE:
4726+            det.table.col_count = block->data;
4727+            det.table.head_row_count = 1;
4728+            det.table.body_row_count = block->n_lines - 2;
4729+            break;
4730+
4731+        default:
4732+            /* Noop. */
4733+            break;
4734+    }
4735+
4736+    if(!is_in_tight_list  ||  block->type != MD_BLOCK_P)
4737+        MD_ENTER_BLOCK(block->type, (void*) &det);
4738+
4739+    /* Process the block contents accordingly to is type. */
4740+    switch(block->type) {
4741+        case MD_BLOCK_HR:
4742+            /* noop */
4743+            break;
4744+
4745+        case MD_BLOCK_CODE:
4746+            MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0),
4747+                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4748+            break;
4749+
4750+        case MD_BLOCK_HTML:
4751+            MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
4752+                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4753+            break;
4754+
4755+        case MD_BLOCK_TABLE:
4756+            MD_CHECK(md_process_table_block_contents(ctx, block->data,
4757+                            (const MD_LINE*)(block + 1), block->n_lines));
4758+            break;
4759+
4760+        default:
4761+            MD_CHECK(md_process_normal_block_contents(ctx,
4762+                            (const MD_LINE*)(block + 1), block->n_lines));
4763+            break;
4764+    }
4765+
4766+    if(!is_in_tight_list  ||  block->type != MD_BLOCK_P)
4767+        MD_LEAVE_BLOCK(block->type, (void*) &det);
4768+
4769+abort:
4770+    if(clean_fence_code_detail) {
4771+        md_free_attribute(ctx, &info_build);
4772+        md_free_attribute(ctx, &lang_build);
4773+    }
4774+    return ret;
4775+}
4776+
4777+static int
4778+md_process_all_blocks(MD_CTX* ctx)
4779+{
4780+    int byte_off = 0;
4781+    int ret = 0;
4782+
4783+    /* ctx->containers now is not needed for detection of lists and list items
4784+     * so we reuse it for tracking what lists are loose or tight. We rely
4785+     * on the fact the vector is large enough to hold the deepest nesting
4786+     * level of lists. */
4787+    ctx->n_containers = 0;
4788+
4789+    while(byte_off < ctx->n_block_bytes) {
4790+        MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off);
4791+        union {
4792+            MD_BLOCK_UL_DETAIL ul;
4793+            MD_BLOCK_OL_DETAIL ol;
4794+            MD_BLOCK_LI_DETAIL li;
4795+        } det;
4796+
4797+        switch(block->type) {
4798+            case MD_BLOCK_UL:
4799+                det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4800+                det.ul.mark = (CHAR) block->data;
4801+                break;
4802+
4803+            case MD_BLOCK_OL:
4804+                det.ol.start = block->n_lines;
4805+                det.ol.is_tight =  (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4806+                det.ol.mark_delimiter = (CHAR) block->data;
4807+                break;
4808+
4809+            case MD_BLOCK_LI:
4810+                det.li.is_task = (block->data != 0);
4811+                det.li.task_mark = (CHAR) block->data;
4812+                det.li.task_mark_offset = (OFF) block->n_lines;
4813+                break;
4814+
4815+            default:
4816+                /* noop */
4817+                break;
4818+        }
4819+
4820+        if(block->flags & MD_BLOCK_CONTAINER) {
4821+            if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
4822+                MD_LEAVE_BLOCK(block->type, &det);
4823+
4824+                if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE)
4825+                    ctx->n_containers--;
4826+            }
4827+
4828+            if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
4829+                MD_ENTER_BLOCK(block->type, &det);
4830+
4831+                if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) {
4832+                    ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
4833+                    ctx->n_containers++;
4834+                } else if(block->type == MD_BLOCK_QUOTE) {
4835+                    /* This causes that any text in a block quote, even if
4836+                     * nested inside a tight list item, is wrapped with
4837+                     * <p>...</p>. */
4838+                    ctx->containers[ctx->n_containers].is_loose = TRUE;
4839+                    ctx->n_containers++;
4840+                }
4841+            }
4842+        } else {
4843+            MD_CHECK(md_process_leaf_block(ctx, block));
4844+
4845+            if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML)
4846+                byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
4847+            else
4848+                byte_off += block->n_lines * sizeof(MD_LINE);
4849+        }
4850+
4851+        byte_off += sizeof(MD_BLOCK);
4852+    }
4853+
4854+    ctx->n_block_bytes = 0;
4855+
4856+abort:
4857+    return ret;
4858+}
4859+
4860+
4861+/************************************
4862+ ***  Grouping Lines into Blocks  ***
4863+ ************************************/
4864+
4865+static void*
4866+md_push_block_bytes(MD_CTX* ctx, int n_bytes)
4867+{
4868+    void* ptr;
4869+
4870+    if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
4871+        void* new_block_bytes;
4872+
4873+        ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0
4874+                ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2
4875+                : 512);
4876+        new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
4877+        if(new_block_bytes == NULL) {
4878+            MD_LOG("realloc() failed.");
4879+            return NULL;
4880+        }
4881+
4882+        /* Fix the ->current_block after the reallocation. */
4883+        if(ctx->current_block != NULL) {
4884+            OFF off_current_block = (char*) ctx->current_block - (char*) ctx->block_bytes;
4885+            ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block);
4886+        }
4887+
4888+        ctx->block_bytes = new_block_bytes;
4889+    }
4890+
4891+    ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
4892+    ctx->n_block_bytes += n_bytes;
4893+    return ptr;
4894+}
4895+
4896+static int
4897+md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
4898+{
4899+    MD_BLOCK* block;
4900+
4901+    MD_ASSERT(ctx->current_block == NULL);
4902+
4903+    block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
4904+    if(block == NULL)
4905+        return -1;
4906+
4907+    switch(line->type) {
4908+        case MD_LINE_HR:
4909+            block->type = MD_BLOCK_HR;
4910+            break;
4911+
4912+        case MD_LINE_ATXHEADER:
4913+        case MD_LINE_SETEXTHEADER:
4914+            block->type = MD_BLOCK_H;
4915+            break;
4916+
4917+        case MD_LINE_FENCEDCODE:
4918+        case MD_LINE_INDENTEDCODE:
4919+            block->type = MD_BLOCK_CODE;
4920+            break;
4921+
4922+        case MD_LINE_TEXT:
4923+            block->type = MD_BLOCK_P;
4924+            break;
4925+
4926+        case MD_LINE_HTML:
4927+            block->type = MD_BLOCK_HTML;
4928+            break;
4929+
4930+        case MD_LINE_BLANK:
4931+        case MD_LINE_SETEXTUNDERLINE:
4932+        case MD_LINE_TABLEUNDERLINE:
4933+        default:
4934+            MD_UNREACHABLE();
4935+            break;
4936+    }
4937+
4938+    block->flags = 0;
4939+    block->data = line->data;
4940+    block->n_lines = 0;
4941+
4942+    ctx->current_block = block;
4943+    return 0;
4944+}
4945+
4946+/* Eat from start of current (textual) block any reference definitions and
4947+ * remember them so we can resolve any links referring to them.
4948+ *
4949+ * (Reference definitions can only be at start of it as they cannot break
4950+ * a paragraph.)
4951+ */
4952+static int
4953+md_consume_link_reference_definitions(MD_CTX* ctx)
4954+{
4955+    MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
4956+    int n_lines = ctx->current_block->n_lines;
4957+    int n = 0;
4958+
4959+    /* Compute how many lines at the start of the block form one or more
4960+     * reference definitions. */
4961+    while(n < n_lines) {
4962+        int n_link_ref_lines;
4963+
4964+        n_link_ref_lines = md_is_link_reference_definition(ctx,
4965+                                    lines + n, n_lines - n);
4966+        /* Not a reference definition? */
4967+        if(n_link_ref_lines == 0)
4968+            break;
4969+
4970+        /* We fail if it is the ref. def. but it could not be stored due
4971+         * a memory allocation error. */
4972+        if(n_link_ref_lines < 0)
4973+            return -1;
4974+
4975+        n += n_link_ref_lines;
4976+    }
4977+
4978+    /* If there was at least one reference definition, we need to remove
4979+     * its lines from the block, or perhaps even the whole block. */
4980+    if(n > 0) {
4981+        if(n == n_lines) {
4982+            /* Remove complete block. */
4983+            ctx->n_block_bytes -= n * sizeof(MD_LINE);
4984+            ctx->n_block_bytes -= sizeof(MD_BLOCK);
4985+            ctx->current_block = NULL;
4986+        } else {
4987+            /* Remove just some initial lines from the block. */
4988+            memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE));
4989+            ctx->current_block->n_lines -= n;
4990+            ctx->n_block_bytes -= n * sizeof(MD_LINE);
4991+        }
4992+    }
4993+
4994+    return 0;
4995+}
4996+
4997+static int
4998+md_end_current_block(MD_CTX* ctx)
4999+{
5000+    int ret = 0;
5001+
5002+    if(ctx->current_block == NULL)
5003+        return ret;
5004+
5005+    /* Check whether there is a reference definition. (We do this here instead
5006+     * of in md_analyze_line() because reference definition can take multiple
5007+     * lines.) */
5008+    if(ctx->current_block->type == MD_BLOCK_P  ||
5009+       (ctx->current_block->type == MD_BLOCK_H  &&  (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)))
5010+    {
5011+        MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
5012+        if(CH(lines[0].beg) == _T('[')) {
5013+            MD_CHECK(md_consume_link_reference_definitions(ctx));
5014+            if(ctx->current_block == NULL)
5015+                return ret;
5016+        }
5017+    }
5018+
5019+    if(ctx->current_block->type == MD_BLOCK_H  &&  (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) {
5020+        int n_lines = ctx->current_block->n_lines;
5021+
5022+        if(n_lines > 1) {
5023+            /* Get rid of the underline. */
5024+            ctx->current_block->n_lines--;
5025+            ctx->n_block_bytes -= sizeof(MD_LINE);
5026+        } else {
5027+            /* Only the underline has left after eating the ref. defs.
5028+             * Keep the line as beginning of a new ordinary paragraph. */
5029+            ctx->current_block->type = MD_BLOCK_P;
5030+            return 0;
5031+        }
5032+    }
5033+
5034+    /* Mark we are not building any block anymore. */
5035+    ctx->current_block = NULL;
5036+
5037+abort:
5038+    return ret;
5039+}
5040+
5041+static int
5042+md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
5043+{
5044+    MD_ASSERT(ctx->current_block != NULL);
5045+
5046+    if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) {
5047+        MD_VERBATIMLINE* line;
5048+
5049+        line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE));
5050+        if(line == NULL)
5051+            return -1;
5052+
5053+        line->indent = analysis->indent;
5054+        line->beg = analysis->beg;
5055+        line->end = analysis->end;
5056+    } else {
5057+        MD_LINE* line;
5058+
5059+        line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE));
5060+        if(line == NULL)
5061+            return -1;
5062+
5063+        line->beg = analysis->beg;
5064+        line->end = analysis->end;
5065+    }
5066+    ctx->current_block->n_lines++;
5067+
5068+    return 0;
5069+}
5070+
5071+static int
5072+md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
5073+                        unsigned data, unsigned flags)
5074+{
5075+    MD_BLOCK* block;
5076+    int ret = 0;
5077+
5078+    MD_CHECK(md_end_current_block(ctx));
5079+
5080+    block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
5081+    if(block == NULL)
5082+        return -1;
5083+
5084+    block->type = type;
5085+    block->flags = flags;
5086+    block->data = data;
5087+    block->n_lines = start;
5088+
5089+abort:
5090+    return ret;
5091+}
5092+
5093+
5094+
5095+/***********************
5096+ ***  Line Analysis  ***
5097+ ***********************/
5098+
5099+static int
5100+md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
5101+{
5102+    OFF off = beg + 1;
5103+    int n = 1;
5104+
5105+    while(off < ctx->size  &&  (CH(off) == CH(beg) || CH(off) == _T(' ') || CH(off) == _T('\t'))) {
5106+        if(CH(off) == CH(beg))
5107+            n++;
5108+        off++;
5109+    }
5110+
5111+    if(n < 3) {
5112+        *p_killer = off;
5113+        return FALSE;
5114+    }
5115+
5116+    /* Nothing else can be present on the line. */
5117+    if(off < ctx->size  &&  !ISNEWLINE(off)) {
5118+        *p_killer = off;
5119+        return FALSE;
5120+    }
5121+
5122+    *p_end = off;
5123+    return TRUE;
5124+}
5125+
5126+static int
5127+md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
5128+{
5129+    int n;
5130+    OFF off = beg + 1;
5131+
5132+    while(off < ctx->size  &&  CH(off) == _T('#')  &&  off - beg < 7)
5133+        off++;
5134+    n = off - beg;
5135+
5136+    if(n > 6)
5137+        return FALSE;
5138+    *p_level = n;
5139+
5140+    if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)  &&  off < ctx->size  &&
5141+       CH(off) != _T(' ')  &&  CH(off) != _T('\t')  &&  !ISNEWLINE(off))
5142+        return FALSE;
5143+
5144+    while(off < ctx->size  &&  CH(off) == _T(' '))
5145+        off++;
5146+    *p_beg = off;
5147+    *p_end = off;
5148+    return TRUE;
5149+}
5150+
5151+static int
5152+md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
5153+{
5154+    OFF off = beg + 1;
5155+
5156+    while(off < ctx->size  &&  CH(off) == CH(beg))
5157+        off++;
5158+
5159+    /* Optionally, space(s) can follow. */
5160+    while(off < ctx->size  &&  CH(off) == _T(' '))
5161+        off++;
5162+
5163+    /* But nothing more is allowed on the line. */
5164+    if(off < ctx->size  &&  !ISNEWLINE(off))
5165+        return FALSE;
5166+
5167+    *p_level = (CH(beg) == _T('=') ? 1 : 2);
5168+    *p_end = off;
5169+    return TRUE;
5170+}
5171+
5172+static int
5173+md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
5174+{
5175+    OFF off = beg;
5176+    int found_pipe = FALSE;
5177+    unsigned col_count = 0;
5178+
5179+    if(off < ctx->size  &&  CH(off) == _T('|')) {
5180+        found_pipe = TRUE;
5181+        off++;
5182+        while(off < ctx->size  &&  ISWHITESPACE(off))
5183+            off++;
5184+    }
5185+
5186+    while(1) {
5187+        OFF cell_beg;
5188+        int delimited = FALSE;
5189+
5190+        /* Cell underline ("-----", ":----", "----:" or ":----:") */
5191+        cell_beg = off;
5192+        if(off < ctx->size  &&  CH(off) == _T(':'))
5193+            off++;
5194+        while(off < ctx->size  &&  CH(off) == _T('-'))
5195+            off++;
5196+        if(off < ctx->size  &&  CH(off) == _T(':'))
5197+            off++;
5198+        if(off - cell_beg < 3)
5199+            return FALSE;
5200+
5201+        col_count++;
5202+
5203+        /* Pipe delimiter (optional at the end of line). */
5204+        while(off < ctx->size  &&  ISWHITESPACE(off))
5205+            off++;
5206+        if(off < ctx->size  &&  CH(off) == _T('|')) {
5207+            delimited = TRUE;
5208+            found_pipe =  TRUE;
5209+            off++;
5210+            while(off < ctx->size  &&  ISWHITESPACE(off))
5211+                off++;
5212+        }
5213+
5214+        /* Success, if we reach end of line. */
5215+        if(off >= ctx->size  ||  ISNEWLINE(off))
5216+            break;
5217+
5218+        if(!delimited)
5219+            return FALSE;
5220+    }
5221+
5222+    if(!found_pipe)
5223+        return FALSE;
5224+
5225+    *p_end = off;
5226+    *p_col_count = col_count;
5227+    return TRUE;
5228+}
5229+
5230+static int
5231+md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
5232+{
5233+    OFF off = beg;
5234+
5235+    while(off < ctx->size && CH(off) == CH(beg))
5236+        off++;
5237+
5238+    /* Fence must have at least three characters. */
5239+    if(off - beg < 3)
5240+        return FALSE;
5241+
5242+    ctx->code_fence_length = off - beg;
5243+
5244+    /* Optionally, space(s) can follow. */
5245+    while(off < ctx->size  &&  CH(off) == _T(' '))
5246+        off++;
5247+
5248+    /* Optionally, an info string can follow. */
5249+    while(off < ctx->size  &&  !ISNEWLINE(off)) {
5250+        /* Backtick-based fence must not contain '`' in the info string. */
5251+        if(CH(beg) == _T('`')  &&  CH(off) == _T('`'))
5252+            return FALSE;
5253+        off++;
5254+    }
5255+
5256+    *p_end = off;
5257+    return TRUE;
5258+}
5259+
5260+static int
5261+md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
5262+{
5263+    OFF off = beg;
5264+    int ret = FALSE;
5265+
5266+    /* Closing fence must have at least the same length and use same char as
5267+     * opening one. */
5268+    while(off < ctx->size  &&  CH(off) == ch)
5269+        off++;
5270+    if(off - beg < ctx->code_fence_length)
5271+        goto out;
5272+
5273+    /* Optionally, space(s) can follow */
5274+    while(off < ctx->size  &&  CH(off) == _T(' '))
5275+        off++;
5276+
5277+    /* But nothing more is allowed on the line. */
5278+    if(off < ctx->size  &&  !ISNEWLINE(off))
5279+        goto out;
5280+
5281+    ret = TRUE;
5282+
5283+out:
5284+    /* Note we set *p_end even on failure: If we are not closing fence, caller
5285+     * would eat the line anyway without any parsing. */
5286+    *p_end = off;
5287+    return ret;
5288+}
5289+
5290+/* Returns type of the raw HTML block, or FALSE if it is not HTML block.
5291+ * (Refer to CommonMark specification for details about the types.)
5292+ */
5293+static int
5294+md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
5295+{
5296+    typedef struct TAG_tag TAG;
5297+    struct TAG_tag {
5298+        const CHAR* name;
5299+        unsigned len    : 8;
5300+    };
5301+
5302+    /* Type 6 is started by a long list of allowed tags. We use two-level
5303+     * tree to speed-up the search. */
5304+#ifdef X
5305+    #undef X
5306+#endif
5307+#define X(name)     { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
5308+#define Xend        { NULL, 0 }
5309+    static const TAG t1[] = { X("script"), X("pre"), X("style"), Xend };
5310+
5311+    static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
5312+    static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
5313+    static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
5314+    static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
5315+                              X("div"), X("dl"), X("dt"), Xend };
5316+    static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
5317+                              X("form"), X("frame"), X("frameset"), Xend };
5318+    static const TAG h6[] = { X("h1"), X("head"), X("header"), X("hr"), X("html"), Xend };
5319+    static const TAG i6[] = { X("iframe"), Xend };
5320+    static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
5321+    static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
5322+    static const TAG n6[] = { X("nav"), X("noframes"), Xend };
5323+    static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
5324+    static const TAG p6[] = { X("p"), X("param"), Xend };
5325+    static const TAG s6[] = { X("section"), X("source"), X("summary"), Xend };
5326+    static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
5327+                              X("thead"), X("title"), X("tr"), X("track"), Xend };
5328+    static const TAG u6[] = { X("ul"), Xend };
5329+    static const TAG xx[] = { Xend };
5330+#undef X
5331+
5332+    static const TAG* map6[26] = {
5333+        a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
5334+        n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
5335+    };
5336+    OFF off = beg + 1;
5337+    int i;
5338+
5339+    /* Check for type 1: <script, <pre, or <style */
5340+    for(i = 0; t1[i].name != NULL; i++) {
5341+        if(off + t1[i].len <= ctx->size) {
5342+            if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len))
5343+                return 1;
5344+        }
5345+    }
5346+
5347+    /* Check for type 2: <!-- */
5348+    if(off + 3 < ctx->size  &&  CH(off) == _T('!')  &&  CH(off+1) == _T('-')  &&  CH(off+2) == _T('-'))
5349+        return 2;
5350+
5351+    /* Check for type 3: <? */
5352+    if(off < ctx->size  &&  CH(off) == _T('?'))
5353+        return 3;
5354+
5355+    /* Check for type 4 or 5: <! */
5356+    if(off < ctx->size  &&  CH(off) == _T('!')) {
5357+        /* Check for type 4: <! followed by uppercase letter. */
5358+        if(off + 1 < ctx->size  &&  ISUPPER(off+1))
5359+            return 4;
5360+
5361+        /* Check for type 5: <![CDATA[ */
5362+        if(off + 8 < ctx->size) {
5363+            if(md_ascii_eq(STR(off), _T("![CDATA["), 8))
5364+                return 5;
5365+        }
5366+    }
5367+
5368+    /* Check for type 6: Many possible starting tags listed above. */
5369+    if(off + 1 < ctx->size  &&  (ISALPHA(off) || (CH(off) == _T('/') && ISALPHA(off+1)))) {
5370+        int slot;
5371+        const TAG* tags;
5372+
5373+        if(CH(off) == _T('/'))
5374+            off++;
5375+
5376+        slot = (ISUPPER(off) ? CH(off) - 'A' : CH(off) - 'a');
5377+        tags = map6[slot];
5378+
5379+        for(i = 0; tags[i].name != NULL; i++) {
5380+            if(off + tags[i].len <= ctx->size) {
5381+                if(md_ascii_case_eq(STR(off), tags[i].name, tags[i].len)) {
5382+                    OFF tmp = off + tags[i].len;
5383+                    if(tmp >= ctx->size)
5384+                        return 6;
5385+                    if(ISBLANK(tmp) || ISNEWLINE(tmp) || CH(tmp) == _T('>'))
5386+                        return 6;
5387+                    if(tmp+1 < ctx->size && CH(tmp) == _T('/') && CH(tmp+1) == _T('>'))
5388+                        return 6;
5389+                    break;
5390+                }
5391+            }
5392+        }
5393+    }
5394+
5395+    /* Check for type 7: any COMPLETE other opening or closing tag. */
5396+    if(off + 1 < ctx->size) {
5397+        OFF end;
5398+
5399+        if(md_is_html_tag(ctx, NULL, 0, beg, ctx->size, &end)) {
5400+            /* Only optional whitespace and new line may follow. */
5401+            while(end < ctx->size  &&  ISWHITESPACE(end))
5402+                end++;
5403+            if(end >= ctx->size  ||  ISNEWLINE(end))
5404+                return 7;
5405+        }
5406+    }
5407+
5408+    return FALSE;
5409+}
5410+
5411+/* Case sensitive check whether there is a substring 'what' between 'beg'
5412+ * and end of line. */
5413+static int
5414+md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
5415+{
5416+    OFF i;
5417+    for(i = beg; i + what_len < ctx->size; i++) {
5418+        if(ISNEWLINE(i))
5419+            break;
5420+        if(memcmp(STR(i), what, what_len * sizeof(CHAR)) == 0) {
5421+            *p_end = i + what_len;
5422+            return TRUE;
5423+        }
5424+    }
5425+
5426+    *p_end = i;
5427+    return FALSE;
5428+}
5429+
5430+/* Returns type of HTML block end condition or FALSE if not an end condition.
5431+ *
5432+ * Note it fills p_end even when it is not end condition as the caller
5433+ * does not need to analyze contents of a raw HTML block.
5434+ */
5435+static int
5436+md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
5437+{
5438+    switch(ctx->html_block_type) {
5439+        case 1:
5440+        {
5441+            OFF off = beg;
5442+
5443+            while(off < ctx->size  &&  !ISNEWLINE(off)) {
5444+                if(CH(off) == _T('<')) {
5445+                    if(md_ascii_case_eq(STR(off), _T("</script>"), 9)) {
5446+                        *p_end = off + 9;
5447+                        return TRUE;
5448+                    }
5449+
5450+                    if(md_ascii_case_eq(STR(off), _T("</style>"), 8)) {
5451+                        *p_end = off + 8;
5452+                        return TRUE;
5453+                    }
5454+
5455+                    if(md_ascii_case_eq(STR(off), _T("</pre>"), 6)) {
5456+                        *p_end = off + 6;
5457+                        return TRUE;
5458+                    }
5459+                }
5460+
5461+                off++;
5462+            }
5463+            *p_end = off;
5464+            return FALSE;
5465+        }
5466+
5467+        case 2:
5468+            return (md_line_contains(ctx, beg, _T("-->"), 3, p_end) ? 2 : FALSE);
5469+
5470+        case 3:
5471+            return (md_line_contains(ctx, beg, _T("?>"), 2, p_end) ? 3 : FALSE);
5472+
5473+        case 4:
5474+            return (md_line_contains(ctx, beg, _T(">"), 1, p_end) ? 4 : FALSE);
5475+
5476+        case 5:
5477+            return (md_line_contains(ctx, beg, _T("]]>"), 3, p_end) ? 5 : FALSE);
5478+
5479+        case 6:     /* Pass through */
5480+        case 7:
5481+            *p_end = beg;
5482+            return (ISNEWLINE(beg) ? ctx->html_block_type : FALSE);
5483+
5484+        default:
5485+            MD_UNREACHABLE();
5486+    }
5487+    return FALSE;
5488+}
5489+
5490+
5491+static int
5492+md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
5493+{
5494+    /* Block quote has no "items" like lists. */
5495+    if(container->ch == _T('>'))
5496+        return FALSE;
5497+
5498+    if(container->ch != pivot->ch)
5499+        return FALSE;
5500+    if(container->mark_indent > pivot->contents_indent)
5501+        return FALSE;
5502+
5503+    return TRUE;
5504+}
5505+
5506+static int
5507+md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
5508+{
5509+    if(ctx->n_containers >= ctx->alloc_containers) {
5510+        MD_CONTAINER* new_containers;
5511+
5512+        ctx->alloc_containers = (ctx->alloc_containers > 0
5513+                ? ctx->alloc_containers + ctx->alloc_containers / 2
5514+                : 16);
5515+        new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER));
5516+        if(new_containers == NULL) {
5517+            MD_LOG("realloc() failed.");
5518+            return -1;
5519+        }
5520+
5521+        ctx->containers = new_containers;
5522+    }
5523+
5524+    memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER));
5525+    return 0;
5526+}
5527+
5528+static int
5529+md_enter_child_containers(MD_CTX* ctx, int n_children, unsigned data)
5530+{
5531+    int i;
5532+    int ret = 0;
5533+
5534+    for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
5535+        MD_CONTAINER* c = &ctx->containers[i];
5536+        int is_ordered_list = FALSE;
5537+
5538+        switch(c->ch) {
5539+            case _T(')'):
5540+            case _T('.'):
5541+                is_ordered_list = TRUE;
5542+                MD_FALLTHROUGH();
5543+
5544+            case _T('-'):
5545+            case _T('+'):
5546+            case _T('*'):
5547+                /* Remember offset in ctx->block_bytes so we can revisit the
5548+                 * block if we detect it is a loose list. */
5549+                md_end_current_block(ctx);
5550+                c->block_byte_off = ctx->n_block_bytes;
5551+
5552+                MD_CHECK(md_push_container_bytes(ctx,
5553+                                (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
5554+                                c->start, data, MD_BLOCK_CONTAINER_OPENER));
5555+                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5556+                                c->task_mark_off,
5557+                                (c->is_task ? CH(c->task_mark_off) : 0),
5558+                                MD_BLOCK_CONTAINER_OPENER));
5559+                break;
5560+
5561+            case _T('>'):
5562+                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER));
5563+                break;
5564+
5565+            default:
5566+                MD_UNREACHABLE();
5567+                break;
5568+        }
5569+    }
5570+
5571+abort:
5572+    return ret;
5573+}
5574+
5575+static int
5576+md_leave_child_containers(MD_CTX* ctx, int n_keep)
5577+{
5578+    int ret = 0;
5579+
5580+    while(ctx->n_containers > n_keep) {
5581+        MD_CONTAINER* c = &ctx->containers[ctx->n_containers-1];
5582+        int is_ordered_list = FALSE;
5583+
5584+        switch(c->ch) {
5585+            case _T(')'):
5586+            case _T('.'):
5587+                is_ordered_list = TRUE;
5588+                MD_FALLTHROUGH();
5589+
5590+            case _T('-'):
5591+            case _T('+'):
5592+            case _T('*'):
5593+                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5594+                                c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : 0),
5595+                                MD_BLOCK_CONTAINER_CLOSER));
5596+                MD_CHECK(md_push_container_bytes(ctx,
5597+                                (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0,
5598+                                c->ch, MD_BLOCK_CONTAINER_CLOSER));
5599+                break;
5600+
5601+            case _T('>'):
5602+                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0,
5603+                                0, MD_BLOCK_CONTAINER_CLOSER));
5604+                break;
5605+
5606+            default:
5607+                MD_UNREACHABLE();
5608+                break;
5609+        }
5610+
5611+        ctx->n_containers--;
5612+    }
5613+
5614+abort:
5615+    return ret;
5616+}
5617+
5618+static int
5619+md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
5620+{
5621+    OFF off = beg;
5622+    OFF max_end;
5623+
5624+    if(off >= ctx->size  ||  indent >= ctx->code_indent_offset)
5625+        return FALSE;
5626+
5627+    /* Check for block quote mark. */
5628+    if(CH(off) == _T('>')) {
5629+        off++;
5630+        p_container->ch = _T('>');
5631+        p_container->is_loose = FALSE;
5632+        p_container->is_task = FALSE;
5633+        p_container->mark_indent = indent;
5634+        p_container->contents_indent = indent + 1;
5635+        *p_end = off;
5636+        return TRUE;
5637+    }
5638+
5639+    /* Check for list item bullet mark. */
5640+    if(ISANYOF(off, _T("-+*"))  &&  (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) {
5641+        p_container->ch = CH(off);
5642+        p_container->is_loose = FALSE;
5643+        p_container->is_task = FALSE;
5644+        p_container->mark_indent = indent;
5645+        p_container->contents_indent = indent + 1;
5646+        *p_end = off+1;
5647+        return TRUE;
5648+    }
5649+
5650+    /* Check for ordered list item marks. */
5651+    max_end = off + 9;
5652+    if(max_end > ctx->size)
5653+        max_end = ctx->size;
5654+    p_container->start = 0;
5655+    while(off < max_end  &&  ISDIGIT(off)) {
5656+        p_container->start = p_container->start * 10 + CH(off) - _T('0');
5657+        off++;
5658+    }
5659+    if(off > beg  &&
5660+       (CH(off) == _T('.') || CH(off) == _T(')'))  &&
5661+       (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1)))
5662+    {
5663+        p_container->ch = CH(off);
5664+        p_container->is_loose = FALSE;
5665+        p_container->is_task = FALSE;
5666+        p_container->mark_indent = indent;
5667+        p_container->contents_indent = indent + off - beg + 1;
5668+        *p_end = off+1;
5669+        return TRUE;
5670+    }
5671+
5672+    return FALSE;
5673+}
5674+
5675+static unsigned
5676+md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
5677+{
5678+    OFF off = beg;
5679+    unsigned indent = total_indent;
5680+
5681+    while(off < ctx->size  &&  ISBLANK(off)) {
5682+        if(CH(off) == _T('\t'))
5683+            indent = (indent + 4) & ~3;
5684+        else
5685+            indent++;
5686+        off++;
5687+    }
5688+
5689+    *p_end = off;
5690+    return indent - total_indent;
5691+}
5692+
5693+static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 };
5694+
5695+/* Analyze type of the line and find some its properties. This serves as a
5696+ * main input for determining type and boundaries of a block. */
5697+static int
5698+md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
5699+                const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
5700+{
5701+    unsigned total_indent = 0;
5702+    int n_parents = 0;
5703+    int n_brothers = 0;
5704+    int n_children = 0;
5705+    MD_CONTAINER container = { 0 };
5706+    int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
5707+    OFF off = beg;
5708+    OFF hr_killer = 0;
5709+    int ret = 0;
5710+
5711+    line->indent = md_line_indentation(ctx, total_indent, off, &off);
5712+    total_indent += line->indent;
5713+    line->beg = off;
5714+
5715+    /* Given the indentation and block quote marks '>', determine how many of
5716+     * the current containers are our parents. */
5717+    while(n_parents < ctx->n_containers) {
5718+        MD_CONTAINER* c = &ctx->containers[n_parents];
5719+
5720+        if(c->ch == _T('>')  &&  line->indent < ctx->code_indent_offset  &&
5721+            off < ctx->size  &&  CH(off) == _T('>'))
5722+        {
5723+            /* Block quote mark. */
5724+            off++;
5725+            total_indent++;
5726+            line->indent = md_line_indentation(ctx, total_indent, off, &off);
5727+            total_indent += line->indent;
5728+
5729+            /* The optional 1st space after '>' is part of the block quote mark. */
5730+            if(line->indent > 0)
5731+                line->indent--;
5732+
5733+            line->beg = off;
5734+
5735+        } else if(c->ch != _T('>')  &&  line->indent >= c->contents_indent) {
5736+            /* List. */
5737+            line->indent -= c->contents_indent;
5738+        } else {
5739+            break;
5740+        }
5741+
5742+        n_parents++;
5743+    }
5744+
5745+    if(off >= ctx->size  ||  ISNEWLINE(off)) {
5746+        /* Blank line does not need any real indentation to be nested inside
5747+         * a list. */
5748+        if(n_brothers + n_children == 0) {
5749+            while(n_parents < ctx->n_containers  &&  ctx->containers[n_parents].ch != _T('>'))
5750+                n_parents++;
5751+        }
5752+    }
5753+
5754+    while(TRUE) {
5755+        /* Check whether we are fenced code continuation. */
5756+        if(pivot_line->type == MD_LINE_FENCEDCODE) {
5757+            line->beg = off;
5758+
5759+            /* We are another MD_LINE_FENCEDCODE unless we are closing fence
5760+             * which we transform into MD_LINE_BLANK. */
5761+            if(line->indent < ctx->code_indent_offset) {
5762+                if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) {
5763+                    line->type = MD_LINE_BLANK;
5764+                    ctx->last_line_has_list_loosening_effect = FALSE;
5765+                    break;
5766+                }
5767+            }
5768+
5769+            /* Change indentation accordingly to the initial code fence. */
5770+            if(n_parents == ctx->n_containers) {
5771+                if(line->indent > pivot_line->indent)
5772+                    line->indent -= pivot_line->indent;
5773+                else
5774+                    line->indent = 0;
5775+
5776+                line->type = MD_LINE_FENCEDCODE;
5777+                break;
5778+            }
5779+        }
5780+
5781+        /* Check whether we are HTML block continuation. */
5782+        if(pivot_line->type == MD_LINE_HTML  &&  ctx->html_block_type > 0) {
5783+            if(n_parents < ctx->n_containers) {
5784+                /* HTML block is implicitly ended if the enclosing container
5785+                 * block ends. */
5786+                ctx->html_block_type = 0;
5787+            } else {
5788+                int html_block_type;
5789+
5790+                html_block_type = md_is_html_block_end_condition(ctx, off, &off);
5791+                if(html_block_type > 0) {
5792+                    MD_ASSERT(html_block_type == ctx->html_block_type);
5793+
5794+                    /* Make sure this is the last line of the block. */
5795+                    ctx->html_block_type = 0;
5796+
5797+                    /* Some end conditions serve as blank lines at the same time. */
5798+                    if(html_block_type == 6 || html_block_type == 7) {
5799+                        line->type = MD_LINE_BLANK;
5800+                        line->indent = 0;
5801+                        break;
5802+                    }
5803+                }
5804+
5805+                line->type = MD_LINE_HTML;
5806+                n_parents = ctx->n_containers;
5807+                break;
5808+            }
5809+        }
5810+
5811+        /* Check for blank line. */
5812+        if(off >= ctx->size  ||  ISNEWLINE(off)) {
5813+            if(pivot_line->type == MD_LINE_INDENTEDCODE  &&  n_parents == ctx->n_containers) {
5814+                line->type = MD_LINE_INDENTEDCODE;
5815+                if(line->indent > ctx->code_indent_offset)
5816+                    line->indent -= ctx->code_indent_offset;
5817+                else
5818+                    line->indent = 0;
5819+                ctx->last_line_has_list_loosening_effect = FALSE;
5820+            } else {
5821+                line->type = MD_LINE_BLANK;
5822+                ctx->last_line_has_list_loosening_effect = (n_parents > 0  &&
5823+                        n_brothers + n_children == 0  &&
5824+                        ctx->containers[n_parents-1].ch != _T('>'));
5825+
5826+    #if 1
5827+                /* See https://github.com/mity/md4c/issues/6
5828+                 *
5829+                 * This ugly checking tests we are in (yet empty) list item but
5830+                 * not its very first line (i.e. not the line with the list
5831+                 * item mark).
5832+                 *
5833+                 * If we are such a blank line, then any following non-blank
5834+                 * line which would be part of the list item actually has to
5835+                 * end the list because according to the specification, "a list
5836+                 * item can begin with at most one blank line."
5837+                 */
5838+                if(n_parents > 0  &&  ctx->containers[n_parents-1].ch != _T('>')  &&
5839+                   n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
5840+                   ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5841+                {
5842+                    MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5843+                    if(top_block->type == MD_BLOCK_LI)
5844+                        ctx->last_list_item_starts_with_two_blank_lines = TRUE;
5845+                }
5846+    #endif
5847+            }
5848+            break;
5849+        } else {
5850+    #if 1
5851+            /* This is the 2nd half of the hack. If the flag is set (i.e. there
5852+             * was a 2nd blank line at the beginning of the list item) and if
5853+             * we would otherwise still belong to the list item, we enforce
5854+             * the end of the list. */
5855+            ctx->last_line_has_list_loosening_effect = FALSE;
5856+            if(ctx->last_list_item_starts_with_two_blank_lines) {
5857+                if(n_parents > 0  &&  ctx->containers[n_parents-1].ch != _T('>')  &&
5858+                   n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
5859+                   ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5860+                {
5861+                    MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5862+                    if(top_block->type == MD_BLOCK_LI)
5863+                        n_parents--;
5864+                }
5865+
5866+                ctx->last_list_item_starts_with_two_blank_lines = FALSE;
5867+            }
5868+    #endif
5869+        }
5870+
5871+        /* Check whether we are Setext underline. */
5872+        if(line->indent < ctx->code_indent_offset  &&  pivot_line->type == MD_LINE_TEXT
5873+            &&  (CH(off) == _T('=') || CH(off) == _T('-'))
5874+            &&  (n_parents == ctx->n_containers))
5875+        {
5876+            unsigned level;
5877+
5878+            if(md_is_setext_underline(ctx, off, &off, &level)) {
5879+                line->type = MD_LINE_SETEXTUNDERLINE;
5880+                line->data = level;
5881+                break;
5882+            }
5883+        }
5884+
5885+        /* Check for thematic break line. */
5886+        if(line->indent < ctx->code_indent_offset  &&  ISANYOF(off, _T("-_*"))  &&  off >= hr_killer) {
5887+            if(md_is_hr_line(ctx, off, &off, &hr_killer)) {
5888+                line->type = MD_LINE_HR;
5889+                break;
5890+            }
5891+        }
5892+
5893+        /* Check for "brother" container. I.e. whether we are another list item
5894+         * in already started list. */
5895+        if(n_parents < ctx->n_containers  &&  n_brothers + n_children == 0) {
5896+            OFF tmp;
5897+
5898+            if(md_is_container_mark(ctx, line->indent, off, &tmp, &container)  &&
5899+               md_is_container_compatible(&ctx->containers[n_parents], &container))
5900+            {
5901+                pivot_line = &md_dummy_blank_line;
5902+
5903+                off = tmp;
5904+
5905+                total_indent += container.contents_indent - container.mark_indent;
5906+                line->indent = md_line_indentation(ctx, total_indent, off, &off);
5907+                total_indent += line->indent;
5908+                line->beg = off;
5909+
5910+                /* Some of the following whitespace actually still belongs to the mark. */
5911+                if(off >= ctx->size || ISNEWLINE(off)) {
5912+                    container.contents_indent++;
5913+                } else if(line->indent <= ctx->code_indent_offset) {
5914+                    container.contents_indent += line->indent;
5915+                    line->indent = 0;
5916+                } else {
5917+                    container.contents_indent += 1;
5918+                    line->indent--;
5919+                }
5920+
5921+                ctx->containers[n_parents].mark_indent = container.mark_indent;
5922+                ctx->containers[n_parents].contents_indent = container.contents_indent;
5923+
5924+                n_brothers++;
5925+                continue;
5926+            }
5927+        }
5928+
5929+        /* Check for indented code.
5930+         * Note indented code block cannot interrupt a paragraph. */
5931+        if(line->indent >= ctx->code_indent_offset  &&
5932+            (pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE))
5933+        {
5934+            line->type = MD_LINE_INDENTEDCODE;
5935+            MD_ASSERT(line->indent >= ctx->code_indent_offset);
5936+            line->indent -= ctx->code_indent_offset;
5937+            line->data = 0;
5938+            break;
5939+        }
5940+
5941+        /* Check for start of a new container block. */
5942+        if(line->indent < ctx->code_indent_offset  &&
5943+           md_is_container_mark(ctx, line->indent, off, &off, &container))
5944+        {
5945+            if(pivot_line->type == MD_LINE_TEXT  &&  n_parents == ctx->n_containers  &&
5946+                        (off >= ctx->size || ISNEWLINE(off))  &&  container.ch != _T('>'))
5947+            {
5948+                /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */
5949+            } else if(pivot_line->type == MD_LINE_TEXT  &&  n_parents == ctx->n_containers  &&
5950+                        (container.ch == _T('.') || container.ch == _T(')'))  &&  container.start != 1)
5951+            {
5952+                /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */
5953+            } else {
5954+                total_indent += container.contents_indent - container.mark_indent;
5955+                line->indent = md_line_indentation(ctx, total_indent, off, &off);
5956+                total_indent += line->indent;
5957+
5958+                line->beg = off;
5959+                line->data = container.ch;
5960+
5961+                /* Some of the following whitespace actually still belongs to the mark. */
5962+                if(off >= ctx->size || ISNEWLINE(off)) {
5963+                    container.contents_indent++;
5964+                } else if(line->indent <= ctx->code_indent_offset) {
5965+                    container.contents_indent += line->indent;
5966+                    line->indent = 0;
5967+                } else {
5968+                    container.contents_indent += 1;
5969+                    line->indent--;
5970+                }
5971+
5972+                if(n_brothers + n_children == 0)
5973+                    pivot_line = &md_dummy_blank_line;
5974+
5975+                if(n_children == 0)
5976+                    MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
5977+
5978+                n_children++;
5979+                MD_CHECK(md_push_container(ctx, &container));
5980+                continue;
5981+            }
5982+        }
5983+
5984+        /* Check whether we are table continuation. */
5985+        if(pivot_line->type == MD_LINE_TABLE  &&  n_parents == ctx->n_containers) {
5986+            line->type = MD_LINE_TABLE;
5987+            break;
5988+        }
5989+
5990+        /* Check for ATX header. */
5991+        if(line->indent < ctx->code_indent_offset  &&  CH(off) == _T('#')) {
5992+            unsigned level;
5993+
5994+            if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) {
5995+                line->type = MD_LINE_ATXHEADER;
5996+                line->data = level;
5997+                break;
5998+            }
5999+        }
6000+
6001+        /* Check whether we are starting code fence. */
6002+        if(CH(off) == _T('`') || CH(off) == _T('~')) {
6003+            if(md_is_opening_code_fence(ctx, off, &off)) {
6004+                line->type = MD_LINE_FENCEDCODE;
6005+                line->data = 1;
6006+                break;
6007+            }
6008+        }
6009+
6010+        /* Check for start of raw HTML block. */
6011+        if(CH(off) == _T('<')  &&  !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
6012+        {
6013+            ctx->html_block_type = md_is_html_block_start_condition(ctx, off);
6014+
6015+            /* HTML block type 7 cannot interrupt paragraph. */
6016+            if(ctx->html_block_type == 7  &&  pivot_line->type == MD_LINE_TEXT)
6017+                ctx->html_block_type = 0;
6018+
6019+            if(ctx->html_block_type > 0) {
6020+                /* The line itself also may immediately close the block. */
6021+                if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) {
6022+                    /* Make sure this is the last line of the block. */
6023+                    ctx->html_block_type = 0;
6024+                }
6025+
6026+                line->type = MD_LINE_HTML;
6027+                break;
6028+            }
6029+        }
6030+
6031+        /* Check for table underline. */
6032+        if((ctx->parser.flags & MD_FLAG_TABLES)  &&  pivot_line->type == MD_LINE_TEXT  &&
6033+           (CH(off) == _T('|') || CH(off) == _T('-') || CH(off) == _T(':'))  &&
6034+           n_parents == ctx->n_containers)
6035+        {
6036+            unsigned col_count;
6037+
6038+            if(ctx->current_block != NULL  &&  ctx->current_block->n_lines == 1  &&
6039+                md_is_table_underline(ctx, off, &off, &col_count))
6040+            {
6041+                line->data = col_count;
6042+                line->type = MD_LINE_TABLEUNDERLINE;
6043+                break;
6044+            }
6045+        }
6046+
6047+        /* By default, we are normal text line. */
6048+        line->type = MD_LINE_TEXT;
6049+        if(pivot_line->type == MD_LINE_TEXT  &&  n_brothers + n_children == 0) {
6050+            /* Lazy continuation. */
6051+            n_parents = ctx->n_containers;
6052+        }
6053+
6054+        /* Check for task mark. */
6055+        if((ctx->parser.flags & MD_FLAG_TASKLISTS)  &&  n_brothers + n_children > 0  &&
6056+           ISANYOF_(ctx->containers[ctx->n_containers-1].ch, _T("-+*.)")))
6057+        {
6058+            OFF tmp = off;
6059+
6060+            while(tmp < ctx->size  &&  tmp < off + 3  &&  ISBLANK(tmp))
6061+                tmp++;
6062+            if(tmp + 2 < ctx->size  &&  CH(tmp) == _T('[')  &&
6063+               ISANYOF(tmp+1, _T("xX "))  &&  CH(tmp+2) == _T(']')  &&
6064+               (tmp + 3 == ctx->size  ||  ISBLANK(tmp+3)  ||  ISNEWLINE(tmp+3)))
6065+            {
6066+                MD_CONTAINER* task_container = (n_children > 0 ? &ctx->containers[ctx->n_containers-1] : &container);
6067+                task_container->is_task = TRUE;
6068+                task_container->task_mark_off = tmp + 1;
6069+                off = tmp + 3;
6070+                while(ISWHITESPACE(off))
6071+                    off++;
6072+                line->beg = off;
6073+            }
6074+        }
6075+
6076+        break;
6077+    }
6078+
6079+    /* Scan for end of the line.
6080+     *
6081+     * Note this is quite a bottleneck of the parsing as we here iterate almost
6082+     * over compete document.
6083+     */
6084+#if defined __linux__ && !defined MD4C_USE_UTF16
6085+    /* Recent glibc versions have superbly optimized strcspn(), even using
6086+     * vectorization if available. */
6087+    if(ctx->doc_ends_with_newline  &&  off < ctx->size) {
6088+        while(TRUE) {
6089+            off += (OFF) strcspn(STR(off), "\r\n");
6090+
6091+            /* strcspn() can stop on zero terminator; but that can appear
6092+             * anywhere in the Markfown input... */
6093+            if(CH(off) == _T('\0'))
6094+                off++;
6095+            else
6096+                break;
6097+        }
6098+    } else
6099+#endif
6100+    {
6101+        /* Optimization: Use some loop unrolling. */
6102+        while(off + 3 < ctx->size  &&  !ISNEWLINE(off+0)  &&  !ISNEWLINE(off+1)
6103+                                   &&  !ISNEWLINE(off+2)  &&  !ISNEWLINE(off+3))
6104+            off += 4;
6105+        while(off < ctx->size  &&  !ISNEWLINE(off))
6106+            off++;
6107+    }
6108+
6109+    /* Set end of the line. */
6110+    line->end = off;
6111+
6112+    /* But for ATX header, we should exclude the optional trailing mark. */
6113+    if(line->type == MD_LINE_ATXHEADER) {
6114+        OFF tmp = line->end;
6115+        while(tmp > line->beg && CH(tmp-1) == _T(' '))
6116+            tmp--;
6117+        while(tmp > line->beg && CH(tmp-1) == _T('#'))
6118+            tmp--;
6119+        if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
6120+            line->end = tmp;
6121+    }
6122+
6123+    /* Trim trailing spaces. */
6124+    if(line->type != MD_LINE_INDENTEDCODE  &&  line->type != MD_LINE_FENCEDCODE) {
6125+        while(line->end > line->beg && CH(line->end-1) == _T(' '))
6126+            line->end--;
6127+    }
6128+
6129+    /* Eat also the new line. */
6130+    if(off < ctx->size && CH(off) == _T('\r'))
6131+        off++;
6132+    if(off < ctx->size && CH(off) == _T('\n'))
6133+        off++;
6134+
6135+    *p_end = off;
6136+
6137+    /* If we belong to a list after seeing a blank line, the list is loose. */
6138+    if(prev_line_has_list_loosening_effect  &&  line->type != MD_LINE_BLANK  &&  n_parents + n_brothers > 0) {
6139+        MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1];
6140+        if(c->ch != _T('>')) {
6141+            MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off);
6142+            block->flags |= MD_BLOCK_LOOSE_LIST;
6143+        }
6144+    }
6145+
6146+    /* Leave any containers we are not part of anymore. */
6147+    if(n_children == 0  &&  n_parents + n_brothers < ctx->n_containers)
6148+        MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
6149+
6150+    /* Enter any container we found a mark for. */
6151+    if(n_brothers > 0) {
6152+        MD_ASSERT(n_brothers == 1);
6153+        MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6154+                    ctx->containers[n_parents].task_mark_off,
6155+                    (ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : 0),
6156+                    MD_BLOCK_CONTAINER_CLOSER));
6157+        MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6158+                    container.task_mark_off,
6159+                    (container.is_task ? CH(container.task_mark_off) : 0),
6160+                    MD_BLOCK_CONTAINER_OPENER));
6161+        ctx->containers[n_parents].is_task = container.is_task;
6162+        ctx->containers[n_parents].task_mark_off = container.task_mark_off;
6163+    }
6164+
6165+    if(n_children > 0)
6166+        MD_CHECK(md_enter_child_containers(ctx, n_children, line->data));
6167+
6168+abort:
6169+    return ret;
6170+}
6171+
6172+static int
6173+md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
6174+{
6175+    const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
6176+    int ret = 0;
6177+
6178+    /* Blank line ends current leaf block. */
6179+    if(line->type == MD_LINE_BLANK) {
6180+        MD_CHECK(md_end_current_block(ctx));
6181+        *p_pivot_line = &md_dummy_blank_line;
6182+        return 0;
6183+    }
6184+
6185+    /* Some line types form block on their own. */
6186+    if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) {
6187+        MD_CHECK(md_end_current_block(ctx));
6188+
6189+        /* Add our single-line block. */
6190+        MD_CHECK(md_start_new_block(ctx, line));
6191+        MD_CHECK(md_add_line_into_current_block(ctx, line));
6192+        MD_CHECK(md_end_current_block(ctx));
6193+        *p_pivot_line = &md_dummy_blank_line;
6194+        return 0;
6195+    }
6196+
6197+    /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */
6198+    if(line->type == MD_LINE_SETEXTUNDERLINE) {
6199+        MD_ASSERT(ctx->current_block != NULL);
6200+        ctx->current_block->type = MD_BLOCK_H;
6201+        ctx->current_block->data = line->data;
6202+        ctx->current_block->flags |= MD_BLOCK_SETEXT_HEADER;
6203+        MD_CHECK(md_add_line_into_current_block(ctx, line));
6204+        MD_CHECK(md_end_current_block(ctx));
6205+        if(ctx->current_block == NULL) {
6206+            *p_pivot_line = &md_dummy_blank_line;
6207+        } else {
6208+            /* This happens if we have consumed all the body as link ref. defs.
6209+             * and downgraded the underline into start of a new paragraph block. */
6210+            line->type = MD_LINE_TEXT;
6211+            *p_pivot_line = line;
6212+        }
6213+        return 0;
6214+    }
6215+
6216+    /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */
6217+    if(line->type == MD_LINE_TABLEUNDERLINE) {
6218+        MD_ASSERT(ctx->current_block != NULL);
6219+        MD_ASSERT(ctx->current_block->n_lines == 1);
6220+        ctx->current_block->type = MD_BLOCK_TABLE;
6221+        ctx->current_block->data = line->data;
6222+        MD_ASSERT(pivot_line != &md_dummy_blank_line);
6223+        ((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
6224+        MD_CHECK(md_add_line_into_current_block(ctx, line));
6225+        return 0;
6226+    }
6227+
6228+    /* The current block also ends if the line has different type. */
6229+    if(line->type != pivot_line->type)
6230+        MD_CHECK(md_end_current_block(ctx));
6231+
6232+    /* The current line may start a new block. */
6233+    if(ctx->current_block == NULL) {
6234+        MD_CHECK(md_start_new_block(ctx, line));
6235+        *p_pivot_line = line;
6236+    }
6237+
6238+    /* In all other cases the line is just a continuation of the current block. */
6239+    MD_CHECK(md_add_line_into_current_block(ctx, line));
6240+
6241+abort:
6242+    return ret;
6243+}
6244+
6245+static int
6246+md_process_doc(MD_CTX *ctx)
6247+{
6248+    const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
6249+    MD_LINE_ANALYSIS line_buf[2];
6250+    MD_LINE_ANALYSIS* line = &line_buf[0];
6251+    OFF off = 0;
6252+    int ret = 0;
6253+
6254+    MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
6255+
6256+    while(off < ctx->size) {
6257+        if(line == pivot_line)
6258+            line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]);
6259+
6260+        MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
6261+        MD_CHECK(md_process_line(ctx, &pivot_line, line));
6262+    }
6263+
6264+    md_end_current_block(ctx);
6265+
6266+    MD_CHECK(md_build_ref_def_hashtable(ctx));
6267+
6268+    /* Process all blocks. */
6269+    MD_CHECK(md_leave_child_containers(ctx, 0));
6270+    MD_CHECK(md_process_all_blocks(ctx));
6271+
6272+    MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
6273+
6274+abort:
6275+
6276+#if 0
6277+    /* Output some memory consumption statistics. */
6278+    {
6279+        char buffer[256];
6280+        sprintf(buffer, "Alloced %u bytes for block buffer.",
6281+                    (unsigned)(ctx->alloc_block_bytes));
6282+        MD_LOG(buffer);
6283+
6284+        sprintf(buffer, "Alloced %u bytes for containers buffer.",
6285+                    (unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
6286+        MD_LOG(buffer);
6287+
6288+        sprintf(buffer, "Alloced %u bytes for marks buffer.",
6289+                    (unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
6290+        MD_LOG(buffer);
6291+
6292+        sprintf(buffer, "Alloced %u bytes for aux. buffer.",
6293+                    (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
6294+        MD_LOG(buffer);
6295+    }
6296+#endif
6297+
6298+    return ret;
6299+}
6300+
6301+
6302+/********************
6303+ ***  Public API  ***
6304+ ********************/
6305+
6306+int
6307+md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
6308+{
6309+    MD_CTX ctx;
6310+    int i;
6311+    int ret;
6312+
6313+    if(parser->abi_version != 0) {
6314+        if(parser->debug_log != NULL)
6315+            parser->debug_log("Unsupported abi_version.", userdata);
6316+        return -1;
6317+    }
6318+
6319+    /* Setup context structure. */
6320+    memset(&ctx, 0, sizeof(MD_CTX));
6321+    ctx.text = text;
6322+    ctx.size = size;
6323+    memcpy(&ctx.parser, parser, sizeof(MD_PARSER));
6324+    ctx.userdata = userdata;
6325+    ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
6326+    md_build_mark_char_map(&ctx);
6327+    ctx.doc_ends_with_newline = (size > 0  &&  ISNEWLINE_(text[size-1]));
6328+
6329+    /* Reset all unresolved opener mark chains. */
6330+    for(i = 0; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) {
6331+        ctx.mark_chains[i].head = -1;
6332+        ctx.mark_chains[i].tail = -1;
6333+    }
6334+    ctx.unresolved_link_head = -1;
6335+    ctx.unresolved_link_tail = -1;
6336+
6337+    /* All the work. */
6338+    ret = md_process_doc(&ctx);
6339+
6340+    /* Clean-up. */
6341+    md_free_ref_defs(&ctx);
6342+    md_free_ref_def_hashtable(&ctx);
6343+    free(ctx.buffer);
6344+    free(ctx.marks);
6345+    free(ctx.block_bytes);
6346+    free(ctx.containers);
6347+
6348+    return ret;
6349+}

A · md4c.h +405, -0

  1@@ -0,0 +1,405 @@
  2+/*
  3+ * MD4C: Markdown parser for C
  4+ * (http://github.com/mity/md4c)
  5+ *
  6+ * Copyright (c) 2016-2020 Martin Mitas
  7+ *
  8+ * Permission is hereby granted, free of charge, to any person obtaining a
  9+ * copy of this software and associated documentation files (the "Software"),
 10+ * to deal in the Software without restriction, including without limitation
 11+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 12+ * and/or sell copies of the Software, and to permit persons to whom the
 13+ * Software is furnished to do so, subject to the following conditions:
 14+ *
 15+ * The above copyright notice and this permission notice shall be included in
 16+ * all copies or substantial portions of the Software.
 17+ *
 18+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 19+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 24+ * IN THE SOFTWARE.
 25+ */
 26+
 27+#ifndef MD4C_H
 28+#define MD4C_H
 29+
 30+#ifdef __cplusplus
 31+    extern "C" {
 32+#endif
 33+
 34+#if defined MD4C_USE_UTF16
 35+    /* Magic to support UTF-16. Note that in order to use it, you have to define
 36+     * the macro MD4C_USE_UTF16 both when building MD4C as well as when
 37+     * including this header in your code. */
 38+    #ifdef _WIN32
 39+        #include <windows.h>
 40+        typedef WCHAR       MD_CHAR;
 41+    #else
 42+        #error MD4C_USE_UTF16 is only supported on Windows.
 43+    #endif
 44+#else
 45+    typedef char            MD_CHAR;
 46+#endif
 47+
 48+typedef unsigned MD_SIZE;
 49+typedef unsigned MD_OFFSET;
 50+
 51+
 52+/* Block represents a part of document hierarchy structure like a paragraph
 53+ * or list item.
 54+ */
 55+typedef enum MD_BLOCKTYPE {
 56+    /* <body>...</body> */
 57+    MD_BLOCK_DOC = 0,
 58+
 59+    /* <blockquote>...</blockquote> */
 60+    MD_BLOCK_QUOTE,
 61+
 62+    /* <ul>...</ul>
 63+     * Detail: Structure MD_BLOCK_UL_DETAIL. */
 64+    MD_BLOCK_UL,
 65+
 66+    /* <ol>...</ol>
 67+     * Detail: Structure MD_BLOCK_OL_DETAIL. */
 68+    MD_BLOCK_OL,
 69+
 70+    /* <li>...</li>
 71+     * Detail: Structure MD_BLOCK_LI_DETAIL. */
 72+    MD_BLOCK_LI,
 73+
 74+    /* <hr> */
 75+    MD_BLOCK_HR,
 76+
 77+    /* <h1>...</h1> (for levels up to 6)
 78+     * Detail: Structure MD_BLOCK_H_DETAIL. */
 79+    MD_BLOCK_H,
 80+
 81+    /* <pre><code>...</code></pre>
 82+     * Note the text lines within code blocks are terminated with '\n'
 83+     * instead of explicit MD_TEXT_BR. */
 84+    MD_BLOCK_CODE,
 85+
 86+    /* Raw HTML block. This itself does not correspond to any particular HTML
 87+     * tag. The contents of it _is_ raw HTML source intended to be put
 88+     * in verbatim form to the HTML output. */
 89+    MD_BLOCK_HTML,
 90+
 91+    /* <p>...</p> */
 92+    MD_BLOCK_P,
 93+
 94+    /* <table>...</table> and its contents.
 95+     * Detail: Structure MD_BLOCK_TABLE_DETAIL (for MD_BLOCK_TABLE),
 96+     *         structure MD_BLOCK_TD_DETAIL (for MD_BLOCK_TH and MD_BLOCK_TD)
 97+     * Note all of these are used only if extension MD_FLAG_TABLES is enabled. */
 98+    MD_BLOCK_TABLE,
 99+    MD_BLOCK_THEAD,
100+    MD_BLOCK_TBODY,
101+    MD_BLOCK_TR,
102+    MD_BLOCK_TH,
103+    MD_BLOCK_TD
104+} MD_BLOCKTYPE;
105+
106+/* Span represents an in-line piece of a document which should be rendered with
107+ * the same font, color and other attributes. A sequence of spans forms a block
108+ * like paragraph or list item. */
109+typedef enum MD_SPANTYPE {
110+    /* <em>...</em> */
111+    MD_SPAN_EM,
112+
113+    /* <strong>...</strong> */
114+    MD_SPAN_STRONG,
115+
116+    /* <a href="xxx">...</a>
117+     * Detail: Structure MD_SPAN_A_DETAIL. */
118+    MD_SPAN_A,
119+
120+    /* <img src="xxx">...</a>
121+     * Detail: Structure MD_SPAN_IMG_DETAIL.
122+     * Note: Image text can contain nested spans and even nested images.
123+     * If rendered into ALT attribute of HTML <IMG> tag, it's responsibility
124+     * of the parser to deal with it.
125+     */
126+    MD_SPAN_IMG,
127+
128+    /* <code>...</code> */
129+    MD_SPAN_CODE,
130+
131+    /* <del>...</del>
132+     * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled.
133+     */
134+    MD_SPAN_DEL,
135+
136+    /* For recognizing inline ($) and display ($$) equations
137+     * Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled.
138+     */
139+    MD_SPAN_LATEXMATH,
140+    MD_SPAN_LATEXMATH_DISPLAY,
141+
142+    /* Wiki links
143+     * Note: Recognized only when MD_FLAG_WIKILINKS is enabled.
144+     */
145+    MD_SPAN_WIKILINK,
146+
147+    /* <u>...</u>
148+     * Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */
149+    MD_SPAN_U
150+} MD_SPANTYPE;
151+
152+/* Text is the actual textual contents of span. */
153+typedef enum MD_TEXTTYPE {
154+    /* Normal text. */
155+    MD_TEXT_NORMAL = 0,
156+
157+    /* NULL character. CommonMark requires replacing NULL character with
158+     * the replacement char U+FFFD, so this allows caller to do that easily. */
159+    MD_TEXT_NULLCHAR,
160+
161+    /* Line breaks.
162+     * Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE
163+     * or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */
164+    MD_TEXT_BR,         /* <br> (hard break) */
165+    MD_TEXT_SOFTBR,     /* '\n' in source text where it is not semantically meaningful (soft break) */
166+
167+    /* Entity.
168+     * (a) Named entity, e.g. &nbsp; 
169+     *     (Note MD4C does not have a list of known entities.
170+     *     Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is
171+     *     treated as a named entity.)
172+     * (b) Numerical entity, e.g. &#1234;
173+     * (c) Hexadecimal entity, e.g. &#x12AB;
174+     *
175+     * As MD4C is mostly encoding agnostic, application gets the verbatim
176+     * entity text into the MD_PARSER::text_callback(). */
177+    MD_TEXT_ENTITY,
178+
179+    /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`).
180+     * If it is inside MD_BLOCK_CODE, it includes spaces for indentation and
181+     * '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this
182+     * kind of text. */
183+    MD_TEXT_CODE,
184+
185+    /* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not
186+     * an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used.
187+     * The text contains verbatim '\n' for the new lines. */
188+    MD_TEXT_HTML,
189+
190+    /* Text is inside an equation. This is processed the same way as inlined code
191+     * spans (`code`). */
192+    MD_TEXT_LATEXMATH
193+} MD_TEXTTYPE;
194+
195+
196+/* Alignment enumeration. */
197+typedef enum MD_ALIGN {
198+    MD_ALIGN_DEFAULT = 0,   /* When unspecified. */
199+    MD_ALIGN_LEFT,
200+    MD_ALIGN_CENTER,
201+    MD_ALIGN_RIGHT
202+} MD_ALIGN;
203+
204+
205+/* String attribute.
206+ *
207+ * This wraps strings which are outside of a normal text flow and which are
208+ * propagated within various detailed structures, but which still may contain
209+ * string portions of different types like e.g. entities.
210+ *
211+ * So, for example, lets consider this image:
212+ *
213+ *     ![image alt text](http://example.org/image.png 'foo &quot; bar')
214+ *
215+ * The image alt text is propagated as a normal text via the MD_PARSER::text()
216+ * callback. However, the image title ('foo &quot; bar') is propagated as
217+ * MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title.
218+ *
219+ * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following:
220+ *  -- [0]: "foo "   (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0)
221+ *  -- [1]: "&quot;" (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4)
222+ *  -- [2]: " bar"   (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10)
223+ *  -- [3]: (n/a)    (n/a                              ; substr_offsets[3] == 14)
224+ *
225+ * Note that these invariants are always guaranteed:
226+ *  -- substr_offsets[0] == 0
227+ *  -- substr_offsets[LAST+1] == size
228+ *  -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR
229+ *     substrings can appear. This could change only of the specification
230+ *     changes.
231+ */
232+typedef struct MD_ATTRIBUTE {
233+    const MD_CHAR* text;
234+    MD_SIZE size;
235+    const MD_TEXTTYPE* substr_types;
236+    const MD_OFFSET* substr_offsets;
237+} MD_ATTRIBUTE;
238+
239+
240+/* Detailed info for MD_BLOCK_UL. */
241+typedef struct MD_BLOCK_UL_DETAIL {
242+    int is_tight;           /* Non-zero if tight list, zero if loose. */
243+    MD_CHAR mark;           /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */
244+} MD_BLOCK_UL_DETAIL;
245+
246+/* Detailed info for MD_BLOCK_OL. */
247+typedef struct MD_BLOCK_OL_DETAIL {
248+    unsigned start;         /* Start index of the ordered list. */
249+    int is_tight;           /* Non-zero if tight list, zero if loose. */
250+    MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */
251+} MD_BLOCK_OL_DETAIL;
252+
253+/* Detailed info for MD_BLOCK_LI. */
254+typedef struct MD_BLOCK_LI_DETAIL {
255+    int is_task;            /* Can be non-zero only with MD_FLAG_TASKLISTS */
256+    MD_CHAR task_mark;      /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */
257+    MD_OFFSET task_mark_offset;  /* If is_task, then offset in the input of the char between '[' and ']'. */
258+} MD_BLOCK_LI_DETAIL;
259+
260+/* Detailed info for MD_BLOCK_H. */
261+typedef struct MD_BLOCK_H_DETAIL {
262+    unsigned level;         /* Header level (1 - 6) */
263+} MD_BLOCK_H_DETAIL;
264+
265+/* Detailed info for MD_BLOCK_CODE. */
266+typedef struct MD_BLOCK_CODE_DETAIL {
267+    MD_ATTRIBUTE info;
268+    MD_ATTRIBUTE lang;
269+    MD_CHAR fence_char;     /* The character used for fenced code block; or zero for indented code block. */
270+} MD_BLOCK_CODE_DETAIL;
271+
272+/* Detailed info for MD_BLOCK_TABLE. */
273+typedef struct MD_BLOCK_TABLE_DETAIL {
274+    unsigned col_count;         /* Count of columns in the table. */
275+    unsigned head_row_count;    /* Count of rows in the table header (currently always 1) */
276+    unsigned body_row_count;    /* Count of rows in the table body */
277+} MD_BLOCK_TABLE_DETAIL;
278+
279+/* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */
280+typedef struct MD_BLOCK_TD_DETAIL {
281+    MD_ALIGN align;
282+} MD_BLOCK_TD_DETAIL;
283+
284+/* Detailed info for MD_SPAN_A. */
285+typedef struct MD_SPAN_A_DETAIL {
286+    MD_ATTRIBUTE href;
287+    MD_ATTRIBUTE title;
288+} MD_SPAN_A_DETAIL;
289+
290+/* Detailed info for MD_SPAN_IMG. */
291+typedef struct MD_SPAN_IMG_DETAIL {
292+    MD_ATTRIBUTE src;
293+    MD_ATTRIBUTE title;
294+} MD_SPAN_IMG_DETAIL;
295+
296+/* Detailed info for MD_SPAN_WIKILINK. */
297+typedef struct MD_SPAN_WIKILINK {
298+    MD_ATTRIBUTE target;
299+} MD_SPAN_WIKILINK_DETAIL;
300+
301+/* Flags specifying extensions/deviations from CommonMark specification.
302+ *
303+ * By default (when MD_PARSER::flags == 0), we follow CommonMark specification.
304+ * The following flags may allow some extensions or deviations from it.
305+ */
306+#define MD_FLAG_COLLAPSEWHITESPACE          0x0001  /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
307+#define MD_FLAG_PERMISSIVEATXHEADERS        0x0002  /* Do not require space in ATX headers ( ###header ) */
308+#define MD_FLAG_PERMISSIVEURLAUTOLINKS      0x0004  /* Recognize URLs as autolinks even without '<', '>' */
309+#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS    0x0008  /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
310+#define MD_FLAG_NOINDENTEDCODEBLOCKS        0x0010  /* Disable indented code blocks. (Only fenced code works.) */
311+#define MD_FLAG_NOHTMLBLOCKS                0x0020  /* Disable raw HTML blocks. */
312+#define MD_FLAG_NOHTMLSPANS                 0x0040  /* Disable raw HTML (inline). */
313+#define MD_FLAG_TABLES                      0x0100  /* Enable tables extension. */
314+#define MD_FLAG_STRIKETHROUGH               0x0200  /* Enable strikethrough extension. */
315+#define MD_FLAG_PERMISSIVEWWWAUTOLINKS      0x0400  /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
316+#define MD_FLAG_TASKLISTS                   0x0800  /* Enable task list extension. */
317+#define MD_FLAG_LATEXMATHSPANS              0x1000  /* Enable $ and $$ containing LaTeX equations. */
318+#define MD_FLAG_WIKILINKS                   0x2000  /* Enable wiki links extension. */
319+#define MD_FLAG_UNDERLINE                   0x4000  /* Enable underline extension (and disables '_' for normal emphasis). */
320+
321+#define MD_FLAG_PERMISSIVEAUTOLINKS         (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
322+#define MD_FLAG_NOHTML                      (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
323+
324+/* Convenient sets of flags corresponding to well-known Markdown dialects.
325+ *
326+ * Note we may only support subset of features of the referred dialect.
327+ * The constant just enables those extensions which bring us as close as
328+ * possible given what features we implement.
329+ *
330+ * ABI compatibility note: Meaning of these can change in time as new
331+ * extensions, bringing the dialect closer to the original, are implemented.
332+ */
333+#define MD_DIALECT_COMMONMARK               0
334+#define MD_DIALECT_GITHUB                   (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS)
335+
336+/* Parser structure.
337+ */
338+typedef struct MD_PARSER {
339+    /* Reserved. Set to zero.
340+     */
341+    unsigned abi_version;
342+
343+    /* Dialect options. Bitmask of MD_FLAG_xxxx values.
344+     */
345+    unsigned flags;
346+
347+    /* Caller-provided rendering callbacks.
348+     *
349+     * For some block/span types, more detailed information is provided in a
350+     * type-specific structure pointed by the argument 'detail'.
351+     *
352+     * The last argument of all callbacks, 'userdata', is just propagated from
353+     * md_parse() and is available for any use by the application.
354+     *
355+     * Note any strings provided to the callbacks as their arguments or as
356+     * members of any detail structure are generally not zero-terminated.
357+     * Application has to take the respective size information into account.
358+     *
359+     * Any rendering callback may abort further parsing of the document by
360+     * returning non-zero.
361+     */
362+    int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
363+    int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
364+
365+    int (*enter_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
366+    int (*leave_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
367+
368+    int (*text)(MD_TEXTTYPE /*type*/, const MD_CHAR* /*text*/, MD_SIZE /*size*/, void* /*userdata*/);
369+
370+    /* Debug callback. Optional (may be NULL).
371+     *
372+     * If provided and something goes wrong, this function gets called.
373+     * This is intended for debugging and problem diagnosis for developers;
374+     * it is not intended to provide any errors suitable for displaying to an
375+     * end user.
376+     */
377+    void (*debug_log)(const char* /*msg*/, void* /*userdata*/);
378+
379+    /* Reserved. Set to NULL.
380+     */
381+    void (*syntax)(void);
382+} MD_PARSER;
383+
384+
385+/* For backward compatibility. Do not use in new code.
386+ */
387+typedef MD_PARSER MD_RENDERER;
388+
389+
390+/* Parse the Markdown document stored in the string 'text' of size 'size'.
391+ * The parser provides callbacks to be called during the parsing so the
392+ * caller can render the document on the screen or convert the Markdown
393+ * to another format.
394+ *
395+ * Zero is returned on success. If a runtime error occurs (e.g. a memory
396+ * fails), -1 is returned. If the processing is aborted due any callback
397+ * returning non-zero, the return value of the callback is returned.
398+ */
399+int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata);
400+
401+
402+#ifdef __cplusplus
403+    }  /* extern "C" { */
404+#endif
405+
406+#endif  /* MD4C_H */

A · md4c.o +0, -0

M · stagit +0, -0

M · stagit-index +0, -0

M · stagit-index.c +1, -1

1@@ -149,7 +149,7 @@ writelog(FILE *fp)
2 
3 	fputs("<tr><td><a href=\"", fp);
4 	percentencode(fp, stripped_name, strlen(stripped_name));
5-	fputs("/log.html\">", fp);
6+	fputs("/file/README.md.html\">", fp);
7 	xmlencode(fp, stripped_name, strlen(stripped_name));
8 	fputs("</a></td><td>", fp);
9 	xmlencode(fp, description, strlen(description));

M · stagit-index.o +0, -0

M · stagit.c +56, -2

 1@@ -14,6 +14,8 @@
 2 
 3 #include <git2.h>
 4 
 5+#include "md4c-html.h"
 6+
 7 #include "compat.h"
 8 
 9 #define LEN(s)    (sizeof(s)/sizeof(*s))
10@@ -558,6 +560,38 @@ writefooter(FILE *fp)
11 	fputs("</div>\n</div>\n</div>\n</body>\n</html>\n", fp);
12 }
13 
14+void
15+processmd(const char* output, unsigned int len, void *fp)
16+{
17+    fprintf((FILE *)fp, "%.*s", len, output);
18+}
19+
20+size_t
21+writeblobmd(FILE *fp, const git_blob *blob)
22+{
23+    size_t n = 0, i, len, prev, ret;
24+    const char *s = git_blob_rawcontent(blob);
25+    len = git_blob_rawsize(blob);
26+    fputs("<div id=\"md\">\n", fp);
27+    /* Counting lines in the file*/
28+    if (len > 0) {
29+        for (i = 0, prev = 0; i < len; i++) {
30+            if (s[i] != '\n')
31+                continue;
32+            n++;
33+            prev = i + 1;
34+        }
35+        if ((len - prev) > 0) {
36+            n++;
37+        }
38+        ret = md_html(s, len, processmd, fp, MD_FLAG_TABLES | MD_FLAG_TASKLISTS | 
39+                MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS, 0);
40+    }
41+
42+    fputs("</div>\n", fp);
43+    return n;
44+}
45+
46 size_t
47 writeblobhtml(FILE *fp, const git_blob *blob)
48 {
49@@ -949,6 +983,18 @@ writeatom(FILE *fp, int all)
50 	return 0;
51 }
52 
53+int
54+file_is_md(const char *filename)
55+{
56+    int i = strlen(filename) - 3;
57+    if (filename[i++] == '.' &&
58+            filename[i++] == 'm' &&
59+            filename[i] == 'd')
60+        return 1;
61+    return 0;
62+    
63+}
64+
65 size_t
66 writeblob(git_object *obj, const char *fpath, const char *filename, size_t filesize)
67 {
68@@ -975,10 +1021,18 @@ writeblob(git_object *obj, const char *fpath, const char *filename, size_t files
69 	fputs("<p> ", fp);
70 	xmlencode(fp, filename, strlen(filename));
71 	fprintf(fp, " (%zuB)", filesize);
72-	if (git_blob_is_binary((git_blob *)obj))
73+	
74+	if (git_blob_is_binary((git_blob *)obj)) {
75 		fputs("<p>Binary file.</p>\n", fp);
76-	else
77+    } else if (file_is_md(filename)) {
78+        lc = writeblobmd(fp, (git_blob *)obj);
79+        if (ferror(fp))
80+            err(1, "md parse fail");
81+	} else {
82 		lc = writeblobhtml(fp, (git_blob *)obj);
83+		if (ferror(fp))
84+			err(1, "fwrite");
85+	}
86 
87 	writefooter(fp);
88 	checkfileerror(fp, fpath, 'w');

M · stagit.o +0, -0