#include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "hweavm.h" extern int OptimizeUseMinus; extern int OptimizeUsePrime; extern int OptimizeTableNbsp; extern int DebugTables; extern void HWEB_puts(const char *s); extern void HWEB_sendI(void); extern void HWEB_send_I(void); extern void do_warn(const char *fmat, ...); extern void do_error(const char *fmat, ...); #define NELEM(arr) ((int)(sizeof(arr)/sizeof *(arr)))
The flags data member requires a little more explanation. Some terms need to be tagged with "hints" about formatting; for example, a term representing an equality operator ought to be formatted to have a little extra space around it, or a summation sign ought to have its superscript and subscript (if any) positioned directly above and below it, rather than to its right. We use bitwise flags to represent these little hints.
#define SUM_OR_PROD 0x0001 #define XTRA_SPACE_L 0x0002 #define XTRA_SPACE_R 0x0004 #define XTRA_SPACE 0x0006 #define TEXT_ITALIC 0x0008 #define TEXT_CENTER 0x0010 #define TEXT_SMALL 0x0020 #define FRACTION_BAR 0x0040
Here is the main function. It takes a string and returns the formula (as a tree of MathNode structures) obtained from it.
MathNode *parseMathLine(const char *line, int len, int *idx) { MathNode *ret = NULL; const char *p=line, *end = line+len; const char *q; for (q = line; q < end; ) { if (q > p) { MathNode *thisNode = newMathNode(p, q-p); int tmp; p = q; thisNode->right = parseMathTerm(p, end-p, &tmp, 0, 0); q = p += tmp; appendMathNode(&ret, thisNode); } else { MathNode *thisNode; int tmp; thisNode = parseMathTerm(p, end-p, &tmp, 0, 0); q = p += tmp; appendMathNode(&ret, thisNode); } } if (p < q) appendMathNode(&ret, newMathNode(p, q-p)); if (idx != NULL) *idx = (q-line); return ret; }
A term is just a single identifier or a braced construct, possibly with super- or subscripts attached. We (somewhat naively) pass around two flags representing "are we in a subscript? a superscript?", which means that abc is parsed properly, but a^b^c_d is parsed with the d subscript belonging to the a, and not the b or the c as one might at first expect. (Those effects can be achieved with a^{b^c_d} and a^b^{c_d}, respectively.)
a b c d
a b c d
a b c d
#define parseMathSuperTerm(x,y,z) parseMathTerm(x,y,z,IsSubTerm,1) #define parseMathSubTerm(x,y,z) parseMathTerm(x,y,z,1,IsSuperTerm) MathNode *parseMathTerm(const char *line, int len, int *idx, int IsSubTerm, int IsSuperTerm) { MathNode *ret = NULL; const char *p=line, *end = line+len; const char *q; while (p < end && isspace(*p)) ++p; q = p; if (*q == '{') { if (q > p) { MathNode *thisNode = newMathNode(p, q-p); int tmp; p = q+1; for (tmp=1, ++q; q < end; ++q) { tmp += (*q == '{') - (*q == '}'); if (tmp == 0) break; } if (q >= end) do_error("'{' without matching '}'"); thisNode->right = parseMathLine(p, q-p, &tmp); if (tmp != q-p) do_error("Malformed braced term"); p = q = q+1; appendMathNode(&ret, thisNode); } else { MathNode *thisNode; int tmp; p = q+1; for (tmp=1, ++q; q < end; ++q) { tmp += (*q == '{') - (*q == '}'); if (tmp == 0) break; } if (q >= end) do_error("'{' without matching '}'"); thisNode = parseMathLine(p, q-p, &tmp); if (tmp != q-p) do_error("Malformed braced term"); p = q = q+1; appendMathNode(&ret, thisNode); } } else if (*q == '\\') { int tmp; appendMathNode(&ret, parseEntity(q, end-q, &tmp)); p = q += tmp; } else if (isalpha(*q)) { int tmp; appendMathNode(&ret, parseIdent(q, end-q, &tmp)); p = q += tmp; } else if (isdigit(*q)) { while (q < end && isdigit(*q)) ++q; if (q < end && *q == '.' && isdigit(q[1])) { ++q; while (q < end && isdigit(*q)) ++q; } if (q < end-1 && toupper(*q) == 'E') { const char *oldq = q; ++q; if (*q == '+' || *q == '-') ++q; while (q < end && isdigit(*q)) ++q; if (!isdigit(q[-1])) q = oldq; } } else if (!strchr("^_", *q)) { int tmp; appendMathNode(&ret, parseSymbol(q, end-q, &tmp)); p = q += tmp; } if (q < end && *q == '_') { if (!IsSuperTerm) { MathNode *thisNode = newMathNode(p, q-p); int tmp; p = q+1; thisNode->sub = parseMathSubTerm(p, end-p, &tmp); q = p += tmp; appendMathNode(&ret, thisNode); } } if (q < end && *q == '^') { if (!IsSubTerm) { MathNode *thisNode = newMathNode(p, q-p); int tmp; p = q+1; thisNode->super = parseMathSuperTerm(p, end-p, &tmp); q = p += tmp; appendMathNode(&ret, thisNode); } } if (p < q) appendMathNode(&ret, newMathNode(p, q-p)); if (idx != NULL) *idx = (q-line); return ret; }
This function is in charge of parsing "special" constructs such as implicit sub- and superscripts. For example, it is charged with recognizing that Ai denotes the subscripted term Ai, and with determining that s32b might well be intended to read s32b while the superficially similar x1y1 would be read as x1y1. This function must only be passed a single alphanumeric string at a time.
We have a sequence consisting of alternating letter-sequences, digit-sequences, and ij-sequences (which might be letters or subscripts, depending on context). We try to match the entire sequence onto a repeating pattern of [alnum-sequence][subscript]. If it matches a repeating pattern, then we take that pattern as correct. If it matches no repeating pattern, then if it ends with a potential subscript, we take it; otherwise we treat the whole thing as one identifier.
For example, the input xiy1 matches two repetitions of the pattern "one letter, one subscript." The input s32b matches no repeated pattern, nor does it end in a potential subscript; thus it is treated as s32b, all one identifier. The input s32bi matches no repeating pattern, but does end in a subscript; thus it is recorded as s32bi.
MathNode *parseIdent(const char *line, int len, int *idx) { const char *end = line+len; const char *q; int idlen; int i,j,k,m,t; int InSubscr; MathNode *ret = NULL; MathNode *thisNode; #define ALPHA 0x01 #define DIGIT 0x02 #define IJ 0x04 #define COULD_IDENT 0x10 #define COULD_SUBSCR 0x20
For each character in the potential identifier string, we record its essential characteristics: can it be a subscript?
unsigned char *pattern = calloc(len, sizeof *pattern); for (q=line, idlen=0; q < end && isalnum(*q); ++q, ++idlen) { if (isalpha(*q)) pattern[idlen] |= ALPHA; if (isdigit(*q)) pattern[idlen] |= DIGIT; if (strchr("ij",*q)) pattern[idlen] |= IJ; }
If we have found a single-letter identifier such as i, just go ahead and return it. Likewise, input such as Frobini_us ought to be handled correctly, as Frobinius and not as Frobinius or, worse, Frobinius! So if idlen is 1, or if our identifier is followed by an explicit subscript, then return immediately.
if (idlen == 1 || (idlen < len && line[idlen]=='_')) { thisNode = newMathNode(line, idlen); thisNode->flags |= TEXT_ITALIC; goto done; }
Now we construct the repeated sequence of identifiers and subscripts by brute force. For each factor k of idlen, we break the string into parts of length k and see whether their patterns could be made to match.
for (k=2; k <= idlen; ++k) { if (idlen % k != 0) { if (k > idlen/2) k = idlen-1; continue; } for (t=0; t < k; ++t) { pattern[t] |= (COULD_IDENT | COULD_SUBSCR); for (m=t; m < idlen; m += k) { if (t==0) {/* Identifiers must start with an alphabetic */if (!(pattern[m] & ALPHA)) goto next_k; else pattern[t] &= ~COULD_SUBSCR; } else { if (!(pattern[m] & (ALPHA | DIGIT))) pattern[t] &= ~COULD_IDENT; if (!(pattern[m] & (IJ | DIGIT))) pattern[t] &= ~COULD_SUBSCR; } } if (!(pattern[t] & (COULD_IDENT | COULD_SUBSCR))) goto next_k; }
At this point we have produced a match. We have set pattern[t] for each t between 0 and k−1 to tell us whether the characters equal to t modulo k are supposed to be formatted as regular identifiers or as subscripts. Now we break out of the k loop in order to record the formatting in the tree.
break; next_k: continue; } if (k == idlen && !(pattern[idlen-1] & COULD_SUBSCR)) {
The pattern does not repeat, nor does it end with a subscript. This is the s32b case; we return the identifier unaltered.
thisNode = newMathNode(line, idlen); thisNode->flags |= TEXT_ITALIC; goto done; } InSubscr = 0; thisNode = NULL; for (i=j=0; j < idlen; ++j) { if ((pattern[j%k] & COULD_SUBSCR) && !InSubscr) { InSubscr = 1; thisNode = newMathNode(line+i, j-i); thisNode->flags |= TEXT_ITALIC; i = j; } else if (!(pattern[j%k] & COULD_SUBSCR) && InSubscr) { InSubscr = 0; thisNode->sub = newMathNode(line+i, j-i); if (isalpha(line[i])) thisNode->sub->flags |= TEXT_ITALIC; appendMathNode(&ret, thisNode); thisNode = NULL; i = j; } } if (InSubscr) { thisNode->sub = newMathNode(line+i, j-i); if (isalpha(line[i])) thisNode->sub->flags |= TEXT_ITALIC; } else { thisNode = newMathNode(line+i, j-i); thisNode->flags |= TEXT_ITALIC; } done: appendMathNode(&ret, thisNode); free(pattern); if (idx != NULL) *idx = idlen; return ret; #undef ALPHA #undef DIGIT #undef IJ #undef COULD_IDENT #undef COULD_SUBSCR }
The function parseEntity is in charge of parsing entities starting with backslashes, such as \sum and \times. This function receives the entire string, starting at the backslash, and can parse as much of it as it needs, as long as it stays within a "term" boundary. For example, a TEX user might type 3\div2 instead of 3\div 2; we need to recognize that and produce the correct tree in either case. Similarly but even more weirdly, we need to parse entities like \not= for ≠, and of course "\" for the backslash itself. We use the maximal-munch principle, so for instance \not\in matches the "slashed inclusion" sign, not the "negation" sign followed by the "inclusion" sign.
A∉B
A¬∈B
We set up a table of all the allowed entities, and store for each one of them: its name; its replacement text; whether it can be followed immediately by text; whether it can be followed by a number; and any flags that always apply to this operator.
The replacement text repl can be set to NULL if the appropriate HTML entity is just the original text surrounded by & and ;. For example, the input \times produces the output ×.
MathNode *parseEntity(const char *line, int len, int *idx) { struct foo { char *text, *repl; int yestext, yesnum; unsigned flags; } escs[] = { { "not", "¬", 0, 0, 0 }, { "not=", "≠", 1, 1, XTRA_SPACE }, { "div", NULL, 0, 1, 0 }, { "times", NULL, 0, 1, 0 }, { "sum", "<big>∑</big>", 0, 0, SUM_OR_PROD | XTRA_SPACE_R }, { "prod", "<big>∏</big>", 0, 0, SUM_OR_PROD | XTRA_SPACE_R }, { "inf", "∞", 0, 0, 0 }, { "in", "∈", 0, 0, 0 }, { "not\\in", "∉", 0, 0, 0 }, #define G(letter) { #letter, NULL, 0,0,0 }, G(Alpha)G(alpha)G(Beta)G(beta)G(Gamma)G(gamma) G(Delta)G(delta)G(Epsilon)G(epsilon)G(Zeta)G(zeta) G(Eta)G(eta)G(Theta)G(theta)G(Iota)G(iota) G(Kappa)G(kappa)G(Lambda)G(lambda)G(Mu)G(mu) G(Nu)G(nu)G(Xi)G(xi)G(Omicron)G(omicron)G(Pi)G(pi) G(Rho)G(rho)G(Sigma)G(sigma)G(sigmaf)G(Tau)G(tau) G(Upsilon)G(upsilon)G(Phi)G(phi)G(Chi)G(chi) G(Psi)G(psi)G(Omega)G(omega) #undef G { "forall", "∀", 0, 0, 0 }, { "exists", "∃", 0, 0, 0 }, { "therefore", "∴", 0, 0, 0 }, { "prop", NULL, 0, 1, XTRA_SPACE }, { "sim", NULL, 0, 1, XTRA_SPACE }, { "equiv", NULL, 0, 1, XTRA_SPACE }, { "and", NULL, 0, 0, XTRA_SPACE }, { "or", NULL, 0, 0, XTRA_SPACE }, { "cap", NULL, 0, 0, 0 }, { "cup", NULL, 0, 0, 0 }, { "oplus", NULL, 0, 0, 0 }, { "otimes", NULL, 0, 0, 0 }, { "perp", NULL, 0, 0, 0 }, }; struct foo *best = NULL; int i, bestlen = 0; for (i=0; i < NELEM(escs); ++i) { int n = strlen(escs[i].text); if (n <= bestlen) continue; if (n+1 > len) continue; if (strncmp(escs[i].text, line+1, n) != 0) continue; if (!escs[i].yestext && isalpha(line[n+1])) continue; if (!escs[i].yesnum && isdigit(line[n+1])) continue; best = &escs[i]; bestlen = n; } if (!best) { MathNode *ret; do_warn("Unknown entity in line '%.*s'", len, line);
We handle unknown entities in two ways: If the entity is alphabetic or numeric, we treat it as an operator in its own right, without italics (e.g., \cos becomes "cos"). Otherwise, we treat the backslash literally.
if (isalpha(line[1])) { ++line; for (i=0; isalpha(line[i]); ++i) continue; if (idx) *idx = i+1; } else if (isdigit(line[1])) { ++line; for (i=1; isdigit(line[i]); ++i) continue; if (idx) *idx = i+1; } else { i = 1; if (idx) *idx = i; } ret = newMathNode(line, i); return ret; } if (idx) *idx = strlen(best->text)+1; if (best->repl) { MathNode *ret = newMathNode(best->repl, strlen(best->repl)); ret->flags = best->flags; return ret; } else { int n = strlen(best->text)+2; char *buffer = malloc(n+1); MathNode *ret; sprintf(buffer, "&%s;", best->text); ret = newMathNode(buffer, n); free(buffer); ret->flags = best->flags; return ret; } }
The function parseSymbol is in charge of parsing operators not starting with backslashes, including * and -->. This function receives the entire string, starting at the backslash, and can parse as much of it as it needs, as long as it stays within a "term" boundary. We use the maximal-munch principle, so for instance --> matches the "right-arrow" sign, not two "minus" signs followed by a "greater-than." The major difference between this function and parseEntity is that we do not complain loudly if we do not recognize an operator; we just shut up and include it literally in the output.
MathNode *parseSymbol(const char *line, int len, int *idx) { struct foo { char *text, *repl; int yestext, yesnum; unsigned flags; } escs[] = { { "-->", "→", 1, 1, XTRA_SPACE }, { "<--", "←", 1, 1, XTRA_SPACE }, { "<->", "&lrarrow;",1, 1, XTRA_SPACE }, { "<==", "&larrow;", 1, 1, XTRA_SPACE }, { "==>", "⇒", 1, 1, XTRA_SPACE }, { "<=>", "⇔", 1, 1, XTRA_SPACE }, { "~", "¬", 1, 1, 0 }, { "<=", "≤", 1, 1, XTRA_SPACE }, { ">=", "≥", 1, 1, XTRA_SPACE }, { "!=", "≠", 1, 1, XTRA_SPACE }, { "<>", "≠", 1, 1, XTRA_SPACE }, { "<", "<", 1, 1, XTRA_SPACE }, { ">", ">", 1, 1, XTRA_SPACE }, { "=", "=", 1, 1, XTRA_SPACE }, { "&", "&", 1, 1, 0 }, { ",", ",", 1, 1, XTRA_SPACE_R }, }; struct foo *best = NULL; int i, bestlen = 0; for (i=0; i < NELEM(escs); ++i) { int n = strlen(escs[i].text); if (n <= bestlen) continue; if (n > len) continue; if (strncmp(escs[i].text, line, n) != 0) continue; if (!escs[i].yestext && isalpha(line[n+1])) continue; if (!escs[i].yesnum && isdigit(line[n+1])) continue; best = &escs[i]; bestlen = n; } if (idx) *idx = (best? strlen(best->text): 1); if (!best) { if (line[0] == '-' && OptimizeUseMinus) return newMathNode("−", 7); if (!strncmp(line, "''", 2) && OptimizeUsePrime) return newMathNode("″", 7); if (line[0] == '\'' && OptimizeUsePrime) return newMathNode("′", 7); if (line[0] == '*') return newMathNode("·", 8); return newMathNode(line, 1); } else { MathNode *ret = newMathNode(best->repl, strlen(best->repl)); ret->flags = best->flags; return ret; } }
The function newMathNode is a constructor for the MathNode structure. It returns a newly allocated structure with a copy of the provided text in its text field, and all the pointer fields set to NULL.
The killMathNode function does exactly the opposite: it takes an existing MathNode structure and frees it and all its child nodes.
MathNode *newMathNode(const char *text, int ntext) { MathNode *p = malloc(sizeof *p); if (!p) do_error("Out of memory!"); p->text = malloc(ntext+1); if (!p->text) do_error("Out of memory!"); sprintf(p->text, "%.*s", ntext, text); p->right = p->super = p->sub = NULL; p->flags = 0; return p; } void killMathNode(MathNode *p) { if (p == NULL) return; free(p->text); killMathNode(p->super); killMathNode(p->sub); killMathNode(p->right); free(p); }
appendMathNode appends the node given by its second argument, onto the end of the list of terms given by its first. If the list is empty (i.e., if *root is NULL), then the list is created.
void appendMathNode(MathNode **root, MathNode *right) { while (*root != NULL) root = &(*root)->right; *root = right; }
The function compressMathTree takes a tree and "cleans it up," removing empty nodes to the best of its ability. Essentially, this is the piece of the program that handles the aesthetics of the formula. Splitting the responsibilities between this function and parseMathLine lets parseMathLine be a lot simpler and more straightforward.
MathNode *compressMathTree_helper(MathNode *root); void compressMathTree(MathNode **root) { *root = compressMathTree_helper(*root); } MathNode *compressMathTree_helper(MathNode *A) { if (A == NULL) return NULL; A->sub = compressMathTree_helper(A->sub); A->super = compressMathTree_helper(A->super); A->right = compressMathTree_helper(A->right);
We must take care of the following situation:
Compacting super- and subscripts: an2. A->right is B, and B->text is empty, and !(A->sub && B->sub) and !(A->super && B->super). If all those are the case, then we make A->super the old value of B->super, and A->sub the old value of B->sub, and A->right the value of B->right. Then we kill the node B.
Finally, we do the whole thing over again, so that we properly stack superscripts on top of subscripts. (The first pass compresses [a][][_n][][^2] to [a][_n][][^2], and the second pass compresses that to [a][_n][^2].)
if (A->right == NULL) { if (A->super || A->sub) goto next; if (A->text && A->text[0]) goto next; free(A); return NULL; } else { MathNode *B = A->right; if (B->text && B->text[0]) goto next; if (A->super && B->super) goto next; if (A->sub && B->sub) goto next;/* Okay, it's time to kill B. */if (!A->super) A->super = B->super; if (!A->sub) A->sub = B->sub; A->right = B->right; free(B->text); free(B); }
Compacting adjacent texts: foobar. A->right is B, and A->super and A->sub are both empty. Reallocate A->text to hold the concatenation of A->text and B->text.
Note that this has interactions with SUM_OR_PROD (we certainly don't want to merge two sums!), and with XTRA_SPACE (we may need to insert some of that extra space at this point), and also with TEXT_ITALIC (we don't want to mess with explicit HTML formatting tags at this stage, so don't merge unlike TEXT_ITALIC texts, either).
next: if (A->right && !A->sub && !A->super) { MathNode *B = A->right; int ntext; if ((A->flags | B->flags) & SUM_OR_PROD) goto last; if ((A->flags ^ B->flags) & TEXT_ITALIC) goto last; ntext = (A->text? strlen(A->text): 0) + (B->text? strlen(B->text): 0) + 1; if ((A->flags & XTRA_SPACE_R) || (B->flags & XTRA_SPACE_L)) ntext += strlen(" "); A->text = realloc(A->text, ntext); if ((A->flags & XTRA_SPACE_R) || (B->flags & XTRA_SPACE_L)) strcat(A->text, " "); strcat(A->text, B->text); A->super = B->super; A->sub = B->sub; A->right = B->right; A->flags &= ~XTRA_SPACE_R; A->flags |= (B->flags & XTRA_SPACE_R); free(B->text); free(B); } last: return A; } void dumpMathHTML_helper(MathNode *root); void dumpMathHTML(MathNode **root) { compressMathTree(root); dumpMathHTML_helper(*root); } void dumpMathHTML_helper(MathNode *root) { if (root == NULL) return; if (root->flags & SUM_OR_PROD) { if (root->flags & TEXT_ITALIC) HWEB_sendI(); HWEB_puts(root->text); if (root->flags & TEXT_ITALIC) HWEB_send_I(); if (root->sub) { HWEB_puts("<sub><small>"); dumpMathHTML_helper(root->sub); if (root->super) { HWEB_puts(".."); dumpMathHTML_helper(root->super); } HWEB_puts("</small></sub>"); } else if (root->super) { HWEB_puts("<sup><small>"); dumpMathHTML_helper(root->super); HWEB_puts("</small></sup>"); } } else if (root->flags & FRACTION_BAR) { if (root->super) { HWEB_puts("<sup>"); dumpMathHTML_helper(root->super); HWEB_puts("</sup>"); } if (root->sub || root->super) { HWEB_puts(" / "); } if (root->sub) { HWEB_puts("<sub>"); dumpMathHTML_helper(root->sub); HWEB_puts("</sub>"); } } else { if (root->flags & TEXT_ITALIC) HWEB_sendI(); HWEB_puts(root->text); if (root->flags & TEXT_ITALIC) HWEB_send_I(); if (root->sub) { HWEB_puts("<sub><small>"); dumpMathHTML_helper(root->sub); HWEB_puts("</small></sub>"); } if (root->super) { HWEB_puts("<sup><small>"); dumpMathHTML_helper(root->super); HWEB_puts("</small></sup>"); } } if (root->right && ((root->flags & XTRA_SPACE_R) || (root->right->flags & XTRA_SPACE_L))) { HWEB_puts(" "); } dumpMathHTML_helper(root->right); }
The following functions are related to out-of-line math mode.
The function textcompressMathTree does exactly the same thing as compressMathTree, except that it also handles italics and trivial sub- and superscripts. It is intended for use in out-of-line (table-based) math mode.
MathNode *textcompressMathTree_helper(MathNode *root); void textcompressMathTree(MathNode **root) { *root = textcompressMathTree_helper(*root); } MathNode *textcompressMathTree_helper(MathNode *A) { if (A == NULL) return NULL; A->sub = textcompressMathTree_helper(A->sub); A->super = textcompressMathTree_helper(A->super); A->right = textcompressMathTree_helper(A->right);
We must take care of the following situation:
Compacting super- and subscripts: an2. A->right is B, and B->text is empty, and !(A->sub && B->sub) and !(A->super && B->super). If all those are the case, then we make A->super the old value of B->super, and A->sub the old value of B->sub, and A->right the value of B->right. Then we kill the node B.
Finally, we do the whole thing over again, so that we properly stack superscripts on top of subscripts. (The first pass compresses [a][][_n][][^2] to [a][_n][][^2], and the second pass compresses that to [a][_n][^2].)
while (A->right != NULL) { MathNode *B = A->right; if (B->text && B->text[0]) goto next; if (A->super && B->super) goto next; if (A->sub && B->sub) goto next;/* Okay, it's time to kill B. */if (!A->super) A->super = B->super; if (!A->sub) A->sub = B->sub; A->right = B->right; free(B->text); free(B); } if (!A->super && !A->sub && (!A->text || !A->text[0])) { killMathNode(A); return NULL; } next: #if 0
Compacting trivial sub- and superscripts: an. A->super contains only one term, or A->sub does, and there are not both a superscript and a subscript. Merge the sub- or superscript right into this text; unless this text is flagged either FRACTION_BAR or SUM_OR_PROD.
Tagging small text in sums and products: ∑i. In the case of sums or products, we tag the subscripts and superscripts as "small" text.
if (A->flags & SUM_OR_PROD) { MathNode *cur; for (cur = A->sub; cur != NULL; cur = cur->right) cur->flags |= TEXT_SMALL; for (cur = A->super; cur != NULL; cur = cur->right) cur->flags |= TEXT_SMALL; } else if (A->flags & FRACTION_BAR) {/* do nothing */} else { if (A->sub && !A->super && !strstr(A->sub->text, "<sub>") && !strstr(A->sub->text, "<sup>") && !A->sub->right && !A->sub->sub && !A->sub->super) { int ntext = strlen(A->text) + strlen(A->sub->text) + 1; ntext += strlen("<sub></sub>"); if (A->sub->flags & TEXT_ITALIC) ntext += strlen("<i></i>"); A->text = realloc(A->text, ntext); strcat(A->text, "<sub>"); if (A->sub->flags & TEXT_ITALIC) strcat(A->text, "<i>"); strcat(A->text, A->sub->text); if (A->sub->flags & TEXT_ITALIC) strcat(A->text, "</i>"); strcat(A->text, "</sub>"); killMathNode(A->sub); A->sub = NULL; } else if (A->super && !A->sub && !strstr(A->super->text, "<sub>") && !strstr(A->super->text, "<sup>") && !A->super->right && !A->super->sub && !A->super->super) { int ntext = strlen(A->text) + strlen(A->super->text) + 1; ntext += strlen("<sup></sup>"); if (A->super->flags & TEXT_ITALIC) ntext += strlen("<i></i>"); A->text = realloc(A->text, ntext); strcat(A->text, "<sup>"); if (A->super->flags & TEXT_ITALIC) strcat(A->text, "<i>"); strcat(A->text, A->super->text); if (A->super->flags & TEXT_ITALIC) strcat(A->text, "</i>"); strcat(A->text, "</sup>"); killMathNode(A->super); A->super = NULL; } } #endif
Compacting adjacent texts: foobar. A->right is B, and A->super and A->sub are both empty. Reallocate A->text to hold the concatenation of A->text and B->text.
Note that this has interactions with SUM_OR_PROD (we certainly don't want to merge two sums!), and with XTRA_SPACE (we may need to insert some of that extra space at this point), and also with TEXT_ITALIC (we may have to insert explicit <i> and </i> tags at this stage).
if (A->right && !A->sub && !A->super) { MathNode *B = A->right; int ntext; if ((A->flags | B->flags) & (SUM_OR_PROD | FRACTION_BAR)) goto last; ntext = (A->text? strlen(A->text): 0) + (B->text? strlen(B->text): 0) + 1; if ((A->flags ^ B->flags) & TEXT_ITALIC) ntext += strlen("<i></i>"); if ((A->flags & XTRA_SPACE_R) || (B->flags & XTRA_SPACE_L)) ntext += strlen(" "); A->text = realloc(A->text, ntext); if ((A->flags & TEXT_ITALIC) && !(B->flags & TEXT_ITALIC)) { memmove(A->text+3, A->text, strlen(A->text)+1); strncpy(A->text, "<i>", 3); strcat(A->text, "</i>"); } if ((A->flags & XTRA_SPACE_R) || (B->flags & XTRA_SPACE_L)) strcat(A->text, " "); if ((B->flags & TEXT_ITALIC) && !(A->flags & TEXT_ITALIC)) { strcat(A->text, "<i>"); } strcat(A->text, B->text); if ((B->flags & TEXT_ITALIC) && !(A->flags & TEXT_ITALIC)) { strcat(A->text, "</i>"); } A->super = B->super; A->sub = B->sub; A->right = B->right; A->flags &= ~XTRA_SPACE_R; A->flags |= (B->flags & XTRA_SPACE_R); if ((A->flags ^ B->flags) & TEXT_ITALIC) A->flags &= ~TEXT_ITALIC; free(B->text); free(B); } last: return A; }
The function tagMathTree takes a tree and tags each node with two pieces of information: the height and width of the node and its vertical and horizontal offset. The routine tagMathTree_fix does this for a single subtree, and returns the height and width of the whole subtree (not just its first node). The routine tagMathTree_percolate "percolates" scaling and displacement information down into the branches of a tree.
void tagMathTree_fix(MathNode *root, int *w, int *h); void tagMathTree_percolate(MathNode *root, int wsc, int hsc, int wofs, int hofs); int lcm(int x, int y); int max(int x, int y); void tagMathTree(MathNode *root, int *w, int *h) { tagMathTree_fix(root, w, h); } void tagMathTree_fix(MathNode *root, int *w, int *h) { int subw, subh; int supw, suph; int rightw, righth; if (root == NULL) { *w = 0; *h = 0; return; } tagMathTree_fix(root->sub, &subw, &subh); tagMathTree_fix(root->super, &supw, &suph); tagMathTree_fix(root->right, &rightw, &righth); root->rowstart = root->colstart = 0; root->rowspan = root->colspan = 0; if (root->flags & SUM_OR_PROD) {
This node is at least as high as its superscript and subscript trees, and as high as its right neighbor node. It is also as wide as its superscript and subscript trees.
int mywidth = lcm(supw, subw); int myheight = (root->right)? root->right->rowspan: 1; if (root->right) tagMathTree_percolate(root->right, 1, myheight/root->right->rowspan, mywidth, 0); if (supw) tagMathTree_percolate(root->super, mywidth/supw, 1, 0, -suph); if (subw) tagMathTree_percolate(root->sub, mywidth/subw, 1, 0, myheight); *w = mywidth + rightw; *h = max(suph+myheight+subh, righth); root->rowspan = myheight; root->colspan = mywidth;
We normally "shrink" expressions to the right of sums so that they fit entirely within the vertical range of the summation sign. But if the expression to the right is also a summation, then we don't shrink it—we have to offset everything in our own column vertically without touching the expression to the right.
if (root->right && (root->right->flags & SUM_OR_PROD)) { tagMathTree_percolate(root->sub, 1, 1, 0, suph); root->rowstart = suph; tagMathTree_percolate(root->super, 1, 1, 0, suph); } else { tagMathTree_percolate(root, 1, 1, 0, suph); }
There is one other thing we must do to sums and products: we must set their "center" attributes and the "center" attributes of their sub- and superscripts, if at all possible.
root->flags |= TEXT_CENTER; if (root->sub && !root->sub->right && !root->sub->sub && !root->sub->super) root->sub->flags |= TEXT_CENTER; if (root->super && !root->super->right && !root->super->sub && !root->super->super) root->super->flags |= TEXT_CENTER; return; } else if (root->flags & FRACTION_BAR) {
This node type is not yet fully supported, mostly because it just doesn't "fit in" with the other types. I have not yet found a nice way to display fraction bars, either. So as a temporary fix, this clause treats FRACTION_BAR pretty much the same as SUM_OR_PROD.
int mywidth = max(subw, supw); int myheight = (root->right)? root->right->rowspan: 1; int expw = 1 + (supw && subw && (supw%2 != subw%2)); mywidth = expw*max(supw, subw); tagMathTree_percolate(root->super, expw, 1, (mywidth-expw*supw)/2, -suph); tagMathTree_percolate(root->sub, expw, 1, (mywidth-expw*subw)/2, myheight); if (root->right) tagMathTree_percolate(root->right, expw, myheight/root->right->rowspan, mywidth, 0); *w = mywidth + rightw; *h = max(suph+myheight+subh, righth); root->rowspan = myheight; root->colspan = mywidth; tagMathTree_percolate(root, 1, 1, 0, suph);
There is one other thing we must do to fractions: we must set their "center" attributes and the "center" attributes of their sub- and superscripts.
root->flags |= TEXT_CENTER; if (root->sub && !root->sub->right && !root->sub->sub && !root->sub->super) root->sub->flags |= TEXT_CENTER; if (root->super && !root->super->right && !root->super->sub && !root->super->super) root->super->flags |= TEXT_CENTER; return; } else {
This node is twice as high as its superscript or subscript trees, and as high as its right neighbor node.
int mywidth = 1; int sswidth = max(supw, subw); int myheight = 1; if (subh > 0) myheight = lcm(myheight, 2*subh); if (suph > 0) myheight = lcm(myheight, 2*suph); if (root->right) { myheight = lcm(myheight, root->right->rowspan); tagMathTree_percolate(root->right, 1, myheight/root->right->rowspan, mywidth+sswidth, 0); } if (subh) tagMathTree_percolate(root->sub, 1, myheight/(2*subh), mywidth, myheight/2); if (suph) tagMathTree_percolate(root->super, 1, myheight/(2*suph), mywidth, 0); *w = mywidth + sswidth + rightw; *h = max(myheight, righth); root->rowspan = myheight; root->colspan = mywidth; root->rowstart = (root->right)? root->right->rowstart: 0; return; } }
tagMathTree_percolate scales and displaces all the coordinates in a given subtree.
void tagMathTree_percolate(MathNode *root, int wsc, int hsc, int wofs, int hofs) { if (root == NULL) return; tagMathTree_percolate(root->sub, wsc, hsc, wofs, hofs); tagMathTree_percolate(root->super, wsc, hsc, wofs, hofs); tagMathTree_percolate(root->right, wsc, hsc, wofs, hofs); root->rowspan *= hsc; root->colspan *= wsc; root->rowstart *= hsc; root->colstart *= wsc; root->rowstart += hofs; root->colstart += wofs; } int gcd(int x, int y) { return (y==0)? x: gcd(y, x%y); } int lcm(int x, int y) { if (x==0 && y==0) return 1; if (x==0) return y; if (y==0) return x; return (x*y) / gcd(x,y); } int max(int x, int y) { return (x < y)? y: x; }
The routine dumpMathTable is rather naive at present, but it does what it needs to do. Namely, it searches the given tree for nodes corresponding to each cell, and prints their contents.
MathNode *dumpMathTable_helper(MathNode *tree, int i, int j); void dumpMathTable(MathNode **tree) { int w, h; int i, j; textcompressMathTree(tree); tagMathTree(*tree, &w, &h); #if 1 if (h < 2) { dumpMathHTML(tree); HWEB_puts("<br>"); return; } #endif HWEB_puts("<table"); if (DebugTables) HWEB_puts(" border=3"); HWEB_puts(">"); for (j=0; j < h; ++j) { HWEB_puts("<tr>"); for (i=0; i < w; ++i) { MathNode *p = dumpMathTable_helper(*tree, i, j); if (p == NULL) { HWEB_puts("<td>"); if (OptimizeTableNbsp) HWEB_puts(" "); HWEB_puts("</td>"); } else if (p->rowstart == j && p->colstart == i) { char buffer[200], *b = buffer; b += sprintf(b, "<td"); if (p->rowspan > 1) b += sprintf(b, " rowspan=%d", p->rowspan); if (p->colspan > 1) b += sprintf(b, " colspan=%d", p->colspan); if (p->flags & TEXT_CENTER) b += sprintf(b, " align=\"center\""); b += sprintf(b, " valign=\"middle\">"); if (p->flags & TEXT_SMALL) b += sprintf(b, "<small>"); HWEB_puts(buffer); if (p->flags & TEXT_ITALIC) HWEB_sendI(); HWEB_puts(p->text); if (p->flags & TEXT_ITALIC) HWEB_send_I(); if (p->flags & TEXT_SMALL) HWEB_puts("</small>"); HWEB_puts("</td>"); } else {/* do nothing at all */} } HWEB_puts("</tr>"); } HWEB_puts("</table>"); } MathNode *dumpMathTable_helper(MathNode *tree, int i, int j) { MathNode *p; if (tree == NULL) return NULL; if (tree->rowstart <= j && j < (tree->rowstart + tree->rowspan)) if (tree->colstart <= i && i < (tree->colstart + tree->colspan)) return tree; if ((p = dumpMathTable_helper(tree->super, i, j))) return p; if ((p = dumpMathTable_helper(tree->sub, i, j))) return p; return dumpMathTable_helper(tree->right, i, j); }