Documentation autogenerated by HWEB


#include "hweavm.h"

void HWEB_attach(FILE *fp);
int HWEB_release(FILE *fp);
int HWEB_process(int k);
  int HWEB_increase(void);
int HWEB_break(int ParaAllowed);
  int HWEB_parse_and_dump(void);
    void HWEB_dumpPlain(const char *s, int begin, int end);
    void HWEB_dumpItal(const char *s, int begin, int end);
    void HWEB_dumpPipe(const char *s, int begin, int end);
    int HWEB_dumpMath(const char *s, int begin, int end);
    int HWEB_dumpNmath(const char *s, int begin, int end);
      int HWEB_mathterm(const char *s, int *begin, int end);
      int HWEB_subsup(const char *s, int *begin, int end);
      int HWEB_mathescape(const char *s, int *begin, int end);

void HWEB_putc(int c);
  void HWEB_puts(const char *s);
    void HWEB_sendPut();
      void HWEB_puts_(const char *s);
    void HWEB_compress(int k);

void HWEB_sendClear();
void HWEB_sendP(); void HWEB_send_P();
void HWEB_sendEM(); void HWEB_send_EM();
void HWEB_sendI(); void HWEB_send_I();
void HWEB_sendTT(); void HWEB_send_TT();


static FILE *HWEB_outfile = NULL;
static char *HWEB_inbuffer = NULL;
static size_t HWEB_inbufferlen = 0;
static size_t HWEB_inbuffercap = 0;

void HWEB_attach(FILE *fp)
{
    HWEB_outfile = fp;
    HWEB_puts_("<html><head><title>");
    HWEB_puts_("\nDocumentation autogenerated by HWEB\n");
    HWEB_puts_("</title></head><body><h1>");
    HWEB_puts_("\nDocumentation autogenerated by HWEB\n</h1>");
}


int HWEB_release(FILE *fp)
{
    HWEB_puts_("<small><tt>This documentation was generated by HWEB, ");
    HWEB_puts_("version " HWEB_VERSION ", by Arthur O'Dwyer.</small></tt>\n");
    HWEB_puts_("</body></html>\n");
    HWEB_outfile = NULL;
    return 0;
}

A very important note: HWEB_process may modify the value of HWEB_inbuffer via realloc, thus invalidating all pointers into that array.


int HWEB_process(int k)
{
    if (HWEB_inbufferlen >= HWEB_inbuffercap) {
        if (HWEB_increase()) return -1;
    }
    HWEB_inbuffer[HWEB_inbufferlen++] = k;
    return 0;
}


int HWEB_increase(void)
{
    size_t new_cap = 3*HWEB_inbuffercap/2 + 100;
    void *t;
    if (new_cap <= HWEB_inbuffercap) return -1;
    if ((t = realloc(HWEB_inbuffer, new_cap)) == NULL) return -1;
    HWEB_inbuffer = t;
    HWEB_inbuffercap = new_cap;
    return 0;
}


int HWEB_break(int paraAllowed)
{
    if (HWEB_inbuffer == NULL) return 0;
    ParaAllowed = paraAllowed;
    if (HWEB_parse_and_dump()) return -1;
    free(HWEB_inbuffer);
    HWEB_inbuffer = NULL;
    HWEB_inbufferlen = 0;
    HWEB_inbuffercap = 0;
    return 0;
}

We have three "competing" modes in HWEB documentation: pipe-mode, italics-mode, and math-mode. They can "nest" as follows: italics can contain math italics can contain piped However, piped text and mathematical formulas are distinct classes from italics and regular text; they cannot contain italics, nor can they contain each other.

Italics mode is triggered by the slash delimiter followed by a non-whitespace character. This slash: / does not fit the bill. This slash does. Italics mode is ended by the first non-escaped slash seen (outside any inner, nested mode environments, that is).

Piped mode is triggered by the pipe delimiter followed by a non-whitespace character. This pipe: | does not fit the bill. This pipe does. Piped mode is ended by the first non-escaped pipe.

Math mode is triggered by the dollar-sign delimiter followed by either a non-whitespace character or a (possibly empty) sequence of whitespace characters terminated by a newline. The former case introduces a mathematical formula on one line: x+y. This mode is ended by the first non-escaped dollar sign.

The latter case, in which the dollar sign is followed by a newline, introduces a math mode environment in which newlines are respected:

x+y
y+z
z+w

It is terminated by a dollar sign at the beginning of a line. Any dollar sign encountered in this mode that is not at the beginning of a line, is an error and will be flagged accordingly.

A completely blank line, or one containing only whitespace, ends the current paragraph and begins a new one. No mode except newline-respecting math mode may span more than one paragraph; if a blank line is found inside an italics, piped or math environment, that is flagged as an error.


int HWEB_parse_and_dump(void)
{
    int ItalMode = 0;
    int PipeMode = 0;
    int MathMode = 0;
    int NmathMode = 0;
    int BlankLineSoFar = 1;
    int end;
    int begin = 0;
    const char *s;

/* Null-terminate the HWEB text for easier manipulation. */

    if (HWEB_process('\0'))
      return -1;

    HWEB_sendClear();
    HWEB_sendP();

    s = HWEB_inbuffer;

    for (end=0; s[end]; BlankLineSoFar = 0)
    {

Our algorithm is to go down the line until we find some "special" situation (a new environment or a new paragraph). Then we output the whole chunk according to our flags, by calling the appropriate parsing function if necessary (e.g. to handle sub_scripts in math mode).

The flag BlankLineSoFar is set whenever we are positioned at the first nonblank character of this line. Thus it is reset to 0 at the bottom of this loop, and reset to 1 whenever we hit a newline.


        if (s[end] == '\n') {
            do { ++end; } while (isspace(s[end]) && s[end] != '\n');
            if (s[end] != '\n') {
                BlankLineSoFar = 1;
                if (s[end] == '\0') break;
            }
            else
            {

A completely blank line indicates a paragraph break. This is a special event.


                if (NmathMode) {

/* don't do anything special */

                }
                else if (ItalMode || PipeMode || MathMode) {
                    do_warn("Paragraph breaks not allowed in non-plain modes,"
                        " line %d", lineno);
                }
                else {
                    HWEB_dumpPlain(s, begin, end);
                    HWEB_send_P(); HWEB_sendP();
                    begin = ++end;
                    continue;
                }
            }
        }

We have something printable. Is it a special character indicating a new environment? Or — first — is it a backslash-escaped character?


        if (s[end] == '\\') {
            if (ItalMode || MathMode || NmathMode)
              end += 2;
            else if (PipeMode && s[end+1] == '|')
              end += 2;
            else ++end;
            continue;
        }
        else if (s[end] == '/') {
            if (PipeMode || MathMode || NmathMode) {
                ++end; continue;
            }
            else if (ItalMode && !isspace(s[end-1]) &&
                strchr(" \n\t'\".,:;?!-()[]{}", s[end+1]))
            {
                HWEB_dumpItal(s, begin, end);
                HWEB_send_I();
                ItalMode = 0;
                begin = ++end;
            }
            else if (!ItalMode && !isspace(s[end+1]) &&
                (!end || strchr(" \n\t-([{", s[end-1])))
            {
                HWEB_dumpPlain(s, begin, end);
                HWEB_sendI();
                ItalMode = 1;
                begin = ++end;
            }
            else ++end;
            continue;
        }
        else if (s[end] == '|') {
            if (PipeMode) {
                if (!isspace(s[end-1])) {
                    HWEB_dumpPipe(s, begin, end);
                    PipeMode = 0;
                    begin = ++end;
                }
            }
            else if (!MathMode && !NmathMode && !isspace(s[end+1])) {
                if (ItalMode) HWEB_dumpItal(s, begin, end);
                else HWEB_dumpPlain(s, begin, end);
                PipeMode = 1;
                begin = ++end;
            }
            else ++end;
            continue;
        }
        else if (s[end] == '$') {
            if (MathMode) {
                int rc = HWEB_dumpMath(s, begin, end);
                if (rc) return rc;
                MathMode = 0;
                if (ItalMode) HWEB_sendI();
                begin = ++end;
            }
            else if (NmathMode) {
                int rc;

Unescaped dollar signs in the interior of a multi-line formula are a sign of a mistake. Flag them.


                if (!BlankLineSoFar)
                  do_warn("Unescaped $ inside multiline formula needs to be"
                      " on a line by itself, line %d", lineno);
                if ((rc = HWEB_dumpNmath(s, begin, end)))
                  return rc;
                NmathMode = 0;
                if (ItalMode) HWEB_sendI();
                begin = ++end;
            }
            else if (!PipeMode) {
                int i;
                if (ItalMode) {
                    HWEB_dumpItal(s, begin, end);
                    HWEB_send_I();
                }
                else HWEB_dumpPlain(s, begin, end);

A dollar sign at the end of a line indicates the beginning of what we are calling NmathMode. So check whether there is nothing but whitespace before the next newline.


                i = end+1;
                while (isspace(s[i])) {
                    if (s[i] == '\n') break;
                    ++i;
                }
                if (s[i] == '\n') {

/* Discard that whitespace and newline! */

                    end = i;
                    NmathMode = 1;
                }
                else MathMode = 1;
                begin = ++end;
            }
            else ++end;
            continue;
        }
        else {

/* It's just a regular old character */

            ++end;
        }
    }
    if (ItalMode || PipeMode || MathMode || NmathMode)
      return -6;
    HWEB_dumpPlain(s, begin, end);
    HWEB_send_P();
    HWEB_puts(0);
    return 0;
}


void HWEB_dumpPlain(const char *s, int begin, int end)
{
    int i;
    for (i=begin; i < end; ++i)
    {
        if (s[i] == '\\') HWEB_putc(s[++i]);
        else if (!strncmp(&s[i], "---", 3)) {
            if (OptimizeUseDashes) HWEB_puts("&mdash;");
            else HWEB_puts("---");
            i += 2;
        }
        else if (!strncmp(&s[i], "--", 2)) {
            if (OptimizeUseDashes) HWEB_puts("&ndash;");
            else HWEB_puts("-");
            i += 1;
        }
        else if (!strncmp(&s[i], "(C)", 3) || !strncmp(&s[i], "(c)", 3)) {
            HWEB_puts("&#169;");
            i += 2;
        }
        else if (s[i] == '&') HWEB_puts("&amp;");
        else if (s[i] == '<') HWEB_puts("&lt;");
        else if (s[i] == '>') HWEB_puts("&gt;");
        else if (!strncmp(s+i, "http://", 7) ||
                 !strncmp(s+i, "https://", 8) ||
                 !strncmp(s+i, "ftp://", 6) ||
                 !strncmp(s+i, "ftps://", 7) ||
                 !strncmp(s+i, "gopher://", 9) ||
                 !strncmp(s+i, "file://", 7) ||
                 !strncmp(s+i, "nntp://", 7) ||
                 !strncmp(s+i, "mailto://", 9) ||
                 !strncmp(s+i, "news://", 7)) {

We give special treatment to URLs in plain text, turning them into hyperlinked copies of themselves. We assume that a link will never end in a mark of English punctuation.

Note that URL characters in the href part of the tag are output using the raw HWEB_compress output routine, rather than the "cooked" HWEB_putc routine.


            int origend = (strstr(s+i, "//") - s) + 2;
            int urlend = origend;
            while (urlend <= end && !isspace(s[urlend])) ++urlend;
            while (strchr(".,:;'\"", s[urlend-1])) --urlend;
            if (urlend > origend) {
                int j;
                HWEB_puts(NULL);
                HWEB_puts_("<a href=\"");
                for (j=i; j < urlend; ++j) HWEB_compress(s[j]);
                HWEB_puts_("\">");
                for (j=i; j < urlend; ++j) HWEB_putc(s[j]);
                HWEB_puts_("</a>");
                i = urlend-1;
            }
            else {
                HWEB_putc(s[i]);
            }
        }
        else if (isupper(s[i]) &&
                 (i==0 || strchr("\"'- \n\t", s[i-1])))
        {

We give special treatment to names that are in all upper case in documentation: we make them small-caps. This is the international practice in publication; for example, Donald KNUTH, CHOW Yun-Fat. Some false positives, e.g. on UNIX, aren't too awful-looking, either! And note that other false positives can be escaped, e.g. compare "MS-DOS" and "MS-DOS". A special case is "T_EX"/"T_EX", which gets smallcaps on the "E" but not on the "X".


            int is_really_a_name;
            int scend = i+1;
            if ((!strncmp(s+i, "TEX", 3) || !strncmp(s+i, "TeX", 3)) &&
                !isalnum(s[i+3]))
            {

Note that Mozilla Firefox handles subscripts beautifully, but some other browsers do not. We have a special case for the former, and a fallback case for the latter.


                if (OptimizeMozillaSubscripts)
                  HWEB_puts("T<big><sub>E</sub></big>X");
                else
                  HWEB_puts("T<small>E</small>X");
                i += 2;
                continue;
            }

Note the procedure for determining if a string is really a name, or just an acronym or some such. It must be at least two letters long, and be a complete word, and contain at least one vowel or nasal ("N"). Thus we have a true positive on "Ana NG," true negatives on "POP3" and "HTML," and false positives on "NASA" and "The XYZ Affair."

We could do more to prevent false positives, but note that the programmer can still backslash-escape anything he really needs.


            while (isupper(s[scend])) ++scend;
            is_really_a_name = (scend-i > 1) && !isalnum(s[scend]);
            if (is_really_a_name) {
                int j;
                for (j = i; j < scend; ++j)
                  if (strchr("AEINOUY", s[j])) break;
                is_really_a_name = (j < scend);
            }

            if (is_really_a_name) {
                HWEB_putc(s[i]);
                HWEB_puts("<small>");
                while (++i < scend) HWEB_putc(s[i]);
                --i;
                HWEB_puts("</small>");
            }
            else {
                HWEB_putc(s[i]);
            }
        }
        else HWEB_putc(s[i]);
    }
}

void HWEB_dumpItal(const char *s, int begin, int end)
{
    HWEB_dumpPlain(s, begin, end);
}

void HWEB_dumpPipe(const char *s, int begin, int end)
{
    int i;
    HWEB_sendTT();
    for (i=begin; i < end; ++i)
    {
        if (s[i] == '\\') {
            if (s[i+1] == '|') HWEB_putc(s[++i]);
            else HWEB_putc('\\');
        }
        else if (s[i] == '&') HWEB_puts("&amp;");
        else if (s[i] == '<') HWEB_puts("&lt;");
        else if (s[i] == '>') HWEB_puts("&gt;");
        else HWEB_putc(s[i]);
    }
    HWEB_send_TT();
}

The treatment of mathematical formulas is taken care of in the routines described in file hweavm.c.


int HWEB_dumpMath(const char *s, int begin, int end)
{
    int tmp;
    MathNode *tree = parseMathLine(s+begin, end-begin, &tmp);
    dumpMathHTML(&tree);
    killMathNode(tree);
    if (tmp != end-begin) return -1;
    return 0;
}


int HWEB_dumpNmath(const char *s, int begin, int end)
{
    int i, j;
    HWEB_puts("<blockquote>");
    for (i=j=begin; j < end; )
    {
        if (s[j] == '\n') {
            if (i < j) {
                int tmp;
                MathNode *tree = parseMathLine(s+i, j-i, &tmp);
                dumpMathTable(&tree);
                killMathNode(tree);
                if (tmp != j-i) return -1;
            }
            else {
                HWEB_puts("<br>");
            }
            i = ++j;
        }
        else if (isspace(s[i])) i = ++j;
        else ++j;
    }
    HWEB_puts("</blockquote>");
    return 0;
}


static unsigned HWEB_flags = 0;
void HWEB_sendClear() { HWEB_flags = 0; }
void HWEB_sendP() { if (ParaAllowed) HWEB_flags |= 0x02; }
void HWEB_send_P() {
    if (ParaAllowed) {
        if (HWEB_flags & 0x02) HWEB_flags ^= 0x02;
        else HWEB_flags |= 0x01;
    }
}
void HWEB_sendEM() {
    if (HWEB_flags & 0x04) HWEB_flags ^= 0x04;
    else HWEB_flags |= 0x08;
}
void HWEB_send_EM() {
    if (HWEB_flags & 0x08) HWEB_flags ^= 0x08;
    else HWEB_flags |= 0x04;
}
void HWEB_sendI() {
    if (HWEB_flags & 0x10) HWEB_flags ^= 0x10;
    else HWEB_flags |= 0x20;
}
void HWEB_send_I() {
    if (HWEB_flags & 0x20) HWEB_flags ^= 0x20;
    else HWEB_flags |= 0x10;
}
void HWEB_sendTT() {
    if (HWEB_flags & 0x40) HWEB_flags ^= 0x40;
    else HWEB_flags |= 0x80;
}
void HWEB_send_TT() {
    if (HWEB_flags & 0x80) HWEB_flags ^= 0x80;
    else HWEB_flags |= 0x40;
}
void HWEB_sendPutOn() {
    unsigned temp = HWEB_flags;
    HWEB_flags &= ~0xAA;
    if (temp & 0x02) HWEB_puts_("<p>");
    if (temp & 0x08) HWEB_puts_("<em>");
    if (temp & 0x20) HWEB_puts_("<i>");
    if (temp & 0x80) HWEB_puts_("<tt>");
}
void HWEB_sendPutOff() {
    unsigned temp = HWEB_flags;
    HWEB_flags &= ~0x55;
    if (temp & 0x40) HWEB_puts_("</tt>");
    if (temp & 0x10) HWEB_puts_("</i>");  
    if (temp & 0x04) HWEB_puts_("</em>");
    if (temp & 0x01) HWEB_puts_("</p>");
}

void HWEB_puts_(const char *s)
{ while (*s) HWEB_compress(*s++); }

The output routines are a little complicated. Essentially, each routine takes care of a particular "layer" of the output transaction. HWEB_putc is a convenient wrapper around HWEB_puts. The latter takes care of a few trimming tasks such as whitespace compression and recognition of HTML elements such as <br>. Then it splits the passed string back into individual characters to hand them to HWEB_compress. HWEB_compress handles the packing of tokens into 70-character lines. And HWEB_puts_ is just like HWEB_puts (with no trailing underscore) except that it bypasses the calls to HWEB_sendPut, so it's just a convenient wrapper around HWEB_compress.



void HWEB_putc(int c)
{
    char buf[2];
    buf[0] = c;
    buf[1] = '\0';
    HWEB_puts(buf);
}

void HWEB_puts(const char *s)
{
    static int space=0;
    static int br=0;

    if (s != NULL) {
        while (*s && isspace(*s)) space=1, ++s;
        if (steq(s, "<br>")) br=1, s="";
        else if (steq(s, "<blockquote>")) space=0;
        else if (steq(s, "</blockquote>")) space=0, br=0;
        if (*s == '\0') return;
    }

If s is NULL, then we've reached the end of the output and we just have to flush it all to the output stream, including end-of-paragraph tags and suchlike.


    HWEB_sendPutOff();
    if (br) HWEB_puts_("<br>"), space=0, br=0;
    else if (space) HWEB_compress(' '), space=0;
    HWEB_sendPutOn();

    if (s != NULL)
      while (*s) HWEB_compress(*s++);
}

HWEB_compress takes individual characters and packs them into 70-character lines. It only considers linebreaks at whitespace.


void HWEB_compress(int k)
{
    static int linelen=0;
    if (linelen > 65) {
        if (isspace(k))
          putc('\n', HWEB_outfile), linelen=0;
        else if (k == '>')
          fputs("\n>", HWEB_outfile), linelen=1;
        else goto nobreak;
    }
    else nobreak:
    {
        putc(k, HWEB_outfile);
        ++linelen;
        if (k == '\n') linelen = 0;
    }
}

This documentation was generated by HWEB, version 1.1α, by Arthur O'Dwyer.