/* $Id: scan.c,v 1.3 1998/04/05 10:33:48 tonyg Exp $ */

#include "memory.h"
#include "buffer.h"
#include "scan.h"

#include "symbol.h"
#include "string.h"

#include <string.h>
#include <ctype.h>

SCANSTATE newscanner(void *source,
                     char (*peek)(SCANSTATE),
                     void (*drop)(SCANSTATE)) {
    SCANSTATE s = getmem(sizeof(Scanstate));

    s->buf = newbuf(0);
    s->source = source;
    s->peek = peek;
    s->drop = drop;

    return s;
}

void killscanner(SCANSTATE state) {
    killbuf(state->buf);
    freemem(state);
}

PRIVATE void scan_insert(SCANSTATE state, char ch) {
    buf_append(state->buf, ch);
}

PRIVATE char *scan_buf(SCANSTATE state) {
    scan_insert(state, '\0');
    state->buf->pos = 0;
    return state->buf->buf;
}

#define scan_peek(st)   ((st->peek)(st))
#define scan_drop(st)   ((st->drop)(st))

PRIVATE void scan_shift(SCANSTATE state) {
    int ch = scan_peek(state);

    if (ch == EOF) return;

    scan_insert(state, ch);
    scan_drop(state);
}

#define START           0
#define ID              1
#define NUMBER          2
#define STRING          3
#define HASH            4
#define COMMENT         5

#define GO(st)          { state = st; continue; }
#define EMIT(val)       return val;

OBJECT scan(SCANSTATE s) {
    int state = START;
    char ch;
    long num;
    int radix;
    int sign;

    while (1) {
        ch = scan_peek(s);

	if (ch == '\r')
	  ch = '\n';		/* Cope with MacOS' stupid CRs... */

        switch (state) {
            case START:
                radix = 10;
                num = 0;
                sign = 1;

                if (isspace(ch)) {
                    scan_drop(s);
                    GO(START);
                }

                if (isdigit(ch) || (ch == '-'))
                    GO(NUMBER);

                if (ch == EOF)
                    EMIT(undefined)

                switch (ch) {
                    case ';': scan_drop(s); GO(COMMENT);
                    case '"': scan_drop(s); GO(STRING);
                    case '#': scan_drop(s); GO(HASH);
                    case '(': scan_drop(s); EMIT(newsym("("))
                    case ')': scan_drop(s); EMIT(newsym(")"))
                    case '\'': scan_drop(s); EMIT(newsym("'"))
                    case '`': scan_drop(s); EMIT(newsym("`"))
                    case ',':
                        scan_drop(s);
                        if (scan_peek(s) == '@') {
                            scan_drop(s);
                            EMIT(newsym(",@"))
                        } else
                            EMIT(newsym(","))
                    default: GO(ID);
                }

            case ID:
                if ((strchr("();\"", ch) == NULL) && !isspace(ch) &&
                    (ch != EOF)) {
                    scan_shift(s);
                    GO(ID);
                }

                EMIT(newsym(scan_buf(s)))

            case NUMBER:
                ch = toupper(ch);

                if (ch == '-') {
                    scan_drop(s);

                    if (sign == -1) {
                        /* Only one negative-sign before a number is legal */
                    }

                    ch = scan_peek(s);
                    if (!isdigit(ch) && !(ch >= 'A' && ch <= 'F')) {
                        scan_insert(s, '-');
                        GO(ID);
                    }

                    sign = -1;
                    GO(NUMBER);
                }

                if (isdigit(ch) || (ch >= 'A' && ch <= 'F')) {
                    scan_drop(s);

                    num *= radix;

                    if (ch >= 'A')
                        num += ch - 'A' + 10;
                    else
                        num += ch - '0';

                    GO(NUMBER);
                }

                EMIT(MKNUM(sign * num))

            case STRING:
                if (ch == EOF)
                    EMIT(newstring(scan_buf(s)))

                if (ch == '"') {
                    scan_drop(s);
                    EMIT(newstring(scan_buf(s)))
                }

                if (ch == '\\') {
                    scan_drop(s);

                    ch = scan_peek(s);
                    scan_drop(s);

                    switch (ch) {
                        case 'r': scan_insert(s, '\r'); break;
                        case 'n': scan_insert(s, '\n'); break;
                        case 't': scan_insert(s, '\t'); break;
                        case 'b': scan_insert(s, '\b'); break;
                        case 'a': scan_insert(s, '\a'); break;
                        case '\r':      /* Cope with MS-DOS' stupid CRLF */
                            if (scan_peek(s) == '\n')
                                scan_drop(s);
                            break;
                        case '"':
                        case '\\': scan_insert(s, ch); break;
                    }
                } else
                    if (ch == '\n') {
                        /* String split across lines */
                        scan_drop(s);
                        EMIT(newstring(scan_buf(s)))
                    } else
                        scan_shift(s);

                GO(STRING);

            case HASH:
                scan_drop(s);

                switch (ch) {
                    case '!': GO(COMMENT);	/* Deal with #!/usr/local/bin/moof */
                    case 'b': radix = 2; GO(NUMBER);
                    case 'o': radix = 8; GO(NUMBER);
                    case 'd': radix = 10; GO(NUMBER);
                    case 'x': radix = 16; GO(NUMBER);
                    case 't': EMIT(true)
                    case 'f': EMIT(false)
                    case '(': EMIT(newsym("#("))
                    default: GO(START); /* Invalid hash prefix */
                }

            case COMMENT:
                if (ch == EOF)
                    EMIT(undefined)

                if (ch != '\n') {
                    scan_drop(s);
                    GO(COMMENT);
                }

                GO(START);

            default:
                error("Invalid scanner state (%d)", state);
        }
    }

    return undefined;
}

