From 495545ef220a2f13fd45859b3faba9f3ba1b1987 Mon Sep 17 00:00:00 2001 From: Daniel Henry Date: Fri, 22 Aug 2025 03:43:08 -0500 Subject: [PATCH] Finish Chapter 16 Implement basic parsing / scanner Signed-off-by: Daniel Henry --- README.md | 25 ++--- include/compiler.h | 6 ++ include/scanner.h | 62 +++++++++++ include/vm.h | 2 +- src/compiler.c | 24 +++++ src/main.c | 88 ++++++++++----- src/scanner.c | 260 +++++++++++++++++++++++++++++++++++++++++++++ src/vm.c | 8 +- 8 files changed, 427 insertions(+), 48 deletions(-) create mode 100644 include/compiler.h create mode 100644 include/scanner.h create mode 100644 src/compiler.c create mode 100644 src/scanner.c diff --git a/README.md b/README.md index 192db60..ff3bf87 100644 --- a/README.md +++ b/README.md @@ -22,20 +22,16 @@ make debug make bear ``` -## Run (current behavior) - -The current `main` builds a tiny test chunk and disassembles it—there’s no REPL or script runner yet. +## Usage +Run a file ```sh -./bin/clox +clox [file] ``` -Example output: - -``` -== test chunk == -0000 123 OP_CONSTANT 0 '1.2' -0002 | OP_RETURN +Run Repl +```sh +clox ``` ## Layout @@ -47,12 +43,3 @@ obj/ # build objects (generated) bin/ # executable output (generated) ``` -## Notes - -- Tracks the book’s **clox** implementation; commits may follow chapters. -- `compile_commands.json` can be generated via `make bear` for editor tooling. - -## License - -Private / for learning. - diff --git a/include/compiler.h b/include/compiler.h new file mode 100644 index 0000000..55cd189 --- /dev/null +++ b/include/compiler.h @@ -0,0 +1,6 @@ +#ifndef clox_compiler_h +#define clox_compiler_h + +void compile(const char *source); + +#endif /* clox_compiler_h */ diff --git a/include/scanner.h b/include/scanner.h new file mode 100644 index 0000000..965949b --- /dev/null +++ b/include/scanner.h @@ -0,0 +1,62 @@ +#ifndef clox_scanner_h +#define clox_scanner_h + +typedef enum { + // Single-character tokens. + TOKEN_LEFT_PAREN, + TOKEN_RIGHT_PAREN, + TOKEN_LEFT_BRACE, + TOKEN_RIGHT_BRACE, + TOKEN_COMMA, + TOKEN_DOT, + TOKEN_MINUS, + TOKEN_PLUS, + TOKEN_SEMICOLON, + TOKEN_SLASH, + TOKEN_STAR, + // One or two character tokens. + TOKEN_BANG, + TOKEN_BANG_EQUAL, + TOKEN_EQUAL, + TOKEN_EQUAL_EQUAL, + TOKEN_GREATER, + TOKEN_GREATER_EQUAL, + TOKEN_LESS, + TOKEN_LESS_EQUAL, + // Literals. + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_NUMBER, + // Keywords. + TOKEN_AND, + TOKEN_CLASS, + TOKEN_ELSE, + TOKEN_FALSE, + TOKEN_FOR, + TOKEN_FUN, + TOKEN_IF, + TOKEN_NIL, + TOKEN_OR, + TOKEN_PRINT, + TOKEN_RETURN, + TOKEN_SUPER, + TOKEN_THIS, + TOKEN_TRUE, + TOKEN_VAR, + TOKEN_WHILE, + + TOKEN_ERROR, + TOKEN_EOF +} TokenType; + +typedef struct { + TokenType type; + const char *start; + int length; + int line; +} Token; + +void initScanner(const char *source); +Token scanToken(); + +#endif /* clox_scanner_h */ diff --git a/include/vm.h b/include/vm.h index 460cc1c..8147f33 100644 --- a/include/vm.h +++ b/include/vm.h @@ -21,7 +21,7 @@ typedef enum { void initVM(); void freeVM(); -InterpretResult interpret(Chunk *chunk); +InterpretResult interpret(const char *source); void push(Value value); Value pop(); diff --git a/src/compiler.c b/src/compiler.c new file mode 100644 index 0000000..54f0353 --- /dev/null +++ b/src/compiler.c @@ -0,0 +1,24 @@ +#include "compiler.h" +#include "common.h" +#include "scanner.h" +#include + +void compile(const char *source) { + initScanner(source); + + int line = -1; + for (;;) { + Token token = scanToken(); + if (token.line != line) { + printf("%4d ", token.line); + line = token.line; + } else { + printf(" | "); + } + printf("%2d '%.*s'\n", token.type, token.length, token.start); + + if (token.type == TOKEN_EOF) { + break; + } + } +} diff --git a/src/main.c b/src/main.c index f94ed66..bc1a394 100644 --- a/src/main.c +++ b/src/main.c @@ -1,34 +1,74 @@ -#include "chunk.h" #include "common.h" -#include "debug.h" #include "vm.h" +#include +#include + +static void repl() { + char line[1024]; + for (;;) { + printf("> "); + + if (!fgets(line, sizeof(line), stdin)) { + printf("\n"); + break; + } + + interpret(line); + } +} + +static char *readFile(const char *path) { + FILE *file = fopen(path, "rb"); + if (file == NULL) { + fprintf(stderr, "Could not open file \"%s\".\n", path); + exit(74); + } + + fseek(file, 0L, SEEK_END); + size_t fileSize = ftell(file); + rewind(file); + + char *buffer = (char *)malloc(fileSize + 1); + if (buffer == NULL) { + fprintf(stderr, "Not enough memory to read \"%s\".\n", path); + exit(74); + } + size_t bytesRead = fread(buffer, sizeof(char), fileSize, file); + if (bytesRead < fileSize) { + fprintf(stderr, "Could not read file \"%s\". \n", path); + exit(74); + } + buffer[bytesRead] = '\0'; + + fclose(file); + return buffer; +} + +static void runFile(const char *path) { + char *source = readFile(path); + InterpretResult result = interpret(source); + free(source); + + if (result == INTERPRET_COMPILE_ERROR) { + exit(65); + } + if (result == INTERPRET_RUNTIME_ERROR) { + exit(70); + } +} int main(int argc, const char *argv[]) { initVM(); - Chunk chunk; - initChunk(&chunk); - int constant = addConstant(&chunk, 1.2); - writeChunk(&chunk, OP_CONSTANT, 123); - writeChunk(&chunk, constant, 123); + if (argc == 1) { + repl(); + } else if (argc == 2) { + runFile(argv[1]); + } else { + fprintf(stderr, "Usage: clox [path]\n"); + exit(64); + } - constant = addConstant(&chunk, 3.4); - writeChunk(&chunk, OP_CONSTANT, 123); - writeChunk(&chunk, constant, 123); - - writeChunk(&chunk, OP_ADD, 123); - - constant = addConstant(&chunk, 5.6); - writeChunk(&chunk, OP_CONSTANT, 123); - writeChunk(&chunk, constant, 123); - - writeChunk(&chunk, OP_DIVIDE, 123); - writeChunk(&chunk, OP_NEGATE, 123); - writeChunk(&chunk, OP_RETURN, 123); - - disassembleChunk(&chunk, "test chunk"); - interpret(&chunk); freeVM(); - freeChunk(&chunk); return 0; } diff --git a/src/scanner.c b/src/scanner.c new file mode 100644 index 0000000..e46c9c6 --- /dev/null +++ b/src/scanner.c @@ -0,0 +1,260 @@ +#include "scanner.h" +#include "common.h" +#include +#include + +typedef struct { + const char *start; + const char *current; + int line; +} Scanner; + +Scanner scanner; + +void initScanner(const char *source) { + scanner.start = source; + scanner.current = source; + scanner.line = 1; +} + +static bool isAlpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_'); +} + +static bool isDigit(char c) { + return c >= '0' && c <= '9'; +} + +static bool isAtEnd() { + return *scanner.current == '\0'; +} + +static char advance() { + scanner.current++; + return scanner.current[-1]; +} + +static char peek() { + return *scanner.current; +} + +static char peekNext() { + if (isAtEnd()) { + return '\0'; + } + return scanner.current[1]; +} +static bool match(char expected) { + if (isAtEnd()) { + return false; + } + + if (*scanner.current != expected) { + return false; + } + + scanner.current++; + return true; +} + +static Token makeToken(TokenType type) { + Token token; + token.type = type; + token.start = scanner.start; + token.length = (int)(scanner.current - scanner.start); + token.line = scanner.line; + return token; +} + +static Token errorToken(const char *message) { + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (int)strlen(message); + token.line = scanner.line; + return token; +} + +static void skipWhitespace() { + for (;;) { + char c = peek(); + switch (c) { + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + scanner.line++; + advance(); + break; + case '/': + if (peekNext() == '/') { + while (peek() != '\n' && !isAtEnd()) { + advance(); + } + } else { + return; + } + break; + default: + return; + } + } +} + +static TokenType checkKeyword(int start, int length, const char *rest, + TokenType type) { + if (scanner.current - scanner.start == start + length && + memcmp(scanner.start + start, rest, length) == 0) { + return type; + } + + return TOKEN_IDENTIFIER; +} + +static TokenType identifierType() { + + switch (scanner.start[0]) { + case 'a': + return checkKeyword(1, 2, "nd", TOKEN_AND); + case 'c': + return checkKeyword(1, 4, "lass", TOKEN_CLASS); + case 'e': + return checkKeyword(1, 3, "lse", TOKEN_ELSE); + case 'f': + if (scanner.current - scanner.start > 1) { + switch (scanner.start[1]) { + case 'a': + return checkKeyword(2, 3, "lse", TOKEN_ELSE); + case 'o': + return checkKeyword(2, 1, "r", TOKEN_FOR); + case 'u': + return checkKeyword(2, 1, "n", TOKEN_FUN); + } + } + break; + case 'i': + return checkKeyword(1, 1, "f", TOKEN_IF); + case 'n': + return checkKeyword(1, 2, "il", TOKEN_NIL); + case 'o': + return checkKeyword(1, 1, "r", TOKEN_OR); + case 'p': + return checkKeyword(1, 4, "rint", TOKEN_PRINT); + case 'r': + return checkKeyword(1, 5, "eturn", TOKEN_RETURN); + case 's': + return checkKeyword(1, 4, "uper", TOKEN_SUPER); + case 't': + if (scanner.current - scanner.start > 1) { + switch (scanner.start[1]) { + case 'h': + return checkKeyword(2, 2, "is", TOKEN_THIS); + case 'r': + return checkKeyword(2, 2, "ue", TOKEN_TRUE); + } + } + break; + case 'v': + return checkKeyword(1, 2, "ar", TOKEN_VAR); + case 'w': + return checkKeyword(1, 4, "hile", TOKEN_WHILE); + } + + return TOKEN_IDENTIFIER; +} + +static Token identifier() { + while (isAlpha(peek()) || isDigit(peek())) { + advance(); + } + + return makeToken(identifierType()); +} + +static Token number() { + while (isDigit(peek())) { + advance(); + } + + if (peek() == '.' && isDigit(peekNext())) { + advance(); + + while (isDigit(peek())) { + advance(); + } + } + + return makeToken(TOKEN_NUMBER); +} + +static Token string() { + while (peek() != '"' && !isAtEnd()) { + if (peek() == '\n') { + scanner.line++; + advance(); + } + + if (isAtEnd()) { + return errorToken("Unterminated String."); + } + advance(); + return makeToken(TOKEN_STRING); + } +} + +Token scanToken() { + skipWhitespace(); + scanner.start = scanner.current; + if (isAtEnd()) { + return makeToken(TOKEN_EOF); + } + + char c = advance(); + + if (isAlpha(c)) { + return identifier(); + } + + if (isDigit(c)) { + return number(); + } + + switch (c) { + case '(': + return makeToken(TOKEN_LEFT_PAREN); + case ')': + return makeToken(TOKEN_RIGHT_PAREN); + case '{': + return makeToken(TOKEN_LEFT_BRACE); + case '}': + return makeToken(TOKEN_RIGHT_BRACE); + case ';': + return makeToken(TOKEN_SEMICOLON); + case ',': + return makeToken(TOKEN_COMMA); + case '.': + return makeToken(TOKEN_DOT); + case '-': + return makeToken(TOKEN_MINUS); + case '+': + return makeToken(TOKEN_PLUS); + case '/': + return makeToken(TOKEN_SLASH); + case '*': + return makeToken(TOKEN_STAR); + case '!': + return makeToken(match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG); + case '=': + return makeToken(match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL); + case '<': + return makeToken(match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS); + case '>': + return makeToken(match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER); + case '"': + return string(); + } + + return errorToken("Unexpected character."); +} diff --git a/src/vm.c b/src/vm.c index 1c52d2f..3b69e84 100644 --- a/src/vm.c +++ b/src/vm.c @@ -1,6 +1,7 @@ #include "vm.h" #include "chunk.h" #include "common.h" +#include "compiler.h" #include "debug.h" #include "value.h" #include @@ -82,8 +83,7 @@ static InterpretResult run() { #undef BINARY_OP } -InterpretResult interpret(Chunk *chunk) { - vm.chunk = chunk; - vm.ip = vm.chunk->code; - return run(); +InterpretResult interpret(const char *source) { + compile(source); + return INTERPRET_OK; }