Finish Chapter 16

Implement basic parsing / scanner

Signed-off-by: Daniel Henry <iamdanhenry@gmail.com>
This commit is contained in:
2025-08-22 03:43:08 -05:00
parent 6029f876ac
commit 495545ef22
8 changed files with 427 additions and 48 deletions

View File

@@ -22,20 +22,16 @@ make debug
make bear make bear
``` ```
## Run (current behavior) ## Usage
The current `main` builds a tiny test chunk and disassembles it—theres no REPL or script runner yet.
Run a file
```sh ```sh
./bin/clox clox [file]
``` ```
Example output: Run Repl
```sh
``` clox
== test chunk ==
0000 123 OP_CONSTANT 0 '1.2'
0002 | OP_RETURN
``` ```
## Layout ## Layout
@@ -47,12 +43,3 @@ obj/ # build objects (generated)
bin/ # executable output (generated) bin/ # executable output (generated)
``` ```
## Notes
- Tracks the books **clox** implementation; commits may follow chapters.
- `compile_commands.json` can be generated via `make bear` for editor tooling.
## License
Private / for learning.

6
include/compiler.h Normal file
View File

@@ -0,0 +1,6 @@
#ifndef clox_compiler_h
#define clox_compiler_h
void compile(const char *source);
#endif /* clox_compiler_h */

62
include/scanner.h Normal file
View File

@@ -0,0 +1,62 @@
#ifndef clox_scanner_h
#define clox_scanner_h
typedef enum {
// Single-character tokens.
TOKEN_LEFT_PAREN,
TOKEN_RIGHT_PAREN,
TOKEN_LEFT_BRACE,
TOKEN_RIGHT_BRACE,
TOKEN_COMMA,
TOKEN_DOT,
TOKEN_MINUS,
TOKEN_PLUS,
TOKEN_SEMICOLON,
TOKEN_SLASH,
TOKEN_STAR,
// One or two character tokens.
TOKEN_BANG,
TOKEN_BANG_EQUAL,
TOKEN_EQUAL,
TOKEN_EQUAL_EQUAL,
TOKEN_GREATER,
TOKEN_GREATER_EQUAL,
TOKEN_LESS,
TOKEN_LESS_EQUAL,
// Literals.
TOKEN_IDENTIFIER,
TOKEN_STRING,
TOKEN_NUMBER,
// Keywords.
TOKEN_AND,
TOKEN_CLASS,
TOKEN_ELSE,
TOKEN_FALSE,
TOKEN_FOR,
TOKEN_FUN,
TOKEN_IF,
TOKEN_NIL,
TOKEN_OR,
TOKEN_PRINT,
TOKEN_RETURN,
TOKEN_SUPER,
TOKEN_THIS,
TOKEN_TRUE,
TOKEN_VAR,
TOKEN_WHILE,
TOKEN_ERROR,
TOKEN_EOF
} TokenType;
typedef struct {
TokenType type;
const char *start;
int length;
int line;
} Token;
void initScanner(const char *source);
Token scanToken();
#endif /* clox_scanner_h */

View File

@@ -21,7 +21,7 @@ typedef enum {
void initVM(); void initVM();
void freeVM(); void freeVM();
InterpretResult interpret(Chunk *chunk); InterpretResult interpret(const char *source);
void push(Value value); void push(Value value);
Value pop(); Value pop();

24
src/compiler.c Normal file
View File

@@ -0,0 +1,24 @@
#include "compiler.h"
#include "common.h"
#include "scanner.h"
#include <stdio.h>
void compile(const char *source) {
initScanner(source);
int line = -1;
for (;;) {
Token token = scanToken();
if (token.line != line) {
printf("%4d ", token.line);
line = token.line;
} else {
printf(" | ");
}
printf("%2d '%.*s'\n", token.type, token.length, token.start);
if (token.type == TOKEN_EOF) {
break;
}
}
}

View File

@@ -1,34 +1,74 @@
#include "chunk.h"
#include "common.h" #include "common.h"
#include "debug.h"
#include "vm.h" #include "vm.h"
#include <stdio.h>
#include <stdlib.h>
static void repl() {
char line[1024];
for (;;) {
printf("> ");
if (!fgets(line, sizeof(line), stdin)) {
printf("\n");
break;
}
interpret(line);
}
}
static char *readFile(const char *path) {
FILE *file = fopen(path, "rb");
if (file == NULL) {
fprintf(stderr, "Could not open file \"%s\".\n", path);
exit(74);
}
fseek(file, 0L, SEEK_END);
size_t fileSize = ftell(file);
rewind(file);
char *buffer = (char *)malloc(fileSize + 1);
if (buffer == NULL) {
fprintf(stderr, "Not enough memory to read \"%s\".\n", path);
exit(74);
}
size_t bytesRead = fread(buffer, sizeof(char), fileSize, file);
if (bytesRead < fileSize) {
fprintf(stderr, "Could not read file \"%s\". \n", path);
exit(74);
}
buffer[bytesRead] = '\0';
fclose(file);
return buffer;
}
static void runFile(const char *path) {
char *source = readFile(path);
InterpretResult result = interpret(source);
free(source);
if (result == INTERPRET_COMPILE_ERROR) {
exit(65);
}
if (result == INTERPRET_RUNTIME_ERROR) {
exit(70);
}
}
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
initVM(); initVM();
Chunk chunk; if (argc == 1) {
initChunk(&chunk); repl();
int constant = addConstant(&chunk, 1.2); } else if (argc == 2) {
writeChunk(&chunk, OP_CONSTANT, 123); runFile(argv[1]);
writeChunk(&chunk, constant, 123); } else {
fprintf(stderr, "Usage: clox [path]\n");
exit(64);
}
constant = addConstant(&chunk, 3.4);
writeChunk(&chunk, OP_CONSTANT, 123);
writeChunk(&chunk, constant, 123);
writeChunk(&chunk, OP_ADD, 123);
constant = addConstant(&chunk, 5.6);
writeChunk(&chunk, OP_CONSTANT, 123);
writeChunk(&chunk, constant, 123);
writeChunk(&chunk, OP_DIVIDE, 123);
writeChunk(&chunk, OP_NEGATE, 123);
writeChunk(&chunk, OP_RETURN, 123);
disassembleChunk(&chunk, "test chunk");
interpret(&chunk);
freeVM(); freeVM();
freeChunk(&chunk);
return 0; return 0;
} }

260
src/scanner.c Normal file
View File

@@ -0,0 +1,260 @@
#include "scanner.h"
#include "common.h"
#include <stdio.h>
#include <string.h>
typedef struct {
const char *start;
const char *current;
int line;
} Scanner;
Scanner scanner;
void initScanner(const char *source) {
scanner.start = source;
scanner.current = source;
scanner.line = 1;
}
static bool isAlpha(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_');
}
static bool isDigit(char c) {
return c >= '0' && c <= '9';
}
static bool isAtEnd() {
return *scanner.current == '\0';
}
static char advance() {
scanner.current++;
return scanner.current[-1];
}
static char peek() {
return *scanner.current;
}
static char peekNext() {
if (isAtEnd()) {
return '\0';
}
return scanner.current[1];
}
static bool match(char expected) {
if (isAtEnd()) {
return false;
}
if (*scanner.current != expected) {
return false;
}
scanner.current++;
return true;
}
static Token makeToken(TokenType type) {
Token token;
token.type = type;
token.start = scanner.start;
token.length = (int)(scanner.current - scanner.start);
token.line = scanner.line;
return token;
}
static Token errorToken(const char *message) {
Token token;
token.type = TOKEN_ERROR;
token.start = message;
token.length = (int)strlen(message);
token.line = scanner.line;
return token;
}
static void skipWhitespace() {
for (;;) {
char c = peek();
switch (c) {
case ' ':
case '\r':
case '\t':
advance();
break;
case '\n':
scanner.line++;
advance();
break;
case '/':
if (peekNext() == '/') {
while (peek() != '\n' && !isAtEnd()) {
advance();
}
} else {
return;
}
break;
default:
return;
}
}
}
static TokenType checkKeyword(int start, int length, const char *rest,
TokenType type) {
if (scanner.current - scanner.start == start + length &&
memcmp(scanner.start + start, rest, length) == 0) {
return type;
}
return TOKEN_IDENTIFIER;
}
static TokenType identifierType() {
switch (scanner.start[0]) {
case 'a':
return checkKeyword(1, 2, "nd", TOKEN_AND);
case 'c':
return checkKeyword(1, 4, "lass", TOKEN_CLASS);
case 'e':
return checkKeyword(1, 3, "lse", TOKEN_ELSE);
case 'f':
if (scanner.current - scanner.start > 1) {
switch (scanner.start[1]) {
case 'a':
return checkKeyword(2, 3, "lse", TOKEN_ELSE);
case 'o':
return checkKeyword(2, 1, "r", TOKEN_FOR);
case 'u':
return checkKeyword(2, 1, "n", TOKEN_FUN);
}
}
break;
case 'i':
return checkKeyword(1, 1, "f", TOKEN_IF);
case 'n':
return checkKeyword(1, 2, "il", TOKEN_NIL);
case 'o':
return checkKeyword(1, 1, "r", TOKEN_OR);
case 'p':
return checkKeyword(1, 4, "rint", TOKEN_PRINT);
case 'r':
return checkKeyword(1, 5, "eturn", TOKEN_RETURN);
case 's':
return checkKeyword(1, 4, "uper", TOKEN_SUPER);
case 't':
if (scanner.current - scanner.start > 1) {
switch (scanner.start[1]) {
case 'h':
return checkKeyword(2, 2, "is", TOKEN_THIS);
case 'r':
return checkKeyword(2, 2, "ue", TOKEN_TRUE);
}
}
break;
case 'v':
return checkKeyword(1, 2, "ar", TOKEN_VAR);
case 'w':
return checkKeyword(1, 4, "hile", TOKEN_WHILE);
}
return TOKEN_IDENTIFIER;
}
static Token identifier() {
while (isAlpha(peek()) || isDigit(peek())) {
advance();
}
return makeToken(identifierType());
}
static Token number() {
while (isDigit(peek())) {
advance();
}
if (peek() == '.' && isDigit(peekNext())) {
advance();
while (isDigit(peek())) {
advance();
}
}
return makeToken(TOKEN_NUMBER);
}
static Token string() {
while (peek() != '"' && !isAtEnd()) {
if (peek() == '\n') {
scanner.line++;
advance();
}
if (isAtEnd()) {
return errorToken("Unterminated String.");
}
advance();
return makeToken(TOKEN_STRING);
}
}
Token scanToken() {
skipWhitespace();
scanner.start = scanner.current;
if (isAtEnd()) {
return makeToken(TOKEN_EOF);
}
char c = advance();
if (isAlpha(c)) {
return identifier();
}
if (isDigit(c)) {
return number();
}
switch (c) {
case '(':
return makeToken(TOKEN_LEFT_PAREN);
case ')':
return makeToken(TOKEN_RIGHT_PAREN);
case '{':
return makeToken(TOKEN_LEFT_BRACE);
case '}':
return makeToken(TOKEN_RIGHT_BRACE);
case ';':
return makeToken(TOKEN_SEMICOLON);
case ',':
return makeToken(TOKEN_COMMA);
case '.':
return makeToken(TOKEN_DOT);
case '-':
return makeToken(TOKEN_MINUS);
case '+':
return makeToken(TOKEN_PLUS);
case '/':
return makeToken(TOKEN_SLASH);
case '*':
return makeToken(TOKEN_STAR);
case '!':
return makeToken(match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG);
case '=':
return makeToken(match('=') ? TOKEN_EQUAL_EQUAL : TOKEN_EQUAL);
case '<':
return makeToken(match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS);
case '>':
return makeToken(match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER);
case '"':
return string();
}
return errorToken("Unexpected character.");
}

View File

@@ -1,6 +1,7 @@
#include "vm.h" #include "vm.h"
#include "chunk.h" #include "chunk.h"
#include "common.h" #include "common.h"
#include "compiler.h"
#include "debug.h" #include "debug.h"
#include "value.h" #include "value.h"
#include <stdio.h> #include <stdio.h>
@@ -82,8 +83,7 @@ static InterpretResult run() {
#undef BINARY_OP #undef BINARY_OP
} }
InterpretResult interpret(Chunk *chunk) { InterpretResult interpret(const char *source) {
vm.chunk = chunk; compile(source);
vm.ip = vm.chunk->code; return INTERPRET_OK;
return run();
} }