slc/engine/slc.h
2024-09-26 10:42:36 +02:00

414 lines
15 KiB
C

#ifndef SLC_H
#define SLC_H
#include<stdint.h> // uint8_t, uint32_t..
#include<string.h> // memcpy, strlen..
#include<stddef.h> // NULL
// Session offset type - defaults to 64 bit, but you can override
#ifndef SLOFFS_T
#define SLOFFS_T uint64_t
#endif
union word_body {
SLOFFS_T offset;
void *ptr;
};
typedef union word_body word_body;
/** Possible word types */
enum SLC_WORDTYP {
/** Still in plain text */
SLC_WORDTYP_TEXT = 0,
/** Native code, use get_word_storage_offset to get what to run (relative pointer or array of pointers) */
SLC_WORDTYP_NATIVE = 1,
/** "Threaded code" (utf16-like word offsets) and encoded parentheses IN-PLACE inlined where text was before */
SLC_WORDTYP_THREADED_INLINE = 2,
/** "Threaded code" that did not fit in-place and is thus stored in session storage, word offset tells where */
SLC_WORDTYP_THREADED_SESSION = 3
};
typedef enum SLC_WORDTYP SLC_WORDTYP;
static inline int get_padding(uint8_t *ptr, int alignment) {
// return (alignment - (ptr % alignment)) % alignment;
return (ptrdiff_t)(ptr + alignment - 1) / alignment * alignment;
}
/**
* A word definition starts right after this. After processing it, we inline overwrite random parts of it in memory...
*
* Examples:
*
* #: just_code
* #dup
* #inc
* #swap
* #
*
* : with_vars @a; @b; @c;
* @a
* inc
* @a(.)
* ;
*
* #builtin: to_prefix
* #swap
* #dup
*
* ^^This built-in generates threaded code / text while processing in a way so that the
* to_prefix becomes prefixed with the "right" prefix and '#' exchanged with the current!
*
* ^^The above always needs starting '#builtin' at the definition and inside. That is exchanged to real prefix...
* This is used for implementation implementing built-ins with FORTH-like code instead of native (saves native interpret. space)
*
* #: structural (
* #parse_num
* #dup
* #inc
* #swap
* #print
* #print
* ) [ #parse_num #print ] { #parse_num #print }
* #
*/
struct word {
/** Defines how to understand this word */
uint8_t flags;
uint8_t reserved; // Padding to ensure alignment of vars (4byte) */
uint16_t var_count; // can be zero
/* XXX: name is stored in symbol table only, not stored here */
uint32_t first_var; /* uint32_t ..vars[]; // The local variables memories for the word. Like "@a @b @c". Can be empty! */
/* Possibly overlapping with first_var (var_count == 0) possibly using that as padding... */
/* EITHER: */
/* word_body processed_body; // Where to find the body data (ptr or offset) if its not "inline_data[]" */
/* OR: */
/* uint8_t inline_data[]; // The "body" - either text src (contains "ender") or inline threaded code (zero term) */
};
typedef struct word word;
/** Gets the wordtyp from a flags field - see word */
static inline SLC_WORDTYP word_type(uint8_t flags) {
return (SLC_WORDTYP) (flags >> 6);
}
/** Gets the variable array of the given word */
static inline uint32_t *word_vars(word *w) {
return &(w->first_var);
}
/** Gets the (inline or processed - same addr) "data" of the word. */
static inline uint8_t *word_inline_data(word *w) {
uint32_t *vars = (uint32_t *)word_vars(w);
uint16_t vcnt = w->var_count;
// Might be overlapping "first_var" in case we had no vars
uint8_t *after_wars = (uint8_t *)(vars + vcnt);
int padding = get_padding(after_wars, 8);
after_wars += padding;
return after_wars; // no more wars
}
/** Gets the processed body of the word. */
static word_body word_processed_body(word *w) {
word_body *body = (word_body *)word_inline_data(w);
return *body;
}
enum SLC_SYM_OP { SLC_SYM_SET, SLC_SYM_GET, SLC_SYM_ERASE };
typedef enum SLC_SYM_OP SLC_SYM_OP;
enum SLC_STACK_OP { SLC_STACK_PUSH, SLC_STACK_POP, SLC_STACK_AT, SLC_STACK_COUNT, SLC_STACK_ERASE };
typedef enum SLC_STACK_OP SLC_STACK_OP;
enum SLC_SESSION_OP {
SLC_SESSION_ALLOC,
SLC_SESSION_ERASE,
SLC_SESSION_PUSH,
SLC_SESSION_GET,
SLC_SESSION_SET,
SLC_SESSION_PROCESS
};
typedef enum SLC_SESSION_OP SLC_SESSION_OP;
enum SLC_IO_OP {
SLC_IO_OPEN,
SLC_IO_OPEN_TMP,
SLC_IO_REMOVE_TMPS,
SLC_IO_CLOSE,
SLC_IO_READ,
SLC_IO_WRITE,
SLC_IO_LOCK,
SLC_IO_UNLOCK,
SLC_IO_CMD,
};
typedef enum SLC_IO_OP SLC_IO_OP;
union symptr {
uint32_t *varp;
word *worp;
};
typedef union symptr symptr;
/**
* Function-abstraction for a "symbol-table".
*
* Operations:
*
* SLC_SYM_SET Saves a mapping from key->word in symbol table. word==NULL removes mapping. Returns ptr back or NULL on errors.
* SLC_SYM_GET Gets the symbol at key (the word parameter is unused). Returns NULL if there is no word for the key.
* SLC_SYM_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL.
*
* Rem.: On GET we return a word* in case the key is to a word and a regular uint32_t* if its a variable name!
*
* @param op Defines which operation the caller wants.
* @param key The key (both for SET and GET)
* @param ptr When adding a found word/variable to the symbol table, the key will point to this word* or uint32_t*
* @returns The word/var definition stored for the key, or NULL when it is not stored yet or op is SET and there was an error.
*/
typedef symptr (*sym)(SLC_SYM_OP op, char *key, symptr ptr);
/**
* Function-abstraction for an integer "stack".
*
* Operations:
*
* SLC_STACK_PUSH pushes the "elem" to the stack. Returns 1 if succeeded, otherwise 0.
* SLC_STACK_POP pops the stack - does not return meaningful value, beware of underflowing!
* SLC_STACK_AT returns the "param"th element down from the top of the stack
* SLC_STACK_COUNT returns the number of elements in the stack
* SLC_STACK_ERASE Makes the stack empty. Basically as if you would POP the COUNT times.
*
* @param op Defines which operation the caller wants.
* @param param On SLC_STACK_PUSH, this is the element to push onto the stack, in case of SLC_STACK_AT, its the index.
* @return The element at the given stack location in case of SLC_STACK_AT or the count in case of SLC_STACK_COUNT. Can show error!
*/
typedef uint32_t (*stack)(SLC_STACK_OP op, uint32_t param);
/**
* Function-abstraction for a "session-storage".
*
* Operations:
* SLC_SESSION_ALLOC allocates parameter amount of memory and returns an accessor index.
* SLC_SESSION_ERASE erase the session storage (all of it) - all parameters are unused
* SLC_SESSION_PUSH adds the given byte (value in i) to the end of the session storage (by growing it) - j unused
* SLC_SESSION_GET gets byte at the ith accessor index - j unused
* SLC_SESSION_SET gets byte at the ith accessor index to be of (byte)j
* SLC_SESSION_PROCESS gets the last j bytes and moves them overriding bytes at index i, then "shrinks" the storage by j.
*
* @param op Defines which operation the caller wants.
* @param i Used on SESSION_GET and is the accessor index, in case of SESSIN_ALLOC it is the amount to allocate.
* @param j Used on SESSION_SET as the byte value and on SESSION_PROCESS as the amount of bytes to "process" / shrink.
* @returns The accessor index in case of ALLOC (0xFFFFFFFF == -1 means error), on get it returns the store BYTE as uint32_t
*/
typedef uint32_t (*session)(SLC_SESSION_OP op, uint32_t i, uint32_t j);
union iores {
/** Either the terminal cmd result or the handle pointer */
const char *ptr;
/** The read character */
char c;
};
typedef union iores iores;
/**
* Function-abstraction for io connectors.
*
* Operations:
*
* SLC_IO_OPEN Opens a PERSISTENT file with the given name. Returns the handle pointer (or NULL on error).
* SLC_IO_CLOSE Closes a PERSISTENT file with the given handle.
* SLC_IO_OPEN_TMP Opens a TEMPORARY file with the given name. Returns the handle pointer.
* SLC_IO_CLOSE_TMP Removes the TEMPORARY file with the given handle.
* SLC_IO_READ Reads a character from the given file handle. Returns pointer to the character that got read.
* SLC_IO_WRITE Writes a character from the given file handle. The 'param' points to the character to write (1 byte)
* SLC_IO_LOCK Locks the given file handle for exclusive reads and writes (others need to use lock/unlock too)
* SLC_IO_UNLOCK Locks the given file handle for exclusive reads and writes (others need to use lock/unlock too)
* SLC_IO_CMD Runs the given command on the operating system. The 'param' is the command (+args) and returned is std output.
*
* @param op Defines which operation the caller wants.
* @param param The name or temporary name or command or the handle pointer parameter depending on op.
* @returns A handle pointer or pointer to character to read / written or closed/unlocked handle (NULL on errors). Also cmd stdout.
*/
typedef iores (*ioconn)(SLC_IO_OP op, const char *param);
/**
* Function-abstraction for reading the source code byte-by-byte.
*/
typedef uint8_t (*coderead)();
enum slc_state : uint32_t {
/** Before things */
SLC_START,
/** In a comment */
SLC_COMMENT,
/** In multi-line comment */
SLC_MULTILINE_COMMENT,
/** Name part of word-definition (after ':') - whitespace ends it */
SLC_DEF_NAME,
/** Variable-listing part of word-definition - endline, '(', '[' or '{' ends it */
SLC_DEF_VAR,
/** Raw body part of the word definition - these can contain local variable accesses + words, depth counted by vars */
SLC_DEF_BODY,
/** Name part of a word "call" (non-definition). Ends by whitespace, '@' (in case of variable) or various parentheses */
SLC_WORD_NAME,
/** Variable call (MYWORD@MYVAR) - we get to be here from SLC_WORD_NAME or from START */
SLC_WORD_VAR,
};
typedef enum slc_state slc_state;
static inline slc_state slc_comment_statechange_in(
slc_state current_state,
char c,
const char *singleline_comment,
const char *multiline_comment_opener,
int singleline_comment_len,
int multiline_comment_len,
int *comment_i,
int *multiline_i){
// FIXME: Implement
return current_state;
}
static inline slc_state slc_def_name_statechange(
slc_state current_state,
char c,
const char *prefix,
int *prefix_i) {
// FIXME: Implement
return current_state;
}
static inline slc_state slc_word_name_statechange(
slc_state current_state,
char c,
const char *prefix) {
// FIXME: Implement
return current_state;
}
/**
* This function runs the main slc engine over a snippet of code.
*
* The code_src is the entry of what we start interpreting, but you can do (un)buffered reads in it
* because we will use session_storage to store the source code data into memory while processing...
* Rem.: This also helps with the "include" directives using io_connector and do some kind of recursion maybe.
* Rem.: This architecture also let us try to immediately "threaded_code" optimize the newly added word definition,
* which is possible if it relies on no forward references - this is the most happy case in my opinion!
*
* The session_storage is where we process the words data (possibly introducing threaded code as early as possible).
* This should also let the code "allocate" some random memory too and get an offset for it (there will be no other way).
*
* The symbol_table not only store "words", but direct access offsets for:
*
* - words themselves (direct offset)
* - variables of the words (direct offset)
*
* The code stack is what the interpreter uses for return addresses, the data stack however is FORTH-style usual stack.
* The "insert_stack" collects things that we will read instead of reading the code_src AFTER a return from current word.
* A \0 value should be there at the end of it until we RETURN from the word - from when we start processing.
*
* The io_connector is needed so that the engine have connection for temporary and real files and things simulating those.
*
* The prefix, ender and varprefix strings really just help when you use SLC to define a compiler and you need these.
*
* @param code_src The input source code to interpret / run. Code ends either with \0 or EOF.
* @param session_storage Can allocate and use arbitrary memory with this.
* @param symbol_table The symbol table to use while processing.
* @param code_stack The code stack (return addresses) to use.
* @param nesting_stack The stack used for the state-machine of the nested words.
* @param data_stack The data stack (forth-like stack) to use.
* @param insert_stack Used for temporarily expanding the input stream (one word level above current) with further words.
* @param io_connector The engine uses this to open/close pipes/files and write/read them.
* @param singleline_comment Like "//" - the character string that makes the rest of the line being comment. Can be "" (no NULL).
* @param multiline_comment_opener The character string that starts a multiline comment. Like / and * for C. Can be "" (no NULL).
* @param multiline_comment_closer The character string that ends a multiline comment. Like * and / for C. Can be "" (no NULL).
* @param prefix The prefix added to the lookup of built-ins. Useful when you write a compiler with SLC. Defaults to "" (empty).
* @param ender The character string that ends a word definition. Defaults to ";".
* @param varprefix The character string that prefixes variable declarations. Defaults to "@".
*/
static inline void slc(
coderead code_src,
session session_storage,
sym symbol_table,
stack code_stack,
stack nesting_stack,
stack data_stack,
stack insert_stack,
ioconn io_connector,
const char *singleline_comment,
const char *multiline_comment_opener,
const char *multiline_comment_closer,
const char *prefix,
const char *ender,
const char *varprefix) {
slc_state state = SLC_START;
int singleline_comment_len = strlen(singleline_comment);
int multiline_comment_opener_len = strlen(multiline_comment_opener);
// TODO: Count line numbers
// TODO: Handle/count indentation for better error messages
int comment_i = 0;
int multiline_i = 0;
int prefix_i = 0;
uint8_t c = 0;
while(((c = code_src()) != 0)) {
process_char:
switch(state) {
case SLC_START:
/* state -> comment | multiline_comment */
state = slc_comment_statechange_in(
state,
c,
singleline_comment,
multiline_comment_opener,
singleline_comment_len,
multiline_comment_opener_len,
&comment_i,
&multiline_i);
/* state -> def_name */
if(state == SLC_START) {
state = slc_def_name_statechange(
state,
c,
prefix,
&prefix_i);
} else goto process_char; /* new state might need 'c' too */
/* state -> word_name */
/* XXX: You can't START words with ':', parts of prefix or comment prefix - that makes my life simpler here */
if((state == SLC_START) && (comment_i == 0) && (multiline_i == 0) && (prefix_i == 0)) {
state = slc_word_name_statechange(
state,
c,
prefix);
if(state != SLC_START) goto process_char; /* new state might need 'c' too */
} else goto process_char; /* new state might need 'c' too */
/* Step to read next character */
break;
case SLC_COMMENT:
break;
case SLC_MULTILINE_COMMENT:
break;
case SLC_DEF_NAME:
break;
case SLC_DEF_VAR:
break;
case SLC_DEF_BODY:
break;
case SLC_WORD_NAME:
break;
case SLC_WORD_VAR:
break;
}
}
}
#endif /* SLC_H */