Compare commits

...

3 Commits

Author SHA1 Message Date
Richard Thier
b5419a1a26 improvements 2024-09-16 23:18:32 +02:00
Richard Thier
ed7584a08f better engine architecture with more simplicity 2024-09-16 22:30:29 +02:00
Richard Thier
b1fa9cd9c9 further ideas 2024-09-16 21:22:44 +02:00

View File

@ -10,7 +10,7 @@ enum SLC_WORDTYP {
/** "Threaded code" (utf16-like word offsets) and encoded parentheses IN-PLACE inlined where text was before */
SLC_WORDTYP_THREADED_INLINE = 2,
/** "Threaded code" that did not fit in-place and is thus stored in session storage, word offset tells where */
SLC_WORDTYP_THREADED_SESSION = 2
SLC_WORDTYP_THREADED_SESSION = 3
};
typedef enum SLC_WORDTYP SLC_WORDTYP;
@ -19,6 +19,7 @@ static inline SLC_WORDTYP get_word_type(uint8_t flags) {
return (SLC_WORDTYP)(flags >> 6);
}
// TODO: probably should remove this?
/** Gets the 6-bit variable count (at most 64 vars possible per word) */
static inline uint8_t get_word_var_count(uint8_t flags) {
return (flags && 0x3F); /* 0011 1111 */
@ -73,6 +74,7 @@ struct wordstart {
uint8_t low_offset;
/* char ..name[]; // The REMAINS of name of the word being defined. Can be empty! */
/* char vars[]; // The local (at least 1-letter) variables of the word. Can be empty - min 4x8bit per a var, like: " @a;" */
// TODO: process vars when doing the reader->session store
/* char newline; // there is always a newline at this point! XXX: "@a; ", "@b;\n" is how we store vars (*/
/* char data[]; // The "body" of the word - either text source or inline threaded code (at least 1 character) */
/* char ender[]; // The ender-string that ends the word - always at least 1 character! */
@ -83,7 +85,14 @@ enum SLC_SYM_OP { SLC_SYM_SET, SLC_SYM_GET, SLC_SYM_ERASE };
typedef enum SLC_SYM_OP SLC_SYM_OP;
enum SLC_STACK_OP { SLC_STACK_PUSH, SLC_STACK_POP, SLC_STACK_AT, SLC_STACK_COUNT, SLC_STACK_ERASE };
typedef enum SLC_STACK_OP SLC_STACK_OP;
enum SLC_SESSION_OP { SLC_SESSION_ALLOC, SLC_SESSION_ERASE, SLC_SESSION_GET };
enum SLC_SESSION_OP {
SLC_SESSION_ALLOC,
SLC_SESSION_ERASE,
SLC_SESSION_PUSH,
SLC_SESSION_GET,
SLC_SESSION_SET,
SLC_SESSION_PROCESS
};
typedef enum SLC_SESSION_OP SLC_SESSION_OP;
enum SLC_IO_OP {
SLC_IO_OPEN,
@ -97,11 +106,6 @@ enum SLC_IO_OP {
SLC_IO_CMD,
};
typedef enum SLC_IO_OP SLC_IO_OP;
enum SLC_CODE_OP {
SLC_CODE_COUNT,
SLC_CODE_READ,
};
typedef enum SLC_CODE_OP SLC_CODE_OP;
/**
* Function-abstraction for a "symbol-table".
@ -112,12 +116,15 @@ typedef enum SLC_CODE_OP SLC_CODE_OP;
* SLC_SYM_GET Gets the symbol at key (the word parameter is unused). Returns NULL if there is no word for the key.
* SLC_SYM_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL.
*
* Rem.: On GET we return a wordstart* in case the key is to a word and a regular uint32_t* if its a variable name!
*
* @param op Defines which operation the caller wants.
* @param key The key (both for SET and GET)
* @param word When adding a found word to the symbol table, the key will point to this word
* @returns The word definition stored for the key, or NULL when it is not stored yet or op is SET and there was an error.
* @param ptr When adding a found word/variable to the symbol table, the key will point to this wordstart* or uint32_t*
* @returns The word/var definition stored for the key, or NULL when it is not stored yet or op is SET and there was an error.
*/
typedef wordstart* (*sym)(SLC_SYM_OP op, char *key, wordstart *word);
typedef void* (*sym)(SLC_SYM_OP op, char *key, void *ptr);
// TODO: union for this?
/**
* Function-abstraction for an integer "stack".
@ -137,18 +144,22 @@ typedef wordstart* (*sym)(SLC_SYM_OP op, char *key, wordstart *word);
typedef uint32_t (*stack)(SLC_STACK_OP op, uint32_t param);
/**
* Function-abstraction for an integer "session-storage".
* Function-abstraction for a "session-storage".
*
* Operations:
* SLC_SESSION_ALLOC allocates parameter amount of memory and returns an accessor index.
* SLC_SESSION_ERASE erase the session storage (all of it)
* SLC_SESSION_GET gets byte at the ith accessor index
* SLC_SESSION_ERASE erase the session storage (all of it) - all parameters are unused
* SLC_SESSION_PUSH adds the given byte (value in i) to the end of the session storage (by growing it) - j unused
* SLC_SESSION_GET gets byte at the ith accessor index - j unused
* SLC_SESSION_SET gets byte at the ith accessor index to be of (byte)j
* SLC_SESSION_PROCESS gets the last j bytes and moves them overriding bytes at index i, then "shrinks" the storage by j.
*
* @param op Defines which operation the caller wants.
* @param i Used on SESSION_GET and is the accessor index
* @param i Used on SESSION_GET and is the accessor index, in case of SESSIN_ALLOC it is the amount to allocate.
* @param j Used on SESSION_SET as the byte value and on SESSION_PROCESS as the amount of bytes to "process" / shrink.
* @returns The accessor index in case of ALLOC (0xFFFFFFFF == -1 means error), on get it returns the store BYTE as uint32_t
*/
typedef uint32_t (*session)(SLC_SESSION_OP op, uint32_t i);
typedef uint32_t (*session)(SLC_SESSION_OP op, uint32_t i, uint32_t j);
/**
* Function-abstraction for io connectors.
@ -170,41 +181,56 @@ typedef uint32_t (*session)(SLC_SESSION_OP op, uint32_t i);
* @returns A handle pointer or pointer to character to read / written or closed/unlocked handle (NULL on errors). Also cmd stdout.
*/
typedef const char* (*ioconn)(SLC_IO_OP op, const char *param);
// TODO: union for this?
/**
* Function-abstraction for reading the source code.
*
* Operations:
*
* SLC_CODE_COUNT To get how much bytes are readable (i is unused). Returns size as uint32_t (full used)
* SLC_CODE_READ To read bytes of the source code at location i. Returns the byte char as uint32_t (low byte)
*
* @param op Defines which operation the caller wants.
* @param i In case of READ, the index of the data.
* Function-abstraction for reading the source code byte-by-byte.
*/
typedef uint32_t (*coderead)(SLC_CODE_OP op, uint32_t i);
typedef uint8_t (*coderead)();
/**
* This function runs the main slc engine over a snippet of code.
*
* @param code_src The input source code to interpret / run.
* @param symbol_table The symbol table to use
* @param code_stack The code stack (return addresses) to use
* @param data_stack The data stack (forth-like stack) to use
* @param insert_stack Used for temporarily expanding the input stream (one word level above) with words
* The code_src is the entry of what we start interpreting, but you can do (un)buffered reads in it
* because we will use session_storage to store the source code data into memory while processing...
* Rem.: This also helps with the "include" directives using io_connector and do some kind of recursion maybe.
* Rem.: This architecture also let us try to immediately "threaded_code" optimize the newly added word definition,
* which is possible if it relies on no forward references - this is the most happy case in my opinion!
*
* The session_storage is where we process the words data (possibly introducing threaded code as early as possible).
* This should also let the code "allocate" some random memory too and get an offset for it (there will be no other way).
*
* The symbol_table not only store "words", but direct access offsets for:
*
* - words themselves (direct offset)
* - variables of the words (direct offset)
*
* The code stack is what the interpreter uses for return addresses, the data stack however is FORTH-style usual stack.
* The "insert_stack" collects things that we will read instead of reading the code_src AFTER a return from current word.
* A \0 value should be there at the end of it until we RETURN from the word - from when we start processing.
*
* The io_connector is needed so that the engine have connection for temporary and real files and things simulating those.
*
* The prefix, ender and varprefix strings really just help when you use SLC to define a compiler and you need these.
*
* @param code_src The input source code to interpret / run. Code ends either with \0 or EOF.
* @param session_storage Can allocate and use arbitrary memory with this.
* @param symbol_table The symbol table to use while processing.
* @param code_stack The code stack (return addresses) to use.
* @param data_stack The data stack (forth-like stack) to use.
* @param insert_stack Used for temporarily expanding the input stream (one word level above current) with further words.
* @param io_connector The engine uses this to open/close pipes/files and write/read them.
* @param prefix The prefix added to the lookup of built-ins.
* @param ender The character string that ends a word definition.
* @param varprefix The character string that prefixes variable declarations.
* @param prefix The prefix added to the lookup of built-ins. Useful when you write a compiler with SLC. Defaults to "" (empty).
* @param ender The character string that ends a word definition. Defaults to ";".
* @param varprefix The character string that prefixes variable declarations. Defaults to "@".
*/
static inline void slc(
coderead code_src,
session session_storage,
sym symbol_table,
stack code_stack,
stack data_stack,
stack insert_stack,
session session_storage,
ioconn io_connector,
const char *prefix,
const char *ender,