slc/engine/slc.h
2024-09-16 21:22:44 +02:00

217 lines
9.0 KiB
C

#ifndef SLC_H
#define SLC_H
/** Possible word types */
enum SLC_WORDTYP {
/** Still in plain text */
SLC_WORDTYP_TEXT = 0,
/** Native code, use get_word_storage_offset to get what to run (relative pointer or array of pointers) */
SLC_WORDTYP_NATIVE = 1,
/** "Threaded code" (utf16-like word offsets) and encoded parentheses IN-PLACE inlined where text was before */
SLC_WORDTYP_THREADED_INLINE = 2,
/** "Threaded code" that did not fit in-place and is thus stored in session storage, word offset tells where */
SLC_WORDTYP_THREADED_SESSION = 2
};
typedef enum SLC_WORDTYP SLC_WORDTYP;
/** Gets the wordtyp from a flags field - see wordstart */
static inline SLC_WORDTYP get_word_type(uint8_t flags) {
return (SLC_WORDTYP)(flags >> 6);
}
/** Gets the 6-bit variable count (at most 64 vars possible per word) */
static inline uint8_t get_word_var_count(uint8_t flags) {
return (flags && 0x3F); /* 0011 1111 */
}
/** Gets the 24bit storage offset of the word: Natives contain pointers in an offseted array */
static inline uint32_t get_word_storage_offset(uint8_t high_offset, uint16_t low_offset) {
return ((uint32_t)high_offset << 16) + low_offset;
}
/**
* A word definition starts right after this. After processing it, we inline overwrite random parts of it in memory...
*
* Examples:
*
* #: just_code
* #dup
* #inc
* #swap
* #
*
* : with_vars @a; @b; @c;
* @a
* inc
* @a(.)
* ;
*
* #builtin: to_prefix
* #swap
* #dup
*
* TODO: How to do this builtin to be properly changed inline? If it can be inline threaded-coded, then its fine, but is spec-case
* ^^The above always needs starting '#builtin' at the definition and inside. That is exchanged to real prefix...
* This is used for implementation implementing built-ins with FORTH-like code instead of native (saves native interpret. space)
*
* #: structural (
* #parse_num
* #dup
* #inc
* #swap
* #print
* #print
* ) [ #parse_num #print ] { #parse_num #print }
* #
*/
struct wordstart {
/** The ':' char - after processing it stores the flags */
uint8_t flags;
/** whitespace after ':' and first char of name - after processing contains the high-offset */
uint16_t high_offset;
/** Either remaining parts of the name - or the leading tab/space for starting variables (or newline if there's none) */
uint8_t low_offset;
/* char ..name[]; // The REMAINS of name of the word being defined. Can be empty! */
/* char vars[]; // The local (at least 1-letter) variables of the word. Can be empty - min 4x8bit per a var, like: " @a;" */
/* char newline; // there is always a newline at this point! XXX: "@a; ", "@b;\n" is how we store vars (*/
/* char data[]; // The "body" of the word - either text source or inline threaded code (at least 1 character) */
/* char ender[]; // The ender-string that ends the word - always at least 1 character! */
};
typedef struct wordstart wordstart;
enum SLC_SYM_OP { SLC_SYM_SET, SLC_SYM_GET, SLC_SYM_ERASE };
typedef enum SLC_SYM_OP SLC_SYM_OP;
enum SLC_STACK_OP { SLC_STACK_PUSH, SLC_STACK_POP, SLC_STACK_AT, SLC_STACK_COUNT, SLC_STACK_ERASE };
typedef enum SLC_STACK_OP SLC_STACK_OP;
enum SLC_SESSION_OP { SLC_SESSION_ALLOC, SLC_SESSION_ERASE, SLC_SESSION_GET };
typedef enum SLC_SESSION_OP SLC_SESSION_OP;
enum SLC_IO_OP {
SLC_IO_OPEN,
SLC_IO_OPEN_TMP,
SLC_IO_REMOVE_TMPS,
SLC_IO_CLOSE,
SLC_IO_READ,
SLC_IO_WRITE,
SLC_IO_LOCK,
SLC_IO_UNLOCK,
SLC_IO_CMD,
};
typedef enum SLC_IO_OP SLC_IO_OP;
/**
* Function-abstraction for a "symbol-table".
*
* Operations:
*
* SLC_SYM_SET Saves a mapping from key->word in symbol table. word==NULL removes mapping. Returns ptr back or NULL on errors.
* SLC_SYM_GET Gets the symbol at key (the word parameter is unused). Returns NULL if there is no word for the key.
* SLC_SYM_ERASE Erases the symbol table so it becomes empty again. Can never fail, returns NULL.
*
* @param op Defines which operation the caller wants.
* @param key The key (both for SET and GET)
* @param word When adding a found word to the symbol table, the key will point to this word
* @returns The word definition stored for the key, or NULL when it is not stored yet or op is SET and there was an error.
*/
typedef wordstart* (*sym)(SLC_SYM_OP op, char *key, wordstart *word);
/**
* Function-abstraction for an integer "stack".
*
* Operations:
*
* SLC_STACK_PUSH pushes the "elem" to the stack. Returns 1 if succeeded, otherwise 0.
* SLC_STACK_POP pops the stack - does not return meaningful value, beware of underflowing!
* SLC_STACK_AT returns the "param"th element down from the top of the stack
* SLC_STACK_COUNT returns the number of elements in the stack
* SLC_STACK_ERASE Makes the stack empty. Basically as if you would POP the COUNT times.
*
* @param op Defines which operation the caller wants.
* @param param On SLC_STACK_PUSH, this is the element to push onto the stack, in case of SLC_STACK_AT, its the index.
* @return The element at the given stack location in case of SLC_STACK_AT or the count in case of SLC_STACK_COUNT. Can show error!
*/
typedef uint32_t (*stack)(SLC_STACK_OP op, uint32_t param);
/**
* Function-abstraction for an integer "session-storage".
*
* Operations:
* SLC_SESSION_ALLOC allocates parameter amount of memory and returns an accessor index.
* SLC_SESSION_ERASE erase the session storage (all of it)
* SLC_SESSION_GET gets byte at the ith accessor index
* SLC_SESSION_SET sets byte to have the value of b at the ith accessor index
*
* @param op Defines which operation the caller wants.
* @param i Used on SESSION_GET and is the accessor index
* @param b Used on SESSION_SET and is the byte to write
* @returns The accessor index in case of ALLOC (0xFFFFFFFF == -1 means error), on get it returns the store BYTE as uint32_t
*/
typedef uint32_t (*session)(SLC_SESSION_OP op, uint32_t i, uint8_t b);
/**
* Function-abstraction for io connectors.
*
* Operations:
*
* SLC_IO_OPEN Opens a PERSISTENT file with the given name. Returns the handle pointer (or NULL on error).
* SLC_IO_CLOSE Closes a PERSISTENT file with the given handle.
* SLC_IO_OPEN_TMP Opens a TEMPORARY file with the given name. Returns the handle pointer.
* SLC_IO_CLOSE_TMP Removes the TEMPORARY file with the given handle.
* SLC_IO_READ Reads a character from the given file handle. Returns pointer to the character that got read.
* SLC_IO_WRITE Writes a character from the given file handle. The 'param' points to the character to write (1 byte)
* SLC_IO_LOCK Locks the given file handle for exclusive reads and writes (others need to use lock/unlock too)
* SLC_IO_UNLOCK Locks the given file handle for exclusive reads and writes (others need to use lock/unlock too)
* SLC_IO_CMD Runs the given command on the operating system. The 'param' is the command (+args) and returned is std output.
*
* @param op Defines which operation the caller wants.
* @param param The name or temporary name or command or the handle pointer parameter depending on op.
* @returns A handle pointer or pointer to character to read / written or closed/unlocked handle (NULL on errors). Also cmd stdout.
*/
typedef const char* (*ioconn)(SLC_IO_OP op, const char *param);
/**
* Function-abstraction for reading the source code byte-by-byte.
*/
typedef uint8_t (*coderead)();
/**
* This function runs the main slc engine over a snippet of code.
*
* The code_src is what we start interpreting, but you can do buffered reads
* because we will use session_storage to store the source code data into memory while processing...
* This also helps with the "include" directives using io_connector.
* TODO: Investigate "what if" we start with session_storage prefilled with the initial read code?
*
* The symbol_table not only store "words", but direct access offsets for:
*
* - words themselves
* - variables of the words
* - "word ", "word(", "word[", "word{" keys show where "blocks" of that word is. TODO: what to do with multiple blocks? Use flagz?
*
* @param code_src The input source code to interpret / run. Code ends either with \0 or EOF.
* @param symbol_table The symbol table to use while processing.
* @param code_stack The code stack (return addresses) to use.
* @param data_stack The data stack (forth-like stack) to use.
* @param insert_stack Used for temporarily expanding the input stream (one word level above current) with further words.
* @param session_storage Can allocate and use arbitrary memory with this.
* @param io_connector The engine uses this to open/close pipes/files and write/read them.
* @param prefix The prefix added to the lookup of built-ins. Useful when you write a compiler with SLC. Defaults to "" (empty).
* @param ender The character string that ends a word definition. Defaults to ";".
* @param varprefix The character string that prefixes variable declarations. Defaults to "@".
*/
static inline void slc(
coderead code_src,
sym symbol_table,
stack code_stack,
stack data_stack,
stack insert_stack,
session session_storage,
ioconn io_connector,
const char *prefix,
const char *ender,
const char *varprefix
) {
// TODO
}
#endif /* SLC_H */