2024-09-07 21:53:56 +02:00
# ifndef SLC_H
# define SLC_H
2024-09-19 11:36:04 +02:00
# include <stdint.h> // uint8_t, uint32_t..
# include <string.h> // memcpy, strlen..
# include <stddef.h> // NULL
2024-09-28 12:57:01 +02:00
# include <ctype.h> // isspace
2024-09-19 11:36:04 +02:00
2024-09-26 11:52:28 +02:00
/* Define this if you want to debug the engine doing its thing */
/* #define SLC_DEBUG */
2024-09-26 11:04:39 +02:00
/* Session offset type - defaults to 64 bit because of union types enable storage for it often, but you can override */
2024-09-25 23:18:30 +02:00
# ifndef SLOFFS_T
# define SLOFFS_T uint64_t
# endif
2024-09-28 12:57:01 +02:00
/* Maximum length of words - defaults but you can override; Adds +1 to this for null terminator */
2024-09-26 11:04:39 +02:00
# ifndef SL_MAX_WORD_NAME
# define SL_MAX_WORD_NAME 255
# endif
2024-09-25 23:18:30 +02:00
union word_body {
SLOFFS_T offset ;
void * ptr ;
} ;
typedef union word_body word_body ;
2024-09-16 20:12:23 +02:00
/** Possible word types */
enum SLC_WORDTYP {
/** Still in plain text */
SLC_WORDTYP_TEXT = 0 ,
/** Native code, use get_word_storage_offset to get what to run (relative pointer or array of pointers) */
SLC_WORDTYP_NATIVE = 1 ,
/** "Threaded code" (utf16-like word offsets) and encoded parentheses IN-PLACE inlined where text was before */
SLC_WORDTYP_THREADED_INLINE = 2 ,
/** "Threaded code" that did not fit in-place and is thus stored in session storage, word offset tells where */
2024-09-16 22:30:29 +02:00
SLC_WORDTYP_THREADED_SESSION = 3
2024-09-16 20:12:23 +02:00
} ;
typedef enum SLC_WORDTYP SLC_WORDTYP ;
2024-09-26 13:25:58 +02:00
/** Gets padding bytes for a memory address to be padded to alignment */
2024-09-25 23:18:30 +02:00
static inline int get_padding ( uint8_t * ptr , int alignment ) {
// return (alignment - (ptr % alignment)) % alignment;
return ( ptrdiff_t ) ( ptr + alignment - 1 ) / alignment * alignment ;
2024-09-16 20:12:23 +02:00
}
2024-09-29 20:16:20 +02:00
/** Gets padding bytes for a size to be padded to divisible alignment */
static inline int get_size_padding ( uint32_t size , int alignment ) {
// return (alignment - (size % alignment)) % alignment;
return ( size + alignment - 1 ) / alignment * alignment ;
}
2024-09-26 13:25:58 +02:00
/** Tells if c ends a line (that is either \n or \r) */
static inline char endsline ( char c ) {
return ( c = = ' \n ' ) | | ( c = = ' \r ' ) ;
}
2024-09-16 20:12:23 +02:00
/**
* A word definition starts right after this . After processing it , we inline overwrite random parts of it in memory . . .
*
* Examples :
*
* # : just_code
* # dup
* # inc
* # swap
* #
*
* : with_vars @ a ; @ b ; @ c ;
* @ a
* inc
* @ a ( . )
* ;
*
* # builtin : to_prefix
* # swap
* # dup
*
2024-09-25 23:18:30 +02:00
* ^ ^ This built - in generates threaded code / text while processing in a way so that the
* to_prefix becomes prefixed with the " right " prefix and ' # ' exchanged with the current !
*
2024-09-16 20:12:23 +02:00
* ^ ^ The above always needs starting ' # builtin ' at the definition and inside . That is exchanged to real prefix . . .
* This is used for implementation implementing built - ins with FORTH - like code instead of native ( saves native interpret . space )
*
2025-01-03 13:37:29 +01:00
* # : : structural (
2024-09-16 20:12:23 +02:00
* # parse_num
* # dup
* # inc
* # swap
* # print
* # print
* ) [ # parse_num # print ] { # parse_num # print }
* #
*/
2024-09-25 23:18:30 +02:00
struct word {
/** Defines how to understand this word */
2024-09-07 21:53:56 +02:00
uint8_t flags ;
2024-09-25 23:18:30 +02:00
uint8_t reserved ; // Padding to ensure alignment of vars (4byte) */
uint16_t var_count ; // can be zero
/* XXX: name is stored in symbol table only, not stored here */
uint32_t first_var ; /* uint32_t ..vars[]; // The local variables memories for the word. Like "@a @b @c". Can be empty! */
/* Possibly overlapping with first_var (var_count == 0) possibly using that as padding... */
/* EITHER: */
/* word_body processed_body; // Where to find the body data (ptr or offset) if its not "inline_data[]" */
/* OR: */
/* uint8_t inline_data[]; // The "body" - either text src (contains "ender") or inline threaded code (zero term) */
2024-09-07 21:53:56 +02:00
} ;
2024-09-25 23:18:30 +02:00
typedef struct word word ;
/** Gets the wordtyp from a flags field - see word */
static inline SLC_WORDTYP word_type ( uint8_t flags ) {
return ( SLC_WORDTYP ) ( flags > > 6 ) ;
}
/** Gets the variable array of the given word */
static inline uint32_t * word_vars ( word * w ) {
return & ( w - > first_var ) ;
}
/** Gets the (inline or processed - same addr) "data" of the word. */
static inline uint8_t * word_inline_data ( word * w ) {
uint32_t * vars = ( uint32_t * ) word_vars ( w ) ;
uint16_t vcnt = w - > var_count ;
// Might be overlapping "first_var" in case we had no vars
uint8_t * after_wars = ( uint8_t * ) ( vars + vcnt ) ;
int padding = get_padding ( after_wars , 8 ) ;
after_wars + = padding ;
return after_wars ; // no more wars
}
/** Gets the processed body of the word. */
static word_body word_processed_body ( word * w ) {
word_body * body = ( word_body * ) word_inline_data ( w ) ;
return * body ;
}
2024-09-07 21:53:56 +02:00
2024-09-29 17:03:42 +02:00
enum SLC_SYM_OP { SLC_SYM_SET = 0 , SLC_SYM_GET = 1 , SLC_SYM_ERASE = 2 } ;
2024-09-14 13:00:43 +02:00
typedef enum SLC_SYM_OP SLC_SYM_OP ;
2024-09-13 18:02:54 +02:00
enum SLC_STACK_OP { SLC_STACK_PUSH , SLC_STACK_POP , SLC_STACK_AT , SLC_STACK_COUNT , SLC_STACK_ERASE } ;
2024-09-14 13:00:43 +02:00
typedef enum SLC_STACK_OP SLC_STACK_OP ;
2024-09-16 22:30:29 +02:00
enum SLC_SESSION_OP {
SLC_SESSION_ALLOC ,
SLC_SESSION_ERASE ,
SLC_SESSION_PUSH ,
SLC_SESSION_GET ,
SLC_SESSION_SET ,
2024-09-29 01:38:07 +02:00
SLC_SESSION_GET32 ,
SLC_SESSION_SET32 ,
2024-09-16 22:30:29 +02:00
SLC_SESSION_PROCESS
} ;
2024-09-14 13:00:43 +02:00
typedef enum SLC_SESSION_OP SLC_SESSION_OP ;
2024-09-13 18:02:54 +02:00
enum SLC_IO_OP {
SLC_IO_OPEN ,
SLC_IO_OPEN_TMP ,
SLC_IO_REMOVE_TMPS ,
SLC_IO_CLOSE ,
SLC_IO_READ ,
SLC_IO_WRITE ,
SLC_IO_LOCK ,
SLC_IO_UNLOCK ,
SLC_IO_CMD ,
} ;
2024-09-14 13:00:43 +02:00
typedef enum SLC_IO_OP SLC_IO_OP ;
2024-09-13 18:02:54 +02:00
2024-09-29 02:00:43 +02:00
struct do_not_save_charptr {
2024-09-29 01:38:07 +02:00
char * ptr ;
} ;
2024-09-29 02:00:43 +02:00
typedef struct do_not_save_charptr do_not_save_charptr ;
2024-09-29 01:38:07 +02:00
2024-09-13 18:02:54 +02:00
/**
* Function - abstraction for a " symbol-table " .
*
* Operations :
*
* SLC_SYM_SET Saves a mapping from key - > word in symbol table . word = = NULL removes mapping . Returns ptr back or NULL on errors .
* SLC_SYM_GET Gets the symbol at key ( the word parameter is unused ) . Returns NULL if there is no word for the key .
* SLC_SYM_ERASE Erases the symbol table so it becomes empty again . Can never fail , returns NULL .
*
* @ param op Defines which operation the caller wants .
2024-09-29 02:00:43 +02:00
* @ param key The key ( both for SET and GET ) . This pointer can get easily invalidated so you might need a copy or you do Trie , etc .
2024-09-25 23:18:30 +02:00
* @ param ptr When adding a found word / variable to the symbol table , the key will point to this word * or uint32_t *
2024-09-16 22:30:29 +02:00
* @ returns The word / var definition stored for the key , or NULL when it is not stored yet or op is SET and there was an error .
2024-09-13 18:02:54 +02:00
*/
2024-09-29 16:09:05 +02:00
typedef void * ( * sym ) ( SLC_SYM_OP op , do_not_save_charptr key , void * ptr ) ;
2024-09-13 18:02:54 +02:00
/**
* Function - abstraction for an integer " stack " .
*
* Operations :
*
* SLC_STACK_PUSH pushes the " elem " to the stack . Returns 1 if succeeded , otherwise 0.
* SLC_STACK_POP pops the stack - does not return meaningful value , beware of underflowing !
* SLC_STACK_AT returns the " param " th element down from the top of the stack
* SLC_STACK_COUNT returns the number of elements in the stack
* SLC_STACK_ERASE Makes the stack empty . Basically as if you would POP the COUNT times .
*
* @ param op Defines which operation the caller wants .
* @ param param On SLC_STACK_PUSH , this is the element to push onto the stack , in case of SLC_STACK_AT , its the index .
* @ return The element at the given stack location in case of SLC_STACK_AT or the count in case of SLC_STACK_COUNT . Can show error !
*/
2024-09-14 13:00:43 +02:00
typedef uint32_t ( * stack ) ( SLC_STACK_OP op , uint32_t param ) ;
2024-09-13 18:02:54 +02:00
/**
2024-09-16 22:30:29 +02:00
* Function - abstraction for a " session-storage " .
2024-09-13 18:02:54 +02:00
*
* Operations :
* SLC_SESSION_ALLOC allocates parameter amount of memory and returns an accessor index .
2024-09-16 22:30:29 +02:00
* SLC_SESSION_ERASE erase the session storage ( all of it ) - all parameters are unused
* SLC_SESSION_PUSH adds the given byte ( value in i ) to the end of the session storage ( by growing it ) - j unused
* SLC_SESSION_GET gets byte at the ith accessor index - j unused
* SLC_SESSION_SET gets byte at the ith accessor index to be of ( byte ) j
2024-09-29 01:38:07 +02:00
* SLC_SESSION_GET32 gets uint32_t at the ith accessor index - j unused . XXX : Beware , architectures unaligned access crash !
* SLC_SESSION_SET32 gets uint32_t at the ith accessor index to be of j . XXX : Beware , architectures unaligned access crash !
2024-09-16 22:30:29 +02:00
* SLC_SESSION_PROCESS gets the last j bytes and moves them overriding bytes at index i , then " shrinks " the storage by j .
2024-09-13 18:02:54 +02:00
*
* @ param op Defines which operation the caller wants .
2024-09-16 22:30:29 +02:00
* @ param i Used on SESSION_GET and is the accessor index , in case of SESSIN_ALLOC it is the amount to allocate .
* @ param j Used on SESSION_SET as the byte value and on SESSION_PROCESS as the amount of bytes to " process " / shrink .
2024-09-13 18:02:54 +02:00
* @ returns The accessor index in case of ALLOC ( 0xFFFFFFFF = = - 1 means error ) , on get it returns the store BYTE as uint32_t
*/
2024-09-16 22:30:29 +02:00
typedef uint32_t ( * session ) ( SLC_SESSION_OP op , uint32_t i , uint32_t j ) ;
2024-09-07 21:53:56 +02:00
2024-09-18 14:49:30 +02:00
union iores {
2024-09-26 12:32:34 +02:00
/** Either the terminal cmd result or the handle pointer. NULL means some kind of error */
2024-09-18 14:49:30 +02:00
const char * ptr ;
/** The read character */
char c ;
} ;
typedef union iores iores ;
2024-09-13 18:02:54 +02:00
/**
* Function - abstraction for io connectors .
*
* Operations :
*
* SLC_IO_OPEN Opens a PERSISTENT file with the given name . Returns the handle pointer ( or NULL on error ) .
* SLC_IO_CLOSE Closes a PERSISTENT file with the given handle .
* SLC_IO_OPEN_TMP Opens a TEMPORARY file with the given name . Returns the handle pointer .
* SLC_IO_CLOSE_TMP Removes the TEMPORARY file with the given handle .
2024-09-26 12:32:34 +02:00
* SLC_IO_READ Reads a character from the given file handle . Returns ' \0 ' on EOF and being out of data !
* SLC_IO_WRITE Writes a character to the given file handle . The ' param ' points to the character to write ( 1 byte )
2024-09-13 18:02:54 +02:00
* SLC_IO_LOCK Locks the given file handle for exclusive reads and writes ( others need to use lock / unlock too )
* SLC_IO_UNLOCK Locks the given file handle for exclusive reads and writes ( others need to use lock / unlock too )
* SLC_IO_CMD Runs the given command on the operating system . The ' param ' is the command ( + args ) and returned is std output .
*
* @ param op Defines which operation the caller wants .
* @ param param The name or temporary name or command or the handle pointer parameter depending on op .
* @ returns A handle pointer or pointer to character to read / written or closed / unlocked handle ( NULL on errors ) . Also cmd stdout .
*/
2024-09-18 14:49:30 +02:00
typedef iores ( * ioconn ) ( SLC_IO_OP op , const char * param ) ;
2024-09-07 21:53:56 +02:00
2024-09-14 13:41:00 +02:00
/**
2024-09-26 11:04:39 +02:00
* Function - abstraction for reading the source code char - by - char .
2024-09-14 13:41:00 +02:00
*/
2024-09-26 11:04:39 +02:00
typedef char ( * coderead ) ( ) ;
2024-09-14 13:41:00 +02:00
2024-09-26 13:05:38 +02:00
/** States the main state-engine can pick up - use characters for debugging better */
enum slc_state : uint8_t {
2024-09-18 16:13:49 +02:00
/** Before things */
2024-09-26 13:05:38 +02:00
SLC_START = ' s ' ,
2024-09-18 16:13:49 +02:00
/** In a comment */
2024-09-26 13:05:38 +02:00
SLC_COMMENT = ' c ' ,
2024-09-18 16:13:49 +02:00
/** In multi-line comment */
2024-09-26 13:05:38 +02:00
SLC_MULTILINE_COMMENT = ' m ' ,
2024-09-18 16:13:49 +02:00
/** Name part of word-definition (after ':') - whitespace ends it */
2024-09-26 13:05:38 +02:00
SLC_DEF_NAME = ' d ' ,
2024-09-18 16:13:49 +02:00
/** Variable-listing part of word-definition - endline, '(', '[' or '{' ends it */
2024-09-26 13:05:38 +02:00
SLC_DEF_VAR = ' D ' ,
2024-09-19 11:36:04 +02:00
/** Raw body part of the word definition - these can contain local variable accesses + words, depth counted by vars */
2024-09-26 13:05:38 +02:00
SLC_DEF_BODY = ' b ' ,
2024-09-19 11:36:04 +02:00
/** Name part of a word "call" (non-definition). Ends by whitespace, '@' (in case of variable) or various parentheses */
2024-09-26 13:05:38 +02:00
SLC_WORD_NAME = ' w ' ,
2024-09-19 11:36:04 +02:00
/** Variable call (MYWORD@MYVAR) - we get to be here from SLC_WORD_NAME or from START */
2024-09-26 13:05:38 +02:00
SLC_WORD_VAR = ' W ' ,
2024-09-28 12:57:01 +02:00
/** Syntax error state - recovers by newlines */
SLC_SYN_ERROR = ' e ' ,
2024-09-18 16:13:49 +02:00
} ;
typedef enum slc_state slc_state ;
2024-09-19 11:36:04 +02:00
static inline slc_state slc_comment_statechange_in (
slc_state current_state ,
char c ,
const char * singleline_comment ,
const char * multiline_comment_opener ,
int * comment_i ,
int * multiline_i ) {
2024-09-26 11:52:28 +02:00
char s = singleline_comment [ * comment_i ] ;
char m = multiline_comment_opener [ * multiline_i ] ;
/* Check if we have finished processing */
2024-09-26 14:14:23 +02:00
if ( s = = 0 ) { * comment_i = 0 ; return SLC_COMMENT ; }
if ( m = = 0 ) { * multiline_i = 0 ; return SLC_MULTILINE_COMMENT ; }
2024-09-26 11:52:28 +02:00
/* Single-line comment progress */
if ( c = = s ) {
+ + ( * comment_i ) ;
} else {
* comment_i = 0 ;
}
/* Multi-line comment progress */
if ( c = = m ) {
+ + ( * multiline_i ) ;
} else {
* multiline_i = 0 ;
}
2024-09-19 11:36:04 +02:00
return current_state ;
}
2024-09-26 14:14:23 +02:00
static inline slc_state slc_multiline_comment_statechange_out (
slc_state current_state ,
char c ,
const char * multiline_comment_closer ,
int * multiline_i ) {
char m = multiline_comment_closer [ * multiline_i ] ;
/* Check if we have finished processing */
if ( m = = 0 ) { * multiline_i = 0 ; return SLC_START ; }
/* Multi-line comment progress */
if ( c = = m ) {
+ + ( * multiline_i ) ;
} else {
* multiline_i = 0 ;
}
return current_state ;
}
2024-09-28 12:57:01 +02:00
/** Handles state change into word definitions */
2024-09-25 23:18:30 +02:00
static inline slc_state slc_def_name_statechange (
slc_state current_state ,
2024-09-28 12:57:01 +02:00
char prevc ,
2024-09-25 23:18:30 +02:00
char c ,
2024-09-26 10:42:36 +02:00
const char * prefix ,
int * prefix_i ) {
2024-09-28 12:57:01 +02:00
2025-01-03 13:37:29 +01:00
/* If not a whitespace currently, check the prefix and check the ending ':' */
2024-09-28 12:57:01 +02:00
if ( ! isspace ( c ) ) {
/* Early exit for not-a-definition sub-state */
if ( * prefix_i < 0 ) {
return current_state ;
}
/* Read prefix */
if ( prefix [ * prefix_i ] ! = 0 ) {
if ( prefix [ * prefix_i ] = = c ) {
+ + ( * prefix_i ) ;
} else {
* prefix_i = - 1 ;
}
2024-09-28 13:02:55 +02:00
} else {
if ( c = = ' : ' ) {
2025-01-03 13:37:29 +01:00
* prefix_i = 0 ; /* XXX: restarts scana for finnding it in the next cases */
return SLC_DEF_NAME ;
2024-09-28 23:21:49 +02:00
} else {
* prefix_i = - 1 ;
2024-09-28 13:02:55 +02:00
}
2024-09-28 12:57:01 +02:00
}
return current_state ;
} else {
2024-09-28 13:02:55 +02:00
/* Not Found: Probably a word occurence */
* prefix_i = 0 ; /* XXX: restarts scan */
return current_state ;
2024-09-28 12:57:01 +02:00
}
2024-09-25 23:18:30 +02:00
}
2024-09-28 12:57:01 +02:00
/** Handles state change into word occurences - shared wordname with def_name_statechange! */
2024-09-26 14:14:23 +02:00
static inline slc_state slc_word_statechange (
2024-09-19 11:36:04 +02:00
slc_state current_state ,
char c ,
2024-09-26 11:04:39 +02:00
int * wordname_i ,
const char * wordname ) {
2024-09-19 11:36:04 +02:00
// FIXME: Implement
return current_state ;
}
2024-09-28 23:31:00 +02:00
# define SET_SLC_START \
2024-09-28 23:21:49 +02:00
comment_i = 0 ; \
multiline_i = 0 ; \
prefix_i = 0 ; \
wordname_i = 0 ; \
wordname [ 0 ] = 0 ; \
state = SLC_START ;
2024-09-13 18:02:54 +02:00
/**
* This function runs the main slc engine over a snippet of code .
*
2024-09-16 22:30:29 +02:00
* The code_src is the entry of what we start interpreting , but you can do ( un ) buffered reads in it
2024-09-16 21:22:44 +02:00
* because we will use session_storage to store the source code data into memory while processing . . .
2024-09-16 22:30:29 +02:00
* Rem . : This also helps with the " include " directives using io_connector and do some kind of recursion maybe .
* Rem . : This architecture also let us try to immediately " threaded_code " optimize the newly added word definition ,
* which is possible if it relies on no forward references - this is the most happy case in my opinion !
*
* The session_storage is where we process the words data ( possibly introducing threaded code as early as possible ) .
* This should also let the code " allocate " some random memory too and get an offset for it ( there will be no other way ) .
2024-09-16 21:22:44 +02:00
*
* The symbol_table not only store " words " , but direct access offsets for :
*
2024-09-16 22:30:29 +02:00
* - words themselves ( direct offset )
* - variables of the words ( direct offset )
2024-09-16 21:22:44 +02:00
*
2024-09-16 22:30:29 +02:00
* The code stack is what the interpreter uses for return addresses , the data stack however is FORTH - style usual stack .
* The " insert_stack " collects things that we will read instead of reading the code_src AFTER a return from current word .
* A \ 0 value should be there at the end of it until we RETURN from the word - from when we start processing .
*
* The io_connector is needed so that the engine have connection for temporary and real files and things simulating those .
*
* The prefix , ender and varprefix strings really just help when you use SLC to define a compiler and you need these .
*
2024-09-16 21:22:44 +02:00
* @ param code_src The input source code to interpret / run . Code ends either with \ 0 or EOF .
2024-09-16 22:30:29 +02:00
* @ param session_storage Can allocate and use arbitrary memory with this .
2024-09-16 21:22:44 +02:00
* @ param symbol_table The symbol table to use while processing .
* @ param code_stack The code stack ( return addresses ) to use .
2024-09-18 16:13:49 +02:00
* @ param nesting_stack The stack used for the state - machine of the nested words .
2024-09-16 21:22:44 +02:00
* @ param data_stack The data stack ( forth - like stack ) to use .
* @ param insert_stack Used for temporarily expanding the input stream ( one word level above current ) with further words .
2024-09-13 18:02:54 +02:00
* @ param io_connector The engine uses this to open / close pipes / files and write / read them .
2024-09-19 11:36:04 +02:00
* @ param singleline_comment Like " // " - the character string that makes the rest of the line being comment . Can be " " ( no NULL ) .
* @ param multiline_comment_opener The character string that starts a multiline comment . Like / and * for C . Can be " " ( no NULL ) .
* @ param multiline_comment_closer The character string that ends a multiline comment . Like * and / for C . Can be " " ( no NULL ) .
2024-09-16 21:22:44 +02:00
* @ param prefix The prefix added to the lookup of built - ins . Useful when you write a compiler with SLC . Defaults to " " ( empty ) .
* @ param ender The character string that ends a word definition . Defaults to " ; " .
* @ param varprefix The character string that prefixes variable declarations . Defaults to " @ " .
2024-09-13 18:02:54 +02:00
*/
2024-09-16 20:12:23 +02:00
static inline void slc (
2024-09-18 16:13:49 +02:00
coderead code_src ,
session session_storage ,
sym symbol_table ,
stack code_stack ,
stack nesting_stack ,
stack data_stack ,
stack insert_stack ,
ioconn io_connector ,
const char * singleline_comment ,
const char * multiline_comment_opener ,
const char * multiline_comment_closer ,
const char * prefix ,
const char * ender ,
const char * varprefix ) {
2024-09-25 23:37:57 +02:00
2024-09-26 13:05:38 +02:00
char last_is_endl = 0 ;
int line = 0 ;
2024-09-26 13:25:58 +02:00
int col = - 1 ;
char is_indenting = 1 ;
int indent = 0 ;
2024-09-19 11:36:04 +02:00
2024-09-28 23:21:49 +02:00
slc_state state ;
int comment_i ;
int multiline_i ;
int prefix_i ;
int wordname_i ;
2024-09-28 12:57:01 +02:00
char wordname [ SL_MAX_WORD_NAME + 1 ] ;
2024-09-25 23:46:27 +02:00
2024-09-28 23:31:00 +02:00
SET_SLC_START
2024-09-28 23:21:49 +02:00
2024-09-28 12:57:01 +02:00
char prevc = 0 ;
2024-09-26 11:04:39 +02:00
char c = 0 ;
2024-09-19 11:36:04 +02:00
while ( ( ( c = code_src ( ) ) ! = 0 ) ) {
2024-09-26 13:25:58 +02:00
/* Handle lines and columns, parts of indenting */
if ( endsline ( c ) ) {
2024-09-28 12:57:01 +02:00
/* Handles \n, \r, \r\n and \n\r this way and counts empty lines properly */
2024-09-28 23:21:49 +02:00
/* De-Morgan (!a || b) == (a => b) so (last_is_endl => (prevc == c)) */
if ( ! last_is_endl | | ( prevc = = c ) ) {
2024-09-26 13:05:38 +02:00
+ + line ;
2024-09-26 13:25:58 +02:00
col = 0 ;
/* Indent part */
is_indenting = 1 ;
indent = 0 ;
2024-09-26 13:05:38 +02:00
}
last_is_endl = 1 ;
2024-09-26 13:25:58 +02:00
} else { last_is_endl = 0 ; + + col ; }
/* Handle indenting */
if ( ( c = = ' ' ) | | ( c = = ' ' ) ) {
indent + = is_indenting ;
} else {
/* Defends against state-changer endline */
is_indenting = endsline ( c ) ;
}
2024-09-25 23:37:57 +02:00
process_char :
2024-09-26 12:32:34 +02:00
# ifdef SLC_DEBUG
2024-09-26 13:25:58 +02:00
fprintf ( stderr , " %c state:%c @ line:%d col:%d indent:%d \n " , c , state , line , col , indent ) ;
2024-09-26 12:32:34 +02:00
# endif
2024-09-19 11:36:04 +02:00
switch ( state ) {
case SLC_START :
/* state -> comment | multiline_comment */
state = slc_comment_statechange_in (
state ,
c ,
singleline_comment ,
multiline_comment_opener ,
& comment_i ,
& multiline_i ) ;
2024-09-26 10:42:36 +02:00
2024-09-19 11:36:04 +02:00
/* state -> def_name */
if ( state = = SLC_START ) {
2024-09-25 23:18:30 +02:00
state = slc_def_name_statechange (
2024-09-19 11:36:04 +02:00
state ,
2024-09-28 12:57:01 +02:00
prevc ,
2024-09-19 11:36:04 +02:00
c ,
2024-09-26 10:42:36 +02:00
prefix ,
& prefix_i ) ;
2024-09-26 14:14:23 +02:00
}
2024-09-26 10:42:36 +02:00
2024-09-26 14:14:23 +02:00
/* state -> word_name | word_var */
2024-09-26 11:04:39 +02:00
if ( state = = SLC_START ) {
2024-09-26 14:14:23 +02:00
state = slc_word_statechange (
2024-09-25 23:18:30 +02:00
state ,
c ,
2024-09-26 11:04:39 +02:00
& wordname_i ,
wordname ) ;
2024-09-26 14:14:23 +02:00
}
2024-09-25 23:37:57 +02:00
2024-09-19 11:36:04 +02:00
break ;
case SLC_COMMENT :
2024-09-28 23:31:00 +02:00
if ( endsline ( c ) ) { SET_SLC_START }
2024-09-19 11:36:04 +02:00
break ;
case SLC_MULTILINE_COMMENT :
2024-09-26 14:14:23 +02:00
state = slc_multiline_comment_statechange_out (
state ,
c ,
multiline_comment_closer ,
& multiline_i ) ;
2024-09-28 23:31:00 +02:00
if ( state = = SLC_START ) { SET_SLC_START }
2024-09-19 11:36:04 +02:00
break ;
case SLC_DEF_NAME :
2025-01-03 13:37:29 +01:00
// TODO: Legyen nekem is fordítási idejű szó és sima!!! Optimalizációban sokat segít
// : iff
// ()
// asm{cmp eax, 0}
// asm{jnz else@}
// {}
// asm(goto endif@)
// asm{else@:}
// []
// asm{endif@:} // @ az asm asm-nél jelentse: valami hash...
// end
//
//
// De ezt vajon lehet? Szerintem ez ne lehessen / nem kell...
//
// : iff_not ()[]{}
// ()
// asm{cmp eax, 0}
// asm{jnz else@}
// []
// asm{else@:} // FIXME: nincs endif?
// {}
// end
//
// Called like - v1:
//
// #include "iff.slc"
//
// iff(4 5 <) {
// 1 print
// } [
// 0 print
// ]
//
// Called like - v2:
//
// #include "iff2.slc"
//
// iff_not(4 5 <) {
// 1 print
// } [
// 0 print
// ]
//
// if(1) {
// A()
// } else {
// B()
// }
//
// Kontext-függő stack kell legyen? case-t csak switch-en belül írhatok (vagy mást jelent stb.)
//
// Switch-case pl.:
//
// // case (5) // itt nem lenne jó!!!!
// switch(valami) {
// case(0) { // csak itt
//
// } [fallthrough]
// case(42) {
// }
// }
//
// Emiatt a zárójelezős stack-re nem csak zárójelek kerüljenek!!!
//
// NIL
// switch(
// switch{ // Ezen a ponton kellhet ":@ switch{case" szót keresni
// switch{case( // TODO: Ez biztos nem kell!
// switch{case{
//
// : switch
// ()
//
// : switch@case // TODO: jobb syntax? Vagy kell-e oda a zárójel? Vagy csak "gyereke"? (stack helyett csak uccsó tárolva...???? Esetleg utolsó megelőző testvér is?
// ()
// iff(=) {
// {}
// } // TODO: Ha van fallthrough GOTO-val
//
// Branch table-s fordítása is lehetséges két pass-ban pl...
//
// : switch
// ()
// generate_ifs_for_cases@inherits // "megörökli" az én kontextusom... tehát az én (..) [..] és {..}
// // esetleg lehet neki is mondjuk {..} és csak az íródik felül.. ESETLEG!
//
//
// Ehhez képest factor-ban:
//
// [printtrue] [printfalse] 4 5 < if
//
// FORTH-ban:
//
// 12 = IF FILL-CARTON THEN
//
// Magyar forth-ba:
//
// 12 = VOLT_AKKOR KARTON_UJRATOLTES TORTENT
//
// :: VOLT_AKKOR IF
//
// :: IF
// asm{cmp eax, 0}
// asm{jnz else@}
// DECODE_WORD
// CALL_WORD
// asm{else@:}
// ;
//
// While és do-while is megy így....
//
// XXX: De a for-ciklus??? Problémás...
//
// for(int i = 0; i < 50; ++i) {
// ....
// }
//
// Szavak:
// - for(
// int
// i
// =
// 0;
// i
// <
// 50;
// ++i
// ) {
// ...
// }
//
// CSV(-szerű) cucc? Megoldás:
//
//
// :: for
// (
// ';'
// delim_parse // max 3 elemre parzol (vagy kevesebb)
// (
// parse_from_stack
// )
// [
// asm("ciklusfelt@:")
// parse_from_stack
// asm{cmp eax, 0}
// asm{jz ciklusend@}
// asm(jmp ciklusmag@)
// ]
// {
// asm("ciklusnovel@:")
// parse_from_stack
// asm("jmp ciklusfelt@")
// }
// // vissza is ad egy értéket a stackre, hogy "van-e még" adat? => tudok loop-olni (de max 1-2-3 féle kindra)
// )
// {
// asm("ciklusmag@:")
// parse_from_stack
// asm(goto ciklusnovel@)
// asm("ciklusend@:")
// }
//
2024-09-19 11:36:04 +02:00
break ;
case SLC_DEF_VAR :
break ;
case SLC_DEF_BODY :
break ;
case SLC_WORD_NAME :
2025-01-03 13:37:29 +01:00
// Words end with:
// - any parentheses
// - whitespaces:
// -- followed by parentheses
// -- followed by word
// - end of file
// return 42;
// if (..) [] {..};
// if(1 + a < 42)
2024-09-19 11:36:04 +02:00
break ;
case SLC_WORD_VAR :
break ;
2024-09-28 12:57:01 +02:00
case SLC_SYN_ERROR :
/* Recover from slc syntax errors at endlines for now */
2024-09-28 23:31:00 +02:00
if ( endsline ( c ) ) { SET_SLC_START }
2024-09-28 12:57:01 +02:00
break ;
2024-09-19 11:36:04 +02:00
}
2024-09-28 12:57:01 +02:00
prevc = c ;
2024-09-19 11:36:04 +02:00
}
2024-09-13 18:02:54 +02:00
}
2024-09-07 21:53:56 +02:00
2025-01-03 13:37:29 +01:00
/*
* FORTH mini példa - threaded kód példa . . .
*
* : addmul ( a b c - - d )
* +
* *
* ;
*
* 4 5 6 addmul
* 1 2 3 addmul
*
* - >
* goto main
* a :
* ' c ' ' + '
* ' c ' ' * '
* ' r '
*
* main :
*
* ' p ' 4
* ' p ' 5
* ' p ' 6
* ' c ' ' a '
*
* ' p ' 1
* ' p ' 2
* ' p ' 3
* ' c ' ' a '
*
*/
2024-09-14 12:59:14 +02:00
# endif /* SLC_H */