diff --git a/.gitignore b/.gitignore index 7f58cd346..30c8e0e58 100644 --- a/.gitignore +++ b/.gitignore @@ -52,4 +52,9 @@ Mkfile.old dkms.conf cmake-build-debug/ -.idea/ \ No newline at end of file +.idea/ +/resources/grammar.tab.c +/resources/grammar.vcg +/resources/lex.yy.c +/resources/y.tab.c +/resources/y.tab.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f27bfc4ac..5a8eb576b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,7 +84,7 @@ add_executable(c3c src/compiler/llvm_codegen_type.c src/compiler/llvm_codegen_function.c src/build/builder.c - src/utils/toml.c src/build/project.c src/build/build_internal.h src/compiler/sema_name_resolution.c) + src/utils/toml.c src/build/project.c src/build/build_internal.h src/compiler/sema_name_resolution.c src/target_info/target_info.c) target_compile_options(c3c PRIVATE -Wimplicit-int -Werror -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-unused-parameter) diff --git a/resources/c3.l b/resources/c3.l index cf4ba89ac..0d3d0bdea 100644 --- a/resources/c3.l +++ b/resources/c3.l @@ -75,7 +75,6 @@ void comment(void); [_]*[A-Z]{UA}* { count(); return(CONST_IDENT); } [_]*[A-Z]{UA}*[a-z]{AN}* { count(); return(TYPE_IDENT); } [_]*[a-z]{AN}* { count(); return(IDENT); } -@{L}+[!]? { count(); return(AT_IDENT); } ${L}+ { count(); return(CT_IDENT); } #{L}+ { count(); return(HASH_IDENT); } 0[xX]{H}+{IS}? { count(); return(CONSTANT); } @@ -119,6 +118,7 @@ L?\"(\\.|[^\\"])*\" { count(); return(STRING_LITERAL); } ":" { count(); return(':'); } "=" { count(); return('='); } "(" { count(); return('('); } +"@" { count(); return(AT); } ")" { count(); return(')'); } ("[") { count(); return('['); } ("]") { count(); return(']'); } diff --git a/resources/examples/acornvm/avm_array.c3 b/resources/examples/acornvm/avm_array.c3 new file mode 100644 index 000000000..d23a62bca --- /dev/null +++ b/resources/examples/acornvm/avm_array.c3 @@ -0,0 +1,268 @@ +module acorn::arr; + +/** Implements arrays: variable-sized, ordered collections of Values (see avm_array.h) + * + * @file + * + * This source file is part of avm - Acorn Virtual Machine. + * See Copyright Notice in avm.h + */ + +/* Return a new Array, allocating len slots for Values. */ +func Value new(Value th, Value *dest, Value type, AuintIdx len) +{ + // Create an array object + ArrInfo* val = @cast(mem::new(th, ArrEnc, sizeof(ArrInfo), ArrInfo*); + val.flags1 = 0; // Initialize Flags1 flags + val.type = type; + val.avail = len; + val.size = 0; + val.arr = nil; + if (len > 0) mem::reallocvector(th, val.arr, 0, len, Value); + return *dest = @cast(val, Value); +} + +/* Return a new Array, allocating len slots for Values. */ +func Value newClosure(Value *th, Value *dest, Value type, AuintIdx len) +{ + // Create an array object + ArrInfo* val = @cast(mem::new(th, ArrEnc, sizeof(ArrInfo), ArrInfo*); + val.flags1 = TypeClo; // Initialize Flags1 flags + val.type = type; + val.avail = len; + val.size = 0; + val.arr = NULL; + if (len > 0) mem::reallocvector(th, val.arr, 0, len, Value); + return *dest = @cast(val, Value); +} + +/* Return 1 if the value is an Array, otherwise 0 */ +func int Value.isArr(Value* val) +{ + return val.isEnc(ArrEnc); +} + +/* Return 1 if the value is an Array, otherwise 0 */ +func int Value.isClosure(Value* val) +{ + return val.isEnc(ArrEnc) && arr_info(val)->flags1 & TypeClo; +} + +private func ArrInfo.fill(ArrInfo* a, AuintIdx start, AuintIdx end, Value value) @inline +{ + for (AuintIdx i = start; i < end; i++) a.arr[i] = value; +} + +/* Ensure array has room for len Values, allocating memory as needed. + * Allocated space will not shrink. Changes nothing about array's contents. */ +func void makeRoom(Value th, Value arr, AuintIdx len) +{ + ArrInfo* a = arr_info(arr); + if (len > a.avail) + { + mem::gccheck(th); // Incremental GC before memory allocation events + mem::reallocvector(th, a.arr, a.avail, len, Value); + a.avail = len; + } +} + +/** + * Set the number of elements in the array, growing it if needed. + * If less than current number array size, array is not shrunk. + */ +func void setSize(Value th, Value arr, AuintIdx len) +{ + ArrInfo* a = arr_info(arr); + AuintIdx size = arr_size(arr); + if (len > size) makeRoom(arr, len); + arr_size(arr) = len; +} + +/** + * Force allocated and used array to a specified size, truncating + * or expanding as needed. Growth space is initialized to aNull. + * @require val.isArr() + */ +func void forceSize(Value th, Value val, AuintIdx len) +{ + ArrInfo *arr = arr_info(val); + + // Expand or contract allocation, as needed + if (len != arr->avail) + { + mem::gccheck(th); // Incremental GC before memory allocation events + mem::reallocvector(th, arr.arr, 0, len, Value); + arr.avail = len; + } + + // Fill growth area with nulls + arr.fill(arr.size, len, aNull); + arr.size = len; +} + +/** + * Retrieve the value in array at specified position. + * @require arr.isArr() + */ +func Value get(Value th, Value arr, AuintIdx pos) +{ + ArrInfo* a = arr_info(arr); + return pos >= a.size ? aNull : a.arr[pos]; +} + +/** + * Put val into the array starting at pos. + * This can expand the size of the array. + * @require arr.isArr() + */ +func void set(Value th, Value arr, AuintIdx pos, Value val) +{ + ArrInfo* a = arr_info(arr); + + // Grow, if needed + if (pos + 1 >= a.avail) makeRoom(th, arr, pos + 1); + // Fill with nulls if pos starts after end of array + if (pos >= a.size) a.fill(a.size, pos, aNull); + // Perform copy + a.arr[pos] = val; + mem::markChk(th, arr, val); + // If final fill is past array size, reset size higher + if (pos + 1 >= a.size) a.size = pos + 1; +} + +/** + * Append val to the end of the array (increasing array's size). + * @require arr.isArr() + */ +func void add(Value th, Value arr, Value val) +{ + ArrInfo *a = arr_info(arr); + AuintIdx sz = arr_size(arr); + + // Double size, if more space is needed + if (sz + 1 > a.avail) makeRoom(th, arr, sz + (sz > 0 ? sz : 1)); + + // Append value + a.arr[sz] = val; + mem::markChk(th, arr, val); + a.size++; +} + +/** + * Propagate n copies of val into the array starting at pos. + * This can expand the size of the array. + * @require arr.isArr() + */ +func void repeat(Value th, Value arr, AuintIdx pos, AuintIdx n, Value val) +{ + ArrInfo* a = arr_info(arr); + + // Prevent unlikely overflow + if (pos +% n < n) return; + + // Grow, if needed + if (pos + n >= a.avail) makeRoom(th, arr, pos + n); + // Fill with nulls if pos starts after end of array + if (pos >= a.size) a.fill(a.size, pos, aNull); + // Perform repeat copy + a.fill(pos, pos + n, val); + mem::markChk(th, arr, val); // only need to check once + // If final fill is past array size, reset size higher + if (pos + n >= a.size) a.size = pos + n; +} + +/** + * Delete n values out of the array starting at pos. + * All values after these are preserved, essentially shrinking the array. + * @require arr.isArr() + */ +func void del(Value th, Value arr, AuintIdx pos, AuintIdx n) +{ + ArrInfo *a = arr_info(arr); + + // Nothing to delete (or overflow) + if (pos >= a.size || pos +% n < n) return; + + // Copy high end down over deleted portion + if (pos + n < a.size) + { + memmove(&a.arr[pos], &a.arr[pos + n], (a.size - pos - n) * sizeof(Value)); + } + else + { + n = a.size - pos; // Clip n to end of array, if too large + } + a.size -= n; // Adjust size accordingly +} + +/** + * Insert n copies of val into the array starting at pos, expanding the array's size. + * @require arr.isArr() + */ +func void ins(Value th, Value arr, AuintIdx pos, AuintIdx n, Value val) +{ + ArrInfo *a = arr_info(arr); + + // Prevent unlikely overflow + if (a.size +% n < n) return; + + // Ensure array is large enough + if (n + a.size >= a.avail) makeRoom(th, arr, n + a.size); + + // Move values up to make room for insertions + if (pos <= a.size) memmove(&a,arr[pos+n], &a.arr[pos], (a.size - pos) * sizeof(Value)); + a.size += n; + + // Do any needed null fill plus the repeat copy + rpt(th, arr, pos, n, val); +} + +/** + * Copy n2 values from arr2 starting at pos2 into array, replacing the n values in first array starting at pos. + * This can increase or decrease the size of the array. arr and arr2 may be the same array. + * @require arr.isArr() + */ +func void sub(Value th, Value arr, AuintIdx pos, AuintIdx n, Value arr2, AuintIdx pos2, AuintIdx n2) +{ + ArrInfo *a = arr_info(arr); + + // Prevent unlikely overflow + if ((a.size - n) +% n2 < n2) return; + + // Ensure array is large enough + if (a.size - n + n2 > a.avail) makeRoom(th, arr, a.size - n + n2); + + // Adjust position of upper values to make precise space for copy + if (n != n2 && pos < a.size) memmove(&a.arr[pos + n2], &a.arr[pos + n], (a.size - pos - n) * sizeof(Value)); + + // Fill with nulls if pos starts after end of array + if (pos > a->size) a.fill(a.size, pos, aNull); + + // Perform copy + if (arr2 && arr2.isPtr()) memmove(&a.arr[pos], &arr_info(arr2).arr[pos2], n2 * sizeof(Value)); + for (AintIdx i = n2 - 1; i >= 0; i--) + { + mem::markChk(th, arr, a.arr[pos+i]); + } + + a.size += n2 - n; +} + +/* Serialize an array's contents to indented text */ +func void serialize(Value th, Value str, int indent, Value arr) +{ + // TODO + ArrInfo *a = arr_info(arr); + AuintIdx sz = arr_size(arr); + string type = arr_info(arr).flags1 & TypeClo ? "+Closure" : "+List"; + + strAppend(th, str, typ, strlen(typ)); + for (AuintIdx i = 0; i < sz; i++) + { + strAppend(th, str, "\n", 1); + int ind = indent+1; + while (ind--) strAppend(th, str, "\t", 1); + serialize(th, str, indent+1, a.arr[i]); + } +} + diff --git a/resources/examples/acornvm/avm_memory.c3 b/resources/examples/acornvm/avm_memory.c3 new file mode 100644 index 000000000..9c84a145b --- /dev/null +++ b/resources/examples/acornvm/avm_memory.c3 @@ -0,0 +1,164 @@ +/** Memory allocation and garbage collection + * @file + * + * This source file is part of avm - Acorn Virtual Machine. + * See Copyright Notice in avm.h +*/ + +module acorn::mem; + +/** Garbage-collection savvy memory malloc, free and realloc function + * - If nsize==0, it frees the memory block (if non-NULL) + * - If ptr==NULL, it allocates a new uninitialized memory block + * - Otherwise it changes the size of the memory block (and may move its location) + * It returns the location of the new block or NULL (if freed). */ +func void* gcrealloc(Value th, void *block, Auint osize, Auint nsize) +{ + Value newblock; + + // Check consistency of block and osize (both must be null or specified) + Auint realosize = block ? osize : 0; + assert((realosize == 0) == (block == nil)); + + // Allocate/free/resize the memory block + newblock = @cast(frealloc(block, nsize), Value); + + $if (defined(MEMORYLOG)) + { + if (nsize==0) + { + vmLog("Freeing %p size %d", block, osize); + } + else + { + vmLog("Allocating %p from %p for %d", newblock, block, nsize); + } + } + + // If alloc or resize failed, compact memory and try again + if (newblock == nil && nsize > 0) + { + // realloc cannot fail when shrinking a block + gcfull(th, 1); // try to free some memory... + newblock = @cast(frealloc(block, nsize), Value); // try again + if (newblock == nil) + { + logSevere("Out of memory trying allocate or grow a memory block."); + } + } + + // Make sure it worked, adjust GC debt and return address of new block + assert((nsize == 0) == (newblock == nil)); + vm(th).totalbytes += nsize - realosize; + return newblock; +} + +func void* gcreallocv(Value th, void* block, Auint osize, Auint nsize, Auint esize) +{ + // Ensure we are not asking for more memory than available in address space + // If we do not do this, calculating the needed memory will overflow + if (nsize+1 > ~((Auint)0) / esize) + { + logSevere("Out of memory trying to ask for more memory than address space has."); + } + return gcrealloc(th, block, osize*esize, nsize*esize); +} + +/** General-purpose memory malloc, free and realloc function. + * - If size==0, it frees the memory block (if non-NULL) + * - If block==NULL, it allocates a new uninitialized memory block + * - Otherwise it changes the size of the memory block (and may move its location) + * It returns the location of the new block or NULL (if freed). + **/ +func void* frealloc(void* block, Auint size) +{ + if (size == 0) + { + free(block); + return NULL; + } + else + { + return realloc(block, size); + } +} + +macro type($type) @amalloc($type) +{ + return @cast(mem_frealloc(NULL, sizeof($type)), $type); +} + + +/* Create a new pointer object (with given encoding and size) and add to front of *list. */ +MemInfo* new(Value th, int enc, Auint sz) +{ + // Perform garbage collection before a memory allocation + $if (defined(AVM_GCHARDMEMTEST)) + { + // force a full GC to see if any unattached objects die + if (vm(th).gcrunning) gcfull(th, 1); + } + $else + { + gccheck(th); // Incremental GC before memory allocation events + } + vm(th).gcnbrnew++; + MemInfo* o = (MemInfo*) (char *) gcrealloc(th, nil, 0, sz); + o.marked = vm(th).currentwhite & WHITEBITS; + o.enctyp = enc; + + // Use the standard list for collectable objects + MemInfo **list = &vm(th).objlist; + o.next = *list; + *list = o; + return o; +} + +/** + * Create a new pointer object (with given encoding and size). + * Caller must add itself to its own private list + */ +func MemInfo* newnolink(Value th, int enc, Auint sz) +{ + // Perform garbage collection before a memory allocation + $if (defined(AVM_GCHARDMEMTEST)) + { + // force a full GC to see if any unattached objects die + if (vm(th)->gcrunning) gcfull(th, 1); + } + $else + { + gccheck(th); // Incremental GC before memory allocation events + } + vm(th)->gcnbrnew++; + // Allocate and initialize + MemInfo *o = (MemInfo*) (char *) gcrealloc(th, NULL, 0, sz); + o.marked = vm(th)->currentwhite & WHITEBITS; + o.enctyp = enc; + return o; +} + +/* double size of vector array, up to limits */ +func void growaux_(Value th, void *block, AuintIdx *size, AuintIdx size_elems, AuintIdx limit) +{ + void* newblock; + AuintIdx newsize; + // cannot double it? + if (*size >= limit / 2) + { + // cannot grow even a little? + if (*size >= limit) logSevere("Out of memory trying to grow a vector array."); + newsize = limit; /* still have at least one free place */ + } + else + { + newsize = (*size) * 2; + // minimum size + if (newsize < MINSIZEARRAY) newsize = MINSIZEARRAY; + } + newblock = gcreallocv(th, block, *size, newsize, size_elems); + // update only when everything else is OK + *size = newsize; + return newblock; +} + diff --git a/resources/examples/acornvm/avm_stack.c3 b/resources/examples/acornvm/avm_stack.c3 new file mode 100644 index 000000000..1f3b0eac5 --- /dev/null +++ b/resources/examples/acornvm/avm_stack.c3 @@ -0,0 +1,520 @@ +module acorn::stack; +import acorn::sym; + +/** Implements the data stack that belongs to a thread. + * A thread has one data stack which is an allocated array of Values, initialized to 'null'. + * + * The stack implementation is optimized for lean performance first, as its functions + * are called several times for every method call. Therefore, stack indices are not checked for + * validity (except when running in debug mode, where invalid indices generate exceptions). + * + * A current method's area of the data stack is bounded by pointers: + * - th(th)->curmethod->begin points to the bottom (at 0 index) + * - th(th)->stk_top points just above the last (top) value + * - th(th)->curmethod->end points just above last allocated value on stack for method + * + * @file + * + * This source file is part of avm - Acorn Virtual Machine. + * See Copyright Notice in avm.h + */ + + +/* **************************************** + HELPER MACROS + ***************************************/ + +/** Size of the method's stack area: base to top */ +func AintIdx stkSz(Value th) @inline +{ + return th(th).stk_top - th(th).curmethod.begin; +} + +/** Is there room to increment stack top up by 1 and null it to ensure we do not mark it when making it available for a new value */ +#define stkCanIncTop(th) {assert((th(th)->stk_top+1 <= th(th)->curmethod->end) && "stack top overflow");*th(th)->stk_top=aNull;} + +/** Point to current method's stack value at position i. + * For a method: i=0 is self, i=1 is first parameter, etc. */ +func void Value.at(Value* th, AintIdx i) @inline +{ + @assert_exp(i >= 0 && i < stkSz(th), "invalid stack index"); + return &th(*th).curmethod.begin[i]; +} + +/* **************************************** + INDEX-ONLY STACK MANIPULATION + ***************************************/ + +/* Retrieve the stack value at the index. Be sure 0<= idx < top. + * Good for getting method's parameters: 0=self, 1=parm 1, etc. */ +func Value Value.getLocal(Value *th, AintIdx idx) +{ + return *th.at(idx); +} + +/* Put the value on the stack at the designated position. Be sure 0<= idx < top. */ +func void Value.setLocal(Value th, AintIdx idx, Value val) +{ + *th.at(idx) = val; + mem::markChk(th, th, val); +} + +/* Copy the stack value at fromidx into toidx */ +func void Value.copyLocal(Value* th, AintIdx toidx, AintIdx fromidx) +{ + *th.at(toidx) = *th.at(fromidx); +} + +/** + * Remove the value at index (shifting down all values above it to top) + * @require stkSz(th) > 0 + */ +func void Value.deleteLocal(Value* th, AintIdx idx) +{ + Value* p = th.at(idx); + memmove(p, p + 1, sizeof(Value)*(stkSz(th) - idx - 1)); + th(*th).stk_top--; +} + +/** + * Insert the popped value into index (shifting up all values above it) + * @require stkSz(th) > 0 + */ +func void Value.insertLocal(Value *th, AintIdx idx) +{ + Value *p = th.at(idx); + Value val = *(th(*th).stk_top - 1); + memmove(p+1, p, sizeof(Value) * (stkSz(th) - idx - 1)); + *p = val; +} + + +/* **************************************** + TOP-BASED STACK MANIPULATION + ***************************************/ + +/* Push a value on the stack's top */ +func Value Value.pushValue(Value* th, Value val) +{ + stkCanIncTop(th); /* Check if there is room */ + *th(*th).stk_top++ = val; + mem::markChk(th, th, val); // Keep, if marked for deletion? + return val; +} + +/* Push and return the corresponding Symbol value for a 0-terminated c-string */ +func Value Value.pushSym(Value* th, string str) +{ + stkCanIncTop(th); /* Check if there is room */ + return sym::newSym(*th, th(*th).stk_top++, str); +} + +/* Push and return the corresponding Symbol value for a byte sequence of specified length */ +func Value Value.pushSyml(Value th, string str) +{ + stkCanIncTop(th); /* Check if there is room */ + return sym::newSym(*th, th(*th).stk_top++, str); +} + +/* Push and return a new String value */ +Value pushString(Value th, Value type, const char *str) +{ + stkCanIncTop(th); /* Check if there is room */ + return newStr(th, th(th)->stk_top++, (type==aNull)? vmlit(TypeTextm) : type, str, strlen(str)); +} + +/* Push and return a new String value of size with a copy of str bytes */ +Value pushStringl(Value th, Value type, const char *str, AuintIdx size) { + stkCanIncTop(th); /* Check if there is room */ + return newStr(th, th(th)->stk_top++, (type==aNull)? vmlit(TypeTextm) : type, str, size); +} + +/* Push and return a new typed CData value of size */ +Value pushCData(Value th, Value type, unsigned char cdatatyp, AuintIdx size, unsigned int extrahdr) { + stkCanIncTop(th); /* Check if there is room */ + return newCData(th, th(th)->stk_top++, type, cdatatyp, size, extrahdr); +} + +/* Push and return a new Array value */ +Value pushArray(Value th, Value type, AuintIdx size) { + stkCanIncTop(th); /* Check if there is room */ + return newArr(th, th(th)->stk_top++, (type==aNull)? vmlit(TypeListm) : type, size); +} + +/* Push and return a new Closure value. + Size is get and set methods plus closure variables, all pushed on stack */ +Value pushClosure(Value th, AintIdx size) { + Value closure; + assert(size>=2 && stkSz(th)>=size); // All closure variables should be on stack + stkCanIncTop(th); /* Check if there is room */ + closure = newClosure(th, th(th)->stk_top++, vmlit(TypeClom), size); + // Copy closure variables into closure + for (int i=0; istk_top-size-1+i)); + *(th(th)->stk_top-size-1) = closure; // move created closure down + th(th)->stk_top -= size; // pop off closure variables + return closure; +} + +/* Push a closure variable. */ +Value pushCloVar(Value th, AuintIdx idx) { + stkCanIncTop(th); /* Check if there is room */ + Value closure = *th(th)->curmethod->methodbase; + return *th(th)->stk_top++ = (isArr(closure) && idx0); // Must be at least one value to remove! + Value closure = *th(th)->curmethod->methodbase; + if (isArr(closure) && idxstk_top); + else + --th(th)->stk_top; +} + +/* Push and return a new hashed table value */ +Value pushTbl(Value th, Value type, AuintIdx size) { + stkCanIncTop(th); /* Check if there is room */ + return newTbl(th, th(th)->stk_top++, (type==aNull)? vmlit(TypeIndexm) : type, size); +} + +/* Push and return a new Type value */ +Value pushType(Value th, Value type, AuintIdx size) { + stkCanIncTop(th); /* Check if there is room */ + return newType(th, th(th)->stk_top++, (type==aNull)? vmlit(TypeObject) : type, size); +} + +/* Push and return a new Mixin value */ +Value pushMixin(Value th, Value type, Value inheritype, AuintIdx size) { + stkCanIncTop(th); /* Check if there is room */ + return newMixin(th, th(th)->stk_top++, (type==aNull)? vmlit(TypeObject) : type, inheritype, size); +} + +/* Push and return the value for a method written in C */ +Value pushCMethod(Value th, AcMethodp meth) +{ + stkCanIncTop(th); /* Check if there is room */ + return newCMethod(th, th(th)->stk_top++, meth); +} + +/* Push and return the VM's value */ +Value pushVM(Value th) { + stkCanIncTop(th); /* Check if there is room */ + return *th(th)->stk_top++ = vm(th); +} + +/* Push and return a new CompInfo value, compiler state for an Acorn method */ +Value pushCompiler(Value th, Value src, Value url) { + stkCanIncTop(th); /* Check if there is room */ + return newCompiler(th, th(th)->stk_top++, src, url); +} + +/* Push a value's serialized Text */ +Value pushSerialized(Value th, Value val) { + Value serstr = pushStringl(th, aNull, NULL, 16); + serialize(th, serstr, 0, val); + return serstr; +} + +/* Push and return the value of the named member of the table found at the stack's specified index */ +Value pushTblGet(Value th, AintIdx tblidx, const char *mbrnm) { + stkCanIncTop(th); /* Check if there is room */ + Value tbl = *stkAt(th, tblidx); + assert(isTbl(tbl)); + newSym(th, th(th)->stk_top++, mbrnm, strlen(mbrnm)); + return *(th(th)->stk_top-1) = tblGet(th, tbl, *(th(th)->stk_top-1)); +} + +/* Put the local stack's top value into the named member of the table found at the stack's specified index */ +void popTblSet(Value th, AintIdx tblidx, const char *mbrnm) { + assert(stkSz(th)>0); // Must be at least one value to remove! + Value tbl = *stkAt(th, tblidx); + assert(isTbl(tbl)); + stkCanIncTop(th); /* Check if there is room */ + newSym(th, th(th)->stk_top++, mbrnm, strlen(mbrnm)); + tblSet(th, tbl, *(th(th)->stk_top-1), *(th(th)->stk_top-2)); + th(th)->stk_top -= 2; // Pop key & value after value is safely in table +} + +/* Push and return the value held by the uncalled property of the value found at the stack's specified index. */ +Value pushProperty(Value th, AintIdx validx, const char *propnm) { + stkCanIncTop(th); /* Check if there is room */ + Value val = *stkAt(th, validx); + newSym(th, th(th)->stk_top++, propnm, strlen(propnm)); + return *(th(th)->stk_top-1) = getProperty(th, val, *(th(th)->stk_top-1)); +} + +/* Store the local stack's top value into the uncalled property of the type found at the stack's specified index + * Note: Unlike pushProperty, popProperty is restricted to the type being changed. */ +void popProperty(Value th, AintIdx typeidx, const char *mbrnm) { + assert(stkSz(th)>0); // Must be at least one value to remove! + Value tbl = *stkAt(th, typeidx); + stkCanIncTop(th); /* Check if there is room */ + newSym(th, th(th)->stk_top++, mbrnm, strlen(mbrnm)); + if (isType(tbl)) + tblSet(th, tbl, *(th(th)->stk_top-1), *(th(th)->stk_top-2)); + th(th)->stk_top -= 2; // Pop key & value after value is stored +} + +/* Push and return the value held by the perhaps-called property of the value found at the stack's specified index. + * Note: This lives in between pushProperty (which never calls) and getCall (which always calls). + * This calls the property's value only if it is callable, otherwise it just pushes the property's value. */ +Value pushGetActProp(Value th, AintIdx selfidx, const char *propnm) { + stkCanIncTop(th); /* Check if there is room */ + Value self = *stkAt(th, selfidx); + newSym(th, th(th)->stk_top++, propnm, strlen(propnm)); + Value ret = *(th(th)->stk_top-1) = getProperty(th, self, *(th(th)->stk_top-1)); + + // If it is callable (e.g., a method), call it to get property value + if (canCall(ret)) { + // Finish setting up stack for call + stkCanIncTop(th); /* Check if there is room for self */ + *(th(th)->stk_top++) = self; + // Do the call, expecting (and returning) just one return value + switch (canCallMorC(th(th)->stk_top-2)? callMorCPrep(th, th(th)->stk_top-2, 1, 0) + : callYielderPrep(th, th(th)->stk_top-2, 1, 0)) { + case MethodBC: + methodRunBC(th); + break; + } + ret = *(th(th)->stk_top-1); + } + return ret; +} + +/* Store the local stack's top value into the perhaps-called property of the value found at the stack's specified index + * Note: This lives in between popProperty (which never calls) and setCall (which always calls). + * This calls the property's value only if it is a closure with a set method. + * Otherwise, it sets the property's value directly if (and only if) self is a type. */ +void popSetActProp(Value th, AintIdx selfidx, const char *mbrnm) { + assert(stkSz(th)>0); // Must be at least one value to remove! + Value self = *stkAt(th, selfidx); + stkCanIncTop(th); /* Check if there is room for symbol */ + newSym(th, th(th)->stk_top++, mbrnm, strlen(mbrnm)); + Value propval = getProperty(th, self, *(th(th)->stk_top-1)); + + // If it is callable (e.g., a method), call it to set property value + if (canCall(propval)) { + // Set up stack for call + stkCanIncTop(th); /* Check if there is room for self */ + Value set = getFromTop(th, 1); // the value to set + *(th(th)->stk_top-2) = propval; + *(th(th)->stk_top-1) = self; + *(th(th)->stk_top++) = set; + // Do the set call, expecting (and returning) just one return value + switch (canCallMorC(propval)? callMorCPrep(th, th(th)->stk_top-3, 1, 0) + : callYielderPrep(th, th(th)->stk_top-3, 1, 0)) { + case MethodBC: + methodRunBC(th); + break; + } + } + else { + // Only if self is a type, store value in property + if (isType(self)) + tblSet(th, self, *(th(th)->stk_top-1), *(th(th)->stk_top-2)); + th(th)->stk_top -= 2; // Pop key & value + } +} + +/* Push a copy of a stack's value at index onto the stack's top */ +func Value Value.pushLocal(Value* th, AintIdx idx) +{ + stkCanIncTop(th); /* Check if there is room */ + return *th(*th).stk_top++ = th.getLocal(idx); +} + +/** + * Pop a value off the top of the stack + * @require stkSz(th) > 0 + */ +func Value Value.popValue() +{ + return *--th(*th).stk_top; +} + +/** + * Pops the top value and writes it at idx. Often used to set return value + * @require stkSz(th) > 0, idx >= 0, idx < stkSz(th) - 1 + */ +func void Value.popLocal(Value* th, AintIdx idx) +{ + th.setLocal(idx, *(th(*th).stk_top - 1)); + // Pop after value is safely in Global + --th(*th).stk_top; +} + +/** + * Retrieve the stack value at the index from top. Be sure 0<= idx < top. + * @require idx >= 0, idx < stkSz(th) + */ +func Value Value.getFromTop(Value* th, AintIdx idx) +{ + return *th.at(stkSz(th) - idx - 1); +} + +/** + * Return number of values on the current method's stack + */ +func AuintIdx Value.getTop(Value* th) +{ + return cast(stkSz(th), AuintIdx); +} + +/** + * When index is positive, this indicates how many Values are on the method's stack. + * This can shrink the stack or grow it (padding with 'null's). + * A negative index removes that number of values off the top. + */ +func void Value.setTop(Value* th, AintIdx idx) +{ + // TODO + Value *base = th(*th).curmethod.begin; + + // If positive, idx is the index of top value on stack + if (idx >= 0) + { + assert((base + idx <= th(th)->stk_last) && "stack top overflow"); // Cannot grow past established limit + while (th(th)->stk_top < base + idx) + *th(th)->stk_top++ = aNull; // If growing, fill with nulls + th(th)->stk_top = base + idx; + } + // If negative, idx is which Value from old top is new top (-1 means no change, -2 pops one) + else { + assert((-(idx) <= th(th)->stk_top - base) && "invalid new top"); + th(th)->stk_top += idx; // Adjust top using negative index + } +} + +/* **************************************** + GLOBAL VARIABLE ACCESS + ***************************************/ + +/** + * Push and return the symbolically-named global variable's value + * @require vm(*th).global.isTbl() + **/ +func Value Value.pushGloVar(Value* th, string var) +{ + // Check if there is room + stkCanIncTop(th); + Value val = sym::newSym(th, th(th).stk_top++, var); + mem::markChk(th, th, val); /* Mark it if needed */ + return *(th(*th).stk_top - 1) = tbl::get(th, vm(th).global, val); +} + +/** + * Alter the symbolically-named global variable to have the value popped off the local stack + * @require stkSz(th) > 0, vm(th).global.isTbl() + **/ +func void Value.popGloVar(Value* th, string var) +{ + // Check if there is room + stkCanIncTop(th); + Value val = sym::newSym(th, th(th).stk_top++, var); + tbl::set(th, vm(th).global, *(th(th)->stk_top-1), *(th(th)->stk_top-2)); + th(*th).stk_top -= 2; // Pop key & value after value is safely in Global +} + +/* Push the value of the current process thread's global variable table. */ +Value pushGlobal(Value th) +{ + stkCanIncTop(th); /* Check if there is room */ + return *th(th).stk_top++ = vm(th).global; +} + +/** + * Internal function to re-allocate stack's size + * @require newsize <= STACK_MAXSIZE || newsize == STACK_ERRORSIZE + **/ +func void realloc(Value th, int newsize) +{ + // Incremental GC before memory allocation events + mem::gccheck(th); + Value *oldstack = th(th).stack; + int osize = th(th).size; // size of old stack + + // Ensure we not asking for more than allowed, and that old stack's values are consistent + assert(osize == 0 || ((th(th).stk_last - th(th).stack) == th(th)->size - STACK_EXTRA)); + + // Allocate new stack (assume success) and fill any growth with nulls + mem::reallocvector(th, th(th)->stack, th(th)->size, newsize, Value); + for (; osize < newsize; osize++) + { + th(th).stack[osize] = aNull; + } + + // Correct stack values for new size + th(th)->size = newsize; + th(th)->stk_last = th(th)->stack + newsize - STACK_EXTRA; + + // Correct all data stack pointers, given that data stack may have moved in memory + if (oldstack) { + CallInfo *ci; + AintIdx shift = th(th)->stack - oldstack; + th(th)->stk_top = th(th)->stk_top + shift; + for (ci = th(th)->curmethod; ci != NULL; ci = ci->previous) { + ci->end += shift; + ci->methodbase += shift; + ci->retTo += shift; + ci->begin += shift; + } + } +} + +/** Internal function to grow current method's stack area by at least n past stk_top. + May double stack instead. May abort if beyond stack max. */ +void stkGrow(Value th, AuintIdx extra) { + + // Already past max? Abort! + if (th(th)->size > STACK_MAXSIZE) { + logSevere("Acorn VM wants to overflow max stack size. Runaway recursive method?"); + return; + } + + // Calculate the max between how much we need (based on requested growth) + // and doubling the stack size (capped at maximum) + AuintIdx needed = (AuintIdx)(th(th)->stk_top - th(th)->stack) + extra + STACK_EXTRA; + AuintIdx newsize = 2 * th(th)->size; + if (newsize > STACK_MAXSIZE) + newsize = STACK_MAXSIZE; + if (newsize < needed) newsize = needed; + + // re-allocate stack (preserves contents) + if (newsize > STACK_MAXSIZE) { + stkRealloc(th, STACK_ERRORSIZE); // How much we give if asking for too much + } + else + stkRealloc(th, newsize); +} + +/* Ensure method's stack has room for 'needed' values above top. Return 0 on failure. + * This may grow the stack, but never shrinks it. + */ +int needMoreLocal(Value th, AuintIdx needed) { + int success; + CallInfo *ci = th(th)->curmethod; + vm_lock(th); + + // Check if we already have enough allocated room on stack for more values + if ((AuintIdx)(th(th)->stk_last - th(th)->stk_top) > needed + STACK_EXTRA) + success = 1; // Success! Stack is already big enough + else { + // Will this overflow max stack size? + if ((AuintIdx)(th(th)->stk_top - th(th)->stack) > STACK_MAXSIZE - needed - STACK_EXTRA) + success = 0; // Fail! - don't grow + else { + stkGrow(th, needed); + success = 1; + } + } + + // adjust method's last allowed value upwards, as needed + if (success && ci->end < th(th)->stk_top + needed) + ci->end = th(th)->stk_top + needed; + + vm_unlock(th); + return success; +} + diff --git a/resources/examples/acornvm/gen.c3 b/resources/examples/acornvm/gen.c3 new file mode 100644 index 000000000..ce69b7c7f --- /dev/null +++ b/resources/examples/acornvm/gen.c3 @@ -0,0 +1,999 @@ +/** Bytecode generator for Acorn compiler + * + * @file + * + * This source file is part of avm - Acorn Virtual Machine. + * See Copyright Notice in avm.h + */ + +#include "acorn.h" + +#ifdef __cplusplus +namespace avm { +extern "C" { +#endif + +/* Create a new bytecode method value. */ +void newBMethod(Value th, Value *dest) { + BMethodInfo *meth = (BMethodInfo*) mem_new(th, MethEnc, sizeof(BMethodInfo)); + *dest = (Value) meth; + + methodFlags(meth) = 0; + methodNParms(meth) = 1; // 'self' + + meth->code = NULL; + meth->maxstacksize = 20; + meth->avail = 0; + meth->size = 0; + meth->lits = NULL; + meth->litsz = 0; + meth->nbrlits = 0; + meth->nbrexterns = 0; + meth->nbrlocals = 0; +} + +/* Put new instruction in code array */ +void genPutInstr(CompInfo *comp, AuintIdx loc, Instruction i) { + mem_growvector(comp->th, comp->method->code, loc, comp->method->avail, Instruction, INT_MAX); + comp->method->code[loc] = i; +} + +/* Append new instruction to code array */ +void genAddInstr(CompInfo *comp, Instruction i) { + mem_growvector(comp->th, comp->method->code, comp->method->size, comp->method->avail, Instruction, INT_MAX); + comp->method->code[comp->method->size++] = i; +} + +/* Add a literal and return its index */ +int genAddLit(CompInfo *comp, Value val) { + BMethodInfo* f = comp->method; + + // See if we already have it + int i = f->nbrlits; + while (i-- > 0) + if (f->lits[i] == val) + return i; + + // If not found, add it + mem_growvector(comp->th, f->lits, f->nbrlits, f->litsz, Value, INT_MAX); + if (isStr(val)) + str_info(val)->flags1 |= StrLiteral; // Make strings read only + f->lits[f->nbrlits] = val; + mem_markChk(comp->th, comp, val); + return f->nbrlits++; +} + +/* Indicate the method has a variable number of parameters */ +void genVarParms(CompInfo *comp) { + methodFlags(comp->method) = METHOD_FLG_VARPARM; +} + +/** Allocate block's local variables */ +Value genLocalVars(CompInfo *comp, Value blockvarseg,int nexpected) { + Value th = comp->th; + Value svLocalVars = comp->locvarseg; + if (blockvarseg!=aNull) { + int nbrvars = arr_size(blockvarseg)-2; + if (nbrvars>0) { + comp->locvarseg = blockvarseg; + arrSet(th, comp->locvarseg, 1, anInt(comp->nextreg)); + if (nbrvars-nexpected>0) + genAddInstr(comp, BCINS_ABC(OpLoadNulls, comp->nextreg+nexpected, nbrvars-nexpected, 0)); + comp->nextreg += nbrvars; + if (comp->method->maxstacksize < comp->nextreg+nbrvars) + comp->method->maxstacksize = comp->nextreg+nbrvars; + } + } + return svLocalVars; +} + +/* Raise method's max stack size if register is above it */ +void genMaxStack(CompInfo *comp, AuintIdx reg) { + if (comp->method->maxstacksize < reg) + comp->method->maxstacksize = reg+1; +} + +/** Get a node from an AST segment */ +#define astGet(th, astseg, idx) (arrGet(th, astseg, idx)) + +void genExp(CompInfo *comp, Value astseg); +void genStmts(CompInfo *comp, Value astseg); +void genDoProp(CompInfo *comp, Value astseg, char byteop, Value rval, int nexpected); + +/** Return next available register to load values into */ +unsigned int genNextReg(CompInfo *comp) { + // Keep track of high-water mark for later stack allocation purposes + if (comp->method->maxstacksize < comp->nextreg+1) + comp->method->maxstacksize = comp->nextreg+1; + return comp->nextreg++; +} + +/** Return register number for expression (if it already is one), otherwise return -1 */ +int genExpReg(CompInfo *comp, Value astseg) { + Value th = comp->th; + if (isSym(astseg)) { + if (vmlit(SymThis) == astseg) + return comp->thisreg; + else if (vmlit(SymSelf) == astseg) + return 0; + } else { + Value op = astGet(th, astseg, 0); + if (vmlit(SymLocal) == op) + return findLocalVar(comp, astGet(th, astseg, 1)); + else + return -1; + } + return -1; +} + +/** Get the destination where Jump is going */ +int genGetJump(CompInfo *comp, int ip) { + int offset = bc_j(comp->method->code[ip]); + if (offset == BCNO_JMP) /* point to itself represents end of list */ + return BCNO_JMP; /* end of list */ + else + return (ip+1)+offset; /* turn offset into absolute position */ +} + +/** Set the Jump instruction at ip to jump to dest instruction */ +void genSetJump(CompInfo *comp, int ip, int dest) { + if (ip==BCNO_JMP) + return; + Instruction *jmp = &comp->method->code[ip]; + int offset = dest-(ip+1); + assert(dest != BCNO_JMP); + if (((offset+BCBIAS_J) >> 16)!=0) + assert(0 && "control structure too long"); + *jmp = setbc_j(*jmp, offset); +} + +/* Set the jump instruction link chain starting at listip to jump to dest */ +void genSetJumpList(CompInfo *comp, int listip, int dest) { + while (listip != BCNO_JMP) { + int next = genGetJump(comp, listip); + genSetJump(comp, listip, dest); + listip = next; + } +} + +/** Generate a jump that goes forward, possibly as part of an jump chain */ +void genFwdJump(CompInfo *comp, int op, int reg, int *ipchain) { + // If part of a jmp chain, add this jump to the chain + if (*ipchain != BCNO_JMP) { + // Find last jump in chain + int jumpip; + int nextip = *ipchain; + do { + jumpip = nextip; + nextip = genGetJump(comp, jumpip); + } while (nextip != BCNO_JMP); + // Fix it to point to jump we are about to generate + genSetJump(comp, jumpip, comp->method->size); + } + else + *ipchain = comp->method->size; // New chain starts with this jump + genAddInstr(comp, BCINS_AJ(op, reg, BCNO_JMP)); +} + +/** Generate conditional tests & appropriate jump(s), handled recursively for boolean operators. + failjump is ip for first jump past the code to run on condition's success. + passjump is ip for first jump directly to condition's success. + notflag is true if under influence of 'not' operator: reversing jumps and and/or. + lastjump specifies how last jump should behave: true for fail jump, false for passjump. true reverses jump condition. */ +void genJumpExp(CompInfo *comp, Value astseg, int *failjump, int *passjump, bool notflag, bool lastjump) { + Value th = comp->th; + unsigned int svnextreg = comp->nextreg; + Value condop = isArr(astseg)? astGet(th, astseg, 0) : astseg; + bool revjump = notflag ^ lastjump; // Reverse jump based on not flag and lastjump + + // Comparison ops (e.g., == or <) based on rocket operator - generation code comes later. + int jumpop; + if (condop == vmlit(SymLt)) jumpop = revjump? OpJGeN : OpJLt; + else if (condop == vmlit(SymLe)) jumpop = revjump? OpJGtN : OpJLe; + else if (condop == vmlit(SymGt)) jumpop = revjump? OpJLeN : OpJGt; + else if (condop == vmlit(SymGe)) jumpop = revjump? OpJLtN : OpJGe; + else if (condop == vmlit(SymEq)) jumpop = revjump? OpJNeN : OpJEq; + else if (condop == vmlit(SymNe)) jumpop = revjump? OpJEqN : OpJNe; + + // '===' exact equivalence + else if (condop == vmlit(SymEquiv)) { + genExp(comp, astGet(th, astseg, 1)); + Value arg2 = astGet(th, astseg, 2); + if (isArr(arg2) && astGet(th, arg2, 0)==vmlit(SymLit) && astGet(th, arg2, 1)==aNull) { + genFwdJump(comp, revjump? OpJNNull : OpJNull, svnextreg, lastjump? failjump : passjump); + } + else { + genExp(comp, arg2); + genFwdJump(comp, revjump? OpJDiff : OpJSame, svnextreg, lastjump? failjump : passjump); + } + comp->nextreg = svnextreg; + return; + } + + // '~~' pattern match + else if (condop == vmlit(SymMatchOp)) { + genAddInstr(comp, BCINS_ABx(OpLoadLit, genNextReg(comp), genAddLit(comp, vmlit(SymMatchOp)))); + genExp(comp, astGet(th, astseg, 2)); // '~~' uses right hand value for object call + genExp(comp, astGet(th, astseg, 1)); + genAddInstr(comp, BCINS_ABC(OpGetCall, svnextreg, comp->nextreg - svnextreg-1, 1)); + genFwdJump(comp, revjump? OpJFalse : OpJTrue, svnextreg, lastjump? failjump : passjump); + comp->nextreg = svnextreg; + return; + } + + else if (condop == vmlit(SymNot)) { + genJumpExp(comp, astGet(th, astseg, 1), failjump, passjump, !notflag, lastjump); + return; + } + + else if (condop == vmlit(SymOr) || condop == vmlit(SymAnd)) { + bool isAnd = (condop == vmlit(SymAnd)) ^ notflag; // Treat it as 'And' (or 'Or')? + AuintIdx segi = 1; + if (isAnd) { + while (segi < getSize(astseg)-1) { + genJumpExp(comp, astGet(th, astseg, segi++), failjump, passjump, notflag, true); + } + genJumpExp(comp, astGet(th, astseg, segi), failjump, passjump, notflag, lastjump); + return; + } + else { + int newpassjump = BCNO_JMP; + while (segi < getSize(astseg)-1) { + int newfailjump = BCNO_JMP; + genJumpExp(comp, astGet(th, astseg, segi++), &newfailjump, &newpassjump, notflag, false); + genSetJump(comp, newfailjump, comp->method->size); + } + genJumpExp(comp, astGet(th, astseg, segi), failjump, &newpassjump, notflag, lastjump); + genSetJumpList(comp, newpassjump, comp->method->size); // Fix 'or' jumps to here + return; + } + } + + // Otherwise, an expression to be interpreted as false/null or true (anything else) + // (which includes explicit use of <==>) + else { + genExp(comp, astseg); + genFwdJump(comp, revjump? OpJFalse : OpJTrue, svnextreg, lastjump? failjump : passjump); + comp->nextreg = svnextreg; + return; + } + + // Generate code for rocket-based comparisons + genAddInstr(comp, BCINS_ABx(OpLoadLit, genNextReg(comp), genAddLit(comp, vmlit(SymRocket)))); + genExp(comp, astGet(th, astseg, 1)); + genExp(comp, astGet(th, astseg, 2)); + genAddInstr(comp, BCINS_ABC(OpGetCall, svnextreg, comp->nextreg - svnextreg-1, 1)); + genFwdJump(comp, jumpop, svnextreg, lastjump? failjump : passjump); + comp->nextreg = svnextreg; +} + +/** Generate return or yield */ +void genReturn(CompInfo *comp, Value aststmt, int op, int expected) { + Value th = comp->th; + AuintIdx svnextreg = comp->nextreg; + Value retexp = astGet(th, aststmt, 1); + if (retexp==aNull) + genAddInstr(comp, BCINS_ABC(op, 0, 0, expected)); // return with no values + else { + int reg = genExpReg(comp, retexp); + // Return from a local variable registers + if (reg>=0) + genAddInstr(comp, BCINS_ABC(op, reg, 1, expected)); + // Do tail call if we are calling another method as the return value + else if (op==OpReturn && isArr(retexp) && astGet(th, retexp, 0)==vmlit(SymCallProp)) + genDoProp(comp, retexp, OpTailCall, aNull, 1); + // For solo splat, load parameter varargs and return them + else if (retexp == vmlit(SymSplat)) { + genAddInstr(comp, BCINS_ABC(OpLoadVararg, svnextreg, 0xFF, 0)); + genAddInstr(comp, BCINS_ABC(op, svnextreg, 0xFF, expected)); + } + // For comma-separated rvals, special handling in case ... splat appears (at end) + else if (isArr(retexp) && arrGet(th, retexp, 0)==vmlit(SymComma)) { + int nvals = arr_size(retexp)-1; + bool varrvals = false; + for (int i=1; i<=nvals; i++) { + Value rvali = astGet(th, retexp, i); + if (i==nvals && rvali==vmlit(SymSplat)) { + genAddInstr(comp, BCINS_ABC(OpLoadVararg, genNextReg(comp), 0xFF, 0)); + varrvals = true; + } + else if (i==nvals && isArr(rvali) && astGet(th, rvali, 0)==vmlit(SymYield)) { + genReturn(comp, rvali, OpYield, 0xFF); + varrvals = true; + } + else + genExp(comp, rvali); + } + genAddInstr(comp, BCINS_ABC(op, svnextreg, varrvals? 0xFF : comp->nextreg - svnextreg, expected)); + } + // Return calculated values on stack + else { + genExp(comp, retexp); + genAddInstr(comp, BCINS_ABC(op, svnextreg, comp->nextreg - svnextreg, expected)); + } + } + comp->nextreg = svnextreg; +} + +/** Return nonzero opcode if ast operator is a property/method call */ +char genIsProp(Value th, Value op, int setflag) { + if (vmlit(SymActProp) == op) + return setflag? OpSetActProp : OpGetActProp; + else if (vmlit(SymRawProp) == op) + return setflag? OpSetProp : OpGetProp; + else if (vmlit(SymCallProp) == op) + return setflag? OpSetCall : OpGetCall; + return 0; +} + +/** Generate code for some kind of property/method call. + rval is aNull for 'get' mode and either a register integer or ast segment for 'set' mode. + nexpected specifies how many return values expected from called method */ +void genDoProp(CompInfo *comp, Value astseg, char byteop, Value rval, int nexpected) { + Value th = comp->th; + unsigned int svreg = comp->nextreg; // Save + + // <<<< optimize here by seeing if property is a std symbol and self is in register + + genExp(comp, astGet(th, astseg, 2)); // property + genExp(comp, astGet(th, astseg, 1)); // self + + // Handle value to be set (if provided) as first parameter + if (isInt(rval)) // already loaded into a register + genAddInstr(comp, BCINS_ABC(OpLoadReg, genNextReg(comp), toAint(rval), 0)); + else if (rval!=aNull) { + AuintIdx rvalreg = comp->nextreg; + genExp(comp, rval); // Load into next available register + comp->nextreg = rvalreg+1; + } + + // Load as many parameters as we have, then do property get + bool varparms = false; + for (AuintIdx i = 3; inextreg; + Value parm = astGet(th, astseg, i); + if (parm == vmlit(SymSplat)) { + genAddInstr(comp, BCINS_ABC(OpLoadVararg, rvalreg, 0xFF, 0)); + varparms = true; + break; + } + else if (i==getSize(astseg)-1 && isArr(parm) && arrGet(th, parm, 0)==vmlit(SymYield)) { + genReturn(comp, parm, OpYield, 0xFF); + varparms = true; + break; + } + else { + genExp(comp, parm); + comp->nextreg = rvalreg+1; + } + } + genAddInstr(comp, BCINS_ABC(byteop, svreg, varparms? 0xFF : comp->nextreg - svreg-1, nexpected)); + comp->nextreg = svreg+1; +} + +/** Generate code for an assignment */ +void genAssign(CompInfo *comp, Value lval, Value rval) { + Value th = comp->th; + Value lvalop = isArr(lval)? astGet(th, lval, 0) : aNull; + + // Handle assignment to property or method + char opcode = genIsProp(th, lvalop, true); + if (opcode) + genDoProp(comp, lval, opcode, rval, 1); + else { + // Handle parallel, local, closure, global variable assignments where rval is loaded first + int nlvals = lvalop==vmlit(SymComma)? arr_size(lval)-1 : 1; + bool varrvals = false; + AuintIdx rvalreg; + if (isInt(rval)) + rvalreg = toAint(rval); // rval is already in a register, so use that reg + else { + // Special handling for right-hand values for parallel assignment + rvalreg = comp->nextreg; // Save where we put rvals + int opcode; + // For method call, specify expected number of return values + if (isArr(rval) && (opcode = genIsProp(th, astGet(th, rval, 0), false))) { + genDoProp(comp, rval, opcode, aNull, nlvals); + varrvals = true; + } + else if (isArr(rval) && arrGet(th, rval, 0)==vmlit(SymYield)) { + genReturn(comp, rval, OpYield, nlvals); + varrvals = true; + } + // For solo splat, load needed number from parameter varargs + else if (rval == vmlit(SymSplat)) { + genAddInstr(comp, BCINS_ABC(OpLoadVararg, genNextReg(comp), nlvals, 0)); + varrvals = true; + } + // For comma-separated rvals, special handling in case ... splat appears (at end) + else if (nlvals>1 && isArr(rval) && arrGet(th, rval, 0)==vmlit(SymComma)) { + int nvals = arr_size(rval)-1; + for (int i=1; i<=nvals; i++) { + Value rvali = astGet(th, rval, i); + if (i==nvals && i<=nlvals && rvali==vmlit(SymSplat)) { + genAddInstr(comp, BCINS_ABC(OpLoadVararg, genNextReg(comp), nlvals-i+1, 0)); + varrvals = true; + } + else + genExp(comp, rvali); + } + } + else + genExp(comp, rval); + } + // Handle parallel assignment for lvals + if (vmlit(SymComma) == lvalop) { + int nrneed = varrvals? 0 : nlvals - (comp->nextreg - rvalreg); + // Ensure we fill up right values with nulls to as high as left values + if (nrneed > 0) { + genAddInstr(comp, BCINS_ABC(OpLoadNulls, comp->nextreg, nrneed, 0)); + comp->nextreg += nrneed; + // Keep track of high-water mark for later stack allocation purposes + if (comp->method->maxstacksize < comp->nextreg+nrneed) + comp->method->maxstacksize = comp->nextreg+nrneed; + } + // Assign each lval, one at a time, from corresponding loaded rval in a register + for (int i = 0; inextreg +} + +/** Generate optimized code for assignment when it is just a statement and + its right-hand values do not have to be put on stack */ +void genOptAssign(CompInfo *comp, Value lval, Value rval) { + Value th = comp->th; + Value lvalop = astGet(th, lval, 0); + + // Handle assignments that require we load rval (and other stuff) first + unsigned int fromreg = genExpReg(comp, rval); + if (vmlit(SymLocal) == lvalop) { + Value symnm = astGet(th, lval, 1); + int localreg = findLocalVar(comp, symnm); + if (localreg != -1) { + // Optimize load straight into register, if possible (this, self, local var) + if (fromreg!=-1) + genAddInstr(comp, BCINS_ABC(OpLoadReg, localreg, fromreg, 0)); + else if (vmlit(SymBaseurl) == rval) + genAddInstr(comp, BCINS_ABx(OpLoadLit, localreg, genAddLit(comp, comp->lex->url))); + else { + Value rvalop = astGet(th, rval, 0); + if (vmlit(SymLit) == rvalop) { + Value litval = astGet(th, rval, 1); + if (litval==aNull) + genAddInstr(comp, BCINS_ABC(OpLoadPrim, localreg, 0, 0)); + else if (litval==aFalse) + genAddInstr(comp, BCINS_ABC(OpLoadPrim, localreg, 1, 0)); + else if (litval==aTrue) + genAddInstr(comp, BCINS_ABC(OpLoadPrim, localreg, 2, 0)); + else + genAddInstr(comp, BCINS_ABx(OpLoadLit, localreg, genAddLit(comp, litval))); + } else if (vmlit(SymLocal) == rvalop) { + // We did local already - this must be a load from a closure variable + genAddInstr(comp, BCINS_ABC(OpGetClosure, localreg, findClosureVar(comp, astGet(th, rval, 1)), 0)); + } else if (vmlit(SymGlobal) == rvalop) { + genAddInstr(comp, BCINS_ABx(OpGetGlobal, localreg, genAddLit(comp, astGet(th, rval, 1)))); + } else { + fromreg = comp->nextreg; // Save where we put rvals + genExp(comp, rval); + genAddInstr(comp, BCINS_ABC(OpLoadReg, localreg, fromreg, 0)); + } + } + } + else if ((localreg = findClosureVar(comp, symnm))!=-1) { + fromreg = comp->nextreg; // Save where we put rvals + genExp(comp, rval); + genAddInstr(comp, BCINS_ABC(OpSetClosure, localreg, fromreg, 0)); + } + } else if (vmlit(SymGlobal) == lvalop) { + if (fromreg != -1) + genAddInstr(comp, BCINS_ABx(OpSetGlobal, fromreg, genAddLit(comp, astGet(th, lval, 1)))); + else { + fromreg = comp->nextreg; // Save where we put rvals + genExp(comp, rval); + genAddInstr(comp, BCINS_ABx(OpSetGlobal, fromreg, genAddLit(comp, astGet(th, lval, 1)))); + } + } else + genAssign(comp, lval, rval); +} + +/** Return true if the expression makes no use of any logical or comparative operators */ +bool hasNoBool(Value th, Value astseg) { + for (AuintIdx segi = 1; segi < getSize(astseg)-1; segi++) { + Value op = astGet(th, astseg, segi); + op = isArr(op)? astGet(th, op, 0) : op; + if (vmlit(SymAnd)==op || vmlit(SymOr)==op || vmlit(SymNot)==op + || vmlit(SymEquiv) == op || vmlit(SymMatchOp) == op + || vmlit(SymEq)==op || vmlit(SymNe)==op + || vmlit(SymGt)==op || vmlit(SymGe)==op || vmlit(SymLt)==op || vmlit(SymLe)==op) + return false; + } + return true; +} + +/** Generate the appropriate code for something that places one or more values on the stack + beginning at comp->nextreg (which should be saved before calling this). The last value is at comp->nextreg-1 */ +void genExp(CompInfo *comp, Value astseg) { + Value th = comp->th; + if (isSym(astseg)) { + if (vmlit(SymThis) == astseg) + genAddInstr(comp, BCINS_ABC(OpLoadReg, genNextReg(comp), comp->thisreg, 0)); + else if (vmlit(SymSelf) == astseg) + genAddInstr(comp, BCINS_ABC(OpLoadReg, genNextReg(comp), 0, 0)); + else if (vmlit(SymContext) == astseg) + genAddInstr(comp, BCINS_ABC(OpLoadContext, genNextReg(comp), 0, 0)); + else if (vmlit(SymSelfMeth) == astseg) + genAddInstr(comp, BCINS_ABC(OpLoadContext, genNextReg(comp), 1, 0)); + else if (vmlit(SymBaseurl) == astseg) + genAddInstr(comp, BCINS_ABx(OpLoadLit, genNextReg(comp), genAddLit(comp, comp->lex->url))); + else if (vmlit(SymSplat) == astseg) + genAddInstr(comp, BCINS_ABC(OpLoadVararg, genNextReg(comp), 1, 0)); // By default, only get one value + } else if (isArr(astseg)) { + Value op = astGet(th, astseg, 0); + char opcode = genIsProp(th, op, false); + if (opcode) // Property or method use + genDoProp(comp, astseg, opcode, aNull, 1); + else if (vmlit(SymComma) == op) { + int nvals = arr_size(astseg)-1; + for (int i=1; i<=nvals; i++) + genExp(comp, astGet(th, astseg, i)); + } else if (vmlit(SymLit) == op) { + Value litval = astGet(th, astseg, 1); + if (litval==aNull) + genAddInstr(comp, BCINS_ABC(OpLoadPrim, genNextReg(comp), 0, 0)); + else if (litval==aFalse) + genAddInstr(comp, BCINS_ABC(OpLoadPrim, genNextReg(comp), 1, 0)); + else if (litval==aTrue) + genAddInstr(comp, BCINS_ABC(OpLoadPrim, genNextReg(comp), 2, 0)); + else + genAddInstr(comp, BCINS_ABx(OpLoadLit, genNextReg(comp), genAddLit(comp, litval))); + } else if (vmlit(SymExt) == op) { + genAddInstr(comp, BCINS_ABx(OpLoadLit, genNextReg(comp), toAint(astGet(th, astseg, 1)))); + } else if (vmlit(SymLocal) == op) { + Value symnm = astGet(th, astseg, 1); + Aint idx; + if ((idx = findLocalVar(comp, symnm))!=-1) + genAddInstr(comp, BCINS_ABC(OpLoadReg, genNextReg(comp), idx, 0)); + else if ((idx = findClosureVar(comp, symnm))!=-1) + genAddInstr(comp, BCINS_ABC(OpGetClosure, genNextReg(comp), idx, 0)); + } else if (vmlit(SymGlobal) == op) { + genAddInstr(comp, BCINS_ABx(OpGetGlobal, genNextReg(comp), genAddLit(comp, astGet(th, astseg, 1)))); + } else if (vmlit(SymAssgn) == op) { + genAssign(comp, astGet(th, astseg, 1), astGet(th, astseg, 2)); + } else if (vmlit(SymYield) == op) { + genReturn(comp, astseg, OpYield, 1); + } else if (vmlit(SymClosure) == op) { + Value newcloseg = astGet(th, astseg, 2); + // If no closure variables nor set method, don't generate closure, just the 'get' method + Value setmethseg = astGet(th, newcloseg, 4); + if (arr_size(newcloseg)==5 && isArr(setmethseg) && astGet(th, setmethseg, 1)==vmlit(SymNull)) + genExp(comp, astGet(th, newcloseg, 3)); + else + genExp(comp, newcloseg); + } else if (vmlit(SymOrAssgn) == op) { + // Assumes that lvar is a local variable + assert(astGet(th, astGet(th, astseg, 1), 0)==vmlit(SymLocal)); + int varreg = findLocalVar(comp, astGet(th, astGet(th, astseg, 1), 1)); + int jumpip = BCNO_JMP; + genFwdJump(comp, OpJNNull, varreg, &jumpip); + Value valseg = astGet(th, astseg, 2); + int reg = genExpReg(comp, astseg); + if (reg>=0) + genAddInstr(comp, BCINS_ABC(OpLoadReg, varreg, reg, 0)); + else if (isArr(valseg) && astGet(th, valseg, 0) == vmlit(SymLit)) + genAddInstr(comp, BCINS_ABx(OpLoadLit, varreg, genAddLit(comp, astGet(th, valseg, 1)))); + else { + AuintIdx rreg = comp->nextreg; // Save where we put rvals + genExp(comp, valseg); + genAddInstr(comp, BCINS_ABC(OpLoadReg, varreg, rreg, 0)); + } + genSetJumpList(comp, jumpip, comp->method->size); + } else if (vmlit(SymThisBlock) == op) { + unsigned int svthis = comp->thisreg; + unsigned int svthisopreg = comp->thisopreg; + comp->thisopreg = 0; + // Generate "using" operator, if specified + Value thisop = astGet(th, astseg, 3); + if (thisop != aNull) { + comp->thisopreg = comp->nextreg; + genExp(comp, thisop); + } + // Generate 'this' value + int thisreg = comp->nextreg; + genExp(comp, astGet(th, astseg, 1)); + comp->nextreg = thisreg+1; // Only use first value + comp->thisreg = thisreg; + // Optimize "using" operator to a method + if (thisop != aNull) + genAddInstr(comp, BCINS_ABC(OpGetMeth, comp->thisopreg, 0, 0)); + Value svLocalVars = genLocalVars(comp, astGet(th, astseg, 2), 0); + genStmts(comp, astGet(th, astseg, 4)); + // Value of a this block is 'this'. Needed for returns or this blocks within this blocks. + if (thisop != aNull) { + // Move 'this' down, so its value is in the right place + genAddInstr(comp, BCINS_ABC(OpLoadReg, comp->thisopreg, comp->thisreg, 0)); + comp->nextreg = comp->thisreg; + } + else + comp->nextreg = comp->thisreg+1; + comp->locvarseg = svLocalVars; + comp->thisopreg = svthisopreg; + comp->thisreg = svthis; + } else if (vmlit(SymQuestion) == op) { // Ternary + int svnextreg = comp->nextreg; + int failjump = BCNO_JMP; + int passjump = BCNO_JMP; + genJumpExp(comp, astGet(th, astseg, 1), &failjump, NULL, false, true); + int nextreg = genNextReg(comp); + comp->nextreg = svnextreg; + genExp(comp, astGet(th, astseg, 2)); + genFwdJump(comp, OpJump, 0, &passjump); + genSetJumpList(comp, failjump, comp->method->size); + comp->nextreg = svnextreg; + genExp(comp, astGet(th, astseg, 3)); + genSetJumpList(comp, passjump, comp->method->size); + } else if ((vmlit(SymOr)==op || vmlit(SymAnd)==op) && hasNoBool(th, astseg)) { + // 'Pure' and/or conditional processing + int svnextreg = comp->nextreg; + int jumpip = BCNO_JMP; + AuintIdx segi; + for (segi = 1; segi < getSize(astseg)-1; segi++) { + genExp(comp, astGet(th, astseg, segi)); + comp->nextreg = svnextreg; + genFwdJump(comp, op==vmlit(SymOr)? OpJTrue : OpJFalse, svnextreg, &jumpip); + } + genExp(comp, astGet(th, astseg, segi)); + genSetJumpList(comp, jumpip, comp->method->size); + } else if (vmlit(SymAnd)==op || vmlit(SymOr)==op || vmlit(SymNot)==op + || vmlit(SymEquiv) == op || vmlit(SymMatchOp) == op + || vmlit(SymEq)==op || vmlit(SymNe)==op + || vmlit(SymGt)==op || vmlit(SymGe)==op || vmlit(SymLt)==op || vmlit(SymLe)==op) + { + // Conditional/boolean expression, resolved to 'true' or 'false' + int failjump = BCNO_JMP; + genJumpExp(comp, astseg, &failjump, NULL, false, true); + int nextreg = genNextReg(comp); + genAddInstr(comp, BCINS_ABC(OpLoadPrim, nextreg, 2, 0)); + genAddInstr(comp, BCINS_AJ(OpJump, 0, 1)); + genSetJumpList(comp, failjump, comp->method->size); + genAddInstr(comp, BCINS_ABC(OpLoadPrim, nextreg, 1, 0)); + } + } + return; +} + +/** Generate all if/elif/else blocks */ +void genIf(CompInfo *comp, Value astseg) { + Value th = comp->th; + + int jumpEndIp = BCNO_JMP; // Instruction pointer to first jump to end of if + + // Process all condition/blocks in astseg + AuintIdx ifindx = 1; // Index into astseg for each cond/block group + do { + unsigned int savereg = comp->nextreg; + // Generate conditional jump for bypassing block on condition failure + Value condast = astGet(th, astseg, ifindx); + Value svLocalVars = genLocalVars(comp, astGet(th, astseg, ifindx+1), 0); + int jumpNextIp = BCNO_JMP; // Instruction pointer to jump to next elif/else block + if (condast != vmlit(SymElse)) { + unsigned int condreg = comp->nextreg; + genJumpExp(comp, condast, &jumpNextIp, NULL, false, true); + comp->nextreg = condreg; + } + genStmts(comp, astGet(th, astseg, ifindx+2)); // Generate block + // Generate/fix jumps after clause's block + if (condast != vmlit(SymElse)) { + if (ifindx+2 < getSize(astseg)) + genFwdJump(comp, OpJump, 0, &jumpEndIp); + genSetJumpList(comp, jumpNextIp, comp->method->size); // Fix jumps to next elif/else block + } + comp->locvarseg = svLocalVars; + comp->nextreg = savereg; + ifindx += 3; + } while (ifindx < getSize(astseg)); + genSetJumpList(comp, jumpEndIp, comp->method->size); // Fix jumps to end of 'if' +} + +/* Generate specific match call */ +void genMatchWith(CompInfo *comp, Value pattern, unsigned int matchreg, int nexpected) { + // pattern '~~' matchval + comp->nextreg = matchreg+2; + genAddInstr(comp, BCINS_ABC(OpLoadReg, genNextReg(comp), matchreg+1, 0)); + genExp(comp, pattern); + comp->nextreg = matchreg+4; // only want one value from genExp + genAddInstr(comp, BCINS_ABC(OpLoadReg, genNextReg(comp), matchreg, 0)); + genAddInstr(comp, BCINS_ABC(OpGetCall, matchreg+2, 2, nexpected==0? 1 : nexpected)); +} + +/** Generate match block */ +void genMatch(CompInfo *comp, Value astseg) { + Value th = comp->th; + + int jumpEndIp = BCNO_JMP; // Instruction pointer of first jump to end of match + unsigned int matchreg = comp->nextreg; + genExp(comp, astGet(th, astseg, 1)); + Value mtchmethexp = astGet(th, astseg, 2); + if (mtchmethexp==vmlit(SymMatchOp)) + genAddInstr(comp, BCINS_ABx(OpLoadLit, genNextReg(comp), genAddLit(comp, mtchmethexp))); + else + genExp(comp, mtchmethexp); + + // Process all 'with' blocks in astseg + AuintIdx mtchindx = 3; // Index into astseg for each 'with' block + while (mtchindx < getSize(astseg)) { + comp->nextreg = matchreg+2; + Value condast = astGet(th, astseg, mtchindx); + int nexpected = toAint(astGet(th, astseg, mtchindx+2)); + // Perform match and then bypass block on failure + int jumpNextIp = BCNO_JMP; // Instruction pointer to jump past this block + if (isArr(condast) && arrGet(th, condast, 0)==vmlit(SymComma)) { + int jumpDoIp = BCNO_JMP; + for (AuintIdx i=1; imethod->size); // Fix jumps to block + } + else if (condast != vmlit(SymElse)) { + genMatchWith(comp, condast, matchreg, nexpected); + genFwdJump(comp, OpJFalse, matchreg+2, &jumpNextIp); + } + comp->nextreg = matchreg+2; + Value svLocalVars = genLocalVars(comp, astGet(th, astseg, mtchindx+1), nexpected); + genStmts(comp, astGet(th, astseg, mtchindx+3)); // Generate block + // Generate/fix jumps after clause's block + if (condast != vmlit(SymElse)) { + if (mtchindx+2 < getSize(astseg)) + genFwdJump(comp, OpJump, 0, &jumpEndIp); + genSetJumpList(comp, jumpNextIp, comp->method->size); // Fix jumps to next with/else block + } + comp->locvarseg = svLocalVars; + mtchindx += 4; + } + genSetJumpList(comp, jumpEndIp, comp->method->size); // Fix jumps to end of 'match' + comp->nextreg = matchreg; +} + +/** Generate while block */ +void genWhile(CompInfo *comp, Value astseg) { + Value th = comp->th; + unsigned int savereg = comp->nextreg; + + // Allocate block's local variables + Value svLocalVars = genLocalVars(comp, astGet(th, astseg, 1), 0); + + // Perform conditional expression and jump + int svJumpBegIp = comp->whileBegIp; + int svJumpEndIp = comp->whileEndIp; + comp->whileBegIp = comp->method->size; + comp->whileEndIp = BCNO_JMP; + genJumpExp(comp, astGet(th, astseg, 2), &comp->whileEndIp, NULL, false, true); + + // Generate block and jump to beginning. Fix conditional jump to after 'while' block + genStmts(comp, astGet(th, astseg, 3)); // Generate block + genAddInstr(comp, BCINS_AJ(OpJump, 0, comp->whileBegIp - comp->method->size-1)); + genSetJumpList(comp, comp->whileEndIp, comp->method->size); // Fix jump to end of 'while' block + + // Restore block's saved values + comp->nextreg = savereg; + comp->whileBegIp = svJumpBegIp; + comp->whileEndIp = svJumpEndIp; + comp->locvarseg = svLocalVars; +} + +/** Generate each block */ +void genEach(CompInfo *comp, Value astseg) { + Value th = comp->th; + unsigned int savereg = comp->nextreg; + + // Prepare iterator for 'each' block outside of main loop (loaded in savereg) + Value iter = astGet(th, astseg, 3); + if (iter == vmlit(SymSplat)) + genAddInstr(comp, BCINS_ABx(OpLoadLit, genNextReg(comp), genAddLit(comp, anInt(0)))); + else { + int fromreg = genExpReg(comp, iter); + if (fromreg==-1) { + genExp(comp, iter); + genAddInstr(comp, BCINS_ABC(OpEachPrep, savereg, savereg, 0)); + } + else + genAddInstr(comp, BCINS_ABC(OpEachPrep, genNextReg(comp), fromreg, 0)); + } + + // Allocate block's local variables + Value svLocalVars = genLocalVars(comp, astGet(th, astseg, 1), 0); + + // Perform conditional expression and jump + int svJumpBegIp = comp->whileBegIp; + int svJumpEndIp = comp->whileEndIp; + comp->whileBegIp = comp->method->size; + comp->whileEndIp = BCNO_JMP; + genAddInstr(comp, BCINS_ABC(iter == vmlit(SymSplat)? OpEachSplat : OpEachCall, savereg, 0, toAint(astGet(th, astseg,2)))); + genFwdJump(comp, OpJFalse, savereg+1, &comp->whileEndIp); + + // Generate block and jump to beginning. Fix conditional jump to after 'while' block + genStmts(comp, astGet(th, astseg, 4)); // Generate block + genAddInstr(comp, BCINS_AJ(OpJump, 0, comp->whileBegIp - comp->method->size-1)); + genSetJumpList(comp, comp->whileEndIp, comp->method->size); // Fix jump to end of 'while' block + + // Restore block's saved values + comp->nextreg = savereg; + comp->whileBegIp = svJumpBegIp; + comp->whileEndIp = svJumpEndIp; + comp->locvarseg = svLocalVars; +} + +/** Generate do block */ +void genDo(CompInfo *comp, Value astseg) { + Value th = comp->th; + unsigned int savereg = comp->nextreg; + unsigned int lowreg, highreg; + + Value svLocalVars = genLocalVars(comp, astGet(th, astseg, 1), 0); + Value exp = astGet(th, astseg, 2); + if (exp!=aNull) { + lowreg = comp->nextreg; + genExp(comp, exp); + highreg = comp->nextreg; + for (unsigned int reg=lowreg; regnextreg = highreg; + for (unsigned int reg=highreg-1; reg>=lowreg; reg--) { + genAddInstr(comp, BCINS_ABx(OpLoadLit, genNextReg(comp), genAddLit(comp, vmlit(SymEnd)))); + genAddInstr(comp, BCINS_ABC(OpLoadReg, genNextReg(comp), reg, 0)); + genAddInstr(comp, BCINS_ABC(OpGetCall, highreg, 1, 0)); + } + } + + // Restore block's saved values + comp->nextreg = savereg; + comp->locvarseg = svLocalVars; +} + +/** Generate a statement */ +void genStmt(CompInfo *comp, Value aststmt) { + Value th = comp->th; + AuintIdx svnextreg = comp->nextreg; + + // Set up a call for every statement + AuintIdx svthisopreg; + if (comp->thisopreg != 0) { + svthisopreg = comp->nextreg; + // We have to copy this+method, because the method's tail call may destroy them + genAddInstr(comp, BCINS_ABC(OpLoadRegs, genNextReg(comp), comp->thisopreg, 2)); + comp->nextreg++; + } + + // Handle various kinds of statements + Value op = isArr(aststmt)? astGet(th, aststmt, 0) : aststmt; + if (op==vmlit(SymIf)) genIf(comp, aststmt); + else if (op==vmlit(SymMatch)) genMatch(comp, aststmt); + else if (op==vmlit(SymWhile)) genWhile(comp, aststmt); + else if (op==vmlit(SymEach)) genEach(comp, aststmt); + else if (op==vmlit(SymDo)) genDo(comp, aststmt); + else if (op==vmlit(SymBreak) && comp->whileBegIp!=-1) + genFwdJump(comp, OpJump, 0, &comp->whileEndIp); + else if (op==vmlit(SymContinue) && comp->whileBegIp!=-1) + genAddInstr(comp, BCINS_AJ(OpJump, 0, comp->whileBegIp - comp->method->size-1)); + else if (op==vmlit(SymReturn)) + genReturn(comp, aststmt, OpReturn, 0); + else if (op==vmlit(SymYield)) + genReturn(comp, aststmt, OpYield, 0); + else if (op==vmlit(SymAssgn)) + genOptAssign(comp, astGet(th, aststmt,1), astGet(th, aststmt,2)); + else + genExp(comp, aststmt); + + // Finish append (or other this op) + if (comp->thisopreg != 0) + genAddInstr(comp, BCINS_ABC(OpGetCall, svthisopreg, comp->nextreg - svthisopreg-1, 0)); + + comp->nextreg = svnextreg; +} + +/** Generate one or a sequence of statements */ +void genStmts(CompInfo *comp, Value astseg) { + Value th = comp->th; + if (isArr(astseg) && astGet(comp->th, astseg, 0)==vmlit(SymSemicolon)) { + for (AuintIdx i=1; ith, astseg, i)); + } + } + else + genStmt(comp, astseg); +} + +#define astAddValue(th, astseg, val) (arrAdd(th, astseg, val)) +Value astAddSeg(Value th, Value oldseg, Value astop, AuintIdx size); +Value astAddSeg2(Value th, Value oldseg, Value astop, Value val); +Value astInsSeg(Value th, Value oldseg, Value astop, AuintIdx size); + +/** Recursively turn a method's implicit returns in the AST into explicit returns */ +void genFixReturns(CompInfo *comp, Value aststmts) { + Value th = comp->th; + if (!isArr(aststmts) || astGet(th, aststmts, 0)!=vmlit(SymSemicolon)) { + vmLog("A method's block is not properly formed (should use ';' AST)"); + return; + } + Value laststmt = astGet(th, aststmts, arr_size(aststmts)-1); + Value lastop = isArr(laststmt)? astGet(th, laststmt, 0) : laststmt; + // Implicit return for loops is to return 'null' afterwards + if (lastop==vmlit(SymWhile) || lastop==vmlit(SymEach) || lastop==vmlit(SymDo) + || lastop==vmlit(SymYield) || lastop==vmlit(SymBreak) || lastop==vmlit(SymContinue)) + astAddSeg2(th, aststmts, vmlit(SymReturn), aNull); + // Implicit return for 'if' + else if (lastop==vmlit(SymIf) || lastop==vmlit(SymMatch)) { + // Recursively handle implicit return for each clause's statement block + int step = lastop==vmlit(SymMatch)? 4 : 3; + Auint i = lastop==vmlit(SymMatch)? 6 : 3; + for (; ith; + // AST: ('method', localvars, closurevars, parminitstmts, statements) + // Initialize generation state for method + comp->method->nbrexterns = comp->method->nbrlits; + comp->nextreg = comp->method->maxstacksize = comp->method->nbrlocals; + comp->thisreg = 0; // Starts with 'self' + comp->thisopreg = 0; + comp->locvarseg = astGet(comp->th, comp->ast, 1); + arrSet(th, comp->locvarseg, 1, anInt(1)); + + // If 'self' is bound to this closure, override passed self with it + int idx; + if ((idx = findClosureVar(comp, vmlit(SymSelf)))!=-1) + genAddInstr(comp, BCINS_ABC(OpGetClosure, 0, idx, 0)); + + // Generate the method's code based on AST + int nbrnull = comp->method->nbrlocals - methodNParms(comp->method); + if (nbrnull>0) // Initialize non-parm locals to null + genAddInstr(comp, BCINS_ABC(OpLoadNulls, methodNParms(comp->method), nbrnull, 0)); + genStmts(comp, astGet(th, comp->ast, 2)); // Generate code for parameter defaults + Value aststmts = astGet(th, comp->ast, 3); + genFixReturns(comp, aststmts); // Turn implicit returns into explicit returns + genStmts(comp, aststmts); // Generate method's code block +} + +#ifdef __cplusplus +} // extern "C" +} // namespace avm +#endif \ No newline at end of file diff --git a/resources/examples/acornvm/lexer.c3 b/resources/examples/acornvm/lexer.c3 new file mode 100644 index 000000000..085c3e3b1 --- /dev/null +++ b/resources/examples/acornvm/lexer.c3 @@ -0,0 +1,692 @@ +module acorn::lex; +/** Lexer for Acorn compiler + * + * @file + * + * This source file is part of avm - Acorn Virtual Machine. + * See Copyright Notice in avm.h + */ + + +/** + * Crude algorithm for determining if character is a Unicode letter + */ +func bool isualpha(Auchar c) @inline +{ + return c > 0xA0 || isalpha(c); +} + + +/** + * Algorithm for determining if character is a digit 0-9 + */ +func bool isudigit(Auchar c) @inline +{ + return c >= '0' && c <= '9'; +} + +/** + * Return a new LexInfo value, lexer context for a source program + */ +func Value new(Value th, Value *dest, Value src, Value url) +{ + LexInfo *lex; + + // Create an lexer object + lex = mem::new(th, LexEnc, sizeof(LexInfo)); + + // Values + lex.token = aNull; + lex.th = th; + lex.source = src; + mem::markChk(th, lex, src); + lex.url = url; + mem::markChk(th, lex, url); + + // Position info (ignoring initial UTF8 byte-order mark) + // TODO + lex.bytepos = lex.linebeg = getSize(src) >= 3 && 0 == strncmp("\xEF\xBB\xBF", toStr(src), 3) ? 3 : 0; + lex.linenbr = 1; + + // indent state + lex.curindent = lex.newindent = 0; + + lex.newline = false; + lex.newprogram = true; + lex.insertSemi = false; + lex.undentcont = false; + lex.optype = 0; + return *dest = cast(lex, Value);; +} + +/** Return the current unicode character whose UTF-8 bytes start at lex->bytepos */ +func Auchar LexInfo.thischar(LexInfo* lex) +{ + byte *src = &toStr(lex.source)[lex.bytepos]; + int nbytes; + Auchar chr; + + // Get info from first UTF-8 byte + if ((*src&0xF0) == 0xF0) { nbytes=4; chr = *src&0x07;} + else if ((*src&0xE0) == 0xE0) {nbytes=3; chr = *src&0x0F;} + else if ((*src&0xC0) == 0xC0) {nbytes=2; chr = *src&0x1F;} + else if ((*src&0x80) == 0x00) {nbytes=1; chr = *src&0x7F;} + else {nbytes=1; chr = 0;} // error + + // Obtain remaining bytes + while (--nbytes) + { + src++; + if (*src & 0xC0 ==0x80) chr = chr << 6 + *src & 0x3F; + } + return chr; +} + +/** Return the current unicode character whose UTF-8 bytes start at lex->bytepos */ +func Auchar LexInfo.nextchar(LexInfo* lex) +{ + const char *src = &toStr(lex->source)[lex->bytepos]; + int nbytes; + Auchar chr; + + // Skip past current character + if ((*src&0xF0) == 0xF0) {nbytes=4;} + else if ((*src&0xE0) == 0xE0) {nbytes=3;} + else if ((*src&0xC0) == 0xC0) {nbytes=2;} + else if ((*src&0x80) == 0x00) {nbytes=1;} + else {nbytes=1;} // error + src += nbytes; + + // Get info from first UTF-8 byte + if ((*src&0xF0) == 0xF0) {nbytes=4; chr = *src&0x07;} + else if ((*src&0xE0) == 0xE0) {nbytes=3; chr = *src&0x0F;} + else if ((*src&0xC0) == 0xC0) {nbytes=2; chr = *src&0x1F;} + else if ((*src&0x80) == 0x00) {nbytes=1; chr = *src&0x7F;} + else {nbytes=1; chr = 0;} // error + + // Obtain remaining bytes + while (--nbytes) { + src++; + if ((*src&0xC0)==0x80) + chr = (chr<<6) + (*src&0x3F); + } + return chr; +} + +/** Skip lex->bytepos past the unicode character whose UTF-8 bytes start at lex->bytepos */ +func void LexInfo.skipchar(LexInfo* lex) +{ + const char *src = &toStr(lex->source)[lex->bytepos]; + int nbytes; + + if (*src=='\0') + return; + + // Get character size from first byte + if ((*src&0xF0) == 0xF0) {nbytes=4;} + else if ((*src&0xE0) == 0xE0) {nbytes=3;} + else if ((*src&0xC0) == 0xC0) {nbytes=2;} + else if ((*src&0x80) == 0x00) {nbytes=1;} + else {nbytes=1;} // error + + lex->bytepos += nbytes; +} + +/** Return true if at end of source */ +#define lex_isEOF(lex) (lex_thischar(lex) == '\0') + +/** Scan past non-tokenized white space. + * Handle line indentation and continuation */ +func bool LexInfo.scanWhite(LexInfo *lex) +{ + Value th = lex.th; // for vmlit + + // Insert semicolon as a token, if requested by implied closing brace + if (lex.insertSemi) + { + lex.insertSemi = false; + lex.toktype=Res_Token; + lex.token=vmlit(SYM_SEMICOLON); + return true; + } + + // Ignore all forms of white space + Auchar chr; + bool lookForWhiteSpace = true; + while (lookForWhiteSpace) { + + switch (chr=lex_thischar(lex)) { + + // Skip past spaces and tabs + case ' ': + case '\t': + case '\r': + lex_skipchar(lex); + break; + + // Skip past new line + case '\n': + lex->linenbr++; + lex->linebeg = lex->bytepos; + lex->newline = true; + lex_skipchar(lex); + + // Count line-leading tabs + lex->newindent = 0; + while (lex_thischar(lex)=='\t') { + lex->newindent++; + lex_skipchar(lex); + } + + // Handle continuation. + if (lex_thischar(lex)=='\\') { + // Undenting requires we spawn some semi-colons and right braces + if (lex->newindent < lex->curindent) + lex->undentcont = true; + else { + lex->newline = false; + // Pretend indent did not change for extra-indented continuation + if (lex->newindent > lex->curindent) + lex->newindent = lex->curindent; + } + lex_skipchar(lex); + } + break; + + // Skip comment starting with '#' until end of line + case '#': + { + const char *scanp = &toStr(lex->source)[lex->bytepos]; + if (strncmp("###", scanp, 3)) { + // Inline comment skips to end of line + while (!lex_isEOF(lex) && lex_thischar(lex)!='\n') + lex_skipchar(lex); + break; + } + // Multi-line comment goes until next '###' + scanp+=3; + while (*scanp && 0!=strncmp("###", scanp, 3)) { + if (*scanp=='\n') + lex->linenbr++; + scanp++; + } + if (*scanp) + scanp+=3; + lex->bytepos += scanp - &toStr(lex->source)[lex->bytepos]; + } + break; + + default: + lookForWhiteSpace = false; + break; + } + } + + // Mark start of a real token + lex->tokbeg = lex->bytepos; + lex->toklinepos = lex->tokbeg - lex->linebeg; + lex->tokline = lex->linenbr; + + // We now know the next character starts a real token + // But first, we must handle insertion of ; { and } characters + // depending on the indentation changes and newline flag + + // Handle increasing indentation + if (lex->newindent > lex->curindent) { + lex->toktype=Res_Token; + lex->token=vmlit(SymLBrace); + lex->curindent++; + lex->newline = false; + return true; + } + + // Do not generate leading ';' + if (lex->newprogram) + lex->newprogram = lex->newline = false; + + // End previous line's statement with a ';' + if (lex->newline) { + lex->toktype=Res_Token; + lex->token=vmlit(SymSemicolon); + lex->newline = false; + return true; + } + + // Ensure end-of-file flushes all indent levels to 0 + if (lex_isEOF(lex)) + lex->newindent = 0; + + // Handle decreasing indentation + if (lex->newindent < lex->curindent) { + lex->toktype=Res_Token; + lex->token=vmlit(SymRBrace); + lex->curindent--; + if (lex->undentcont && lex->newindent==lex->curindent) + lex->undentcont = false; // Continued line at right indent now. No semi-colon. + else + lex->insertSemi = true; // Insert semi-colon after implied closing brace + return true; + } + + return false; +} + +/** End of source program is a token */ +bool lexScanEof(LexInfo *lex) { + if (!lex_isEOF(lex)) + return false; + + lex->toktype = Eof_Token; + return true; +} + +/** Tokenize an integer or floating point number */ +bool lexScanNumber(LexInfo *lex) { + + // A number token's first character is always 0-9 + // We cannot handle negative sign here, as it might be a subtraction + if (!isudigit(lex_thischar(lex))) + return false; + + int base = 10; + bool exp = false; + int digval = 0; + long nbrval = 0; + + // A leading zero may indicate a non-base 10 number + if (lex_thischar(lex)=='0') { + lex_skipchar(lex); + if (toupper(lex_thischar(lex))=='X') {base = 16; lex_skipchar(lex);} + // else if (toupper(lex_thischar(lex))=='B') {base = 2; lex_skipchar(lex);} + else if (toupper(lex_thischar(lex))=='.') {base = -1; lex_skipchar(lex);} + // else base = 8; + } + + // Validate and process remaining numeric digits + while (1) { + // Handle characters in a suspected integer + if (base>0) { + // Decimal point means it is floating point after all + if (base==10 && lex_thischar(lex)=='.') { + // If next character is a symbol/range, treat '.' as method operator instead + Auchar nchr = lex_nextchar(lex); + if (isualpha(nchr) || nchr=='_' || nchr=='$' || nchr=='(' || nchr=='\'' || nchr=='.') + break; + lex_skipchar(lex); + base = -1; + continue; + } + // Extract a number digit value from the character + if (isudigit(lex_thischar(lex))) + digval = lex_thischar(lex)-'0'; + else if (isalpha(lex_thischar(lex))) + digval = toupper(lex_thischar(lex))-'A'+10; + else + break; + // Ensure digit is within base, then process + if (digval>=base) + break; + nbrval = nbrval*base + digval; + lex_skipchar(lex); + } + + // Validate characters in a floating point number + else { + // Only one exponent allowed + if (!exp && toupper(lex_thischar(lex))=='E') { + exp = true; + lex_skipchar(lex); + if (lex_thischar(lex)=='-') + lex_skipchar(lex); + continue; + } + if (!isudigit(lex_thischar(lex))) + break; + lex_skipchar(lex); + } + } + + // Set value and type + if (base>=0) { + lex->token = anInt(nbrval); + lex->toktype = Lit_Token; + } + else { + lex->token = aFloat((Afloat) atof(&toStr(lex->source)[lex->tokbeg])); + lex->toktype = Lit_Token; + } + return true; +} + +/** List of all reserved names (excluding literals) */ +static VmLiterals ReservedNames[] = { + SymAnd, + SymAsync, + SymBaseurl, + SymBreak, + SymContext, + SymContinue, + SymDo, + SymEach, + SymElse, + SymElif, + SymIf, + SymIn, + SymInto, + SymLocal, + SymMatch, + SymNot, + SymOr, + SymReturn, + SymSelf, + SymSelfMeth, + SymThis, + SymUsing, + SymWait, + SymWhile, + SymWith, + SymYield +}; + +/** Tokenize a name. The result could be Name_Token (e.g., for variables) + * Res_Token, a reserved keyword, or Lit_Token for null, false and true. */ +bool lexScanName(LexInfo *lex) { + + // Name token's first character is always a-z, _ or $ + Auchar chr = lex_thischar(lex); + if (!(isualpha(chr) || chr=='_' || chr=='$')) + return false; + + // Walk through all valid characters in name + lex_skipchar(lex); + while ((chr=lex_thischar(lex))=='_' || chr=='$' || isudigit(chr) || isualpha(chr)) + lex_skipchar(lex); + + // Allow ? as trailing character + if (chr=='?') + lex_skipchar(lex); + + // Create name token as a symbol + newSym(lex->th, &lex->token, &toStr(lex->source)[lex->tokbeg], lex->bytepos - lex->tokbeg); + mem_markChk(lex->th, lex, lex->token); + + // If it is a reserved name for a literal, say so. + Value th = lex->th; + lex->toktype = Lit_Token; + if (lex->token == vmlit(SymNull)) {lex->token = aNull; return true;} + else if (lex->token == vmlit(SymFalse)) {lex->token = aFalse; return true;} + else if (lex->token == vmlit(SymTrue)) {lex->token = aTrue; return true;} + + // If it is a reserved name, set toktype to say so + VmLiterals *vmtblendp = &ReservedNames[sizeof(ReservedNames)/sizeof(VmLiterals)]; + for (VmLiterals *vmtblp = ReservedNames; vmtblptoken == vmlit(*vmtblp)) { + lex->toktype = Res_Token; + return true; + } + } + + lex->toktype = Name_Token; + return true; +} + +/** Tokenize a string (double quotes) or symbol (single quotes) + * Handle escape sequences. Ignore line-end and leading tabs for multi-line. */ +bool lexScanString(LexInfo *lex) { + + // String token's first character should be a quote mark + Auchar quotemark = lex_thischar(lex); + if (!(quotemark=='"' || quotemark=='\'' )) + return false; + lex_skipchar(lex); + + // Create a string value to place the contents into + const char *begp = &toStr(lex->source)[lex->bytepos]; + const char *scanp = strchr(begp, quotemark); // An estimate, as it may not be the end + Value buildstr = pushStringl(lex->th, aNull, NULL, scanp==NULL? strlen(begp) : scanp-begp); + + // Repetitively scan source looking for various delimiters + scanp = begp; + while (*scanp && *scanp!=quotemark) { + + // Process any escape sequences within the string + if (*scanp=='\\') { + // Copy over string segment up to the escape sequence + if (scanp-begp > 0) + strAppend(lex->th, buildstr, begp, scanp-begp); + // Process escape sequence + switch (*++scanp) { + case 'n': strAppend(lex->th, buildstr, "\n", 1); scanp++; break; + case 'r': strAppend(lex->th, buildstr, "\r", 1); scanp++; break; + case 't': strAppend(lex->th, buildstr, "\t", 1); scanp++; break; + case 'u': case 'U': + { + // Convert a hexadecimal string of cnt digits to a unicode character + Auchar unichar=0; + int cnt = *scanp=='u'? 4 :8; + if (*(scanp+1)=='+') + scanp++; + while (*++scanp && cnt--) { + if (isudigit(*scanp)) + unichar = unichar*16 + *scanp -'0'; + if (isalpha(*scanp) && toupper(*scanp)<='F') + unichar = unichar*16 + toupper(*scanp)-'A'+10; + } + + // Encode an unicode character into UTF-8 bytes + char utf8str[8]; + char *utfp=&utf8str[sizeof(utf8str)-1]; + *utfp-- = '\0'; // make it a sizeable string + if (unichar < 0x7f) { + *utfp = (char)unichar; + strAppend(lex->th, buildstr, utfp, 1); + } + else { + // multi-byte encoding, byte by byte backwards + int cnt=0; + while (unichar) { + cnt++; + char byt = unichar & 0x3f; + unichar = unichar >> 6; + // Put appropriate flags if it is the first byte + if (unichar==0) { + switch (cnt) { + case 2: *utfp = byt | 0xC0; break; + case 3: *utfp = byt | 0xE0; break; + case 4: *utfp = byt | 0xF0; break; + case 5: *utfp = byt | 0xF8; break; + case 6: *utfp = byt | 0xFC; break; + } + } + else + *utfp-- = byt | 0x80; + } + strAppend(lex->th, buildstr, utfp, cnt); + } + } + break; + + default: strAppend(lex->th, buildstr, scanp, 1); scanp++; break; + } + begp=scanp; + } + + // Ignore line end and line leading tabs + else if (*scanp=='\r' || *scanp=='\n') { + // Copy over string segment up to the escape sequence + if (scanp-begp > 0) + strAppend(lex->th, buildstr, begp, scanp-begp); + // Ignore line end and leading tabs + while (*scanp=='\r' || *scanp=='\n' || *scanp=='\t') { + if (*scanp=='\n') + lex->linenbr++; + scanp++; + } + begp=scanp; + } + + // Otherwise process rest of string + else + scanp++; + } + + // Copy over rest of string segment + if (scanp-begp > 0) + strAppend(lex->th, buildstr, begp, scanp-begp); + + // Update lex position + if (*scanp==quotemark) + *scanp++; + lex->bytepos += scanp - &toStr(lex->source)[lex->bytepos]; + + // Create string (or symbol) + lex->toktype = Lit_Token; + if (quotemark=='"') + lex->token = buildstr; + else + newSym(lex->th, &lex->token, toStr(buildstr), getSize(buildstr)); + mem_markChk(lex->th, lex, lex->token); + popValue(lex->th); // buildstr + return true; +} + +/** Tokenize a punctuation-oriented operator symbol. + * By this point we take at least one character, unless multi-char op is recognized. */ +bool lexScanResource(LexInfo *lex) { + if (lex_thischar(lex)!='@') + return false; + Value th = lex->th; + lex_skipchar(lex); + Auchar delim = lex_thischar(lex); + if (delim=='\'' || delim=='"' || delim=='(' || delim<=' ') { + lex->token = vmlit(SymAt); + lex->toktype = Res_Token; + return true; + } + + // Mark beginning and look for end of url + const char *begp = &toStr(lex->source)[lex->bytepos]; + const char *scanp = begp; + while ((unsigned char)(*++scanp)>' '); // end with space, tab, cr, lf, eof, etc. + lex->bytepos += scanp - begp; + + // Create +Resource from literal url, and return it as token + pushValue(th, vmlit(SymNew)); + pushValue(th, vmlit(TypeResc)); + pushStringl(th, aNull, begp, scanp-begp); + pushValue(th, lex->url); + getCall(th, 3, 1); + lex->token = getFromTop(th, 0); + mem_markChk(lex->th, lex, lex->token); + popValue(th); + lex->toktype = Url_Token; + return true; +} + +/** Tokenize a punctuation-oriented operator symbol. + * By this point we take at least one character, unless multi-char op is recognized. */ +bool lexScanOp(LexInfo *lex) { + const char *begp = &toStr(lex->source)[lex->bytepos]; + Auchar ch1 = lex_thischar(lex); + lex_skipchar(lex); + Auchar ch2 = lex_thischar(lex); + + // Look for 2- and 3- character combos + if (ch1=='.' && ch2=='.') { + if ('.'==lex_nextchar(lex)) lex_skipchar(lex); + lex_skipchar(lex); + } else if (ch1=='=' && ch2=='=') { + if ('='==lex_nextchar(lex)) lex_skipchar(lex); + lex_skipchar(lex); + } else if (ch1=='<' && ch2=='=') { + if ('>'==lex_nextchar(lex)) lex_skipchar(lex); + lex_skipchar(lex); + } else if ((ch1=='>' && ch2=='=') + || (ch1=='!' && ch2=='=') + || (ch1=='~' && ch2=='~') + || (ch1=='<' && ch2=='<') + || (ch1=='>' && ch2=='>') + || (ch1=='+' && ch2=='=') + || (ch1=='-' && ch2=='=') + || (ch1=='*' && ch2=='=') + || (ch1=='/' && ch2=='=') + || (ch1=='.' && ch2==':') + || (ch1==':' && ch2==':') + || (ch1==':' && ch2=='=') + || (ch1=='&' && ch2=='&') + || (ch1=='|' && ch2=='|') + || (ch1=='*' && ch2=='*') + || (ch1=='.' && ch2=='&') + || (ch1=='+' && ch2=='[') + || (ch1=='*' && ch2=='[') + ) lex_skipchar(lex); + + newSym(lex->th, &lex->token, begp, &toStr(lex->source)[lex->bytepos]-begp); + mem_markChk(lex->th, lex, lex->token); + lex->toktype = Res_Token; + return true; +} + +/* Get the next token */ +func void LexInfo.getNextToken(LexInfo *lex) +{ + + // Scan until we find a token + (!lex.scanWhite() + && !lex.scanEof() + && !lex.scanNumber() + && !lex.scanName() + && !lex.scanString() + && !lex.scanResource() + && !lex.scanOp()); + +#ifdef COMPILERLOG + switch (lex->toktype) { + case Lit_Token: { + pushSerialized(lex->th, lex->token); + vmLog("Literal token: %s", toStr(getFromTop(lex->th, 0))); + popValue(lex->th); + } break; + case Url_Token: { + pushSerialized(lex->th, lex->token); + vmLog("Literal url token: %s", toStr(getFromTop(lex->th, 0))); + popValue(lex->th); + } break; + case Name_Token: { + pushSerialized(lex->th, lex->token); + vmLog("Name token: %s", toStr(getFromTop(lex->th, 0))); + popValue(lex->th); + } break; + case Res_Token: { + pushSerialized(lex->th, lex->token); + vmLog("Reserved token: %s", toStr(getFromTop(lex->th, 0))); + popValue(lex->th); + } break; + } +#endif +} + +/* Match current token to a reserved symbol. */ +bool lexMatch(LexInfo *lex, const char *sym) { + return (lex->toktype==Res_Token && 0==strcmp(sym, toStr(lex->token))); +} + +/* Match current token to a reserved symbol. + * If it matches, advance to the next token */ +bool lexMatchNext(LexInfo *lex, const char *sym) { + if (lex->toktype==Res_Token && 0==strcmp(sym, toStr(lex->token))) { + lexGetNextToken(lex); + return true; + } + return false; +} + +/* Log an compiler message */ +void lexLog(LexInfo *lex, const char *msg) { + vmLog("While compiling %s(%d:%d): %s", toStr(lex->url), lex->tokline, lex->toklinepos, msg); +} + +#ifdef __cplusplus +} // extern "C" +} // namespace avm +#endif \ No newline at end of file diff --git a/resources/examples/acornvm/main.c3 b/resources/examples/acornvm/main.c3 new file mode 100644 index 000000000..a5b14cca4 --- /dev/null +++ b/resources/examples/acornvm/main.c3 @@ -0,0 +1,126 @@ +module acornvm::compiler; + + +/* Return a new CompInfo value, compiler state for an Acorn method */ +func Value new_compiler(Value th, Value *dest, Value src, Value url) +{ + CompInfo *comp; + + // Create an compiler context (this block of code can be gc-locked as atomic) + comp = (CompInfo *)mem_new(th, CompEnc, sizeof(CompInfo)); + *dest = (Value) comp; + comp.th = th; + comp.lex = nil; + comp.ast = nil; + comp.method = nil; + comp.prevcomp = aNull; + + // pgmsrc is a Text collection of characters + if (src.isStr()) + { + // Create lexer using source characters + Value lexer = new_lexer(th, &comp->lex, src, url); + mem_markChk(th, comp, comp->lex); + + // Prime the pump by getting the first token + lexGetNextToken(comp->lex); + comp->clovarseg = aNull; + } + // pgmsrc is CompInfo. Make use of its info. + else + { + comp->lex = (@cast(src, CompInfo*).lex; + mem_markChk(th, comp, comp->lex); + comp->prevcomp = src; + comp->clovarseg = ((CompInfo*)src)->clovarseg; + comp->newcloseg = ((CompInfo*)src)->newcloseg; + } + + // Setup AST and method to parse and generate into + newArr(th, &comp->ast, aNull, 2); + mem_markChk(th, comp, comp->ast); + newBMethod(th, (Value *)&comp->method); + mem_markChk(th, comp, comp->method); + + comp.nextreg = 0; + comp.whileBegIp = -1; + comp.forcelocal = false; + + return @cast(*dest, Value); +} + +/* Method to compile an Acorn method. Parameters: + - pgmsrc: CompInfo or Text string containing the program source + - baseurl: a symbol or null + It returns the compiled byte-code method. */ +func int acn_newmethod(Value th) +{ + // Retrieve pgmsrc and baseurl from parameters + Value pgmsrc, baseurl; + if (th.getTop() < 2 || !(Value.isStr(pgmsrc = th.getLocal(1)) || pgmsrc.isPtr() && pgmsrc.isEnc(COMP)))) + { + pushValue(th, aNull); + return 1; + } + if (th.getTop() < 3 || !Value.isSym(baseurl = th.getLocal(2))) + { + baseurl = aNull; + } + // Create compiler context, then parse source to AST + CompInfo* comp = (CompInfo*) pushCompiler(th, pgmsrc, baseurl); + parseProgram(comp); + $if (@defined(COMPILERLOG)) + { + Value aststr = pushSerialized(th, comp->ast); + vmLog("Resulting AST is: %s", toStr(aststr)); + th.pop(th); + } + // Generate method instructions from AST + genBMethod(comp); + if (@defined(COMPILERLOG)) + { + Value bmethod = pushSerialized(th, comp->method); + vmLog("Resulting bytecode is: %s", toStr(bmethod)); + popValue(th); + } + + // Return generated method + th.push(comp->method); + return 1; +} + +// Found in typ_resource.cpp +AuintIdx resource_resolve(Value th, Value meth, Value *resource); + +/* Try to resolve all static Resources (externs) in 'self's method and its extern methods. + Will start the loading of any static resources not already loading. + null is returned if link is successful, otherwise it returns number of unresolved Resources */ +int acn_linker(Value th) +{ + BMethodInfo* meth = @cast(th.getLocal(0), BMethodInfo*); + + // Return null when there are no unresolved externs + if (meth.nbrexterns == 0) return 0; + + AuintIdx counter = 0; + Value *externp = meth.lits; + for (Auint i = 0; i < meth.nbrexterns; i++) + { + counter += th.resource_resolve(meth, externp); + externp++; + } + + // Return null if all externs resolved. + if (counter == 0) + { + meth.nbrexterns = 0; // Mark that no more static Resources externs are to be found + return 0; + } + else + { + th.pushValue(anInt(counter)); // Return count of unresolved static resources + return 1; + } + return 1; +} + diff --git a/resources/examples/acornvm/parser.c3 b/resources/examples/acornvm/parser.c3 new file mode 100644 index 000000000..b0c25447d --- /dev/null +++ b/resources/examples/acornvm/parser.c3 @@ -0,0 +1,1015 @@ +module acorn::parser; +import acorn::parser::ast; + +/** Parser for Acorn compiler. See Acorn documentation for syntax diagrams. + * + * @file + * + * This source file is part of avm - Acorn Virtual Machine. + * See Copyright Notice in avm.h + */ + +/* Add a url literal and return its index */ +bool resource_equal(Value res1, Value res2); + +int genAddUrlLit(CompInfo *comp, Value val) { + BMethodInfo* f = comp->method; + + // See if we already have resource with same url + int i = f->nbrlits; + while (i-- > 0) + if (resource_equal(f->lits[i],val)) + return i; + + // If not found, add it + mem_growvector(comp->th, f->lits, f->nbrlits, f->litsz, Value, INT_MAX); + f->lits[f->nbrlits] = val; + mem_markChk(comp->th, comp, val); + return f->nbrlits++; +} + +/* Add a method literal and return its index */ +func int CompInfo.genAddMethodLit(CompInfo *comp, Value val) +{ + BMethodInfo* f = comp.method; + mem_growvector(comp->th, f->lits, f->nbrlits, f->litsz, Value, INT_MAX); + f.lits[f.nbrlits] = val; + mem_markChk(comp->th, comp, val); + return f.nbrlits++; +} + +/* Look for variable in locvars: return index if found, otherwise -1 */ +int findBlockVar(Value th, Value locvars, Value varnm) +{ + int nbrlocals = arr_size(locvars); + for (int idx = nbrlocals - 1; idx > 1; idx--) { + if (arrGet(th, locvars, idx) == varnm) + return idx-2+toAint(arrGet(th, locvars, 1)); // relative to base index + } + return -1; +} + +/* Look for local variable. Returns idx if found, -1 otherwise. */ +func int CompInfo.findLocalVar(CompInfo *comp, Value varnm) +{ + assert(varnm.isSym()); + + Value th = comp.th; + Value locvars = comp.locvarseg; + do + { + // Look to see if variable already defined as local + // Ignore first two values (link pointer and base index number) + int nbrlocals = arr_size(locvars); + for (int idx = nbrlocals - 1; idx > 1; idx--) + { + if (arrGet(th, locvars, idx) == varnm) + return idx-2+toAint(arrGet(th, locvars, 1)); // relative to base index + } + locvars = arrGet(th, locvars, 0); // link to prior local variables + } while (locvars != aNull); + return -1; +} + +/* Look for closure variable. Returns idx if found, -1 otherwise. */ +func int CompInfo.findClosureVar(CompInfo *comp, Value varnm) +{ + assert(varnm.isSym()); + + if (comp.clovarseg.isArr()) + { + // Look to see if variable already defined as closure + int nbrclosures = arr_size(comp->clovarseg); + for (int idx = nbrclosures - 1; idx >= 0; idx--) + { + // Adjust for position in closure array + if (arrGet(comp->th, comp->clovarseg, idx) == varnm) return idx+2; + } + } + return -1; +} + +/** If variable not declared already, declare it */ +void declareLocal(CompInfo *comp, Value varnm) { + Value th = comp->th; + // If explicit 'local' declaration, declare if not found in block list + if (comp->forcelocal) { + if (findBlockVar(th, comp->locvarseg, varnm)) + arrAdd(th, comp->locvarseg, varnm); + } + // If implicit variable, declare as local or closure, if not found in this or any outer block + else if (findLocalVar(comp, varnm)==-1 && findClosureVar(comp, varnm)==-1) + // Declare as closure var if found as local in outer method. Otherwise, declare as local + if (comp->prevcomp!=aNull && findLocalVar((CompInfo*)comp->prevcomp, varnm)!=-1) { + arrAdd(th, comp->clovarseg, varnm); + // Add initialization logic + astAddSeg2(th, comp->newcloseg, vmlit(SymLocal), varnm); // Add its initialization to new closure segment + } + else + arrAdd(th, comp->locvarseg, varnm); +} + +/** Create and return new Closure AST segment + Modifies comp->clovarseg and -> newcloseg */ +func Value parseNewClo(CompInfo* comp, Value astseg) +{ + Value th = comp->th; + // ('Closure', clovars, ('callprop', Closure, New, getmethod, setmethod)) + Value closeg = ast::addSeg(th, astseg, vmlit(SymClosure), 3); + comp->clovarseg = pushArray(th, aNull, 4); + arr::add(th, closeg, comp->clovarseg); + th.popValue(); + Value newcloseg = ast::addSeg(th, closeg, vmlit(SymCallProp), 8); + ast::addSeg2(th, newcloseg, vmlit(SymGlobal), vmlit(SymClosure)); + ast::addSeg2(th, newcloseg, vmlit(SymLit), vmlit(SymNew)); + ast::addSeg2(th, newcloseg, vmlit(SymLit), vmlit(SymNull)); + ast::addSeg2(th, newcloseg, vmlit(SymLit), vmlit(SymNull)); + return newcloseg; +} + +/** Parse an atomic value: literal, variable or pseudo-variable */ +void parseValue(CompInfo* comp, Value astseg) +{ + Value th = comp->th; + // Literal token (number, symbol, string, true, false, null) + if (comp->lex->toktype == Lit_Token) + { + ast::addSeg2(th, astseg, vmlit(SymLit), comp.lex.token); + lexGetNextToken(comp->lex); + } + // Static unquoted @url + else if (comp->lex->toktype == Url_Token) { + astAddSeg2(th, astseg, vmlit(SymExt), anInt(genAddUrlLit(comp, comp->lex->token))); + lexGetNextToken(comp->lex); + } + // Local or global variable / name token + else if (comp->lex->toktype == Name_Token) { + Value symnm = pushValue(th, comp->lex->token); + lexGetNextToken(comp->lex); + const char first = (toStr(symnm))[0]; + // If followed by ":" or ":=", it is a literal symbol + if (lexMatch(comp->lex, ":") || lexMatch(comp->lex, ":=")) + astAddSeg2(th, astseg, vmlit(SymLit), symnm); + else if (first=='$' || (first>='A' && first<='Z')) + astAddSeg2(th, astseg, vmlit(SymGlobal), symnm); + else { + declareLocal(comp, symnm); // declare local if not already declared + // We do not resolve locals to index until gen because of control clauses (declaration after use) + astAddSeg2(th, astseg, vmlit(SymLocal), symnm); + } + popValue(th); + } + // 'baseurl' pseudo-variable + else if (lexMatchNext(comp->lex, "baseurl")) { + astAddValue(th, astseg, vmlit(SymBaseurl)); + } + // 'this' pseudo-variable + else if (lexMatchNext(comp->lex, "this")) { + astAddValue(th, astseg, vmlit(SymThis)); + } + // 'self' pseudo-variable + else if (lexMatchNext(comp->lex, "self")) { + astAddValue(th, astseg, vmlit(SymSelf)); + } + // 'selfmethod' pseudo-variable + else if (lexMatchNext(comp->lex, "selfmethod")) { + astAddValue(th, astseg, vmlit(SymSelfMeth)); + } + // 'context' pseudo-variable + else if (lexMatchNext(comp->lex, "context")) { + astAddValue(th, astseg, vmlit(SymContext)); + } + // '...' splat + else if (lexMatchNext(comp->lex, "...")) { + astAddValue(th, astseg, vmlit(SymSplat)); + } + // 'yield' expression + else if (lexMatchNext(comp->lex, "yield")) { + Value newseg = astAddSeg(th, astseg, vmlit(SymYield), 2); + parseThisExp(comp, newseg); + } + // parenthetically-enclosed expression + else if (lexMatchNext(comp->lex, "(")) { + parseExp(comp, astseg); + if (!lexMatchNext(comp->lex, ")")) + lexLog(comp->lex, "Expected ')'."); + } + // Method definition + else if (lexMatch(comp->lex, "[") || lexMatch(comp->lex, "*[")) { + Value svclovars = comp->clovarseg; + Value svnewcloseg = comp->newcloseg; + Value newcloseg = astseg; + // Create closure segment just in case, if we are not already inside one... + // ('Closure', clovars, ('callprop', Closure, New, getmethod, setmethod)) + if (!comp->explicitclo) + newcloseg = comp->newcloseg = parseNewClo(comp,astseg); + // Go compile method parms and code block using new compiler context but same lexer + pushValue(th, vmlit(SymNew)); + pushGloVar(th, "Method"); + pushValue(th, comp); + getCall(th, 2, 1); + // Stick returned compiled method reference in extern section of this method's literals + astAddSeg2(th, newcloseg, vmlit(SymExt), anInt(genAddMethodLit(comp, getFromTop(th, 0)))); + popValue(th); + // Move method to its rightful place in closure segment + if (!comp->explicitclo) { + AuintIdx last = arr_size(newcloseg)-1; + arrSet(th, newcloseg, 3, arrGet(th, newcloseg, last)); + arrSetSize(th, newcloseg, last); + } + comp->newcloseg = svnewcloseg; + comp->clovarseg = svclovars; + } + // Explicit closure definition + else if (lexMatchNext(comp->lex, "+[")) { + Value svclovars = comp->clovarseg; + Value svnewcloseg = comp->newcloseg; + bool svexplicitclo = comp->explicitclo; + + Value newcloseg = parseNewClo(comp,astseg); + + // Process explicit closure variable declarations + if (!lexMatch(comp->lex, "]")) { + do { + if (comp->lex->toktype == Name_Token || lexMatch(comp->lex, "self")) { + // Closure variable name + Value symnm = comp->lex->token; + const char first = (toStr(symnm))[0]; + if (first=='$' || (first>='A' && first<='Z')) + lexLog(comp->lex, "A global name may not be a closure variable"); + arrAdd(th, comp->clovarseg, symnm); + lexGetNextToken(comp->lex); + + // Handle specified initializer expression + if (lexMatchNext(comp->lex, "=")) { + parseAppendExp(comp, newcloseg); + } + // No initializer expression? Initialize it using same named 'local' variable + else if (symnm == vmlit(SymSelf)) + astAddValue(th, newcloseg, symnm); + else + astAddSeg2(th, newcloseg, vmlit(SymLocal), symnm); + } + } while (lexMatchNext(comp->lex, ",")); + } + if (!lexMatchNext(comp->lex, "]")) + lexLog(comp->lex, "Expected ']'."); + + comp->explicitclo = true; + comp->newcloseg = newcloseg; + // For get/set explicit closure, look for both + if (lexMatchNext(comp->lex, "{")) { + for (int i=0; i<2; i++) { + parseExp(comp, newcloseg); + AuintIdx last = arr_size(newcloseg)-1; + arrSet(th, newcloseg, 3+i, arrGet(th, newcloseg, last)); + arrSetSize(th, newcloseg, last); + lexMatchNext(comp->lex, ";"); + } + lexMatchNext(comp->lex, "}"); + } + // Not get/set? Get method, then move to its rightful place in closure segment + else { + parseValue(comp, newcloseg); + AuintIdx last = arr_size(newcloseg)-1; + arrSet(th, newcloseg, 3, arrGet(th, newcloseg, last)); + arrSetSize(th, newcloseg, last); + } + + // Restore saved values + comp->explicitclo = svexplicitclo; + comp->newcloseg = svnewcloseg; + comp->clovarseg = svclovars; + } + return; +} + +/** Add a list of parameters to a AST propseg */ +void parseParams(CompInfo* comp, Value propseg, const char *closeparen) { + bool saveforcelocal = comp->forcelocal; + comp->forcelocal = false; + + parseAppendExp(comp, propseg); + while (lexMatchNext(comp->lex, ",")) + parseAppendExp(comp, propseg); + + if (!lexMatchNext(comp->lex, closeparen)) + lexLog(comp->lex, "Expected ')' or ']' at end of parameter/index list."); + + comp->forcelocal = saveforcelocal; +} + +/** Determine if token is '.' '.:' or '::' */ +#define isdots(token) ((token)==vmlit(SymDot) || (token)==vmlit(SymColons) || (token)==vmlit(SymDotColon)) + +/** Parse a compound term, handling new and suffixes */ +void parseTerm(CompInfo* comp, Value astseg) { + Value th = comp->th; + // Capture whether term began with a "+" prefix + bool newflag = lexMatchNext(comp->lex, "+"); + // Obtain base value (dots as prefix implies 'this' as base value) + if (!newflag && isdots(comp->lex->token)) + astAddValue(th, astseg, vmlit(SymThis)); + else + parseValue(comp, astseg); + // Handle suffix chains + while (newflag || isdots(comp->lex->token) || lexMatch(comp->lex, "(") || lexMatch(comp->lex, "[")) { + bool getparms = true; + Value propseg = astInsSeg(th, astseg, vmlit(SymActProp), 4); // may adjust op later + // Treat '+' as calling .New + if (newflag) { + astAddSeg2(th, propseg, vmlit(SymLit), vmlit(SymNew)); + newflag = false; // only works once + } + // For pure method call, adjust to be: self.method + else if (lexMatch(comp->lex, "(")) { + arrSet(th, propseg, 2, arrGet(th, propseg, 1)); + arrSet(th, propseg, 1, vmlit(SymSelf)); + } + // For indexing, adjust to: base.'[]' + else if (lexMatchNext(comp->lex, "[")) { + astSetValue(th, propseg, 0, vmlit(SymCallProp)); // adjust because of parms + astAddSeg2(th, propseg, vmlit(SymLit), vmlit(SymBrackets)); + parseParams(comp, propseg, "]"); + getparms = false; + } + // Handle '.', '.:', and '::' + else { + if (lexMatch(comp->lex, ".:")) { + astSetValue(th, propseg, 0, vmlit(SymRawProp)); + getparms = false; + } + else if (lexMatch(comp->lex, "::")) { + astSetValue(th, propseg, 0, vmlit(SymCallProp)); + astAddSeg2(th, propseg, vmlit(SymLit), vmlit(SymBrackets)); + getparms = false; + } + lexGetNextToken(comp->lex); // scan past dot(s) operator + + // Retrieve the property specified after the dot(s) operator + if (comp->lex->toktype == Name_Token || comp->lex->toktype == Lit_Token) { + astAddSeg2(th, propseg, vmlit(SymLit), comp->lex->token); + lexGetNextToken(comp->lex); + } + // Calculated property symbol/method value + else if (lexMatchNext(comp->lex, "(")) { + parseExp(comp, propseg); + if (!lexMatchNext(comp->lex, ")")) + lexLog(comp->lex, "Expected ')' at end of property expression."); + } + else { + astAddSeg2(th, propseg, vmlit(SymLit), aNull); + lexLog(comp->lex, "Expected property expression after '.', '.:', or '::'"); + } + } + + // Process parameter list, if appropriate for this term suffix + if (getparms) { + if (lexMatchNext(comp->lex, "(")) { + astSetValue(th, propseg, 0, vmlit(SymCallProp)); // adjust because of parms + parseParams(comp, propseg, ")"); + } + // Treat Text or Symbol literal as a single parameter to pass + else if (comp->lex->toktype == Lit_Token && (isStr(comp->lex->token) || isSym(comp->lex->token))) { + astSetValue(th, propseg, 0, vmlit(SymCallProp)); // adjust because of parm + astAddSeg2(th, propseg, vmlit(SymLit), comp->lex->token); + lexGetNextToken(comp->lex); + } + } + } +} + +/** Parse a prefix operator */ +void parsePrefixExp(CompInfo* comp, Value astseg) { + Value th = comp->th; + if (lexMatchNext(comp->lex, "-")) { + parsePrefixExp(comp, astseg); + + // Optimize taking the negative of a literal number + Value selfseg = astGetLast(th, astseg); + if (astGet(th, selfseg, 0)==vmlit(SymLit) && isFloat(astGet(th, selfseg, 1))) + astSetValue(th, selfseg, 1, aFloat(-toAfloat(astGet(th, selfseg, 1)))); + else if (astGet(th, selfseg, 0)==vmlit(SymLit) && isInt(astGet(th, selfseg, 1))) + astSetValue(th, selfseg, 1, anInt(-toAint(astGet(th, selfseg, 1)))); + else { // Not a literal number? Do the property call + astseg = astInsSeg(th, astseg, vmlit(SymCallProp), 3); + Value litseg = astAddSeg(th, astseg, vmlit(SymLit), 2); + astAddValue(th, litseg, vmlit(SymNeg)); + } + } + // '@' + symbol, text or '('exp')' + else if (lexMatchNext(comp->lex, "@")) { + // Symbol or text: treat as static, unquoted url + if (comp->lex->toktype == Lit_Token) { + assert(isStr(comp->lex->token) || isSym(comp->lex->token)); + // +Resource(token,baseurl) + pushValue(th, vmlit(SymNew)); + pushValue(th, vmlit(TypeResc)); + pushValue(th, comp->lex->token); + pushValue(th, comp->lex->url); + getCall(th, 3, 1); + // ('lit', resource) + astAddSeg2(th, astseg, vmlit(SymExt), anInt(genAddUrlLit(comp, getFromTop(th, 0)))); + popValue(th); + lexGetNextToken(comp->lex); + } + else { + // ('callprop', ('callprop', glo'Resource', lit'New', parsed-value, 'baseurl'), 'Load') + Value loadseg = astAddSeg(th, astseg, vmlit(SymCallProp), 3); + Value newseg = astAddSeg(th, loadseg, vmlit(SymCallProp), 5); + astAddSeg2(th, newseg, vmlit(SymGlobal), vmlit(SymResource)); + astAddSeg2(th, newseg, vmlit(SymLit), vmlit(SymNew)); + parsePrefixExp(comp, newseg); + astAddValue(th, newseg, vmlit(SymBaseurl)); + astAddSeg2(th, loadseg, vmlit(SymLit), vmlit(SymLoad)); + } + } + else + parseTerm(comp, astseg); +} + +/** Parse the '**' operator */ +func void CompInfo.parsePowerExp(CompInfo* comp, Value astseg) +{ + Value th = comp.th; + comp.parsePrefixExp(astseg); + Value op = comp.lex.token; + while (comp.matchNext("**")) + { + Value newseg = ast::insSeg(th, astseg, vmlit(SymCallProp), 4); + ast::addSeg2(th, newseg, vmlit(SymLit), op); + comp.parsePrefixExp(newseg); + } +} + +/** Parse the '*', '/' or '%' binary operator */ +func void CompInfo.parseMultDivExp(CompInfo* comp inline, Value astseg) +{ + Value th = comp.th; + parsePowerExp(astseg); + while (Value op = lex.token, matchNext("*") || matchNext("/") || matchNext("%")) + { + Value newseg = ast::insSeg(th, astseg, vmlit(SYM_CALL_PROP), 4); + ast::addSeg2(th, newseg, vmlit(SYM_LIT), op); + parsePowerExp(newseg); + } +} + +/** Parse the '+' or '-' binary operator */ +func void CompInfo.parseAddSubExp(CompInfo* comp, Value astseg) +{ + Value th = comp.th; + comp.parseMultDivExp(astseg); + while (int isAdd; (isAdd = comp.matchNext"+") || comp.matchNextcomp->lex, "-")) + { + Value newseg = ast::insSeg(th, astseg, vmlit(SYM_CALL_PROP), 4); + ast::addSeg2(th, newseg, vmlit(SymLit), isAdd ? vmlit(SYM_PLUS) : vmlit(SYM_MINUS)); + comp.parseMultDivExp(newseg); + } +} + +/** Parse the range .. constructor operator */ +func void CompInfo.parseRangeExp(CompInfo* comp, Value astseg) +{ + Value th = comp.th; + comp.parseAddSubExp(astseg); + if (comp.matchNext("..")) + { + // ('CallProp', 'Range', 'New', from, to, step) + Value newseg = ast::insSeg(th, astseg, vmlit(SYM_CALL_PROP), 4); + Value from = th.pushValue(arr::get(th, newseg, 1)); + arr::del(th, newseg, 1, 1); + ast::addSeg2(th, newseg, vmlit(SYM_GLOBAL), vmlit(SymRange)); + ast::addSeg2(th, newseg, vmlit(SymLit), vmlit(SymNew)); + ast::addValue(th, newseg, from); + th.popValue(); + comp.parseAddSubExp(newseg); + if (lexMatchNext(comp->lex, "..")) comp.parseAddSubExp(newseg); + } +} + +/** Parse the comparison operators */ +func void CompInfo.parseCompExp(CompInfo* comp, Value astseg) { + Value th = comp.th; + comp.parseRangeExp(astseg); + Value op = comp.lex->token; + if (lexMatchNext(comp.lex, "<=>")) + { + Value newseg = ast::insSeg(th, astseg, vmlit(SymCallProp), 4); + ast::addSeg2(th, newseg, vmlit(SymLit), op); + comp.parseRangeExp(newseg); + } + else if (lexMatchNext(comp->lex, "===") || lexMatchNext(comp->lex, "~~") + || lexMatchNext(comp->lex, "==") || lexMatchNext(comp->lex, "!=") + || lexMatchNext(comp->lex, "<=") || lexMatchNext(comp->lex, ">=") + || lexMatchNext(comp->lex, "<") || lexMatchNext(comp->lex, ">")) { + Value newseg = ast::insSeg(th, astseg, op, 3); + comp.parseRangeExp(newseg); + } +} + +/* Parse 'not' conditional logic operator */ +func void CompInfo.parseNotExp(CompInfo* comp, Value astseg) +{ + Value th = comp.th; + bool takenot = false; + while ((lexMatchNext(comp->lex, "!")) || lexMatchNext(comp->lex, "not")) takenot = !takenot; + if (takenot) + { + Value newseg = astAddSeg(th, astseg, vmlit(SymNot), 2); + parseCompExp(comp, newseg); + } + else + { + comp.parseCompExp(astseg); + } +} + +func bool CompInfo.matchNext(CompInfo *comp, string s) @inline +{ + return comp.lex.matchNext(s); +} + +/* Parse 'and' conditional logic operator */ +func void CompInfo.parseAndExp(CompInfo* comp, Value astseg) +{ + Value th = comp.th; + comp.parseNotExp(astseg); + if (comp.matchNext("&&") || comp.matchNext("and") + { + Value newseg = ast::insSeg(th, astseg, vmlit(SymAnd), 3); + do + { + comp.parseNotExp(newseg); + } + while (comp.matchNext("&&") || comp.matchNext("and")); + } +} + +/** Parse 'or' conditional logic operator */ +void parseLogicExp(CompInfo* comp, Value astseg) +{ + Value th = comp->th; + parseAndExp(comp, astseg); + if ((lexMatchNext(comp->lex, "||")) || lexMatchNext(comp->lex, "or")) { + Value newseg = astInsSeg(th, astseg, vmlit(SymOr), 3); + do { + parseAndExp(comp, newseg); + } while ((lexMatchNext(comp->lex, "||")) || lexMatchNext(comp->lex, "or")); + } +} + +/** Parse '?' 'else' ternary operator */ +void parseTernaryExp(CompInfo* comp, Value astseg) { + Value th = comp->th; + parseLogicExp(comp, astseg); + if ((lexMatchNext(comp->lex, "?"))) { + Value newseg = astInsSeg(th, astseg, vmlit(SymQuestion), 4); + parseLogicExp(comp, newseg); + if (lexMatchNext(comp->lex, "else")) + parseLogicExp(comp, newseg); + else { + astAddSeg2(th, newseg, vmlit(SymLit), aNull); + lexLog(comp->lex, "Expected 'else' in ternary expression"); + } + } +} + +/** Parse append and prepend operators */ +void parseAppendExp(CompInfo* comp, Value astseg) { + Value th = comp->th; + // If prefix, assume 'this'. Otherwise get left hand value + if (lexMatch(comp->lex, "<<") || lexMatch(comp->lex, ">>")) + astAddValue(th, astseg, vmlit(SymThis)); + else + parseTernaryExp(comp, astseg); + + Value op; + while ((op=comp->lex->token) && lexMatchNext(comp->lex, "<<") || lexMatchNext(comp->lex, ">>")) { + Value newseg = astInsSeg(th, astseg, vmlit(SymCallProp), 4); + astAddSeg2(th, newseg, vmlit(SymLit), op); + parseTernaryExp(comp, newseg); + } +} + +/** Parse comma separated expressions */ +void parseCommaExp(CompInfo* comp, Value astseg) { + Value th = comp->th; + parseAppendExp(comp, astseg); + if (lexMatch(comp->lex, ",")) { + Value commaseg = astInsSeg(th, astseg, vmlit(SymComma), 4); + while (lexMatchNext(comp->lex, ",")) { + parseAppendExp(comp, commaseg); + } + } +} + +/** Parse an assignment or property setting expression */ +void parseAssgnExp(CompInfo* comp, Value astseg) { + Value th = comp->th; + bool isColonEq; + + // Get lvals (could be rvals if no assignment operator is found) + // Presence of 'local' ensures unknown locals are declared as locals vs. closure vars + bool saveforcelocal = comp->forcelocal; + comp->forcelocal = lexMatchNext(comp->lex, "local"); + parseCommaExp(comp, astseg); + comp->forcelocal = saveforcelocal; + + // Process rvals depending on type of assignment + if (lexMatch(comp->lex, "=")) { + Value assgnseg = astInsSeg(th, astseg, vmlit(SymAssgn), 3); + // Warn about unalterable literals or pseudo-variables to the left of "=" + Value lvalseg = arrGet(th, assgnseg, 1); + if (arrGet(th, lvalseg, 0) == vmlit(SymComma)) { + Value lval; + for (Auint i = 1; ilex, "Literals/pseudo-variables/expressions cannot be altered."); + break; + } + } + } + else if (!astIsLval(th, lvalseg)) { + lexLog(comp->lex, "Literals/pseudo-variables/expressions cannot be altered."); + } + lexGetNextToken(comp->lex); // Go past assignment operator + parseAssgnExp(comp, assgnseg); // Get the values to the right + } + else if ((isColonEq = lexMatchNext(comp->lex, ":=")) || lexMatchNext(comp->lex, ":")) { + // ('=', ('activeprop'/'callprop', 'this', ('[]',) property), value) + Value assgnseg = astInsSeg(th, astseg, vmlit(SymAssgn), 3); + Value indexseg = astPushNew(th, vmlit(isColonEq? SymCallProp : SymActProp), 4); + astAddValue(th, indexseg, vmlit(SymThis)); + if (isColonEq) + astAddSeg2(th, indexseg, vmlit(SymLit), vmlit(SymBrackets)); + astPopNew(th, assgnseg, indexseg); + parseAssgnExp(comp, assgnseg); + } +} + +/** Parse an expression */ +void parseExp(CompInfo* comp, Value astseg) { + parseAssgnExp(comp, astseg); +} + +/** Set up block variable list and add it to astseg */ +Value parseNewBlockVars(CompInfo *comp, Value astseg) { + Value th = comp->th; + // Set up block variable list + Value blkvars = pushArray(th, vmlit(TypeListm), 8); + arrSet(th, blkvars, 0, comp->locvarseg); + arrSet(th, blkvars, 1, anInt(0)); + comp->locvarseg = blkvars; + astAddValue(th, astseg, blkvars); + popValue(th); // blkvars + return blkvars; +} + +/** Parse an expression statement / 'this' block */ +void parseThisExp(CompInfo* comp, Value astseg) { + Value th = comp->th; + Value svlocalvars = comp->locvarseg; + parseAssgnExp(comp, astseg); + if (lexMatchNext(comp->lex, "using")) { + Value newseg = astInsSeg(th, astseg, vmlit(SymThisBlock), 5); + parseNewBlockVars(comp, newseg); + parseAssgnExp(comp, newseg); + parseBlock(comp, newseg); + } + else if (lexMatch(comp->lex, "{")) { + Value newseg = astInsSeg(th, astseg, vmlit(SymThisBlock), 5); + parseNewBlockVars(comp, newseg); + astAddValue(th, newseg, aNull); + parseBlock(comp, newseg); + } + comp->locvarseg = svlocalvars; +} + +/** Expect ';' at this point. Error if not found, then scan to find it. */ +void parseSemi(CompInfo* comp, Value astseg) { + // Allow right curly brace and end-of-file to stand in for a semi-colon + if (!lexMatchNext(comp->lex, ";")&&!lexMatch(comp->lex, "}")&&comp->lex->toktype!=Eof_Token) { + lexLog(comp->lex, "Unexpected token in statement. Ignoring all until block or ';'."); + while (comp->lex->toktype != Eof_Token && !lexMatch(comp->lex, "}") && !lexMatchNext(comp->lex, ";")) + if (lexMatch(comp->lex, "{")) + parseBlock(comp, astseg); + else + lexGetNextToken(comp->lex); + } +} + +/** Parse the each clause: vars and iterator */ +void parseEachClause(CompInfo *comp, Value newseg) { + Value th = comp->th; + + // Set up block variable list + Value blkvars = parseNewBlockVars(comp, newseg); + + // Parse list of 'each' variables (into new variable block) + AuintIdx bvarsz=2; + do { + if (comp->lex->toktype==Name_Token) { + arrSet(th, blkvars, bvarsz++, comp->lex->token); + lexGetNextToken(comp->lex); + } + else + lexLog(comp->lex, "Expected variable name"); + // Assign null variable for "key", if none specified using ':' + if (bvarsz==3 && !lexMatch(comp->lex, ":")) { + arrSet(th, blkvars, bvarsz++, arrGet(th, blkvars, 2)); + arrSet(th, blkvars, 2, aNull); + } + } while (lexMatchNext(comp->lex, ",") || lexMatchNext(comp->lex, ":")); + astAddValue(th, newseg, anInt(bvarsz-2)); // expected number of 'each' values + + // 'in' clause + if (!lexMatchNext(comp->lex, "in")) + lexLog(comp->lex, "Expected 'in'"); + parseLogicExp(comp, newseg); +} + +/** Parse 'if', 'while' or 'each' statement clauses */ +void parseClause(CompInfo* comp, Value astseg, AuintIdx stmtvarsz) { + Value th = comp->th; + Value svlocalvars = comp->locvarseg; // Statement's local block + Value deeplocalvars = aNull; // First/deepest clause's local block + Value inslocalvars = aNull; // prior/deeper clause's local block + // Handle multiple clauses so they execute in reverse order + while (lexMatch(comp->lex, "if") || lexMatch(comp->lex, "while") || lexMatch(comp->lex, "each")) { + Value ctlseg; + Value ctltype = comp->lex->token; + if (lexMatchNext(comp->lex, "if")) { + // Wrap 'if' single statement in a block (so that fixing implicit returns works) + astInsSeg(th, astseg, vmlit(SymSemicolon), 2); + ctlseg = astPushNew(th, ctltype, 4); + parseLogicExp(comp, ctlseg); + parseNewBlockVars(comp, ctlseg); + } + else if (lexMatchNext(comp->lex, "while")) { + ctlseg = astPushNew(th, ctltype, 4); + parseNewBlockVars(comp, ctlseg); + parseLogicExp(comp, ctlseg); + } + else if (lexMatchNext(comp->lex, "each")) { + ctlseg = astPushNew(th, ctltype, 5); + parseEachClause(comp, ctlseg); // var and 'in' iterator + } + astPopNew(th, astseg, ctlseg); // swap in place of block + + // Linkage of variable scoping for clauses is intricate + if (inslocalvars != aNull) // link prior clause to current + arrSet(th, inslocalvars, 0, comp->locvarseg); + if (deeplocalvars == aNull) + deeplocalvars = comp->locvarseg; // Remember first/deepest + inslocalvars = comp->locvarseg; // Remember most recent + comp->locvarseg = svlocalvars; // Restore to statement block + } + parseSemi(comp, astseg); + + // Move any new locals declared in statement to deepest clause's block scope + if (deeplocalvars!=aNull && stmtvarszlocvarseg = deeplocalvars; + for (AuintIdx vari = arr_size(svlocalvars)-1; vari >= stmtvarsz; vari--) { + // Pop off statement's declared local, and if not found, add to deepest block + // Find check is needed to see each's declared variables, for example + Value varnm = pushValue(th, arrGet(th, svlocalvars, vari)); + arrSetSize(th, svlocalvars, vari); + if (findLocalVar(comp, varnm)==-1) + arrAdd(th, deeplocalvars, varnm); + popValue(th); + } + arrSetSize(th, svlocalvars, stmtvarsz); // Remove from outer block vars + comp->locvarseg = svlocalvars; + } +} + +/** Parse a sequence of statements, each ending with ';' */ +void parseStmts(CompInfo* comp, Value astseg) { + Value th = comp->th; + astseg = astAddSeg(th, astseg, vmlit(SymSemicolon), 16); + Value newseg; + while (comp->lex->toktype != Eof_Token && !lexMatch(comp->lex, "}")) { + Value stmt = comp->lex->token; + AuintIdx stmtvarsz = arr_size(comp->locvarseg); // Remember for clauses + // 'if' block + if (lexMatchNext(comp->lex, "if")) { + newseg = astAddSeg(th, astseg, vmlit(SymIf), 4); + Value svlocalvars = comp->locvarseg; + parseLogicExp(comp, newseg); + parseNewBlockVars(comp, newseg); + parseBlock(comp, newseg); + comp->locvarseg = svlocalvars; + parseSemi(comp, astseg); + while (lexMatchNext(comp->lex, "elif")) { + parseLogicExp(comp, newseg); + parseNewBlockVars(comp, newseg); + parseBlock(comp, newseg); + comp->locvarseg = svlocalvars; + parseSemi(comp, astseg); + } + if (lexMatchNext(comp->lex, "else")) { + astAddValue(th, newseg, vmlit(SymElse)); + parseNewBlockVars(comp, newseg); + parseBlock(comp, newseg); + comp->locvarseg = svlocalvars; + parseSemi(comp, astseg); + } + } + + // 'match' block + if (lexMatchNext(comp->lex, "match")) { + newseg = astAddSeg(th, astseg, vmlit(SymMatch), 4); + Value svlocalvars = comp->locvarseg; + parseExp(comp, newseg); + if (lexMatchNext(comp->lex, "using")) + parseAssgnExp(comp, newseg); + else + astAddValue(comp, newseg, vmlit(SymMatchOp)); + Value matchInto = aNull; + if (lexMatchNext(comp->lex, "into")) { + matchInto = pushArray(th, aNull, 4); + do { + if (comp->lex->toktype==Name_Token) { + arrAdd(th, matchInto, comp->lex->token); + lexGetNextToken(comp->lex); + } + else + lexLog(comp->lex, "Expected variable name"); + } while (lexMatchNext(comp->lex, ",")); + } + parseSemi(comp, astseg); + while (lexMatchNext(comp->lex, "with")) { + parseExp(comp, newseg); + parseNewBlockVars(comp, newseg); + AuintIdx nInto = 2; + if (lexMatchNext(comp->lex, "into")) { + do { + if (comp->lex->toktype==Name_Token) { + arrSet(th, comp->locvarseg, nInto++, comp->lex->token); + lexGetNextToken(comp->lex); + } + else + lexLog(comp->lex, "Expected variable name"); + } while (lexMatchNext(comp->lex, ",")); + } + else if (matchInto!=aNull) { + for (AuintIdx i=0; ilocvarseg, nInto, arrGet(th, matchInto, nInto-2)); + nInto++; + } + } + astAddValue(th, newseg, anInt(nInto-2)); + parseBlock(comp, newseg); + comp->locvarseg = svlocalvars; + parseSemi(comp, astseg); + } + if (lexMatchNext(comp->lex, "else")) { + astAddValue(th, newseg, vmlit(SymElse)); + parseNewBlockVars(comp, newseg); + astAddValue(th, newseg, anInt(0)); + parseBlock(comp, newseg); + comp->locvarseg = svlocalvars; + parseSemi(comp, astseg); + } + if (matchInto!=aNull) + popValue(th); + } + + // 'while' block + else if (lexMatchNext(comp->lex, "while")) { + newseg = astAddSeg(th, astseg, vmlit(SymWhile), 4); + Value svlocalvars = comp->locvarseg; + parseNewBlockVars(comp, newseg); + parseLogicExp(comp, newseg); + parseBlock(comp, newseg); + comp->locvarseg = svlocalvars; + parseSemi(comp, astseg); + } + + // 'each': ('each', localvars, nretvals, iterator, block) + else if (lexMatchNext(comp->lex, "each")) { + newseg = astAddSeg(th, astseg, vmlit(SymEach), 5); + Value svlocalvars = comp->locvarseg; + parseEachClause(comp, newseg); // vars and 'in' iterator + parseBlock(comp, newseg); + comp->locvarseg = svlocalvars; + parseSemi(comp, astseg); + } + + // 'do': ('do', local, exp, block) + else if (lexMatchNext(comp->lex, "do")) { + newseg = astAddSeg(th, astseg, vmlit(SymDo), 4); + Value svlocalvars = comp->locvarseg; + parseNewBlockVars(comp, newseg); + if (!lexMatch(comp->lex, "{")) + parseExp(comp, newseg); + else + astAddValue(th, newseg, aNull); + parseBlock(comp, newseg); + comp->locvarseg = svlocalvars; + parseSemi(comp, astseg); + } + + // 'break' or 'continue' statement + else if (lexMatchNext(comp->lex, "break") || lexMatchNext(comp->lex, "continue")) { + astAddSeg(th, astseg, stmt, 1); + parseClause(comp, astseg, stmtvarsz); + } + + // 'return' statement + else if (lexMatchNext(comp->lex, "return") || lexMatchNext(comp->lex, "yield")) { + newseg = astAddSeg(th, astseg, stmt, 2); + if (!lexMatch(comp->lex, ";") && !lexMatch(comp->lex, "if") + && !lexMatch(comp->lex, "each") && !lexMatch(comp->lex, "while")) + parseThisExp(comp, newseg); + else + astAddValue(th, newseg, aNull); + parseClause(comp, astseg, stmtvarsz); + } + + // expression or 'this' block + else { + if (stmt != vmlit(SymSemicolon)) { + parseThisExp(comp, astseg); + parseClause(comp, astseg, stmtvarsz); + } + } + } + return; +} + +/** Parse a block of statements enclosed by '{' and '}' */ +void parseBlock(CompInfo* comp, Value astseg) { + if (!lexMatchNext(comp->lex, "{")) + return; + parseStmts(comp, astseg); + if (!lexMatchNext(comp->lex, "}")) + lexLog(comp->lex, "Expected '}'"); + return; +} + +/* Parse an Acorn program */ +void parseProgram(CompInfo* comp) { + Value th = comp->th; + Value methast = comp->ast; + astAddValue(th, methast, vmlit(SymMethod)); + + // local variable list - starts with pointer to outer method's local variable list + comp->locvarseg = astAddSeg(th, methast, aNull, 16); + astAddValue(th, comp->locvarseg, anInt(1)); + + // closure variable list already retrieved from outer method + comp->explicitclo = false; + + Value parminitast = astAddSeg(th, methast, vmlit(SymSemicolon), 4); + + // process parameters as local variables + bool isYielder = false; + if (lexMatchNext(comp->lex, "[") || (isYielder = lexMatchNext(comp->lex, "*["))) { + if (isYielder) + methodFlags(comp->method) |= METHOD_FLG_YIELDER; + + // Process parameter declaration + do { + if (lexMatchNext(comp->lex, "...")) { + methodFlags(comp->method) |= METHOD_FLG_VARPARM; + break; + } + else if (comp->lex->toktype == Name_Token) { + Value symnm = comp->lex->token; + const char first = (toStr(symnm))[0]; + if (first=='$' || (first>='A' && first<='Z')) + lexLog(comp->lex, "A global name may not be a method parameter"); + else { + if (findLocalVar(comp, symnm)==-1) { + arrAdd(th, comp->locvarseg, symnm); + methodNParms(comp->method)++; + } + else + lexLog(comp->lex, "Duplicate method parameter name"); + } + lexGetNextToken(comp->lex); + + // Handle any specified parameter default value + if (lexMatchNext(comp->lex, "=")) { + // Produce this ast: parm||=default-expression + Value oreqseg = astAddSeg(th, parminitast, vmlit(SymOrAssgn), 3); + astAddSeg2(th, oreqseg, vmlit(SymLocal), symnm); + parseAppendExp(comp, oreqseg); + } + } + } while (lexMatchNext(comp->lex, ",")); + lexMatchNext(comp->lex, "]"); + parseBlock(comp, methast); + } + else + parseStmts(comp, methast); + + comp->method->nbrlocals = arr_size(comp->locvarseg)-1; +} + +#ifdef __cplusplus +} // extern "C" +} // namespace avm +#endif \ No newline at end of file diff --git a/resources/examples/acornvm/parser_ast.c3 b/resources/examples/acornvm/parser_ast.c3 new file mode 100644 index 000000000..25a334093 --- /dev/null +++ b/resources/examples/acornvm/parser_ast.c3 @@ -0,0 +1,131 @@ +module acorn::parser::ast; +import acorn::arr; + +/** Parser for Acorn compiler. See Acorn documentation for syntax diagrams. + * + * @file + * + * This source file is part of avm - Acorn Virtual Machine. + * See Copyright Notice in avm.h + */ + + +/* ********************** + * Abstract Syntax Tree construction helpers for parser + * (isolates that we are working with arrays to encode s-expressions) + * **********************/ + +/** Append a value to AST segment - growing as needed */ +func void addValue(Value th, Value astseg, Value val) @inline +{ + arr::add(th, astseg, val); +} + +/** Get a value within the AST segment */ +func Value get(Value th, Value astseg, AuintIdx idx) @inline +{ + return arr::get(th, astseg, idx); +} + +/** Set a value within the AST segment */ +func void set(Value th, Value astseg, AuintIdx idx, Value val) +{ + arr::set(th, astseg, idx, val); +} + +/** Create and append a new AST segment (of designated size) to current segment. + * Append the AST op to the new segment, then return it */ +Value addSeg(Value th, Value oldseg, Value astop, AuintIdx size) +{ + Value newseg = pushArray(th, aNull, size); + arr::add(th, oldseg, newseg); + th.popValue(); + arr::add(th, newseg, astop); + return newseg; +} + +/** Create and append a new AST segment (with two slots) to current segment. + * Append the AST op and val to the new segment, then return it */ +Value addSeg2(Value th, Value oldseg, Value astop, Value val) +{ + Value newseg = pushArray(th, aNull, 2); + arr::add(th, oldseg, newseg); + popValue(th); + arr::add(th, newseg, astop); + arr::add(th, newseg, val); + return newseg; +} + +/** Get last node from ast segment */ +Value getLast(Value th, Value astseg) @inline +{ + return get(th, astseg, arr_size(astseg) - 1); +} + +/** Create a new segment of designated size to replace last value of oldseg. + * Append the AST op and the value from the oldseg to the new segment, + * then return it. */ +Value insSeg(Value th, Value oldseg, Value astop, AuintIdx size) +{ + AuintIdx oldpos = arr_size(oldseg)-1; + Value saveval = arr::get(th, oldseg, oldpos); + Value newseg = pushArray(th, aNull, size); + arr::set(th, oldseg, oldpos, newseg); + popValue(th); + arr::add(th, newseg, astop); + arr::add(th, newseg, saveval); + return newseg; +} + +/** Create a new segment of designated size to replace last value of oldseg. + * Append the AST op, propval, and the value from the oldseg to the new segment, + * then return it. */ +Value astInsSeg2(Value th, Value oldseg, Value astop, Value propval, AuintIdx size) +{ + AuintIdx oldpos = arr_size(oldseg) - 1; + Value saveval = arr::get(th, oldseg, oldpos); + Value newseg = pushArray(th, aNull, size); + arrSet(th, oldseg, oldpos, newseg); + th.popValue(); + arr::add(th, newseg, astop); + if (isSym(propval)) + { + if (propval == vmlit(SymThis)) + { + arr::add(th, newseg, propval); + } + else + { + Value propseg = addSeg(th, newseg, vmlit(SymLit), 2); // Assume propval is a literal symbol + arr::add(th, propseg, propval); + } + } + arr::add(th, newseg, saveval); + return newseg; +} + +/** Create a new untethered, sized AST segment that has astop as first element */ +Value pushNew(Value th, Value astop, AuintIdx size) +{ + Value newseg = pushArray(th, aNull, size); + arr::add(th, newseg, astop); + return newseg; +} + +/** Attach newseg into last slot of oldseg, whose old value is appended to newseg */ +void popNew(Value th, Value oldseg, Value newseg) +{ + AuintIdx oldpos = arr_size(oldseg) - 1; + Value saveval = arr::get(th, oldseg, oldpos); + arr::set(th, oldseg, oldpos, newseg); + arr::add(th, newseg, saveval); + th.popValue(); +} + +/** Return true if ast segment can be assigned a value: variable or settable property/method */ +func bool isLval(Value th, Value astseg) +{ + if (!astseg.isArr()) return false; + Value op = get(th, astseg, 0); + return op == vmlit(SYM_LOCAL) || op == vmlit(SYM_GLOGAL) || op==vmlit(SYM_ACT_PROP) || op==vmlit(SYM_RAW_PROP) || op==vmlit(SYM_CALL_PROP); +} diff --git a/resources/examples/acornvm/symbol.c3 b/resources/examples/acornvm/symbol.c3 new file mode 100644 index 000000000..f82879f8f --- /dev/null +++ b/resources/examples/acornvm/symbol.c3 @@ -0,0 +1,164 @@ +module acornvm::sym; + +/** modulo operation for hashing (size is always a power of 2) */ +macro @hash_binmod(s, size) +{ + assert_exp(size & (size-1) == 0); + return @cast(s & (size-1), AuintIdx); +} + +/** Resize the symbol table */ +func void resizeTable(Value th, Auint newsize) +{ + SymTable* sym_tbl = &vm(th)->sym_table; + Auint i; + + // If we need to grow, allocate more cleared space for array + if (newsize > sym_tbl.nbrAvail) + { + //mem_gccheck(th); // Incremental GC before memory allocation events + mem_reallocvector(th, sym_tbl->symArray, sym_tbl->nbrAvail, newsize, SymInfo *); + for (i = sym_tbl->nbrAvail; i < newsize; i++) sym_tbl->symArray[i] = NULL; + } + + // Move all symbols to re-hashed positions in array + for (i = 0; i < sym_tbl->nbrAvail; i++) + { + SymInfo *p = sym_tbl.symArray[i]; + sym_tbl.symArray[i] = NULL; + while (p) + { // for each node in the list + SymInfo *next = (SymInfo*) p->next; // save next + AuintIdx h = hash_binmod(p->hash, newsize); // new position + p->next = (MemInfo*) sym_tbl->symArray[h]; // chain it + sym_tbl->symArray[h] = (SymInfo*) p; + resetoldbit(p); // see MOVE OLD rule + p = next; + } + } + + // Shrink array + if (newsize < sym_tbl.nbrAvail) + { + // shrinking slice must be empty + assert(sym_tbl->symArray[newsize] == NULL && sym_tbl->symArray[sym_tbl->nbrAvail - 1] == NULL); + mem_reallocvector(th, sym_tbl->symArray, sym_tbl->nbrAvail, newsize, SymInfo *); + } + sym_tbl->nbrAvail = newsize; +} + +/** Initialize the symbol table that hash indexes all symbols */ +func void init(Value th) +{ + SymTable* sym_tbl = &vm(th).sym_table; + sym_tbl.nbrAvail = 0; + sym_tbl.nbrUsed = 0; + sym_tbl.symArray = nil; + resizeTable(th, AVM_SYMTBLMINSIZE); +} + +/** + * Free the symbol table + */ +void free(Value th) +{ + mem::freearray(th, vm(th).sym_table.symArray, vm(th).sym_table.nbrAvail); +} + +/* If symbol exists in symbol table, reuse it. Otherwise, add it. + Anchor (store) symbol value in dest and return it. */ +func Value newSym(Value th, Value* dest, string str, AuintIdx len) +{ + SymInfo* sym; + SymTable* sym_tbl = &vm(th)->sym_table; + unsigned int hash = tblCalcStrHash(str, len, th(th)->vm->hashseed); + + // Look for symbol in symbol table. Return it, if found. + for (sym = sym_tbl->symArray[hash_binmod(hash, sym_tbl->nbrAvail)]; sym != NULL; sym = (SymInfo*) sym->next) { + if (hash == sym->hash && + len == sym->size && + (memcmp(str, sym_cstr(sym), len) == 0)) { + mem_keepalive(th, (MemInfo*) sym); // Keep it alive, if it had been marked for deletion + return *dest = (Value) sym; + } + } + + // Not found. Double symbol table size if needed to hold another entry + if (sym_tbl->nbrUsed >= sym_tbl->nbrAvail) + sym_resize_tbl(th, sym_tbl->nbrAvail*2); + + // Create a symbol object, adding to symbol table at hash entry + sym = (SymInfo *) mem_newnolink(th, SymEnc, sym_memsize(len)); + MemInfo **linkp = (MemInfo**) &sym_tbl->symArray[hash_binmod(hash, sym_tbl->nbrAvail)]; + sym->next = *linkp; + *linkp = (MemInfo*)sym; + sym->size = len; + sym->hash = hash; + memcpy(sym_cstr(sym), str, len); + (sym_cstr(sym))[len] = '\0'; + sym_tbl->nbrUsed++; + return *dest = (Value) sym; +} + +/* Return 1 if the value is a Symbol, otherwise 0 */ +func int Value.isSym(Value *sym) @inline +{ + return sym.isEnc(SymEnc); +} + +/** + * Return 1 if symbol starts with a uppercase letter or $ + */ +int isGlobal(Value sym) +{ + assert(isSym(sym)); + wchar_t c = (sym_cstr(sym))[0]; + return iswupper(c) || c == '$'; +} + +/* Iterate to next symbol after key in symbol table (or first if key is NULL). Return Null if no more. + * This can be used to sequentially iterate through the symbol table. + * Results may be inaccurate if the symbol table is changed during iteration. + */ +func Value next(Value th, Value key) +{ + SymTable *sym_tbl = &th(th)->vm->sym_table; + SymInfo *sym; + + // If table empty, return null + if (sym_tbl.nbrUsed == 0) return aNull; + + // If key is null, return first symbol in table + if (key == aNull) + { + SymInfo **symtblp = sym_tbl->symArray; + while ((sym=*symtblp++) == nil); + return cast(sym, Value); + } + + // If key is not a symbol, return null + if (!key.isSym()) return aNull; + + // Look for the symbol in table, then return next one + AuintIdx hash = ((SymInfo*)key)->hash; + Auint len = ((SymInfo*)key)->size; + Auint i = hash_binmod(hash, sym_tbl->nbrAvail); + for (sym = sym_tbl->symArray[i]; sym != NULL; sym = (SymInfo*) sym->next) { + if (hash == sym->hash && + len == sym->size && + (memcmp(sym_cstr(key), sym_cstr(sym), len) == 0)) { + // If the next one is populated, return it + if ((sym = (SymInfo*) sym->next)) + return (Value) sym; + // Look for next non-null entry in symbol array + for (i++; inbrAvail; i++) { + if ((sym=sym_tbl->symArray[i])) + return (Value) sym; + } + return aNull; // No next symbol, return null + } + } + return aNull; +} + + diff --git a/resources/examples/acornvm/typ_all.c3 b/resources/examples/acornvm/typ_all.c3 new file mode 100644 index 000000000..31024a5d8 --- /dev/null +++ b/resources/examples/acornvm/typ_all.c3 @@ -0,0 +1,140 @@ +/** All type methods and properties + * + * @file + * + * This source file is part of avm - Acorn Virtual Machine. + * See Copyright Notice in avm.h + */ + +#include "avmlib.h" + +/** <=> */ +int Value.all_compare(Value* th) +{ + if (th.getTop() > 1 && th.getLocal(0) == th.getLocal(1)) + { + th.pushValue(anInt(0)); + return 1; + } + return 0; +} + +/** === Exact match of values */ +int Value.all_same(Value* th) +{ + th.pushValue(th.getTop()> 1 && th.getLocal(0) == th.getLocal(1)? aTrue : aFalse); + return 1; +} + +macro auto @all_rocket!($th) +{ + if (th.getTop(th) < 2) return 0; + th.pushValue(vmlit(SymRocket)); + th.pushValue(th.getLocal(0)); + th.pushValue(th.getLocal(1)); + th.getCall(2, 1); + return th.popValue(); +} + +/** ~~, == equal using <=> */ +int Value.all_equal(Value *th) +{ + th.pushValue(@all_rocket!(th) == anInt(0) ? aTrue : aFalse); + return 1; +} + +/** < */ +int Value.all_lesser(Value *th) +{ + th.pushValue(@all_rocket!(th) == anInt(-1)? aTrue : aFalse); + return 1; +} + +/** > */ +int Value.all_greater(Value *th) { + th.pushValue(@all_rocket!(th) == anInt(1)? aTrue : aFalse); + return 1; +} + +/** <= */ +int Value.all_lesseq(Value* th) +{ + th.pushValue(@all_rocket!(th) == anInt(-1) || ret == anInt(0)? aTrue : aFalse); + return 1; +} + +/** >= */ +int Value.all_greateq(Value* th) +{ + Value ret = all_rocket!(th); + th.pushValue(ret == anInt(1) || ret == anInt(0)? aTrue : aFalse); + return 1; +} + +/** executable? */ +int Value.all_isexec(Value* th) +{ + th.pushValue(canCall(th.getLocal(0)) ? aTrue : aFalse); + return 1; +} + +/** type */ +int Value.all_type(Value* th) +{ + th.pushValue(th.getType(th.getLocal(0))); + return 1; +} + +/** property */ +int Value.all_property(Value* th) +{ + if (th.getTop() > 1) + { + th.pushValue(th.getProperty(th.getLocal(0), th.getLocal(1))); + return 1; + } + return 0; +} + +/** .Mixin(mixin) */ +int Value.all_mixin(Value* th) +{ + if (th.getTop() > 1) th.addMixin(th.getLocal(0), th.getLocal(1)); + th.setTop(1); + return 1; +} + +/** Initialize the All type */ +void core_all_init(Value th) +{ + vmlit(TypeAll) = th.pushMixin(vmlit(TypeObject), aNull, 32); + th.pushSym("All"); + popProperty(th, 0, "_name"); + pushCMethod(th, all_compare); + popProperty(th, 0, "<=>"); + pushCMethod(th, all_equal); + popProperty(th, 0, "~~"); + pushCMethod(th, all_equal); + popProperty(th, 0, "=="); + pushCMethod(th, all_same); + popProperty(th, 0, "==="); + pushCMethod(th, all_lesser); + popProperty(th, 0, "<"); + pushCMethod(th, all_lesseq); + popProperty(th, 0, "<="); + pushCMethod(th, all_greater); + popProperty(th, 0, ">"); + pushCMethod(th, all_greateq); + popProperty(th, 0, ">="); + pushCMethod(th, all_isexec); + popProperty(th, 0, "callable?"); + pushCMethod(th, all_property); + popProperty(th, 0, "property"); + pushCMethod(th, all_type); + popProperty(th, 0, "type"); + pushCMethod(th, all_mixin); + popProperty(th, 0, "Mixin"); + th.popGloVar("All"); + return; +} + diff --git a/resources/examples/acornvm/types.c3 b/resources/examples/acornvm/types.c3 new file mode 100644 index 000000000..d3b922e5b --- /dev/null +++ b/resources/examples/acornvm/types.c3 @@ -0,0 +1,80 @@ +module acornvm::types; + +enum TokenType +{ + LITERAL, //!< Literal token: null, true, false, int, float, symbol, string + URL, //!< Literal url + NAME, //!< Named identifier (e.g., for a variable) + RESERVED, //!< Reserved word or operator + EOF //!< End of file +} + +typedef int as AintIdx; +typedef uint as AuintIdx; +typedef byte as AByte; + +struct MemCommonInfo +{ + MemInfo* next; /**< Pointer to next memory block in chain */ \ + AByte enctyp; /**< Encoding type (see EncType) */ \ + AByte marked; /**< Garbage collection flags */ \ + AByte flags1; /**< Encoding-specific flags */ \ + AByte flags2; /**< Encoding-specific flags */ \ + AuintIdx size /**< Encoding-specific sizing info */ +} + +struct MemCommonInfoGray +{ + inline MemCommonInfo; + MemCommonInfoGray* grayLink; +} + +struct MemCommonInfoT +{ + inline MemCommonInfoGray; + Value type; +} + +struct MemInfo +{ + inline MemCommonInfo; +} + +struct MemInfoGray +{ + inline MemCommonInfoGray; +} + +struct MemInfoT +{ + inline MemCommonInfoT; +} + +struct LexInfo +{ + inline MemInfoGray; //!< Common header + + Value source; //!< The source text + Value url; //!< The url where the source text came from + Value token; //!< Current token + Value th; //!< Current thread + + // Position info + AuintIdx bytepos; //!< Current byte position in source + AuintIdx linenbr; //!< Current line number + AuintIdx linebeg; //!< Beginning of current line + AuintIdx tokbeg; //!< Start of current token in source + AuintIdx tokline; //!< Line number for current token + AuintIdx toklinepos; //!< Column position in line for current token + + // indent state + uint curindent; //!< Current level of indentation + uint newindent; //!< Indentation level for current line + + int optype; //!< sub-type of operator (when type==Op_Token) + TokenType toktype; //!< type of the current token + bool newline; //!< True if we just started a new non-continued line + bool newprogram; //!< True if we have not yet processed any token + bool insertSemi; //!< True if we need to insert ';' as next token + bool undentcont; //!< True if we are processing undenting on a line continuation +} \ No newline at end of file diff --git a/resources/examples/acornvm/value.c3 b/resources/examples/acornvm/value.c3 new file mode 100644 index 000000000..6de2ecfed --- /dev/null +++ b/resources/examples/acornvm/value.c3 @@ -0,0 +1,194 @@ +module acornvm::value; + + +/** A convenience macro for assert(), establishing the conditions expected to be true, + * before returning expression e */ +macro @assert_exp($c, auto $e) +{ + assert($c); + return $e; +} + +/** + Define Value and C-types. + We want all our Value-based types sized the same, + according to the architecture (e.g., all 32-bit or all 64-bit). +*/ + +/** A signed integer, whose size matches Value */ +typedef isize Aint; +/** An unsigned integer, whose size matches Value */ +typedef usize Auint; + + +/** A float, whose size matches Value (see avm_env.h) */ +$assert(usize.size == 8 || usize.size == 4) +$if (usize.size == 8) +{ + typedef double as Afloat; +} +$else +{ + typedef float as Afloat; +} + +/** A unicode character */ +typedef ulong Auchar; + +/** A fixed-sized, self-typed encoded value which holds any kind of data. + * It can be passed to or returned from Acorn or C-methods. + * Never manipulate a Value directly; always use an AcornVM api function. + * + * Its size is that of a full address-space pointer (32- or 64-bits). + * It holds either an immediate value (null, true, false, integer, float, symbol) + * or a pointer to a compound/complex value's header. + */ +typedef void* as distinct Value + +/** Prototype for a C method callable by the VM. + It is passed the thread, through which it obtains parameters via the data stack. + When done, it returns how many return values it has placed on the stack. */ +typedef func int(Value) as AcMethodp; + +/** Quick, exact equivalence check between two values ('===') + * Great for null, false, true, integers and symbols. + * Less suitable for floats (no epsilon) and comparing contents of containers (e.g., strings). + * Is fast because it avoids using type-specific methods. */ +macro @isSame(a, b) { return (a == b); } + +/** What type of data is encoded within the value, as established by the last 2 bits. + * Because of 32/64-bit allocation alignment, pointers always have 0 in last 2 bits. + * Thus, non-zero bits can be used to indicate a non-pointer Value. */ +enum ValBits +{ + POINTER = 0, /*! Value points to a compound value's header */ + INT = 1, /*! Value is a signed integer */ + FLOAT = 2, /*! Value is a floating-point number */ + CONS = 3 /*! Value is a constant (null, false, true) */ +} + +/** The mask used to isolate the value's ValBits info */ +const int VAL_MASK = 0x3; +/** How many bits to shift a Value to remove or make space for ValBits info */ +const int VAL_SHIFT = 2; + + +func bool Value.isEnc(Value *value, EncType type) @inline +{ + return value.isPtr() && @cast(value, MemInfo*).enctyp == type; +} + +/* Return true if the value is a c-String, otherwise 0 */ +bool Value.isStr(Value *str) +{ + return str.isEnc(StrEnc) && !(str_info(str)->flags1 & StrCData); +} + +macro @isType(v, ValBits e) +{ + return @cast(v, Auint) & VAL_MASK == e; +} + +// Integer value functions + + +/** Is v an Integer? */ +func bool Value.isInt(Value *v) +{ + return @isType(*v, INT); +} + +/** Cast c-integer n into an Integer value + * This loses top two-bits of integer precision. + * If integer is too large, this could result in an unexpected value and change of sign. */ +macro @anInt(n) +{ + return @cast(@cast(n, Aint) << VAL_SHIFT + ValInt, Value); +} + +/** Cast an Integer value into a c-integer + * Note: It assumes (and won't verify) that v is an Integer */ +macro @toAint(v) +{ + return @cast(v, Aint) >> VAL_SHIFT; +} + +// Float value functions + +/** Is v a Float? */ +func Value.isFloat(Value *v) +{ + return @isType(*v, FLOAT); +} + +/** Cast c-float n into a Float value + * This loses bottom two-bits of Float mantissa precision. */ +AVM_API Value aFloat(Afloat n); + +/** Cast an Float value into a c-float + * Note: It assumes (and won't verify) that v is an Float */ +AVM_API Afloat toAfloat(Value v); + +/* ******************************************************* + null, false and true values and functions. + (they are encoded in the impossible space for a symbol pointer + **************************************************** */ + +/** The null value */ +macro @aNull() +{ + return @cast(0 << VAL_SHIFT, ValCons, Value); +} + +/** The false value */ +macro @aFalse() +{ + return @cast(1 << VAL_SHIFT + ValCons, Value); +} + +/** The true value */ +macro @aTrue() +{ + return @cast(2 << VAL_SHIFT + ValCons, Value); +} + + +/** + * Is value null? + * @require value != nil + */ +func bool Value.isNull(Value *value) @inline +{ + return *v == aNull; +} + +/** + * Is value false or null? + * @require value != nil + */ +func bool Value.isFalse(Value *value) @inline +{ + return *v == aFalse || *v == aNull; +} + +/** + * Is value true or false? + */ +func bool Value.isBool(Value *value) @inline +{ + return *v >= aFalse; +} + + +// Pointer functions. + +/** Is value a pointer? */ +func bool Value.isPtr(Value *value) @inline +{ + return @isType(*v, POINTER); +} + + +/** Append serialized val to end of str. */ +void serialize(Value th, Value str, int indent, Value val); + diff --git a/resources/examples/acornvm/vm.c3 b/resources/examples/acornvm/vm.c3 new file mode 100644 index 000000000..20e4fc15a --- /dev/null +++ b/resources/examples/acornvm/vm.c3 @@ -0,0 +1,596 @@ +module acornvm::vm; + + +void vm_litinit(Value th); // Initializer for literals +void vm_stdinit(Value th); // Initializer for standard symbols +void core_init(Value th); // Initialize all core types + + +/** Manage the Virtual Machine instance. + * + * This is the heart of the Acorn Virtual Machine. It manages: + * - All memory and garbage collection (avm_memory.h), working with the + * different encoding types. + * - The symbol table, which is shared across everywhere + * - The main thread, which is the recursive root for garbage collection. + * The thread manages the global namespace, including all registered + * core types (including the Acorn compiler and resource types). + * + * See newVm() for more detailed information on VM initialization. + * + * @file + * + * This source file is part of avm - Acorn Virtual Machine. + * See Copyright Notice in avm.h +*/ + + + /** Virtual Machine instance information + * Is never garbage collected, but is the root for garbage collection. */ +struct VmInfo +{ + inline MemCommonInfoGray; //!< Common header for value-containing object + + ulong pcgrng_state; //!< PCG random-number generator state + ulong pcgrng_inc; //!< PCG random-number generator inc value + + Value global; //!< VM's "built in" Global hash table + + Value main_thread; //!< VM's main thread + ThreadInfo main_thr; //!< State space for main thread + + SymTable sym_table; //!< global symbol table + AuintIdx hashseed; //!< randomized seed for hashing strings + Value literals; //!< array of all built-in symbol and type literals + Value stdidx; //!< Table to convert std symbol to index + Value* stdsym; //!< c-array to convert index to std symbol + + // Garbage Collection state + MemInfo* objlist; //!< linked list of all collectable objects + MemInfo** sweepgc; //!< current position of sweep in list 'objlist' + MemInfoGray* gray; //!< list of gray objects + MemInfo* threads; //!< list of all threads + + Auint sweepsymgc; //!< position of sweep in symbol table + + // Metrics used to govern when GC runs + int gctrigger; //!< Memory alloc will trigger GC step when this >= 0 + int gcstepdelay; //!< How many alloc's to wait between steps + int gcnbrnew; //!< number of new objects created since last GC cycle + int gcnbrold; //!< number of old objects created since last gen GC cycle + int gcnewtrigger; //!< Start a new GC cycle after this exceeds gcnbrnew + int gcoldtrigger; //!< Make next GC cycle full if this exceeds gcnbrold + int gcstepunits; //!< How many work units left to consume in GC step + + // Statistics gathering for GC + int gcnbrmarks; //!< How many objects were marked this cycle + int gcnbrfrees; //!< How many objects were freed during cycle's sweep + int gcmicrodt; //!< The clock's micro-seconds measured at start of cycle + + Auint totalbytes; //!< number of bytes currently allocated + + char gcmode; //!< Collection mode: Normal, Emergency, Gen + char gcnextmode; //!< Collection mode for next cycle + char gcstate; //!< state of garbage collector + char gcrunning; //!< true if GC is running + char currentwhite; //!< Current white color for new objects + + char gcbarrieron; //!< Is the write protector on? Yes prevents black->white +} + +/** Mark all in-use thread values for garbage collection + * Increments how much allocated memory the thread uses. + */ +macro @vmMark(th, v) +{ + mem_markobj(th, v.main_thread); + mem_markobj(th, v.global); + mem_markobj(th, v.literals); + mem_markobj(th, v.stdidx); +} + +macro vmStdSym(th, idx) { return vm(th).stdsym[idx]; } + +const N_STD_SYM = 256; + + /** C index values for all VM literal values used throughout the code + for common symbols and core types. They are forever immune from garbage collection + by being anchored to the VM. */ +enum VmLiterals +{ + // Compiler symbols + SYM_NULL, //!< 'null' + SYM_FALSE, //!< 'false' + SYM_TRUE, //!< 'true' + SYM_AND, //!< 'and' + SymAsync, //!< 'async' + SymBaseurl, //!< 'baseurl' + SymBreak, //!< 'break' + SymContext, //!< 'context' + SymContinue, //!< 'continue' + SymDo, //!< 'do' + SymEach, //!< 'each' + SymElse, //!< 'else' + SymElif, //!< 'elif' + SymIf, //!< 'if' + SymIn, //!< 'in' + SymInto, //!< 'into' + SymMatch, //!< 'match' + SymNot, //!< 'not' + SymOr, //!< 'or' + SymReturn, //!< 'return' + SymSelf, //!< 'self' + SymSelfMeth, //!< 'selfmethod' + SymThis, //!< 'this' + SymUsing, //!< 'using' + SymVar, //!< 'var' + SymWait, //!< 'wait' + SymWhile, //!< 'while' + SymWith, //!< 'with' + SymYield, //!< 'yield' + SymLBrace, //!< '{' + SymRBrace, //!< '}' + SymSemicolon, //!< ';' + SymComma, //!< ',' + SymQuestion, //!< '?' + SymAt, //!< '@' + SymSplat, //!< '...' + SymDot, //!< '.' + SymColons, //!< '::' + SymDotColon, //!< '.:' + + // Compiler symbols that are also methods + SymAppend, //!< '<<' + SymPrepend, //!< '>>' + SymPlus, //!< '+' + SymMinus, //!< '-' + SymMult, //!< '*' + SymDiv, //!< '/' + SymRocket, //!< '<=>' + SymEquiv, //!< '===' + SymMatchOp, //!< '~~' + SymLt, //!< '<' + SymLe, //!< '<=' + SymGt, //!< '>' + SymGe, //!< '>=' + SymEq, //!< '==' + SymNe, //!< '!=' + + // Methods that are not compiler symbols + // Byte-code (and parser) standard methods + SYM_NEW, //!< 'New' + SYM_INIT, //!< 'Init' + SYM_LOAD, //!< 'Load' + SymGet, //!< 'Get' + SymParas, //!< '()' + SymBrackets, //!< '[]' + SymNeg, //!< '-@' + SymValue, //!< 'value' + SymEachMeth, //!< 'Each' + SymBegin, //!< 'Begin' + SymEnd, //!< 'End' + + SymFinalizer, //!< '_finalizer' method for CData + SymName, //!< '_type' symbol + + // AST symbols + SymMethod, //!< 'method' + SymAssgn, //!< '=' + SymOrAssgn, //!< '||=' + SymColon, //!< ':' + SymThisBlock, //!< 'thisblock' + SymCallProp, //!< 'callprop' + SymActProp, //!< 'activeprop' + SymRawProp, //!< 'rawprop' + SymGlobal, //!< 'global' + SymLocal, //!< 'local' + SymLit, //!< 'lit' + SymExt, //!< 'ext' + SymRange, //!< 'Range' + SymClosure, //!< 'Closure' + SymYielder, //!< 'Yielder' + SymResource, //!< 'Resource' + + // Core type type + TypeObject, //!< Type + TypeMixinc, //!< Mixin class + TypeMixinm, //!< Mixin mixin + TypeNullc, //!< Null class + TypeNullm, //!< Null mixin + TypeBoolc, //!< Float class + TypeBoolm, //!< Float mixin + TypeIntc, //!< Integer class + TypeIntm, //!< Integer mixin + TypeFloc, //!< Float class + TypeFlom, //!< Float mixin + TypeMethc, //!< Method class + TypeMethm, //!< Method mixin + TypeYieldc, //!< Yielder class + TypeYieldm, //!< Yielder mixin + TypeVmc, //!< Vm class + TypeVmm, //!< Vm mixin + TypeSymc, //!< Symbol class + TypeSymm, //!< Symbol mixin + TypeRangec, //!< Range class + TypeRangem, //!< Range mixin + TypeTextc, //!< Text class + TypeTextm, //!< Text mixin + TypeListc, //!< List class + TypeListm, //!< List mixin + TypeCloc, //!< Closure class + TypeClom, //!< Closure mixin + TypeIndexc, //!< Index class + TypeIndexm, //!< Index mixin + TypeResc, //!< Index class + TypeResm, //!< Index mixin + TypeAll, //!< All + + //! Number of literals + nVmLits +} + +macro @vmlit!(lit) { return arr_inf(vm(th)->literals)->arr[list]; } + + +/** Used by vm_init to build random seed */ +macro @memcpy_Auint(i, val) +{ + Auint anint = @cast(val, Auint); + memcpy(seedstr + i * sizeof(Auint), &anint, sizeof(Auint)); +} + +/** Create and initialize new Virtual Machine + * When a VM is started: + * - Iit dynamically allocates the VmInfo + * which holds all universal information about the VM instance. + * - Memory management and garbage collection (avm_memory.h) is managed at this level. + * The GC root value (the main thread) determines what allocated values to keep + * and which to discard. + * - All value encodings are initialized next, including the single symbol table + * used across the VM. + * - The main thread is started up, initializing its global namespace. + * - All core types are progressively loaded, establishing the default types for + * each encoding. This includes the resource types and Acorn compiler. */ +func Value new(void) +{ + logInfo(AVM_RELEASE " started."); + + // Create VM info block and start up memory management + VmInfo* vm = @amalloc(VmInfo); + vm.enctyp = VmEnc; + mem_init(vm); /* Initialize memory & garbage collection */ + + // VM is GC Root: Never marked or collected. Black will trigger write barrier + vm.marked = bitmask(BLACKBIT); + + // Initialize main thread (allocated as part of VmInfo) + Value th = cast(vm->main_thread = &vm->main_thr, Value); + ThreadInfo* threadInfo = cast(th, threadInfo); + threadInfo.marked = vm.currentwhite; + threadInfo.enctyp = ThrEnc; + threadInfo.next = nil; + thrInit(&vm.main_thr, vm, aNull, STACK_NEWSIZE, 0); + vm.threads = nil; + + // Initialize PCG random number generator to starting values + vm.pcgrng_state = 0x853c49e6748fea9b; + vm.pcgrng_inc = 0xda3e39cb94b95bdb; + + // Compute a randomized seed, using address space layout to increaase randomness + // Seed is used to help calculate randomly distributed symbol hashes + char seedstr[4 * sizeof(Auint)]; + Time timehash = time(nil); + memcpy_Auint(0, vm) // heap pointe + memcpy_Auint(1, timehash) // current time in seconds + memcpy_Auint(2, &timehash) // local variable pointe + memcpy_Auint(3, &newVM) // public function + vm->hashseed = tblCalcStrHash(seedstr, sizeof(seedstr), (AuintIdx) timehash); + + // Initialize vm-wide symbol table, global table and literals + sym_init(th); // Initialize hash table for symbols + newTbl(th, &vm->global, aNull, GLOBAL_NEWSIZE); // Create global hash table + mem_markChk(th, vm, vm->global); + vm_litinit(th); // Load reserved and standard symbols into literal list + core_init(th); // Load up global table and literal list with core types + setType(th, vm->global, vmlit(TypeIndexm)); // Fix up type info for global table + + // Initialize byte-code standard methods and the Acorn compiler + vm_stdinit(th); + + // Start garbage collection + mem_gcstart(th); + + return th; +} + +/* Close down the virtual machine, freeing all allocated memory */ +void vmClose(Value th) { + th = vm(th)->main_thread; + VmInfo* vm = vm(th); + mem::freeAll(th); /* collect all objects */ + mem::reallocvector(th, vm->stdsym, nStdSym, 0, Value); + sym_free(th); + thrFreeStacks(th); + assert(vm(th)->totalbytes == sizeof(VmInfo)); + mem::frealloc(vm(th), 0); /* free main block */ + logInfo(AVM_RELEASE " ended."); +} + +/* Lock the Vm */ +void vm_lock(Value th) +{ +} + +/* Unlock the Vm */ +void vm_unlock(Value th) +{ +} + +/* Interval timer */ +$if ($platform == "win32" || $platform == "win64") +{ + +int64_t vmStartTimer() +{ + LARGE_INTEGER li; + QueryPerformanceCounter(&li); + return li.QuadPart; +} + +float vmEndTimer(int64_t starttime) +{ + LARGE_INTEGER now, freq; + QueryPerformanceCounter(&now); + QueryPerformanceFrequency(&freq); + return float(now.QuadPart-starttime)/float(freq.QuadPart); +} + +} +$else +{ +func int64_t vmStartTimer() +{ + TimeVal start; + start.gettimeofday(); + return start.tv_sec * 1000000 + start.tv_usec; +} + +float vmEndTimer(int64_t starttime) +{ + TimeVal now; + now.gettimeofday(); + int64_t end = now.tv_sec * 1000000 + end.tv_usec; + return @cast(end - starttime)/1000000.0, float); +} +} + +#include +/* Log a message to the logfile */ + +void vmLog(const char *msg, ...) +{ + // Start line with timestamp + time_t ltime; + char timestr[80]; + ltime=time(NULL); + strftime (timestr, sizeof(timestr), "%X %x ", localtime(<ime)); + fputs(timestr, stderr); + + // Do a formatted output, passing along all parms + va_list argptr; + va_start(argptr, msg); + vfprintf(stderr, msg, argptr); + va_end(argptr); + fputs("\n", stderr); + + // Ensure log file gets it + fflush(stderr); +} + +/** Mapping structure correlating a VM literal symbol's number with its name */ +struct vmLitSymEntry +{ + int litindex; //!< Literal symbol's number + string symnm; //!< Literal symbol's string +}; + +/** Constant array that identifies and maps all VM literal symbols */ +vmLitSymEntry[+] vmLitSymTable = { + // Compiler reserved names + {SymNull, "null"}, + {SymFalse, "false"}, + {SymTrue, "true"}, + {SymAnd, "and"}, + {SymAsync, "async"}, + {SymBaseurl, "baseurl"}, + {SymBreak, "break"}, + {SymContext, "context"}, + {SymContinue, "continue"}, + {SymDo, "do"}, + {SymEach, "each"}, + {SymElse, "else"}, + {SymElif, "elif"}, + {SymIf, "if"}, + {SymIn, "in"}, + {SymInto, "into"}, + {SymLocal, "local"}, + {SymMatch, "match"}, + {SymNot, "not"}, + {SymOr, "or"}, + {SymReturn, "return"}, + {SymSelf, "self"}, + {SymSelfMeth, "selfmethod"}, + {SymThis, "this"}, + {SymUsing, "using"}, + {SymWait, "wait"}, + {SymWhile, "while"}, + {SymWith, "with"}, + {SymYield, "yield"}, + {SymLBrace, "{"}, + {SymRBrace, "}"}, + {SymSemicolon, ";"}, + {SymComma, ","}, + {SymQuestion, "?"}, + {SymAt, "@"}, + {SymSplat, "..."}, + {SymDot, "."}, + {SymColons, "::"}, + {SymDotColon, ".:"}, + + // Compiler symbols that are also methods + {SymAppend, "<<"}, + {SymPrepend, ">>"}, + {SymPlus, "+"}, + {SymMinus, "-"}, + {SymMult, "*"}, + {SymDiv, "/"}, + {SymRocket, "<=>"}, + {SymEquiv, "==="}, + {SymMatchOp, "~~"}, + {SymLt, "<"}, + {SymLe, "<="}, + {SymGt, ">"}, + {SymGe, ">="}, + {SymEq, "=="}, + {SymNe, "!="}, + + // Methods that are not compiler symbols + {SymNew, "New"}, + {SymInit, "Init"}, + {SymLoad, "Load"}, + {SymGet, "Get"}, + {SymParas, "()"}, + {SymBrackets, "[]"}, + {SymNeg, "-@"}, + {SymValue, "value"}, + {SymEachMeth, "Each"}, + {SymBegin, "Begin"}, + {SymEnd, "End"}, + + {SymFinalizer, "_finalizer"}, + {SymName, "_name"}, + + // AST symbols + {SymMethod, "method"}, + {SymAssgn, "="}, + {SymOrAssgn, "||="}, + {SymColon, ":"}, + {SymThisBlock, "thisblock"}, + {SymCallProp, "callprop"}, + {SymActProp, "activeprop"}, + {SymRawProp, "rawprop"}, + {SymGlobal, "global"}, + {SymLit, "lit"}, + {SymExt, "ext"}, + {SymRange, "Range"}, + {SymClosure, "Closure"}, + {SymYielder, "Yielder"}, + {SymResource, "Resource"}, + + // End of literal table + {0, NULL} +}; + +/** Initialize vm's literals. */ +void vm_litinit(Value th) { + // Allocate untyped array for literal storage + VmInfo* vm = vm(th); + newArr(th, &vm->literals, aNull, nVmLits); + mem_markChk(th, vm, vm->literals); + arrSet(th, vm->literals, nVmLits-1, aNull); // Ensure it is full with nulls + + Value *vmlits = arr_info(vm->literals)->arr; + vmlits[TypeObject] = aNull; + + // Load up literal symbols from table + const struct vmLitSymEntry *vmlittblp = &vmLitSymTable[0]; + while (vmlittblp->symnm) { + newSym(th, &vmlits[vmlittblp->litindex], vmlittblp->symnm, strlen(vmlittblp->symnm)); + vmlittblp++; + } +} + +/** Map byte-code's standard symbols to VM's literals (max. number at 256) */ +const int stdTblMap[] = { + // Commonly-called methods + SymNew, // 'new' + SymParas, // '()' + SymAppend, // '<<' + SymPlus, // '+' + SymMinus, // '-' + SymMult, // '*' + SymDiv, // '/' + SymNeg, // '-@' + -1 +}; + +/** Initialize vm's standard symbols */ +void vm_stdinit(Value th) { + // Allocate mapping tables + VmInfo* vm = vm(th); + Value stdidx = newTbl(th, &vm->stdidx, aNull, nStdSym); + mem_markChk(th, vm, vm->stdidx); + vm->stdsym = NULL; + mem_reallocvector(th, vm->stdsym, 0, nStdSym, Value); + + // Populate the mapping tables with the corresponding VM literals + const int *mapp = &stdTblMap[0]; + int idx = 0; + while (*mapp >= 0 && idxstdsym[idx] = vmlit(*mapp); + idx++; + mapp++; + } +} + +void core_null_init(Value th); +void core_bool_init(Value th); +void core_int_init(Value th); +void core_float_init(Value th); +void core_symbol_init(Value th); +void core_range_init(Value th); +void core_text_init(Value th); +void core_list_init(Value th); +void core_clo_init(Value th); +void core_index_init(Value th); +void core_object_init(Value th); +void core_mixin_init(Value th); + +void core_thread_init(Value th); +void core_vm_init(Value th); +void core_all_init(Value th); + +void core_resource_init(Value th); +void core_method_init(Value th); +void core_file_init(Value th); + +/** Initialize all core types */ +void core_init(Value th) { + + core_object_init(th); // Type must be first, so other types can use this as their type + vmlit(TypeAll) = pushType(th, aNull, 0); + popGloVar(th, "All"); + core_mixin_init(th); + + core_null_init(th); + core_bool_init(th); + core_int_init(th); + core_float_init(th); + core_symbol_init(th); + core_range_init(th); + core_text_init(th); + core_list_init(th); + core_clo_init(th); + core_index_init(th); + + core_thread_init(th); + core_vm_init(th); + core_all_init(th); + + // Load resource before the types it uses + core_resource_init(th); + core_method_init(th); + core_file_init(th); +} + diff --git a/resources/examples/binarydigits.c3 b/resources/examples/binarydigits.c3 index 39b602471..42e3803bf 100644 --- a/resources/examples/binarydigits.c3 +++ b/resources/examples/binarydigits.c3 @@ -10,7 +10,7 @@ func int main() func string@ bin(int x) { - int bits = (x == 0) ? 1 : log10(cast(double, x)) / log10(2); + int bits = (x == 0) ? 1 : log10(cast(x, double)) / log10(2); string@ ret = string.make_repeat('0', bits); for (int i = 0; i < bits; i++) { diff --git a/resources/examples/functions.c3 b/resources/examples/functions.c3 new file mode 100644 index 000000000..fd5c4b95a --- /dev/null +++ b/resources/examples/functions.c3 @@ -0,0 +1,46 @@ +module functions; + +module vararray(Type) + +struct VarArray +{ + uint capacity; + uint size; + Type* type; +} + +VarArray* make(uint size = startingSize) +{ + VarArray *array = malloc(VarArray.size); + array.capacity = startingSize; + array.size = 0; + array.type = startingSize > 0 ? malloc(Type.size * startingSize) : null; + return array; +} + +generic Type[].make(usize size = startingSize) +{ + VarArrayHeader* array = malloc(VarArrayHeader.size + Type.size * startingSize); + array.capacity = startingSize; + array.size = 0; + return @cast(array[1], Type[]); +} + +macro Type Type[].@index(&Type[] array, usize index) +{ + VarArrayHeader* array = @cast(array, VarArrayHeader*)[-1]; + assert(index < array.size, "Out of bounds access"); + return @cast(array, Type *)[index]; +} + +foo :: proc($N: $I, $T: typeid) -> (res: [N]T) { + // `N` is the constant value passed + // `I` is the type of N + // `T` is the type passed + fmt.printf("Generating an array of type %v from the value %v of type %v\n", + typeid_of(type_of(res)), N, typeid_of(I)); + for i in 0..> ValueTypeOffset) & 0x3); + return cast((raw >> ValueTypeOffset) & 0x3, ValueType); } func uint addType(uint raw, ValueType t) @(inline) diff --git a/resources/examples/toml_tokenizer_c2.c3 b/resources/examples/toml_tokenizer_c2.c3 index 80b29abe1..d5425baeb 100644 --- a/resources/examples/toml_tokenizer_c2.c3 +++ b/resources/examples/toml_tokenizer_c2.c3 @@ -139,17 +139,17 @@ func void Tokenizer.lex(Tokenizer* t, Token* result) t.loc.column = 1; case '=': result.loc = t.loc; - result.kind = TokenKind.EQUALS; + result.kind = EQUALS; t.advance(1); return; case '.': result.loc = t.loc; - result.kind = TokenKind.DOT; + result.kind = DOT; t.advance(1); return; case ',': result.loc = t.loc; - result.kind = TokenKind.COMMA; + result.kind = COMMA; t.advance(1); return; case '[': @@ -157,12 +157,12 @@ func void Tokenizer.lex(Tokenizer* t, Token* result) if (t.current[1] == '[') { t.advance(2); - result.kind = TokenKind.LBRACE2; + result.kind = LBRACE2; } else { t.advance(1); - result.kind = TokenKind.LBRACE; + result.kind = LBRACE; } return; case ']': @@ -170,12 +170,12 @@ func void Tokenizer.lex(Tokenizer* t, Token* result) if (t.current[1] == ']') { t.advance(2); - result.kind = TokenKind.RBRACE2; + result.kind = RBRACE2; } else { t.advance(1); - result.kind = TokenKind.RBRACE; + result.kind = RBRACE; } return; case '"': @@ -199,13 +199,13 @@ func void Tokenizer.lex(Tokenizer* t, Token* result) if (t.current[0] == 'f' && strncmp("false", t.current, 5) == 0) { t.advance(5); result.number = 0; - result.kind = TokenKind.Kw_false; + result.kind = KW_FALSE; return; } if (t.current[0] == 't' && strncmp("true", t.current, 4) == 0) { t.advance(4); result.number = 1; - result.kind = TokenKind.Kw_true; + result.kind = KW_TRUE; return; } if (isalpha(t.current[0])) @@ -214,7 +214,7 @@ func void Tokenizer.lex(Tokenizer* t, Token* result) return; } sprintf(t.text, "unexpected char '%c' at %s", t.current[0], t.loc.str()); - result.kind = TokenKind.Error; + result.kind = ERROR; result.text = t.text; return; } @@ -267,7 +267,7 @@ func void Tokenizer.parseText(Tokenizer* t, Token* result) const char* start = t.current; while (t.current[0] && t.current[0] != '"') t.current++; - uint len = cast(uint, t.current - start); + uint len = cast(t.current - start, uint); // assert(len < MaxText); memcpy(t.text, start, len); t.text[len] = 0; @@ -325,7 +325,7 @@ func void Tokenizer.parseNumber(Tokenizer* t, Token* result) // handle hexadecimal/ocal/binary number // handle '_', like 1_000_000 - u32 number = cast(atoi(t.current)); + uint number = cast(atoi(t.current), uint); result.kind = TokenKind.Number; result.number = number; while (t.current[0] && isdigit(t.current[0])) @@ -347,9 +347,9 @@ func bool isKeyChar(u8 c) func void Tokenizer.parseKey(Tokenizer* t, Token* result) { char* start = t.current; - while (t.current[0] && isKeyChar(cast(t.current[0]))) t.current++; + while (t.current[0] && isKeyChar(cast(t.current[0], byte))) t.current++; - uint len = cast(uint, t.current - start); + uint len = cast(t.current - start, uint); // assert(len < MaxText); memcpy(t.text, start, len); t.text[len] = 0; diff --git a/resources/examples/vector.c3 b/resources/examples/vector.c3 index 2552ec61b..44b6e9e71 100644 --- a/resources/examples/vector.c3 +++ b/resources/examples/vector.c3 @@ -45,7 +45,7 @@ public func bool Vector.empty(Vector *vector) return !vector.array.size; } -public macro Vector.@foreach(Vector *vector : @body(Type value)) +public macro Vector.foreach(Vector *vector, macro void(Type value) body) { for (usize i = 0, i < vector.array.size; i++) { @@ -59,11 +59,11 @@ test IntVector *vector = @calloc(IntVector); vector.add(1); vector.add(2); - foreach (int i : vector) + for (int i : vector) { printDigit(i); } - vector.@foreach(int i) + @vector.foreach(int i) { printDigit(i); } diff --git a/resources/grammar.y b/resources/grammar.y index 537f90896..c18d2e698 100644 --- a/resources/grammar.y +++ b/resources/grammar.y @@ -9,11 +9,12 @@ int yylex(void); void yyerror(char *s); %} -%token IDENT AT_IDENT CT_IDENT CONSTANT CONST_IDENT TYPE_IDENT STRING_LITERAL SIZEOF +%token IDENT CT_IDENT CONSTANT CONST_IDENT TYPE_IDENT STRING_LITERAL SIZEOF %token INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN %token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN %token XOR_ASSIGN OR_ASSIGN VAR NIL ELVIS HASH_IDENT NEXT +%token AT %token TYPEDEF MODULE IMPORT %token CHAR SHORT INT LONG FLOAT DOUBLE CONST VOLATILE VOID @@ -26,6 +27,7 @@ void yyerror(char *s); %token FN_BLOCK_START FN_BLOCK_END %token MULTW ADDW SUBW +%token AUTO %start translation_unit %% @@ -44,7 +46,6 @@ ident_expression : CONST_IDENT | IDENT | CT_IDENT - | AT_IDENT ; primary_expression @@ -91,6 +92,7 @@ unary_operator | SUBW | '~' | '!' + | '@' ; @@ -199,6 +201,8 @@ identifier_list macro_argument : CT_IDENT | IDENT + | type_expression IDENT + | type_expression CT_IDENT ; macro_argument_list @@ -235,6 +239,7 @@ parameter_list base_type : VOID + | AUTO | BOOL | CHAR | BYTE @@ -426,11 +431,14 @@ jump_statement | RETURN expression ';' ; +path_ident + : IDENT + | path IDENT + ; + attribute - : AT_IDENT - | path AT_IDENT - | AT_IDENT '(' constant_expression ')' - | path AT_IDENT '(' constant_expression ')' + : AT path_ident + | AT path_ident '(' constant_expression ')' ; attribute_list @@ -480,7 +488,8 @@ func_definition ; macro_declaration - : MACRO AT_IDENT '(' macro_argument_list ')' compound_statement + : MACRO type_expression IDENT '(' macro_argument_list ')' compound_statement + : MACRO IDENT '(' macro_argument_list ')' compound_statement ; @@ -576,8 +585,8 @@ attribute_domains ; attribute_declaration - : ATTRIBUTE attribute_domains AT_IDENT ';' - | ATTRIBUTE attribute_domains AT_IDENT '(' parameter_type_list ')' ';' + : ATTRIBUTE attribute_domains IDENT ';' + | ATTRIBUTE attribute_domains IDENT '(' parameter_type_list ')' ';' ; global_declaration @@ -631,7 +640,7 @@ module_param : CT_IDENT | HASH_IDENT | TYPE_IDENT - | AT_IDENT + | IDENT ; module_params diff --git a/resources/testfragments/parsertest.c3 b/resources/testfragments/parsertest.c3 index fee00d926..481f69952 100644 --- a/resources/testfragments/parsertest.c3 +++ b/resources/testfragments/parsertest.c3 @@ -13,6 +13,16 @@ macro @goo(i, $e) } +struct Foo +{ + $if ($use_bar > 0) + { + int bar; + } + int baz; +} + + macro @soom!(i, $e) {} @@ -125,6 +135,12 @@ $else generic test(i) { } } +#if $b > 0 + +#else + +#endif + generic boofer2(i, g, eok) { case int, char[], type($eoo): @@ -133,6 +149,8 @@ generic boofer2(i, g, eok) return 1000; } + + func void hello() throws Errors { int i, j; diff --git a/resources/testfragments/super_simple.c3 b/resources/testfragments/super_simple.c3 index 238bf0b5f..7f72a8926 100644 --- a/resources/testfragments/super_simple.c3 +++ b/resources/testfragments/super_simple.c3 @@ -30,8 +30,56 @@ union Test3 int b; } +struct Teob +{ + int x; + double y; + int xy; + int oekfeo; +} + +error Error +{ + BLURB, + NO_SUCH_FILE, + +} +error OtherError +{ + FOO_BAR +} + +func int jumptest() +{ + if (1) goto LABELX; + return 1; + LABELX: + return 2; +} +func int borok() throws +{ + return 1; +} + +func int barok() throws Error, OtherError +{ + if (true) + { + throw Error.NO_SUCH_FILE; + } + return 100; +} + func int boba(int y, int j) { +// hello(); + //$e = type(Teob); + //Teob xbd = type(Teob); + //Teob xb = { 1, 1.0, 100, 1000 }; + //Test2 tee = { { 3 }, 4 }; + //Test3 xc = { eo = 1, t.a = 1 }; + // throw Error.NO_SUCH_FILE; + for (int i = 0; i < 10; i++) { @@ -106,13 +154,14 @@ typedef func void(int) as Foo; //typedef int as Foo; extern func void printf(char *hello); -macro @hello() +macro hello() { printf("Hello world!\n"); } func void bob() { + byte a = 2; short b = 3; int c = 4; diff --git a/src/build/build_options.c b/src/build/build_options.c index 201993b87..6b9cf9296 100644 --- a/src/build/build_options.c +++ b/src/build/build_options.c @@ -310,11 +310,6 @@ void parse_arguments(int argc, const char *argv[]) exit(EXIT_SUCCESS); } - build_options.cshort_size = sizeof(short); - build_options.cint_size = sizeof(int); - build_options.clong_size = sizeof(long); - build_options.clonglong_size = sizeof(long long); - build_options.pointer_size = sizeof(void *); build_options.path = "."; build_options.emit_llvm = false; build_options.emit_bitcode = true; diff --git a/src/build/build_options.h b/src/build/build_options.h index fbe14faf3..a28f1ff70 100644 --- a/src/build/build_options.h +++ b/src/build/build_options.h @@ -100,12 +100,6 @@ typedef struct uint32_t symtab_size; CompileOption compile_option; DiagnosticsSeverity severity[DIAG_END_SENTINEL]; - int pointer_size; - int cshort_size; - int cint_size; - int clong_size; - int clonglong_size; - int clongdouble_size; OptimizationLevel optimization_level; SizeOptimizationLevel size_optimization_level; bool debug_info; diff --git a/src/compiler/ast.c b/src/compiler/ast.c index c166686fe..db9b2fd27 100644 --- a/src/compiler/ast.c +++ b/src/compiler/ast.c @@ -16,6 +16,7 @@ Decl *decl_new(DeclKind decl_kind, Token name, Visibility visibility) return decl; } + Type poisoned_type = { .type_kind = TYPE_POISONED }; TypeInfo poisoned_type_info = { .kind = TYPE_INFO_POISON }; @@ -174,8 +175,18 @@ Type* type_get_unsigned(Type *type) */ +bool func_return_value_as_out(FunctionSignature *func_sig) +{ + Type *return_type = func_sig->rtype->type->canonical; + if (return_type->type_kind == TYPE_VOID) return false; + if (func_has_error_return(func_sig)) return true; + // TODO improve + return type_size(return_type) > 8 * 4; +} -BinaryOp binary_op[256] = { + + +BinaryOp binary_op[TOKEN_LAST + 1] = { [TOKEN_STAR] = BINARYOP_MULT, [TOKEN_MULT_MOD] = BINARYOP_MULT_MOD, [TOKEN_DIV] = BINARYOP_DIV, @@ -214,7 +225,7 @@ BinaryOp binary_op[256] = { }; -static BinaryOp assign_binop[256] = { +static BinaryOp assign_binop[BINARYOP_LAST + 1] = { [BINARYOP_MULT_ASSIGN] = BINARYOP_MULT, [BINARYOP_MULT_MOD_ASSIGN] = BINARYOP_MULT_MOD, [BINARYOP_ADD_ASSIGN] = BINARYOP_ADD, @@ -237,10 +248,7 @@ BinaryOp binaryop_assign_base_op(BinaryOp assign_binary_op) return assign_binop[(int)assign_binary_op]; } -AssignOp assign_op[256] = { -}; - -UnaryOp unary_op[256] = { +UnaryOp unary_op[TOKEN_LAST + 1] = { [TOKEN_STAR] = UNARYOP_DEREF, [TOKEN_AMP] = UNARYOP_ADDR, [TOKEN_BIT_NOT] = UNARYOP_BITNEG, @@ -250,25 +258,13 @@ UnaryOp unary_op[256] = { [TOKEN_MINUSMINUS] = UNARYOP_DEC, }; -PostUnaryOp post_unary_op[256] = { + +PostUnaryOp post_unary_op[TOKEN_LAST + 1] = { [TOKEN_PLUSPLUS] = POSTUNARYOP_INC, [TOKEN_MINUSMINUS] = POSTUNARYOP_DEC, }; -AssignOp assignop_from_token(TokenType type) -{ - return assign_op[type]; -} - -TokenType assignop_to_token(AssignOp type) -{ - for (unsigned i = 0; i < 256; i++) - { - if (assign_op[i] == type) return (TokenType)i; - } - return TOKEN_INVALID_TOKEN; -} BinaryOp binaryop_from_token(TokenType type) { @@ -277,7 +273,7 @@ BinaryOp binaryop_from_token(TokenType type) TokenType binaryop_to_token(BinaryOp type) { - for (unsigned i = 0; i < 256; i++) + for (unsigned i = 0; i <= TOKEN_LAST; i++) { if (binary_op[i] == type) return (TokenType)i; } @@ -291,7 +287,7 @@ UnaryOp unaryop_from_token(TokenType type) TokenType unaryop_to_token(UnaryOp type) { - for (unsigned i = 0; i < 256; i++) + for (unsigned i = 0; i <= TOKEN_LAST; i++) { if (unary_op[i] == type) return (TokenType)i; } @@ -305,7 +301,7 @@ PostUnaryOp post_unaryop_from_token(TokenType type) TokenType postunaryop_to_token(PostUnaryOp type) { - for (unsigned i = 0; i < 256; i++) + for (unsigned i = 0; i <= TOKEN_LAST; i++) { if (post_unary_op[i] == type) return (TokenType)i; } @@ -348,6 +344,10 @@ void fprint_type_recursive(FILE *file, Type *type, int indent) case TYPE_POISONED: fprintf_indented(file, indent, "(type poison)\n"); return; + case TYPE_META_TYPE: + fprintf_indented(file, indent, "(meta-type"); + fprint_type_recursive(file, type->child, indent + 1); + fprint_endparen(file, indent); case TYPE_FUNC: fprintf_indented(file, indent, "(type-func %s)\n", type->func.signature->mangled_signature); return; @@ -675,9 +675,16 @@ void fprint_func_signature(FILE *file, FunctionSignature *signature, int indent) fprintf_indented(file, indent, "(params\n"); fprint_decl_list(file, signature->params, indent + 1); fprint_endparen(file, indent); - fprintf_indented(file, indent, "(throws\n"); - fprint_decl_list(file, signature->throws, indent + 1); - fprint_endparen(file, indent); + if (signature->throw_any) + { + fprintf_indented(file, indent, "(throws any)\n"); + } + else + { + fprintf_indented(file, indent, "(throws\n"); + fprint_decl_list(file, signature->throws, indent + 1); + fprint_endparen(file, indent); + } } void fprint_decl_recursive(FILE *file, Decl *decl, int indent) { @@ -1039,7 +1046,7 @@ static void fprint_ast_recursive(FILE *file, Ast *ast, int indent) break; case AST_THROW_STMT: fprintf(file, "(throw\n"); - fprint_expr_recursive(file, ast->throw_stmt, indent + 1); + fprint_expr_recursive(file, ast->throw_stmt.throw_value, indent + 1); break; case AST_TRY_STMT: TODO diff --git a/src/compiler/casts.c b/src/compiler/casts.c index 4bd1bac2b..175596cf3 100644 --- a/src/compiler/casts.c +++ b/src/compiler/casts.c @@ -38,7 +38,7 @@ static bool sema_type_mismatch(Expr *expr, Type *type, CastType cast_type) break; } - SEMA_ERROR(expr, "Cannot %s '%s' to '%s'", action, type_to_error_string(expr->type), type_to_error_string(type)); + SEMA_ERROR(expr, "Cannot %s '%s' to '%s'.", action, type_to_error_string(expr->type), type_to_error_string(type)); return false; } @@ -580,6 +580,7 @@ CastKind cast_to_bool_kind(Type *type) case TYPE_ARRAY: case TYPE_VARARRAY: case TYPE_SUBARRAY: + case TYPE_META_TYPE: // Improve consider vararray / subarray conversion to boolean. return CAST_ERROR; case TYPE_BOOL: @@ -614,6 +615,7 @@ bool cast(Expr *expr, Type *to_type, CastType cast_type) { case TYPE_POISONED: case TYPE_VOID: + case TYPE_META_TYPE: break; case TYPE_BOOL: if (type_is_integer(canonical)) return boxi(expr, from_type, canonical, to_type, cast_type); diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c index 780b0553d..83764e0fc 100644 --- a/src/compiler/compiler.c +++ b/src/compiler/compiler.c @@ -36,7 +36,6 @@ static void compiler_lex(BuildTarget *target) void compiler_parse(BuildTarget *target) { - builtin_setup(); VECEACH(target->sources, i) { bool loaded = false; @@ -149,7 +148,6 @@ void compile_files(BuildTarget *target) } target_expand_source_names(target); target_setup(); - builtin_setup(); if (!vec_size(target->sources)) error_exit("No files to compile."); switch (build_options.compile_option) diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index f98acc4a6..58a75ef3d 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -9,6 +9,7 @@ #include "../build/build_options.h" #include "compiler.h" #include "enums.h" +#include "target.h" typedef uint32_t SourceLoc; #define INVALID_LOC UINT32_MAX @@ -18,6 +19,9 @@ typedef uint32_t SourceLoc; #define MAX_SCOPE_DEPTH 0xFF #define MAX_PATH 1024 #define MAX_DEFERS 0xFFFF +#define MAX_FUNCTION_SIGNATURE_SIZE 2048 +#define MAX_PARAMS 512 +#define MAX_ERRORS 0xFFFF typedef struct _Ast Ast; typedef struct _Decl Decl; @@ -102,8 +106,10 @@ typedef struct _Path typedef struct { - unsigned bitsize : 16; + unsigned char bitsize; unsigned char bytesize; + unsigned char min_alignment; + unsigned char pref_alignment; } TypeBuiltin; typedef struct @@ -128,7 +134,7 @@ struct _Type TypeKind type_kind : 8; struct _Type *canonical; const char *name; - struct _Type **ptr_cache; + struct _Type **type_cache; void *backend_type; void *backend_debug_type; union @@ -138,6 +144,7 @@ struct _Type TypeArray array; TypeFunc func; Type *pointer; + Type *child; }; }; @@ -183,6 +190,8 @@ typedef struct typedef struct { + uint32_t alignment; + uint64_t size; Decl **members; } StructDecl; @@ -230,6 +239,7 @@ typedef struct _FunctionSignature { CallConvention convention : 4; bool variadic : 1; + bool throw_any : 1; TypeInfo *rtype; Decl** params; Decl** throws; @@ -339,6 +349,7 @@ typedef struct _Decl CtIfDecl ct_elif_decl; Decl** ct_else_decl; Expr *incr_array_decl; + TypeInfo *throws; }; } Decl; @@ -474,6 +485,7 @@ struct _Expr ExprStructValue struct_value_expr; ExprTypeRef type_access; ExprTry try_expr; + Expr* macro_expr; ExprBinary binary_expr; ExprTernary ternary_expr; ExprUnary unary_expr; @@ -651,6 +663,12 @@ typedef struct DeferList defers; } AstNextStmt; +typedef struct +{ + Expr *throw_value; + DeferList defers; +} AstThrowStmt; + typedef struct _Ast { SourceRange span; @@ -663,9 +681,9 @@ typedef struct _Ast AstCompoundStmt function_block_stmt; Decl *declare_stmt; Expr *expr_stmt; - Expr *throw_stmt; - struct _Ast *volatile_stmt; - struct _Ast *try_stmt; + AstThrowStmt throw_stmt; + Ast *volatile_stmt; + Ast *try_stmt; AstLabelStmt label_stmt; AstReturnStmt return_stmt; AstWhileStmt while_stmt; @@ -764,6 +782,12 @@ typedef struct _Context Token *next_lead_comment; DynamicScope *current_scope; Decl *evaluating_macro; + // Error handling + struct + { + Decl **errors; + int try_nesting; + }; Type *rtype; int in_volatile_section; Decl *locals[MAX_LOCALS]; @@ -780,7 +804,7 @@ typedef struct _Context Token next_tok; struct { - bool has_stored; + bool in_lookahead; const char *current; const char *start; Token tok; @@ -788,7 +812,6 @@ typedef struct _Context Token *lead_comment; Token *trailing_comment; Token *next_lead_comment; - unsigned comments; } stored; } Context; @@ -859,7 +882,7 @@ static inline Ast *extend_ast_with_prev_token(Context *context, Ast *ast) } -void builtin_setup(); +void builtin_setup(Target *target); static inline bool builtin_may_negate(Type *canonical) { @@ -915,6 +938,8 @@ CastKind cast_to_bool_kind(Type *type); bool cast_to_runtime(Expr *expr); void llvm_codegen(Context *context); +void llvm_set_struct_size_alignment(Decl *decl); + bool sema_analyse_expr(Context *context, Type *to, Expr *expr); bool sema_analyse_decl(Context *context, Decl *decl); @@ -969,6 +994,12 @@ void fprint_decl(FILE *file, Decl *dec); void fprint_type_info_recursive(FILE *file, TypeInfo *type_info, int indent); void fprint_expr_recursive(FILE *file, Expr *expr, int indent); +bool func_return_value_as_out(FunctionSignature *func_sig); +static inline bool func_has_error_return(FunctionSignature *func_sig) +{ + return func_sig->throws || func_sig->throw_any; +} + Token lexer_scan_token(Lexer *lexer); Token lexer_scan_ident_test(Lexer *lexer, const char *scan); @@ -1035,6 +1066,7 @@ void target_setup(); int target_alloca_addr_space(); void *target_data_layout(); void *target_machine(); +void *target_target(); #define TOKEN_MAX_LENGTH 0xFFFF #define TOK2VARSTR(_token) _token.span.length, _token.start @@ -1047,9 +1079,10 @@ static inline Token wrap(const char *string) } Type *type_get_ptr(Type *ptr_type); +Type *type_get_meta(Type *meta_type); Type *type_get_array(Type *arr_type, uint64_t len); -Type *type_signed_int_by_size(int bytesize); -Type *type_unsigned_int_by_size(int bytesize); +Type *type_signed_int_by_bitsize(unsigned bytesize); +Type *type_unsigned_int_by_bitsize(unsigned bytesize); bool type_is_subtype(Type *type, Type *possible_subtype); Type *type_find_common_ancestor(Type *left, Type *right); const char *type_to_error_string(Type *type); @@ -1141,7 +1174,6 @@ static inline bool type_is_number(Type *type) __type->name_loc = _name; __type->unresolved.module = _module; __type; }) #define TYPE_UNRESOLVED(_name) ({ TypeInfo *__type = type_new(TYPE_USER_DEFINED); __type->name_loc = _name; __type; }) -AssignOp assignop_from_token(TokenType type); UnaryOp unaryop_from_token(TokenType type); TokenType unaryop_to_token(UnaryOp type); PostUnaryOp post_unaryop_from_token(TokenType type); @@ -1159,8 +1191,7 @@ static inline const char* struct_union_name_from_token(TokenType type) return type == TOKEN_STRUCT ? "struct" : "union"; } -#define BACKEND_TYPE(type) gencontext_get_llvm_type(context, type) -#define BACKEND_TYPE_GLOBAL(type) gencontext_get_llvm_type(NULL, type) +#define llvm_type(type) gencontext_get_llvm_type(context, type) #define DEBUG_TYPE(type) gencontext_get_debug_type(context, type) void advance(Context *context); diff --git a/src/compiler/enums.h b/src/compiler/enums.h index 34bce6dfe..497c692e9 100644 --- a/src/compiler/enums.h +++ b/src/compiler/enums.h @@ -63,6 +63,7 @@ typedef enum BINARYOP_BIT_XOR_ASSIGN, BINARYOP_SHR_ASSIGN, BINARYOP_SHL_ASSIGN, + BINARYOP_LAST = BINARYOP_SHL_ASSIGN } BinaryOp; typedef enum @@ -218,6 +219,7 @@ typedef enum EXIT_BREAK, EXIT_GOTO, EXIT_CONTINUE, + EXIT_THROW, EXIT_RETURN, } ExitType; @@ -243,6 +245,7 @@ typedef enum EXPR_EXPRESSION_LIST, EXPR_CAST, EXPR_SCOPED_EXPR, + EXPR_MACRO_EXPR, } ExprKind; @@ -259,6 +262,7 @@ typedef enum { PREC_NONE, PREC_ASSIGNMENT, // =, *=, /=, %=, ... + PREC_TRY, // try PREC_TERNARY, // ?: PREC_LOGICAL, // && || PREC_RELATIONAL, // < > <= >= == != @@ -266,7 +270,7 @@ typedef enum PREC_BIT, // ^ | & PREC_SHIFT, // << >> >>> PREC_MULTIPLICATIVE, // * / % - PREC_UNARY, // ! - + ~ * & prefix ++/-- + PREC_UNARY, // @ ! - + ~ * & prefix ++/-- PREC_CALL, // . () [] postfix ++/-- } Precedence; @@ -401,9 +405,8 @@ typedef enum TOKEN_CONST_IDENT, // Any purely upper case ident, TOKEN_TYPE_IDENT, // Any ident on the format FooBar or __FooBar - // We want to parse @foo / #foo / $foo separately. - // Otherwise we allow things like "@ foo" which would be pretty bad. - TOKEN_AT_IDENT, // @foobar + // We want to parse #foo / $foo separately. + // Otherwise we allow things like "# foo" which would be pretty bad. TOKEN_HASH_IDENT, // #foobar TOKEN_CT_IDENT, // $foobar @@ -486,6 +489,7 @@ typedef enum TOKEN_EOF, // \n - SHOULD ALWAYS BE THE LAST TOKEN. + TOKEN_LAST = TOKEN_EOF, } TokenType; @@ -523,9 +527,11 @@ typedef enum TYPE_ARRAY, TYPE_VARARRAY, TYPE_SUBARRAY, + TYPE_META_TYPE, + TYPE_LAST = TYPE_META_TYPE } TypeKind; -#define TYPE_KINDS (TYPE_SUBARRAY + 1) +#define TYPE_KINDS (TYPE_LAST + 1) typedef enum { @@ -537,6 +543,7 @@ typedef enum UNARYOP_NOT, UNARYOP_INC, UNARYOP_DEC, + UNARYOP_LAST = UNARYOP_DEC } UnaryOp; typedef enum diff --git a/src/compiler/expr_analysis.c b/src/compiler/expr_analysis.c index e1b76a24b..9348f29b7 100644 --- a/src/compiler/expr_analysis.c +++ b/src/compiler/expr_analysis.c @@ -120,7 +120,11 @@ static inline bool sema_expr_analyse_identifier(Context *context, Type *to, Expr SEMA_ERROR(expr, "Functions from other modules, must be prefixed with the module name"); return false; } - + if (decl->decl_kind == DECL_MACRO) + { + SEMA_ERROR(expr, "Macro expansions must be prefixed with '@', try using '@%s(...)' instead.", decl->name); + return false; + } assert(decl->type); expr->identifier_expr.decl = decl; expr->type = decl->type; @@ -133,29 +137,17 @@ static inline bool sema_expr_analyse_binary_sub_expr(Context *context, Expr *lef } static inline bool sema_expr_analyse_var_call(Context *context, Type *to, Expr *expr) { TODO } -static inline bool sema_expr_analyse_macro_call(Context *context, Type *to, Expr *expr, Decl *macro) -{ - Ast *macro_parent; - // TODO handle loops - Decl *stored_macro = context->evaluating_macro; - Type *stored_rtype = context->rtype; - context->evaluating_macro = macro; - context->rtype = macro->macro_decl.rtype->type; - // Handle escaping macro - bool success = sema_analyse_statement(context, macro->macro_decl.body); - context->evaluating_macro = stored_macro; - context->rtype = stored_rtype; - if (!success) return false; - - TODO - return success; -}; static inline bool sema_expr_analyse_generic_call(Context *context, Type *to, Expr *expr) { TODO }; static inline bool sema_expr_analyse_func_call(Context *context, Type *to, Expr *expr, Decl *decl) { Expr **args =expr->call_expr.arguments; Decl **func_params = decl->func.function_signature.params; + unsigned error_params = decl->func.function_signature.throw_any || decl->func.function_signature.throws; + if (error_params) + { + TODO + } unsigned num_args = vec_size(args); // unsigned num_params = vec_size(func_params); // TODO handle named parameters, handle default parameters, varargs etc @@ -192,7 +184,8 @@ static inline bool sema_expr_analyse_call(Context *context, Type *to, Expr *expr case DECL_FUNC: return sema_expr_analyse_func_call(context, to, expr, decl); case DECL_MACRO: - return sema_expr_analyse_macro_call(context, to, expr, decl); + SEMA_ERROR(expr, "Macro calls must be preceeded by '@'."); + return false; case DECL_GENERIC: return sema_expr_analyse_generic_call(context, to, expr); case DECL_POISONED: @@ -402,57 +395,143 @@ static inline bool sema_expr_analyse_type_access(Context *context, Type *to, Exp return false; } -static inline Decl *decl_find_by_name(Decl** decls, const char *name) +static Decl *sema_analyse_init_path(Context *context, Decl *strukt, Expr *expr); + +static Decl *sema_analyse_init_identifier_string(Context *context, Decl *strukt, const char *string) { - VECEACH(decls, i) + assert(decl_is_struct_type(strukt)); + Decl **members = strukt->strukt.members; + VECEACH(members, i) { - if (decls[i]->name == name) return decls[i]; + Decl *member = members[i]; + if (member->name == string) return member; + if (!member->name) + { + Decl *anonymous_member = sema_analyse_init_identifier_string(context, member->type->decl, string); + if (anonymous_member) return anonymous_member; + } } return NULL; } -static inline bool expr_may_be_struct_field_decl(Expr *maybe_binary) + +static Decl *sema_analyse_init_identifier(Context *context, Decl *strukt, Expr *expr) { - if (maybe_binary->expr_kind != EXPR_BINARY) return false; - if (maybe_binary->binary_expr.operator != BINARYOP_EQ) return false; - Expr *expr = maybe_binary->binary_expr.left; - while (1) + assert(expr->resolve_status == RESOLVE_NOT_DONE); + expr->resolve_status = RESOLVE_RUNNING; + expr->identifier_expr.decl = sema_analyse_init_identifier_string(context, strukt, expr->identifier_expr.identifier); + expr->resolve_status = RESOLVE_DONE; + return expr->identifier_expr.decl; +} +static Decl *sema_analyse_init_access(Context *context, Decl *strukt, Expr *access_expr) +{ + assert(access_expr->resolve_status == RESOLVE_NOT_DONE); + access_expr->resolve_status = RESOLVE_RUNNING; + Decl *decl = sema_analyse_init_path(context, strukt, access_expr->access_expr.parent); + if (!decl || !decl_is_struct_type(decl->type->decl)) { - if (expr->expr_kind == EXPR_IDENTIFIER) return true; - if (expr->expr_kind != EXPR_ACCESS) return false; - expr = expr->access_expr.parent; + access_expr->resolve_status = RESOLVE_DONE; + return NULL; } + decl = access_expr->access_expr.ref = sema_analyse_init_identifier_string(context, decl->type->decl, access_expr->access_expr.sub_element.string); + access_expr->resolve_status = RESOLVE_DONE; + return decl; +} + +static Decl *sema_analyse_init_subscript(Context *context, Decl *array, Expr *subscript) +{ + TODO + if (array->type->type_kind != TYPE_ARRAY) + { + + } +} + +static Decl *sema_analyse_init_path(Context *context, Decl *strukt, Expr *expr) +{ + switch (expr->expr_kind) + { + case EXPR_ACCESS: + return sema_analyse_init_access(context, strukt, expr); + case EXPR_IDENTIFIER: + return sema_analyse_init_identifier(context, strukt, expr); + case EXPR_SUBSCRIPT: + return sema_analyse_init_subscript(context, strukt, expr); + default: + return NULL; + } +} + + +typedef enum +{ + INIT_SEMA_ERROR, + INIT_SEMA_NOT_FOUND, + INIT_SEMA_OK +} InitSemaResult; + +static InitSemaResult sema_expr_analyse_struct_named_initializer_list(Context *context, Decl *assigned, Expr *expr_list) +{ + VECEACH(expr_list->initializer_expr, i) + { + Expr *expr = expr_list->initializer_expr[i]; + if (expr->expr_kind != EXPR_BINARY && expr->binary_expr.operator != BINARYOP_ASSIGN) + { + if (i != 0) + { + SEMA_ERROR(expr, "Named and non-named initializers are not allowed together, please choose one or the other."); + return INIT_SEMA_ERROR; + } + // If there is an unexpected expression and no previous element then this is a normal initializer list. + return INIT_SEMA_NOT_FOUND; + } + Expr *path = expr->binary_expr.left; + Expr *value = expr->binary_expr.right; + Decl *result = sema_analyse_init_path(context, assigned, path); + if (!result) + { + if (i != 0) + { + SEMA_ERROR(path, "Unexpected element when initializing '%s', did you get the name right?", assigned->name); + return INIT_SEMA_ERROR; + } + return INIT_SEMA_NOT_FOUND; + } + if (!sema_analyse_expr(context, result->type, value)) return INIT_SEMA_ERROR; + } + return INIT_SEMA_OK; } static inline bool sema_expr_analyse_struct_initializer_list(Context *context, Type *assigned, Expr *expr) { Decl **members = assigned->decl->strukt.members; unsigned size = vec_size(members); + // Zero size init will initialize to empty. + if (size == 0) return true; + + InitSemaResult result = sema_expr_analyse_struct_named_initializer_list(context, assigned->decl, expr); + if (result == INIT_SEMA_ERROR) return false; + if (result == INIT_SEMA_OK) + { + TODO + } + if (assigned->type_kind == TYPE_UNION) + { + SEMA_ERROR(expr->initializer_expr[0], "Initializer list for unions must use named initializers, e.g. { a = 4 }"); + return false; + } + if (size < vec_size(expr->initializer_expr)) + { + SEMA_ERROR(expr->initializer_expr[size], "Too many elements in initializer, expected only %d.", size); + return false; + } VECEACH(expr->initializer_expr, i) { - Expr *field = expr->initializer_expr[i]; - Decl *decl; - if (expr_may_be_struct_field_decl(field)) - { - if (field->expr_kind == EXPR_IDENTIFIER) - { - decl = decl_find_by_name(members, field->identifier_expr.identifier); - } - TODO - } - else - { - if (i >= size) - { - SEMA_ERROR(field, "Too many elements in initializer"); - return false; - } - decl = members[i]; - } - if (!cast(field, decl->type, CAST_TYPE_IMPLICIT_ASSIGN)) return false; + if (!sema_analyse_expr(context, members[i]->type, expr->initializer_expr[i])) return false; } expr->type = assigned; return true; } + static inline bool sema_expr_analyse_initializer_list(Context *context, Type *to, Expr *expr) { assert(to); @@ -1303,6 +1382,7 @@ static bool sema_expr_analyse_not(Context *context, Type *to, Expr *expr, Expr * case TYPE_STRING: case TYPE_ENUM: case TYPE_ERROR: + case TYPE_META_TYPE: SEMA_ERROR(expr, "Cannot use 'not' on %s", type_to_error_string(inner->type)); return false; } @@ -1428,8 +1508,8 @@ static inline bool sema_expr_analyse_unary(Context *context, Type *to, Expr *exp case UNARYOP_DEC: case UNARYOP_INC: return sema_expr_analyse_incdec(context, to, expr, inner); - default: - UNREACHABLE + case UNARYOP_ERROR: + return false; } } @@ -1457,9 +1537,398 @@ static inline bool sema_expr_analyse_try(Context *context, Type *to, Expr *expr) return true; } +static Ast *ast_shallow_copy(Ast *source) +{ + Ast *copy = malloc_arena(sizeof(Ast)); + memcpy(copy, source, sizeof(Ast)); + return copy; +} + +static Expr *expr_shallow_copy(Expr *source) +{ + Expr *copy = malloc_arena(sizeof(Expr)); + memcpy(copy, source, sizeof(Expr)); + return copy; +} + +static Expr **expr_copy_expr_list_from_macro(Context *context, Expr *macro, Expr **expr_list); +static Expr *expr_copy_from_macro(Context *context, Expr *macro, Expr *source_expr); +static Ast *ast_copy_from_macro(Context *context, Expr *macro, Ast *source); +static void ast_copy_list_from_macro(Context *context, Expr *macro, Ast ***to_convert); + +static TypeInfo *type_info_copy_from_macro(Context *context, Expr *macro, TypeInfo *source) +{ + if (!source) return NULL; + TypeInfo *copy = malloc_arena(sizeof(TypeInfo)); + memcpy(copy, source, sizeof(TypeInfo)); + switch (source->kind) + { + case TYPE_INFO_POISON: + return copy; + case TYPE_INFO_IDENTIFIER: + assert(source->resolve_status == RESOLVE_NOT_DONE); + TODO + break; + case TYPE_INFO_EXPRESSION: + assert(source->resolve_status == RESOLVE_NOT_DONE); + copy->unresolved_type_expr = expr_copy_from_macro(context, macro, source->unresolved_type_expr); + return copy; + case TYPE_INFO_ARRAY: + assert(source->resolve_status == RESOLVE_NOT_DONE); + copy->array.len = expr_copy_from_macro(context, macro, source->array.len); + copy->array.base = type_info_copy_from_macro(context, macro, source->array.base); + return copy; + case TYPE_INFO_INC_ARRAY: + assert(source->resolve_status == RESOLVE_NOT_DONE); + copy->array.base = type_info_copy_from_macro(context, macro, source->array.base); + return copy; + case TYPE_INFO_POINTER: + assert(source->resolve_status == RESOLVE_NOT_DONE); + copy->pointer = type_info_copy_from_macro(context, macro, source->pointer); + return copy; + } +} + + + +static Expr *expr_copy_from_macro(Context *context, Expr *macro, Expr *source_expr) +{ +#define EXPR_COPY(x) x = expr_copy_from_macro(context, macro, x) + if (!source_expr) return NULL; + Expr *expr = expr_shallow_copy(source_expr); + switch (source_expr->expr_kind) + { + case EXPR_POISONED: + return source_expr; + case EXPR_TRY: + EXPR_COPY(expr->try_expr.expr); + EXPR_COPY(expr->try_expr.else_expr); + return expr; + case EXPR_CONST: + return expr; + case EXPR_BINARY: + EXPR_COPY(expr->binary_expr.left); + EXPR_COPY(expr->binary_expr.right); + return expr; + case EXPR_TERNARY: + EXPR_COPY(expr->ternary_expr.cond); + EXPR_COPY(expr->ternary_expr.then_expr); + EXPR_COPY(expr->ternary_expr.else_expr); + return expr; + case EXPR_UNARY: + EXPR_COPY(expr->unary_expr.expr); + return expr; + case EXPR_POST_UNARY: + EXPR_COPY(expr->post_expr.expr); + return expr; + case EXPR_TYPE: + expr->type_expr.type = type_info_copy_from_macro(context, macro, expr->type_expr.type); + return expr; + case EXPR_IDENTIFIER: + TODO + break; + case EXPR_TYPE_ACCESS: + expr->type_access.type = type_info_copy_from_macro(context, macro, expr->type_expr.type); + return expr; + case EXPR_CALL: + EXPR_COPY(expr->call_expr.function); + expr->call_expr.arguments = expr_copy_expr_list_from_macro(context, macro, expr->call_expr.arguments); + return expr; + case EXPR_SIZEOF: + TODO + break; + case EXPR_SUBSCRIPT: + EXPR_COPY(expr->subscript_expr.expr); + EXPR_COPY(expr->subscript_expr.index); + return expr; + case EXPR_ACCESS: + EXPR_COPY(expr->access_expr.parent); + return expr; + case EXPR_STRUCT_VALUE: + expr->struct_value_expr.type = type_info_copy_from_macro(context, macro, expr->struct_value_expr.type); + EXPR_COPY(expr->struct_value_expr.init_expr); + return expr; + case EXPR_STRUCT_INIT_VALUES: + TODO + return expr; + case EXPR_INITIALIZER_LIST: + expr->initializer_expr = expr_copy_expr_list_from_macro(context, macro, expr->initializer_expr); + return expr; + case EXPR_EXPRESSION_LIST: + expr->expression_list = expr_copy_expr_list_from_macro(context, macro, expr->expression_list); + return expr; + case EXPR_CAST: + EXPR_COPY(expr->cast_expr.expr); + expr->cast_expr.type_info = expr->cast_expr.type_info = type_info_copy_from_macro(context, macro, expr->cast_expr.type_info); + return expr; + case EXPR_SCOPED_EXPR: + EXPR_COPY(expr->expr_scope.expr); + return expr; + case EXPR_MACRO_EXPR: + EXPR_COPY(expr->macro_expr); + return expr; + } +#undef EXPR_COPY +} + +static Expr **expr_copy_expr_list_from_macro(Context *context, Expr *macro, Expr **expr_list) +{ + Expr **result = NULL; + VECEACH(expr_list, i) + { + vec_add(result, expr_copy_from_macro(context, macro, expr_list[i])); + } + return result; +} + +static void ast_copy_list_from_macro(Context *context, Expr *macro, Ast ***to_convert) +{ + Ast **result = NULL; + Ast **list = *to_convert; + VECEACH(list, i) + { + vec_add(result, ast_copy_from_macro(context, macro, list[i])); + } + *to_convert = result; +} + +static void type_info_copy_list_from_macro(Context *context, Expr *macro, TypeInfo ***to_convert) +{ + TypeInfo **result = NULL; + TypeInfo **list = *to_convert; + VECEACH(list, i) + { + vec_add(result, type_info_copy_from_macro(context, macro, list[i])); + } + *to_convert = result; +} + +static Ast *ast_copy_from_macro(Context *context, Expr *macro, Ast *source) +{ +#define EXPR_COPY(x) x = expr_copy_from_macro(context, macro, x) +#define AST_COPY(x) x = ast_copy_from_macro(context, macro, x) + Ast *ast = ast_shallow_copy(source); + switch (source->ast_kind) + { + case AST_POISONED: + return ast; + case AST_ASM_STMT: + TODO + case AST_ATTRIBUTE: + UNREACHABLE + case AST_BREAK_STMT: + return ast; + case AST_CASE_STMT: + AST_COPY(ast->case_stmt.body); + EXPR_COPY(ast->case_stmt.expr); + return ast; + break; + case AST_CATCH_STMT: + AST_COPY(ast->catch_stmt.body); + return ast; + case AST_COMPOUND_STMT: + ast_copy_list_from_macro(context, macro, &ast->compound_stmt.stmts); + return ast; + case AST_CONTINUE_STMT: + return ast; + case AST_CT_IF_STMT: + EXPR_COPY(ast->ct_if_stmt.expr); + AST_COPY(ast->ct_if_stmt.elif); + AST_COPY(ast->ct_if_stmt.then); + return ast; + case AST_CT_ELIF_STMT: + EXPR_COPY(ast->ct_elif_stmt.expr); + AST_COPY(ast->ct_elif_stmt.then); + AST_COPY(ast->ct_elif_stmt.elif); + return ast; + case AST_CT_ELSE_STMT: + AST_COPY(ast->ct_else_stmt); + return ast; + case AST_CT_FOR_STMT: + AST_COPY(ast->ct_for_stmt.body); + EXPR_COPY(ast->ct_for_stmt.expr); + return ast; + case AST_CT_SWITCH_STMT: + EXPR_COPY(ast->ct_switch_stmt.cond); + ast_copy_list_from_macro(context, macro, &ast->ct_switch_stmt.body); + return ast; + case AST_CT_DEFAULT_STMT: + AST_COPY(ast->ct_default_stmt); + return ast; + case AST_CT_CASE_STMT: + AST_COPY(ast->ct_case_stmt.body); + type_info_copy_list_from_macro(context, macro, &ast->ct_case_stmt.types); + return ast; + case AST_DECLARE_STMT: + TODO + return ast; + case AST_DEFAULT_STMT: + AST_COPY(ast->case_stmt.body); + return ast; + case AST_DEFER_STMT: + assert(!ast->defer_stmt.prev_defer); + AST_COPY(ast->defer_stmt.body); + return ast; + case AST_DO_STMT: + AST_COPY(ast->do_stmt.body); + EXPR_COPY(ast->do_stmt.expr); + return ast; + case AST_EXPR_STMT: + EXPR_COPY(ast->expr_stmt); + return ast; + case AST_FOR_STMT: + EXPR_COPY(ast->for_stmt.cond); + EXPR_COPY(ast->for_stmt.incr); + AST_COPY(ast->for_stmt.body); + AST_COPY(ast->for_stmt.init); + return ast; + case AST_FUNCTION_BLOCK_STMT: + ast_copy_list_from_macro(context, macro, &ast->function_block_stmt.stmts); + return ast; + case AST_GENERIC_CASE_STMT: + AST_COPY(ast->generic_case_stmt.body); + // ast->generic_case_stmt.types = ... + TODO + return ast; + case AST_GENERIC_DEFAULT_STMT: + AST_COPY(ast->generic_default_stmt); + return ast; + case AST_GOTO_STMT: + AST_COPY(ast->goto_stmt.label); + // TODO fixup name, which needs to be macro local. + TODO + return ast; + case AST_IF_STMT: + AST_COPY(ast->if_stmt.cond); + AST_COPY(ast->if_stmt.decl); + AST_COPY(ast->if_stmt.else_body); + AST_COPY(ast->if_stmt.then_body); + return ast; + case AST_LABEL: + assert(!ast->label_stmt.defer); + assert(!ast->label_stmt.in_defer); + // TODO fixup name which needs to be macro local. + TODO + return ast; + case AST_NOP_STMT: + return ast; + case AST_RETURN_STMT: + EXPR_COPY(ast->return_stmt.expr); + // TODO handle conversions? + TODO + return ast; + case AST_DECL_EXPR_LIST: + ast_copy_list_from_macro(context, macro, &ast->decl_expr_stmt); + return ast; + case AST_SWITCH_STMT: + AST_COPY(ast->switch_stmt.decl); + AST_COPY(ast->switch_stmt.cond); + ast_copy_list_from_macro(context, macro, &ast->switch_stmt.cases); + return ast; + case AST_THROW_STMT: + EXPR_COPY(ast->throw_stmt.throw_value); + return ast; + case AST_TRY_STMT: + AST_COPY(ast->try_stmt); + return ast; + case AST_NEXT_STMT: + TODO + return ast; + case AST_VOLATILE_STMT: + TODO + return ast; + case AST_WHILE_STMT: + AST_COPY(ast->while_stmt.cond); + AST_COPY(ast->while_stmt.decl); + AST_COPY(ast->while_stmt.body); + return ast; + case AST_SCOPED_STMT: + AST_COPY(ast->scoped_stmt.stmt); + return ast; + } + +#undef EXPR_COPY +#undef AST_COPY +} +static inline bool sema_expr_analyse_macro_call(Context *context, Type *to, Expr *macro, Expr *inner) +{ + Expr *func_expr = inner->call_expr.function; + + if (!sema_analyse_expr(context, NULL, func_expr)) return false; + + Decl *decl; + switch (func_expr->expr_kind) + { + case EXPR_TYPE_ACCESS: + TODO + case EXPR_IDENTIFIER: + decl = func_expr->identifier_expr.decl; + break; + default: + TODO + } + if (decl->decl_kind != DECL_MACRO) + { + SEMA_ERROR(macro, "A macro was expected here."); + return false; + } + Expr **args =func_expr->call_expr.arguments; + Decl **func_params = decl->macro_decl.parameters; + // TODO handle bare macros. + // TODO handle $ args and # args + unsigned num_args = vec_size(args); + // unsigned num_params = vec_size(func_params); + for (unsigned i = 0; i < num_args; i++) + { + Expr *arg = args[i]; + Decl *param = func_params[i]; + if (!sema_analyse_expr(context, param->type, arg)) return false; + } + Ast *body = ast_copy_from_macro(context, inner, decl->macro_decl.body); + TODO +} + +static inline bool sema_expr_analyse_macro_call2(Context *context, Type *to, Expr *expr, Decl *macro) +{ + Ast *macro_parent; + // TODO handle loops + Decl *stored_macro = context->evaluating_macro; + Type *stored_rtype = context->rtype; + context->evaluating_macro = macro; + context->rtype = macro->macro_decl.rtype->type; + // Handle escaping macro + bool success = sema_analyse_statement(context, macro->macro_decl.body); + context->evaluating_macro = stored_macro; + context->rtype = stored_rtype; + if (!success) return false; + + TODO + return success; +}; + +static inline bool sema_expr_analyse_macro_expr(Context *context, Type *to, Expr *expr) +{ + Expr *inner = expr->macro_expr; + switch (inner->expr_kind) + { + case EXPR_CALL: + return sema_expr_analyse_macro_call(context, to, expr, inner); + case EXPR_ACCESS: + case EXPR_IDENTIFIER: + // Allow @f unrolling? + default: + SEMA_ERROR(expr, "Expected a macro name after '@'"); + return false; + } +} + static inline bool sema_expr_analyse_type(Context *context, Type *to, Expr *expr) { - TODO + if (!sema_resolve_type_info(context, expr->type_expr.type)) + { + return expr_poison(expr); + } + expr->type = type_get_meta(expr->type_expr.type->type); return true; } @@ -1476,6 +1945,8 @@ static inline bool sema_analyse_expr_dispatch(Context *context, Type *to, Expr * return false; case EXPR_SCOPED_EXPR: UNREACHABLE + case EXPR_MACRO_EXPR: + return sema_expr_analyse_macro_expr(context, to, expr); case EXPR_TRY: return sema_expr_analyse_try(context, to, expr); case EXPR_CONST: diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index d87ee81b4..d88f815ba 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -504,7 +504,7 @@ Token lexer_scan_token(Lexer *lexer) switch (c) { case '@': - return scan_prefixed_ident(lexer, TOKEN_AT_IDENT, TOKEN_AT, true, "@"); + return make_token(lexer, TOKEN_AT, "@"); case '\'': return scan_char(lexer); case '"': diff --git a/src/compiler/llvm_codegen.c b/src/compiler/llvm_codegen.c index 688ede481..f638cfc17 100644 --- a/src/compiler/llvm_codegen.c +++ b/src/compiler/llvm_codegen.c @@ -170,12 +170,6 @@ void llvm_codegen(Context *context) gencontext_emit_function_decl(&gen_context, context->functions[i]); } - VECEACH(gen_context.generated_types, i) - { - Type *type = gen_context.generated_types[i]; - type->backend_debug_type = NULL; - type->backend_type = NULL; - } gencontext_print_llvm_ir(&gen_context); diff --git a/src/compiler/llvm_codegen_debug_info.c b/src/compiler/llvm_codegen_debug_info.c index e030d3261..a7d538cdd 100644 --- a/src/compiler/llvm_codegen_debug_info.c +++ b/src/compiler/llvm_codegen_debug_info.c @@ -109,6 +109,7 @@ LLVMMetadataRef gencontext_get_debug_type(GenContext *context, Type *type) case TYPE_IXX: case TYPE_UXX: case TYPE_FXX: + case TYPE_META_TYPE: UNREACHABLE case TYPE_BOOL: return gencontext_simple_debug_type(context, type, DW_ATE_boolean); diff --git a/src/compiler/llvm_codegen_expr.c b/src/compiler/llvm_codegen_expr.c index dfb189154..e9cedbb3c 100644 --- a/src/compiler/llvm_codegen_expr.c +++ b/src/compiler/llvm_codegen_expr.c @@ -41,7 +41,7 @@ static inline LLVMValueRef gencontext_emit_subscript_addr(GenContext *context, E TODO case TYPE_POINTER: return LLVMBuildGEP2(context->builder, - BACKEND_TYPE(type->pointer), + llvm_type(type->pointer), gencontext_emit_expr(context, expr->subscript_expr.expr), &index, 1, "[]"); case TYPE_VARARRAY: @@ -56,7 +56,7 @@ static inline LLVMValueRef gencontext_emit_subscript_addr(GenContext *context, E static inline LLVMValueRef gencontext_emit_access_addr(GenContext *context, Expr *expr) { LLVMValueRef value = gencontext_emit_address(context, expr->access_expr.parent); - return LLVMBuildStructGEP2(context->builder, BACKEND_TYPE(expr->access_expr.parent->type), value, (unsigned)expr->access_expr.index, ""); + return LLVMBuildStructGEP2(context->builder, llvm_type(expr->access_expr.parent->type), value, (unsigned)expr->access_expr.index, ""); } LLVMValueRef gencontext_emit_scoped_expr(GenContext *context, Expr *expr) @@ -103,6 +103,7 @@ LLVMValueRef gencontext_emit_address(GenContext *context, Expr *expr) case EXPR_INITIALIZER_LIST: case EXPR_EXPRESSION_LIST: case EXPR_CAST: + case EXPR_MACRO_EXPR: UNREACHABLE } UNREACHABLE @@ -115,9 +116,9 @@ LLVMValueRef gencontext_emit_cast(GenContext *context, CastKind cast_kind, LLVMV case CAST_ERROR: UNREACHABLE case CAST_PTRPTR: - return LLVMBuildPointerCast(context->builder, value, BACKEND_TYPE(type), "ptrptr"); + return LLVMBuildPointerCast(context->builder, value, llvm_type(type), "ptrptr"); case CAST_PTRXI: - return LLVMBuildPtrToInt(context->builder, value, BACKEND_TYPE(type), "ptrxi"); + return LLVMBuildPtrToInt(context->builder, value, llvm_type(type), "ptrxi"); case CAST_VARRPTR: TODO case CAST_ARRPTR: @@ -125,45 +126,45 @@ LLVMValueRef gencontext_emit_cast(GenContext *context, CastKind cast_kind, LLVMV case CAST_STRPTR: TODO case CAST_PTRBOOL: - return LLVMBuildICmp(context->builder, LLVMIntNE, value, LLVMConstPointerNull(BACKEND_TYPE(type->canonical->pointer)), "ptrbool"); + return LLVMBuildICmp(context->builder, LLVMIntNE, value, LLVMConstPointerNull(llvm_type(type->canonical->pointer)), "ptrbool"); case CAST_BOOLINT: - return LLVMBuildTrunc(context->builder, value, BACKEND_TYPE(type), "boolsi"); + return LLVMBuildTrunc(context->builder, value, llvm_type(type), "boolsi"); case CAST_FPBOOL: return LLVMBuildFCmp(context->builder, LLVMRealUNE, value, LLVMConstNull(LLVMTypeOf(value)), "fpbool"); case CAST_BOOLFP: - return LLVMBuildSIToFP(context->builder, value, BACKEND_TYPE(type), "boolfp"); + return LLVMBuildSIToFP(context->builder, value, llvm_type(type), "boolfp"); case CAST_INTBOOL: return LLVMBuildICmp(context->builder, LLVMIntNE, value, LLVMConstNull(LLVMTypeOf(value)), "intbool"); case CAST_FPFP: return type_convert_will_trunc(type, target_type) - ? LLVMBuildFPTrunc(context->builder, value, BACKEND_TYPE(type), "fpfptrunc") - : LLVMBuildFPExt(context->builder, value, BACKEND_TYPE(type), "fpfpext"); + ? LLVMBuildFPTrunc(context->builder, value, llvm_type(type), "fpfptrunc") + : LLVMBuildFPExt(context->builder, value, llvm_type(type), "fpfpext"); case CAST_FPSI: - return LLVMBuildFPToSI(context->builder, value, BACKEND_TYPE(type), "fpsi"); + return LLVMBuildFPToSI(context->builder, value, llvm_type(type), "fpsi"); case CAST_FPUI: - return LLVMBuildFPToUI(context->builder, value, BACKEND_TYPE(type), "fpui"); + return LLVMBuildFPToUI(context->builder, value, llvm_type(type), "fpui"); case CAST_SISI: return type_convert_will_trunc(type, target_type) - ? LLVMBuildTrunc(context->builder, value, BACKEND_TYPE(type), "sisitrunc") - : LLVMBuildSExt(context->builder, value, BACKEND_TYPE(type), "sisiext"); + ? LLVMBuildTrunc(context->builder, value, llvm_type(type), "sisitrunc") + : LLVMBuildSExt(context->builder, value, llvm_type(type), "sisiext"); case CAST_SIUI: return type_convert_will_trunc(type, target_type) - ? LLVMBuildTrunc(context->builder, value, BACKEND_TYPE(type), "siuitrunc") - : LLVMBuildZExt(context->builder, value, BACKEND_TYPE(type), "siuiext"); + ? LLVMBuildTrunc(context->builder, value, llvm_type(type), "siuitrunc") + : LLVMBuildZExt(context->builder, value, llvm_type(type), "siuiext"); case CAST_SIFP: - return LLVMBuildSIToFP(context->builder, value, BACKEND_TYPE(type), "sifp"); + return LLVMBuildSIToFP(context->builder, value, llvm_type(type), "sifp"); case CAST_XIPTR: - return LLVMBuildIntToPtr(context->builder, value, BACKEND_TYPE(type), "xiptr"); + return LLVMBuildIntToPtr(context->builder, value, llvm_type(type), "xiptr"); case CAST_UISI: return type_convert_will_trunc(type, target_type) - ? LLVMBuildTrunc(context->builder, value, BACKEND_TYPE(type), "uisitrunc") - : LLVMBuildZExt(context->builder, value, BACKEND_TYPE(type), "uisiext"); + ? LLVMBuildTrunc(context->builder, value, llvm_type(type), "uisitrunc") + : LLVMBuildZExt(context->builder, value, llvm_type(type), "uisiext"); case CAST_UIUI: return type_convert_will_trunc(type, target_type) - ? LLVMBuildTrunc(context->builder, value, BACKEND_TYPE(type), "uiuitrunc") - : LLVMBuildZExt(context->builder, value, BACKEND_TYPE(type), "uiuiext"); + ? LLVMBuildTrunc(context->builder, value, llvm_type(type), "uiuitrunc") + : LLVMBuildZExt(context->builder, value, llvm_type(type), "uiuiext"); case CAST_UIFP: - return LLVMBuildUIToFP(context->builder, value, BACKEND_TYPE(type), "uifp"); + return LLVMBuildUIToFP(context->builder, value, llvm_type(type), "uifp"); case CAST_ENUMSI: TODO } @@ -177,11 +178,11 @@ static inline LLVMValueRef gencontext_emit_cast_expr(GenContext *context, Expr * static inline LLVMValueRef gencontext_emit_inc_dec_change(GenContext *context, bool use_mod, LLVMValueRef current_value, Expr *expr, int diff) { Type *type = expr->type->canonical; - LLVMTypeRef llvm_type = BACKEND_TYPE(type); + LLVMTypeRef llvm_type = llvm_type(type); if (type->type_kind == TYPE_POINTER) { - LLVMValueRef add = LLVMConstInt(diff < 0 ? BACKEND_TYPE(type_isize) : BACKEND_TYPE(type_usize), diff, diff < 0); + LLVMValueRef add = LLVMConstInt(diff < 0 ? llvm_type(type_isize) : llvm_type(type_usize), diff, diff < 0); return LLVMBuildGEP2(context->builder, llvm_type, current_value, &add, 1, "ptrincdec"); } @@ -200,7 +201,7 @@ static inline LLVMValueRef gencontext_emit_inc_dec_change(GenContext *context, b static inline LLVMValueRef gencontext_emit_pre_inc_dec(GenContext *context, Expr *expr, int diff, bool use_mod) { LLVMValueRef addr = gencontext_emit_address(context, expr); - LLVMValueRef value = LLVMBuildLoad2(context->builder, BACKEND_TYPE(expr->type), addr, ""); + LLVMValueRef value = LLVMBuildLoad2(context->builder, llvm_type(expr->type), addr, ""); LLVMValueRef result = gencontext_emit_inc_dec_change(context, use_mod, value, expr, diff); LLVMBuildStore(context->builder, result, addr); return result; @@ -209,7 +210,7 @@ static inline LLVMValueRef gencontext_emit_pre_inc_dec(GenContext *context, Expr static inline LLVMValueRef gencontext_emit_post_inc_dec(GenContext *context, Expr *expr, int diff, bool use_mod) { LLVMValueRef addr = gencontext_emit_address(context, expr); - LLVMValueRef value = LLVMBuildLoad2(context->builder, BACKEND_TYPE(expr->type), addr, ""); + LLVMValueRef value = LLVMBuildLoad2(context->builder, llvm_type(expr->type), addr, ""); LLVMValueRef result = gencontext_emit_inc_dec_change(context, use_mod, value, expr, diff); LLVMBuildStore(context->builder, result, addr); return value; @@ -234,7 +235,7 @@ LLVMValueRef gencontext_emit_unary_expr(GenContext *context, Expr *expr) case UNARYOP_ADDR: return gencontext_emit_address(context, expr->unary_expr.expr); case UNARYOP_DEREF: - return LLVMBuildLoad2(context->builder, BACKEND_TYPE(expr->unary_expr.expr->type), gencontext_emit_expr(context, expr->unary_expr.expr), "deref"); + return LLVMBuildLoad2(context->builder, llvm_type(expr->unary_expr.expr->type), gencontext_emit_expr(context, expr->unary_expr.expr), "deref"); case UNARYOP_INC: return gencontext_emit_pre_inc_dec(context, expr->unary_expr.expr, 1, false); case UNARYOP_DEC: @@ -277,9 +278,9 @@ static LLVMValueRef gencontext_emit_logical_and_or(GenContext *context, Expr *ex // Simplify for LLVM by entering the constants we already know of. LLVMValueRef result_on_skip = LLVMConstInt(LLVMInt1TypeInContext(context->context), op == BINARYOP_AND ? 0 : 1, false); - LLVMValueRef logicValues[2] = { result_on_skip, rhs }; + LLVMValueRef logic_values[2] = { result_on_skip, rhs }; LLVMBasicBlockRef blocks[2] = { start_block, rhs_block }; - LLVMAddIncoming(phi, logicValues, blocks, 2); + LLVMAddIncoming(phi, logic_values, blocks, 2); return phi; } @@ -293,7 +294,7 @@ static inline LLVMValueRef gencontext_emit_initialization_from_expr(GenContext * static inline LLVMValueRef gencontext_emit_struct_value_expr(GenContext *context, Expr *expr) { - LLVMValueRef temp_alloc = gencontext_emit_alloca(context, BACKEND_TYPE(expr->type), "temp"); + LLVMValueRef temp_alloc = gencontext_emit_alloca(context, llvm_type(expr->type), "temp"); return gencontext_emit_initialization_from_expr(context, temp_alloc, expr->struct_value_expr.init_expr); } @@ -314,7 +315,7 @@ static LLVMValueRef gencontext_emit_binary(GenContext *context, Expr *expr, LLVM LLVMValueRef rhs_value; if (lhs_addr) { - lhs_value = LLVMBuildLoad2(context->builder, BACKEND_TYPE(lhs->type), lhs_addr, ""); + lhs_value = LLVMBuildLoad2(context->builder, llvm_type(lhs->type), lhs_addr, ""); } else { @@ -348,7 +349,7 @@ static LLVMValueRef gencontext_emit_binary(GenContext *context, Expr *expr, LLVM { if (lhs->type->canonical == rhs->type->canonical) return LLVMBuildPtrDiff(context->builder, lhs_value, rhs_value, "ptrdiff"); rhs_value = LLVMBuildNeg(context->builder, rhs_value, ""); - return LLVMBuildGEP2(context->builder, BACKEND_TYPE(lhs->type), lhs_value, &rhs_value, 1, "ptrsub"); + return LLVMBuildGEP2(context->builder, llvm_type(lhs->type), lhs_value, &rhs_value, 1, "ptrsub"); } if (is_float) return LLVMBuildFSub(context->builder, lhs_value, rhs_value, "fsub"); return gencontext_emit_sub_int(context, lhs->type->canonical, binary_op == BINARYOP_SUB_MOD, lhs_value, rhs_value); @@ -357,7 +358,7 @@ static LLVMValueRef gencontext_emit_binary(GenContext *context, Expr *expr, LLVM if (lhs->type->canonical->type_kind == TYPE_POINTER) { assert(type_is_integer(rhs->type->canonical)); - return LLVMBuildGEP2(context->builder, BACKEND_TYPE(lhs->type), lhs_value, &rhs_value, 1, "ptradd"); + return LLVMBuildGEP2(context->builder, llvm_type(lhs->type), lhs_value, &rhs_value, 1, "ptradd"); } if (is_float) return LLVMBuildFAdd(context->builder, lhs_value, rhs_value, "fadd"); return gencontext_emit_add_int(context, lhs->type->canonical, binary_op == BINARYOP_ADD_MOD, lhs_value, rhs_value); @@ -491,9 +492,9 @@ LLVMValueRef gencontext_emit_elvis_expr(GenContext *context, Expr *expr) gencontext_emit_block(context, phi_block); LLVMValueRef phi = LLVMBuildPhi(context->builder, expr->type->backend_type, "val"); - LLVMValueRef logicValues[2] = { lhs, rhs }; + LLVMValueRef logic_values[2] = { lhs, rhs }; LLVMBasicBlockRef blocks[2] = { current_block, rhs_block }; - LLVMAddIncoming(phi, logicValues, blocks, 2); + LLVMAddIncoming(phi, logic_values, blocks, 2); return phi; } @@ -539,7 +540,7 @@ static LLVMValueRef gencontext_emit_identifier_expr(GenContext *context, Expr *e LLVMValueRef gencontext_emit_const_expr(GenContext *context, Expr *expr) { - LLVMTypeRef type = BACKEND_TYPE(expr->type); + LLVMTypeRef type = llvm_type(expr->type); switch (expr->const_expr.type) { case CONST_INT: @@ -578,7 +579,8 @@ LLVMValueRef gencontext_emit_call_expr(GenContext *context, Expr *expr) Decl *function = expr->call_expr.function->identifier_expr.decl; LLVMValueRef func = function->func.backend_value; - LLVMTypeRef func_type = BACKEND_TYPE(function->type); + LLVMTypeRef func_type = llvm_type(function->type); + // TODO fix throws and return optimization LLVMValueRef call = LLVMBuildCall2(context->builder, func_type, func, values, args, "call"); /* if (function->func.function_signature.convention) @@ -595,7 +597,7 @@ static inline LLVMValueRef gencontext_emit_access_expr(GenContext *context, Expr { // Improve, add string description to the access? LLVMValueRef value = gencontext_emit_address(context, expr->access_expr.parent); - LLVMValueRef val = LLVMBuildStructGEP2(context->builder, BACKEND_TYPE(expr->access_expr.parent->type), value, (unsigned)expr->access_expr.index, ""); + LLVMValueRef val = LLVMBuildStructGEP2(context->builder, llvm_type(expr->access_expr.parent->type), value, (unsigned)expr->access_expr.index, ""); return LLVMBuildLoad2(context->builder, gencontext_get_llvm_type(context, expr->type), val, ""); } @@ -611,19 +613,22 @@ static inline LLVMValueRef gencontext_emit_expression_list_expr(GenContext *cont static inline LLVMValueRef gencontext_emit_initializer_list_expr(GenContext *context, Expr *expr) { - LLVMValueRef value = LLVMGetUndef(LLVMTYPE(expr->type)); + LLVMTypeRef type = llvm_type(expr->type); + LLVMValueRef value = LLVMGetUndef(type); - /* - for (expr->initializer_expr) - expr->type. - else if (littype->tag == StructTag) { - LLVMValueRef strval = LLVMGetUndef(genlType(gen, littype)); - unsigned int pos = 0; - for (nodesFor(lit->args, cnt, nodesp)) - strval = LLVMBuildInsertValue(gen->builder, strval, genlExpr(gen, *nodesp), pos++, "literal"); - return strval; + if (!vec_size(expr->initializer_expr)) + { + LLVMValueRef ref = gencontext_emit_alloca(context, type, "temp"); + value = LLVMBuildMemSet(context->builder, ref, LLVMConstInt(llvm_type(type_byte), 0, false), + LLVMConstInt(llvm_type(type_ulong), expr->type->decl->strukt.size, false), expr->type->decl->strukt.alignment); + return ref; + } + + VECEACH(expr->initializer_expr, i) + { + LLVMValueRef init_value = gencontext_emit_expr(context, expr->initializer_expr[i]); + value = LLVMBuildInsertValue(context->builder, value, init_value, i, "literal"); } - TODO*/ return value; } @@ -654,6 +659,7 @@ LLVMValueRef gencontext_emit_expr(GenContext *context, Expr *expr) case EXPR_SIZEOF: case EXPR_TYPE_ACCESS: case EXPR_TRY: + case EXPR_MACRO_EXPR: // These are folded in the semantic analysis step. UNREACHABLE case EXPR_IDENTIFIER: diff --git a/src/compiler/llvm_codegen_function.c b/src/compiler/llvm_codegen_function.c index 6653aa973..7568c73d6 100644 --- a/src/compiler/llvm_codegen_function.c +++ b/src/compiler/llvm_codegen_function.c @@ -78,10 +78,22 @@ static inline void gencontext_emit_parameter(GenContext *context, Decl *decl, un assert(decl->decl_kind == DECL_VAR && decl->var.kind == VARDECL_PARAM); // Allocate room on stack and copy. - decl->var.backend_ref = gencontext_emit_alloca(context, BACKEND_TYPE(decl->type), decl->name); + decl->var.backend_ref = gencontext_emit_alloca(context, llvm_type(decl->type), decl->name); LLVMBuildStore(context->builder, LLVMGetParam(context->function, index), decl->var.backend_ref); } +void gencontext_emit_implicit_return(GenContext *context) +{ + if (func_has_error_return(&context->cur_func_decl->func.function_signature)) + { + LLVMBuildRet(context->builder, LLVMConstInt(llvm_type(type_ulong), 0, false)); + } + else + { + LLVMBuildRetVoid(context->builder); + } +} + void gencontext_emit_function_body(GenContext *context, Decl *decl) { assert(decl->func.backend_value); @@ -90,6 +102,7 @@ void gencontext_emit_function_body(GenContext *context, Decl *decl) LLVMBuilderRef prev_builder = context->builder; context->function = decl->func.backend_value; + context->cur_func_decl = decl; LLVMBasicBlockRef entry = LLVMAppendBasicBlockInContext(context->context, context->function, "entry"); context->current_block = entry; @@ -100,10 +113,22 @@ void gencontext_emit_function_body(GenContext *context, Decl *decl) LLVMValueRef alloca_point = LLVMBuildAlloca(context->builder, LLVMInt32TypeInContext(context->context), "alloca_point"); context->alloca_point = alloca_point; + unsigned return_parameter = func_return_value_as_out(&decl->func.function_signature) ? 1 : 0; + + if (return_parameter) + { + context->return_out = gencontext_emit_alloca(context, llvm_type(decl->func.function_signature.rtype->type), "retval"); + LLVMBuildStore(context->builder, LLVMGetParam(context->function, 0), context->return_out); + } + else + { + context->return_out = NULL; + } + // Generate LLVMValueRef's for all parameters, so we can use them as local vars in code VECEACH(decl->func.function_signature.params, i) { - gencontext_emit_parameter(context, decl->func.function_signature.params[i], i); + gencontext_emit_parameter(context, decl->func.function_signature.params[i], i + return_parameter); } VECEACH(decl->func.labels, i) @@ -130,7 +155,7 @@ void gencontext_emit_function_body(GenContext *context, Decl *decl) assert(decl->func.function_signature.rtype->type->type_kind == TYPE_VOID); assert(decl->func.body->compound_stmt.defer_list.end == NULL); gencontext_emit_defer(context, decl->func.body->compound_stmt.defer_list.start, NULL); - LLVMBuildRetVoid(context->builder); + gencontext_emit_implicit_return(context); } // erase alloca point @@ -151,7 +176,7 @@ void gencontext_emit_function_decl(GenContext *context, Decl *decl) assert(decl->decl_kind == DECL_FUNC); // Resolve function backend type for function. decl->func.backend_value = LLVMAddFunction(context->module, decl->external_name, - BACKEND_TYPE(decl->type)); + llvm_type(decl->type)); // Specify appropriate storage class, visibility and call convention // extern functions (linkedited in separately): @@ -208,11 +233,11 @@ void gencontext_emit_extern_decl(GenContext *context, Decl *decl) UNREACHABLE; case DECL_FUNC: decl->func.backend_value = LLVMAddFunction(context->module, decl->external_name, - BACKEND_TYPE(decl->type)); + llvm_type(decl->type)); LLVMSetVisibility(decl->func.backend_value, LLVMDefaultVisibility); break; case DECL_VAR: - decl->var.backend_ref = LLVMAddGlobal(context->module, BACKEND_TYPE(decl->type), decl->external_name); + decl->var.backend_ref = LLVMAddGlobal(context->module, llvm_type(decl->type), decl->external_name); LLVMSetVisibility(decl->var.backend_ref, LLVMDefaultVisibility); break; case DECL_TYPEDEF: @@ -221,7 +246,7 @@ void gencontext_emit_extern_decl(GenContext *context, Decl *decl) TODO case DECL_STRUCT: case DECL_UNION: - BACKEND_TYPE(decl->type); + llvm_type(decl->type); break; case DECL_ENUM: TODO diff --git a/src/compiler/llvm_codegen_internal.h b/src/compiler/llvm_codegen_internal.h index 9c2332855..2e572007a 100644 --- a/src/compiler/llvm_codegen_internal.h +++ b/src/compiler/llvm_codegen_internal.h @@ -64,8 +64,7 @@ typedef struct Context *ast_context; BreakContinue break_continue_stack[BREAK_STACK_MAX]; size_t break_continue_stack_index; - LLVMTypeRef error_type; - Type **generated_types; + LLVMValueRef return_out; } GenContext; @@ -89,6 +88,7 @@ static inline LLVMBasicBlockRef gencontext_create_free_block(GenContext *context return LLVMCreateBasicBlockInContext(context->context, name); } +void gencontext_emit_implicit_return(GenContext *context); void gencontext_emit_function_decl(GenContext *context, Decl *decl); void gencontext_emit_extern_decl(GenContext *context, Decl *decl); LLVMValueRef gencontext_emit_address(GenContext *context, Expr *expr); diff --git a/src/compiler/llvm_codegen_module.c b/src/compiler/llvm_codegen_module.c index 0b583e35f..1237a2769 100644 --- a/src/compiler/llvm_codegen_module.c +++ b/src/compiler/llvm_codegen_module.c @@ -35,11 +35,6 @@ static inline LLVMTypeRef gencontext_create_basic_llvm_type(GenContext *context, } } -static inline void gencontext_init_basic_llvm_type(GenContext *context, Type *type) -{ - vec_add(context->generated_types, type); - type->backend_type = gencontext_create_basic_llvm_type(context, type); -} void gencontext_begin_module(GenContext *context) { assert(!context->module && "Expected no module"); @@ -72,18 +67,6 @@ void gencontext_begin_module(GenContext *context) // Setup all types. Not thread-safe, but at this point in time we can assume a single context. // We need to remove the context from the cache after this. // This would seem to indicate that we should change Type / actual type. - gencontext_init_basic_llvm_type(context, type_char); - gencontext_init_basic_llvm_type(context, type_byte); - gencontext_init_basic_llvm_type(context, type_short); - gencontext_init_basic_llvm_type(context, type_ushort); - gencontext_init_basic_llvm_type(context, type_int); - gencontext_init_basic_llvm_type(context, type_uint); - gencontext_init_basic_llvm_type(context, type_long); - gencontext_init_basic_llvm_type(context, type_ulong); - gencontext_init_basic_llvm_type(context, type_float); - gencontext_init_basic_llvm_type(context, type_double); - gencontext_init_basic_llvm_type(context, type_void); - gencontext_init_basic_llvm_type(context, type_bool); context->pointer_alignment = LLVMPointerSizeForAS(target_data_layout(), 0); diff --git a/src/compiler/llvm_codegen_stmt.c b/src/compiler/llvm_codegen_stmt.c index d5ba51296..5a938d979 100644 --- a/src/compiler/llvm_codegen_stmt.c +++ b/src/compiler/llvm_codegen_stmt.c @@ -23,7 +23,7 @@ static LLVMValueRef gencontext_emit_decl(GenContext *context, Ast *ast) { Decl *decl = ast->declare_stmt; - decl->var.backend_ref = gencontext_emit_alloca(context, BACKEND_TYPE(decl->type), decl->name); + decl->var.backend_ref = gencontext_emit_alloca(context, llvm_type(decl->type), decl->name); // TODO NRVO // TODO debug info /* @@ -42,6 +42,16 @@ static LLVMValueRef gencontext_emit_decl(GenContext *context, Ast *ast) */ if (decl->var.init_expr) { + Expr *expr = decl->var.init_expr; + // Quick path for empty initializer list + if (expr->expr_kind == EXPR_INITIALIZER_LIST && vec_size(expr->initializer_expr) == 0) + { + LLVMBuildMemSet(context->builder, decl->var.backend_ref, LLVMConstInt(llvm_type(type_byte), 0, false), + LLVMConstInt(llvm_type(type_ulong), expr->type->decl->strukt.size, false), + expr->type->decl->strukt.alignment); + return decl->var.backend_ref; + } + LLVMValueRef value = gencontext_emit_expr(context, decl->var.init_expr); LLVMBuildStore(context->builder, value, decl->var.backend_ref); return decl->var.backend_ref; @@ -100,14 +110,37 @@ static inline void gencontext_emit_return(GenContext *context, Ast *ast) { // Ensure we are on a branch that is non empty. if (!gencontext_check_block_branch_emit(context)) return; + LLVMValueRef ret_value = ast->return_stmt.expr ? gencontext_emit_expr(context, ast->return_stmt.expr) : NULL; gencontext_emit_defer(context, ast->return_stmt.defer, NULL); if (!ret_value) { - LLVMBuildRetVoid(context->builder); + gencontext_emit_implicit_return(context); return; } - LLVMBuildRet(context->builder, ret_value); + if (context->return_out) + { + LLVMBuildStore(context->builder, ret_value, context->return_out); + gencontext_emit_implicit_return(context); + } + else + { + LLVMBuildRet(context->builder, ret_value); + } + context->current_block = NULL; + LLVMBasicBlockRef post_ret_block = gencontext_create_free_block(context, "ret"); + gencontext_emit_block(context, post_ret_block); +} + +static inline void gencontext_emit_throw(GenContext *context, Ast *ast) +{ + // Ensure we are on a branch that is non empty. + if (!gencontext_check_block_branch_emit(context)) return; + + gencontext_emit_defer(context, ast->throw_stmt.defers.start, ast->throw_stmt.defers.end); + // TODO handle throw if simply a jump + LLVMBuildRet(context->builder, LLVMConstInt(llvm_type(type_ulong), 10 + ast->throw_stmt.throw_value->identifier_expr.decl->error_constant.value, false)); + context->current_block = NULL; LLVMBasicBlockRef post_ret_block = gencontext_create_free_block(context, "ret"); gencontext_emit_block(context, post_ret_block); @@ -154,10 +187,7 @@ void gencontext_emit_if(GenContext *context, Ast *ast) } -static void gencontext_push_next(GenContext *context, LLVMBasicBlockRef nextBlock) -{ - // TODO -} + static void gencontext_push_break_continue(GenContext *context, LLVMBasicBlockRef break_block, LLVMBasicBlockRef continue_block, LLVMBasicBlockRef next_block) @@ -474,7 +504,7 @@ LLVMValueRef gencontext_get_defer_bool(GenContext *context, Ast *defer) assert(defer->ast_kind == AST_DEFER_STMT && defer->defer_stmt.emit_boolean); if (!defer->defer_stmt.bool_var) { - defer->defer_stmt.bool_var = gencontext_emit_alloca(context, BACKEND_TYPE(type_bool), "defer"); + defer->defer_stmt.bool_var = gencontext_emit_alloca(context, llvm_type(type_bool), "defer"); } return defer->defer_stmt.bool_var; } @@ -492,7 +522,7 @@ void gencontext_emit_defer(GenContext *context, Ast *defer_start, Ast *defer_end LLVMBasicBlockRef exit_block = LLVMCreateBasicBlockInContext(context->context, "skip.defer"); LLVMBasicBlockRef defer_block = LLVMCreateBasicBlockInContext(context->context, "do.defer"); - LLVMValueRef value = LLVMBuildLoad2(context->builder, BACKEND_TYPE(type_bool), gencontext_get_defer_bool(context, defer), "will.defer"); + LLVMValueRef value = LLVMBuildLoad2(context->builder, llvm_type(type_bool), gencontext_get_defer_bool(context, defer), "will.defer"); gencontext_emit_cond_br(context, value, defer_block, exit_block); @@ -520,7 +550,7 @@ void gencontext_emit_goto(GenContext *context, Ast *ast) Ast *defer = ast->goto_stmt.label->label_stmt.defer; while (defer != ast->goto_stmt.defer.end) { - LLVMBuildStore(context->builder, LLVMConstInt(BACKEND_TYPE(type_bool), 0, false), + LLVMBuildStore(context->builder, LLVMConstInt(llvm_type(type_bool), 0, false), gencontext_get_defer_bool(context, defer)); defer = defer->defer_stmt.prev_defer; } @@ -604,7 +634,7 @@ void gencontext_emit_stmt(GenContext *context, Ast *ast) case AST_DEFER_STMT: if (ast->defer_stmt.emit_boolean) { - LLVMBuildStore(context->builder, LLVMConstInt(BACKEND_TYPE(type_bool), 1, false), + LLVMBuildStore(context->builder, LLVMConstInt(llvm_type(type_bool), 1, false), gencontext_get_defer_bool(context, ast)); } break; @@ -612,9 +642,11 @@ void gencontext_emit_stmt(GenContext *context, Ast *ast) break; case AST_CATCH_STMT: case AST_TRY_STMT: - case AST_THROW_STMT: // Should have been lowered. UNREACHABLE + case AST_THROW_STMT: + gencontext_emit_throw(context, ast); + break; case AST_ASM_STMT: TODO case AST_ATTRIBUTE: diff --git a/src/compiler/llvm_codegen_type.c b/src/compiler/llvm_codegen_type.c index 17aab9c5e..7f6deff30 100644 --- a/src/compiler/llvm_codegen_type.c +++ b/src/compiler/llvm_codegen_type.c @@ -4,11 +4,11 @@ #include "llvm_codegen_internal.h" -#define LLVMCONTEXT(gen_context) (gen_context ? gen_context->context : LLVMGetGlobalContext()) +LLVMTypeRef llvm_get_type(LLVMContextRef context, Type *type); -static inline LLVMTypeRef gencontext_create_llvm_type_from_decl(GenContext *context, Decl *decl) +static inline LLVMTypeRef llvm_type_from_decl(LLVMContextRef context, Decl *decl) { - static LLVMTypeRef params[512]; + static LLVMTypeRef params[MAX_PARAMS]; switch (decl->decl_kind) { case DECL_ATTRIBUTE: @@ -28,26 +28,25 @@ static inline LLVMTypeRef gencontext_create_llvm_type_from_decl(GenContext *cont { VECEACH(decl->func.function_signature.params, i) { - params[i] = BACKEND_TYPE(decl->func.function_signature.params[i]->type); + params[i] = llvm_get_type(context, decl->func.function_signature.params[i]->type); } unsigned param_size = vec_size(decl->func.function_signature.params); - return LLVMFunctionType(BACKEND_TYPE(decl->func.function_signature.rtype->type), + return LLVMFunctionType(llvm_get_type(context, decl->func.function_signature.rtype->type), params, param_size, decl->func.function_signature.variadic); } case DECL_TYPEDEF: - return BACKEND_TYPE(decl->typedef_decl.type); + return llvm_get_type(context, decl->typedef_decl.type); case DECL_STRUCT: { LLVMTypeRef *types = NULL; VECEACH(decl->strukt.members, i) { - VECADD(types, BACKEND_TYPE(decl->strukt.members[i]->type)); + vec_add(types, llvm_get_type(context, decl->strukt.members[i]->type)); } - // TODO fix name. - LLVMTypeRef type = LLVMStructCreateNamed(LLVMCONTEXT(context), decl->external_name); + LLVMTypeRef type = LLVMStructCreateNamed(context, decl->external_name); LLVMStructSetBody(type, types, vec_size(types), decl->is_packed); return type; } @@ -57,7 +56,7 @@ static inline LLVMTypeRef gencontext_create_llvm_type_from_decl(GenContext *cont unsigned long long max_size = 0; VECEACH(decl->strukt.members, i) { - LLVMTypeRef type = BACKEND_TYPE(decl->strukt.members[i]->type); + LLVMTypeRef type = llvm_get_type(context, decl->strukt.members[i]->type); unsigned long long size = LLVMStoreSizeOfType(target_data_layout(), type); if (size > max_size || !max_type) { @@ -65,13 +64,15 @@ static inline LLVMTypeRef gencontext_create_llvm_type_from_decl(GenContext *cont max_type = type; } } - LLVMTypeRef type = LLVMStructCreateNamed(LLVMCONTEXT(context), decl->external_name); + LLVMTypeRef type = LLVMStructCreateNamed(context, decl->external_name); LLVMStructSetBody(type, &max_type, 1, false); return type; } case DECL_ENUM: - return BACKEND_TYPE(decl->type); + return llvm_get_type(context, decl->type); case DECL_ERROR: + TODO + /* if (!context->error_type) { LLVMTypeRef domain_type = LLVMInt64TypeInContext(LLVMCONTEXT(context)); @@ -81,117 +82,144 @@ static inline LLVMTypeRef gencontext_create_llvm_type_from_decl(GenContext *cont LLVMStructSetBody(error_type, types, 2, false); context->error_type = error_type; } - return context->error_type; + return context->error_type;*/ case DECL_THROWS: UNREACHABLE } UNREACHABLE } -static inline LLVMTypeRef gencontext_create_llvm_type_from_ptr(GenContext *context, Type *type) +static inline LLVMTypeRef llvm_type_from_ptr(LLVMContextRef context, Type *type) { - LLVMTypeRef base_llvm_type = BACKEND_TYPE(type->pointer); - vec_add(context->generated_types, type); + LLVMTypeRef base_llvm_type = llvm_get_type(context, type->pointer); if (type->canonical != type) { - return type->backend_type = BACKEND_TYPE(type->canonical); + return type->backend_type = llvm_get_type(context, type->canonical); } return type->backend_type = LLVMPointerType(base_llvm_type, /** TODO **/0); } -static inline LLVMTypeRef gencontext_create_llvm_type_from_array(GenContext *context, Type *type) +static inline LLVMTypeRef llvm_type_from_array(LLVMContextRef context, Type *type) { - LLVMTypeRef base_llvm_type = BACKEND_TYPE(type->array.base); - - vec_add(context->generated_types, type); + LLVMTypeRef base_llvm_type = llvm_get_type(context, type->array.base); if (type->canonical != type) { - return type->backend_type = BACKEND_TYPE(type->canonical); + return type->backend_type = llvm_get_type(context, type->canonical); } return type->backend_type = LLVMPointerType(base_llvm_type, /** TODO **/0); } -LLVMTypeRef gencontext_create_llvm_func_type(GenContext *context, Type *type) +LLVMTypeRef llvm_func_type(LLVMContextRef context, Type *type) { LLVMTypeRef *params = NULL; FunctionSignature *signature = type->func.signature; - // TODO throws - if (vec_size(signature->params)) + bool return_parameter = func_return_value_as_out(signature); + bool return_error = func_has_error_return(signature); + unsigned parameters = vec_size(signature->params) + return_parameter; + if (parameters) { - params = malloc_arena(sizeof(LLVMTypeRef) * vec_size(signature->params)); + params = malloc_arena(sizeof(LLVMTypeRef) * parameters); + if (return_parameter) + { + params[0] = llvm_get_type(context, signature->rtype->type); + } VECEACH(signature->params, i) { - params[i] = BACKEND_TYPE(signature->params[i]->type->canonical); + params[i + return_parameter] = llvm_get_type(context, signature->params[i]->type->canonical); } } - return LLVMFunctionType( - BACKEND_TYPE(type->func.signature->rtype->type), - params, vec_size(signature->params), signature->variadic); + LLVMTypeRef ret_type; + if (return_error) + { + ret_type = llvm_get_type(context, type_ulong); + } + else + { + ret_type = return_parameter ? llvm_get_type(context, type_void) : llvm_get_type(context, type->func.signature->rtype->type); + } + return LLVMFunctionType( ret_type, params, parameters, signature->variadic); } -LLVMTypeRef gencontext_get_llvm_type(GenContext *context, Type *type) +LLVMTypeRef llvm_get_type(LLVMContextRef context, Type *type) { - if (type->backend_type) + if (type->backend_type && LLVMGetTypeContext(type->backend_type) == context) { - assert(LLVMGetTypeContext(type->backend_type) == context->context); return type->backend_type; } - vec_add(context->generated_types, type); - DEBUG_LOG("Generating type %s", type->name); switch (type->type_kind) { + case TYPE_POISONED: + case TYPE_IXX: + case TYPE_UXX: + case TYPE_FXX: + case TYPE_META_TYPE: + UNREACHABLE; case TYPE_TYPEDEF: - return type->backend_type = BACKEND_TYPE(type->canonical); + return type->backend_type = llvm_get_type(context, type->canonical); case TYPE_STRUCT: case TYPE_UNION: case TYPE_ENUM: case TYPE_ERROR: case TYPE_ERROR_UNION: - return type->backend_type = gencontext_create_llvm_type_from_decl(context, type->decl); + return type->backend_type = llvm_type_from_decl(context, type->decl); case TYPE_FUNC: - return type->backend_type = gencontext_create_llvm_func_type(context, type); + return type->backend_type = llvm_func_type(context, type); case TYPE_VOID: + return type->backend_type = LLVMVoidTypeInContext(context); case TYPE_F64: + return type->backend_type = LLVMDoubleTypeInContext(context); case TYPE_F32: + return type->backend_type = LLVMFloatTypeInContext(context); case TYPE_U64: - case TYPE_POISONED: - case TYPE_BOOL: - case TYPE_I8: - case TYPE_I16: - case TYPE_I32: case TYPE_I64: - case TYPE_IXX: - case TYPE_U8: - case TYPE_U16: + return type->backend_type = LLVMIntTypeInContext(context, 64U); case TYPE_U32: - case TYPE_UXX: - case TYPE_FXX: - UNREACHABLE; + case TYPE_I32: + return type->backend_type = LLVMIntTypeInContext(context, 32U); + case TYPE_U16: + case TYPE_I16: + return type->backend_type = LLVMIntTypeInContext(context, 16U); + case TYPE_U8: + case TYPE_I8: + return type->backend_type = LLVMIntTypeInContext(context, 8U); + case TYPE_BOOL: + return type->backend_type = LLVMIntTypeInContext(context, 1U); case TYPE_POINTER: - return type->backend_type = gencontext_create_llvm_type_from_ptr(context, type); + return type->backend_type = llvm_type_from_ptr(context, type); case TYPE_STRING: // TODO return type->backend_type = LLVMPointerType(LLVMTYPE(type_char), 0); case TYPE_ARRAY: - return type->backend_type = gencontext_create_llvm_type_from_array(context, type); + return type->backend_type = llvm_type_from_array(context, type); case TYPE_SUBARRAY: { - LLVMTypeRef base_type = BACKEND_TYPE(type->array.base); - LLVMTypeRef size_type = BACKEND_TYPE(type_usize); + LLVMTypeRef base_type = llvm_get_type(context, type->array.base); + LLVMTypeRef size_type = llvm_get_type(context, type_usize); assert(type->array.base->canonical->type_kind == TYPE_POINTER); - LLVMTypeRef array_type = LLVMStructCreateNamed(LLVMCONTEXT(context), type->name); + LLVMTypeRef array_type = LLVMStructCreateNamed(context, type->name); LLVMTypeRef types[2] = { base_type, size_type }; LLVMStructSetBody(array_type, types, 2, false); return type->backend_type = array_type; } case TYPE_VARARRAY: - return type->backend_type = LLVMPointerType(BACKEND_TYPE(type->array.base), 0); + return type->backend_type = LLVMPointerType(llvm_get_type(context, type->array.base), 0); } UNREACHABLE; } +LLVMTypeRef gencontext_get_llvm_type(GenContext *context, Type *type) +{ + return llvm_get_type(context->context, type); +} + +void llvm_set_struct_size_alignment(Decl *decl) +{ + LLVMTypeRef type = llvm_get_type(LLVMGetGlobalContext(), decl->type); + decl->strukt.size = LLVMStoreSizeOfType(target_data_layout(), type); + decl->strukt.alignment = LLVMPreferredAlignmentOfType(target_data_layout(), type); +} diff --git a/src/compiler/parser.c b/src/compiler/parser.c index 53946691b..26b9e50b2 100644 --- a/src/compiler/parser.c +++ b/src/compiler/parser.c @@ -4,8 +4,6 @@ #include "compiler_internal.h" -const int MAX_DOCS_ROWS = 1024; - Token module = { .type = TOKEN_INVALID_TOKEN }; static Ast *parse_stmt(Context *context); static Expr *parse_expr(Context *context); @@ -29,8 +27,8 @@ extern ParseRule rules[TOKEN_EOF + 1]; void context_store_lexer_state(Context *context) { - assert(!context->stored.has_stored && "Nested lexer store is forbidden"); - context->stored.has_stored = true; + assert(!context->stored.in_lookahead && "Nested lexer store is forbidden"); + context->stored.in_lookahead = true; context->stored.current = context->lexer.current; context->stored.start = context->lexer.lexing_start; context->stored.tok = context->tok; @@ -38,13 +36,12 @@ void context_store_lexer_state(Context *context) context->stored.lead_comment = context->lead_comment; context->stored.trailing_comment = context->trailing_comment; context->stored.next_lead_comment = context->next_lead_comment; - context->stored.comments = vec_size(context->comments); } void context_restore_lexer_state(Context *context) { - assert(context->stored.has_stored && "Tried to restore missing stored state."); - context->stored.has_stored = false; + assert(context->stored.in_lookahead && "Tried to restore missing stored state."); + context->stored.in_lookahead = false; context->lexer.current = context->stored.current; context->lexer.lexing_start = context->stored.start; context->tok = context->stored.tok; @@ -53,7 +50,6 @@ void context_restore_lexer_state(Context *context) context->next_lead_comment = context->stored.next_lead_comment; context->trailing_comment = context->stored.trailing_comment; context->prev_tok_end = context->tok.span.end_loc; - vec_resize(context->comments, context->stored.comments); } inline void advance(Context *context) @@ -70,6 +66,12 @@ inline void advance(Context *context) if (context->next_tok.type == TOKEN_INVALID_TOKEN) continue; + if (context->stored.in_lookahead && (context->next_tok.type == TOKEN_COMMENT + || context->next_tok.type == TOKEN_DOC_COMMENT)) + { + continue; + } + // Walk through any regular comments if (context->next_tok.type == TOKEN_COMMENT) { @@ -340,8 +342,6 @@ static Ast* parse_function_block(Context *context) return ast; } - - static Path *parse_path_prefix(Context *context) { if (context->tok.type != TOKEN_IDENT || context->next_tok.type != TOKEN_SCOPE) return NULL; @@ -402,7 +402,7 @@ static Path *parse_path_prefix(Context *context) * ; * * Assume prev_token is the type. - * @return Type (poisoned if fails) + * @return TypeInfo (poisoned if fails) */ static inline TypeInfo *parse_base_type(Context *context) { @@ -657,15 +657,6 @@ static Ast *parse_declaration_stmt(Context *context) } -typedef enum -{ - NEXT_WAS_ERROR, - NEXT_WAS_EXPR, - NEXT_WAS_LABEL, - NEXT_WAS_DECL -} ExprCheck; - - /** * expr_stmt ::= expression EOS * @return Ast* poisoned if expression fails to parse. @@ -784,6 +775,14 @@ static inline Ast* parse_if_stmt(Context *context) return if_ast; } +/** + * while_stmt + * : WHILE '(' control_expression ')' statement + * ; + * + * @param context + * @return the while AST + */ static inline Ast* parse_while_stmt(Context *context) { Ast *while_ast = AST_NEW_TOKEN(AST_WHILE_STMT, context->tok); @@ -801,12 +800,13 @@ static inline Ast* parse_while_stmt(Context *context) * : DEFER statement * | DEFER catch statement * ; - * @return + * @return the defer AST */ static inline Ast* parse_defer_stmt(Context *context) { Ast *defer_stmt = AST_NEW_TOKEN(AST_DEFER_STMT, context->tok); advance_and_verify(context, TOKEN_DEFER); + // TODO catch defer_stmt->defer_stmt.body = TRY_AST(parse_stmt(context)); return defer_stmt; } @@ -840,7 +840,8 @@ static inline Ast* parse_catch_stmt(Context *context) return catch_stmt; } -static inline Ast* parse_asm_stmt(Context *context) + +static inline Ast* parse_asm_stmt(Context *context __unused) { TODO } @@ -994,7 +995,7 @@ static inline Ast* parse_ct_switch_stmt(Context *context) stmt->ct_case_stmt.body = TRY_AST_OR(parse_stmt(context), &poisoned_ast); vec_add(switch_statements, stmt); break; - case TOKEN_DEFAULT: + case TOKEN_CT_DEFAULT: stmt = AST_NEW_TOKEN(AST_CT_CASE_STMT, context->tok); advance(context); CONSUME_OR(TOKEN_COLON, &poisoned_ast); @@ -1131,7 +1132,7 @@ static Ast *parse_throw_stmt(Context *context) { Ast *ast = AST_NEW_TOKEN(AST_THROW_STMT, context->tok); advance_and_verify(context, TOKEN_THROW); - ast->throw_stmt = TRY_EXPR_OR(parse_expr(context), &poisoned_ast); + ast->throw_stmt.throw_value = TRY_EXPR_OR(parse_expr(context), &poisoned_ast); RETURN_AFTER_EOS(ast); } @@ -1174,7 +1175,7 @@ static inline bool is_expr_after_type_ident(Context *context) return context->next_tok.type == TOKEN_DOT || context->next_tok.type == TOKEN_LPAREN; } -static bool parse_type_or_expr(Context *context, Expr **exprPtr, TypeInfo **typePtr) +static bool parse_type_or_expr(Context *context, Expr **expr_ptr, TypeInfo **type_ptr) { switch (context->tok.type) { @@ -1203,8 +1204,8 @@ static bool parse_type_or_expr(Context *context, Expr **exprPtr, TypeInfo **type case TOKEN_C_ULONGLONG: case TOKEN_TYPE_IDENT: if (context->next_tok.type == TOKEN_DOT || context->next_tok.type == TOKEN_LPAREN) break; - *typePtr = parse_type_expression(context); - return type_info_ok(*typePtr); + *type_ptr = parse_type_expression(context); + return type_info_ok(*type_ptr); case TOKEN_IDENT: if (context->next_tok.type == TOKEN_SCOPE) { @@ -1217,8 +1218,8 @@ static bool parse_type_or_expr(Context *context, Expr **exprPtr, TypeInfo **type if (context->tok.type == TOKEN_TYPE_IDENT && !is_expr_after_type_ident(context)) { context_restore_lexer_state(context); - *typePtr = parse_type_expression(context); - return type_info_ok(*typePtr); + *type_ptr = parse_type_expression(context); + return type_info_ok(*type_ptr); } context_restore_lexer_state(context); } @@ -1234,20 +1235,20 @@ static bool parse_type_or_expr(Context *context, Expr **exprPtr, TypeInfo **type CONSUME_OR(TOKEN_RPAREN, false); if (inner_expr) { - *typePtr = type_info_new(TYPE_INFO_EXPRESSION); - (**typePtr).unresolved_type_expr = inner_expr; + *type_ptr = type_info_new(TYPE_INFO_EXPRESSION); + (**type_ptr).unresolved_type_expr = inner_expr; return true; } Expr *type_expr = expr_new(EXPR_TYPE, start); type_expr->type_expr.type = inner_type; - *exprPtr = parse_precedence_with_left_side(context, type_expr, PREC_ASSIGNMENT); - return expr_ok(*exprPtr); + *expr_ptr = parse_precedence_with_left_side(context, type_expr, PREC_ASSIGNMENT); + return expr_ok(*expr_ptr); } default: break; } - *exprPtr = parse_expr(context); - return expr_ok(*exprPtr); + *expr_ptr = parse_expr(context); + return expr_ok(*expr_ptr); } @@ -1361,6 +1362,7 @@ static inline Ast* parse_switch_stmt(Context *context) return switch_ast; } + static Ast *parse_stmt(Context *context) { switch (context->tok.type) @@ -1413,6 +1415,8 @@ static Ast *parse_stmt(Context *context) return parse_label_stmt(context); } return parse_expr_stmt(context); + case TOKEN_AT: + return parse_expr_stmt(context); case TOKEN_IDENT: if (context->next_tok.type == TOKEN_SCOPE) { @@ -1487,7 +1491,6 @@ static Ast *parse_stmt(Context *context) case TOKEN_PLUS: case TOKEN_MINUSMINUS: case TOKEN_PLUSPLUS: - case TOKEN_AT_IDENT: case TOKEN_HASH_IDENT: case TOKEN_CT_IDENT: case TOKEN_STRING: @@ -1500,7 +1503,6 @@ static Ast *parse_stmt(Context *context) case TOKEN_INVALID_TOKEN: advance(context); return &poisoned_ast; - case TOKEN_AT: case TOKEN_COLON: case TOKEN_COMMA: case TOKEN_EQ: @@ -1638,7 +1640,6 @@ static inline bool parse_optional_module_params(Context *context, Token **tokens case TOKEN_COMMA: sema_error_range(context->next_tok.span, "Unexpected ','"); return false; - case TOKEN_AT_IDENT: case TOKEN_CT_IDENT: case TOKEN_HASH_IDENT: case TOKEN_TYPE_IDENT: @@ -1830,24 +1831,14 @@ static Expr *parse_precedence(Context *context, Precedence precedence) return parse_precedence_with_left_side(context, left_side, precedence); } +static inline Expr* parse_non_assign_expr(Context *context) +{ + return parse_precedence(context, PREC_ASSIGNMENT + 1); +} static inline Expr* parse_expr(Context *context) { - - SourceRange start = context->tok.span; - bool found_try = try_consume(context, TOKEN_TRY); - Expr *expr = TRY_EXPR_OR(parse_precedence(context, PREC_ASSIGNMENT), &poisoned_expr); - if (found_try) - { - Expr *try_expr = expr_new(EXPR_TRY, start); - try_expr->try_expr.expr = expr; - if (try_consume(context, TOKEN_ELSE)) - { - try_expr->try_expr.else_expr = TRY_EXPR_OR(parse_precedence(context, PREC_ASSIGNMENT), &poisoned_expr); - } - return try_expr; - } - return expr; + return parse_precedence(context, PREC_ASSIGNMENT); } static inline Expr *parse_paren_expr(Context *context) @@ -1942,10 +1933,10 @@ static inline Decl *parse_global_declaration(Context *context, Visibility visibi * ; * * attribute - * : AT_IDENT - * | path AT_IDENT - * | AT_IDENT '(' constant_expression ')' - * | path AT_IDENT '(' constant_expression ')' + * : AT IDENT + * | AT path IDENT + * | AT IDENT '(' constant_expression ')' + * | AT path IDENT '(' constant_expression ')' * ; * * @return true if parsing succeeded, false if recovery is needed @@ -1954,7 +1945,7 @@ static inline bool parse_attributes(Context *context, Decl *parent_decl) { parent_decl->attributes = NULL; - while (context->tok.type == TOKEN_AT_IDENT || (context->tok.type == TOKEN_IDENT && context->next_tok.type == TOKEN_SCOPE)) + while (try_consume(context, TOKEN_AT)) { Path *path = parse_path_prefix(context); @@ -1963,7 +1954,7 @@ static inline bool parse_attributes(Context *context, Decl *parent_decl) attr->name = context->tok; attr->path = path; - TRY_CONSUME_OR(TOKEN_AT_IDENT, "Expected an attribute", false); + TRY_CONSUME_OR(TOKEN_IDENT, "Expected an attribute", false); if (context->tok.type == TOKEN_LPAREN) { @@ -2259,19 +2250,21 @@ static inline bool parse_opt_throw_declaration(Context *context, Visibility visi } if (!try_consume(context, TOKEN_THROWS)) return true; - if (context->tok.type != TOKEN_TYPE_IDENT) + if (context->tok.type != TOKEN_TYPE_IDENT && context->tok.type != TOKEN_IDENT) { - VECADD(signature->throws, &all_error); + signature->throw_any = true; return true; } Decl **throws = NULL; - while (context->tok.type == TOKEN_TYPE_IDENT) - { - Decl *error = decl_new(DECL_ERROR, context->tok, visibility); - advance(context); - VECADD(throws, error); - if (!try_consume(context, TOKEN_COMMA)) break; - } + while (1) + { + TypeInfo *type_info = parse_base_type(context); + if (!type_info_ok(type_info)) return false; + Decl *throw = decl_new(DECL_THROWS, context->tok, visibility); + throw->throws = type_info; + VECADD(throws, throw); + if (!try_consume(context, TOKEN_COMMA)) break; + } switch (context->tok.type) { case TOKEN_TYPE_IDENT: @@ -2346,8 +2339,8 @@ static AttributeDomains TOKEN_TO_ATTR[TOKEN_EOF + 1] = { /** * attribute_declaration - * : ATTRIBUTE attribute_domains AT_IDENT ';' - * | ATTRIBUTE attribute_domains AT_IDENT '(' parameter_type_list ')' ';' + * : ATTRIBUTE attribute_domains IDENT ';' + * | ATTRIBUTE attribute_domains IDENT '(' parameter_type_list ')' ';' * ; * * attribute_domains @@ -2387,8 +2380,8 @@ static inline Decl *parse_attribute_declaration(Context *context, Visibility vis if (!try_consume(context, TOKEN_COMMA)) break; last_domain = TOKEN_TO_ATTR[context->tok.type]; } - TRY_CONSUME_OR(TOKEN_AT_IDENT, "Expected an attribute name.", &poisoned_decl); Decl *decl = decl_new(DECL_ATTRIBUTE, context->tok, visibility); + TRY_CONSUME_OR(TOKEN_IDENT, "Expected an attribute name.", &poisoned_decl); if (last_domain == 0) { SEMA_TOKEN_ERROR(context->tok, "Expected at least one domain for attribute '%s'.", decl->name); @@ -2449,14 +2442,14 @@ static inline Decl *parse_macro_declaration(Context *context, Visibility visibil advance_and_verify(context, TOKEN_MACRO); TypeInfo *rtype = NULL; - if (context->tok.type != TOKEN_AT_IDENT) + if (context->tok.type != TOKEN_IDENT) { rtype = TRY_TYPE_OR(parse_type_expression(context), &poisoned_decl); } Decl *decl = decl_new(DECL_MACRO, context->tok, visibility); decl->macro_decl.rtype = rtype; - TRY_CONSUME_OR(TOKEN_AT_IDENT, "Expected a macro name starting with '@'", &poisoned_decl); + TRY_CONSUME_OR(TOKEN_IDENT, "Expected a macro name here", &poisoned_decl); CONSUME_OR(TOKEN_LPAREN, &poisoned_decl); Decl **params = NULL; @@ -2467,7 +2460,6 @@ static inline Decl *parse_macro_declaration(Context *context, Visibility visibil switch (context->tok.type) { case TOKEN_IDENT: - case TOKEN_AT_IDENT: case TOKEN_CT_IDENT: case TOKEN_HASH_IDENT: break; @@ -3456,7 +3448,6 @@ static Expr *parse_maybe_scope(Context *context, Expr *left) { case TOKEN_IDENT: case TOKEN_CT_IDENT: - case TOKEN_AT_IDENT: case TOKEN_CONST_IDENT: return parse_identifier_with_path(context, path); case TOKEN_TYPE_IDENT: @@ -3480,6 +3471,28 @@ static Expr *parse_type_expr(Context *context, Expr *left) return expr; } +static Expr *parse_try_expr(Context *context, Expr *left) +{ + assert(!left && "Unexpected left hand side"); + Expr *try_expr = EXPR_NEW_TOKEN(EXPR_TRY, context->tok); + advance_and_verify(context, TOKEN_TRY); + try_expr->try_expr.expr = TRY_EXPR_OR(parse_precedence(context, PREC_TRY + 1), &poisoned_expr); + if (try_consume(context, TOKEN_ELSE)) + { + try_expr->try_expr.else_expr = TRY_EXPR_OR(parse_precedence(context, PREC_ASSIGNMENT), &poisoned_expr); + } + return try_expr; +} + +static Expr *parse_macro_expr(Context *context, Expr *left) +{ + assert(!left && "Unexpected left hand side"); + Expr *macro_expr = EXPR_NEW_TOKEN(EXPR_MACRO_EXPR, context->tok); + advance_and_verify(context, TOKEN_AT); + macro_expr->macro_expr = TRY_EXPR_OR(parse_precedence(context, PREC_UNARY + 1), &poisoned_expr); + return macro_expr; +} + static Expr *parse_cast_expr(Context *context, Expr *left) { assert(!left && "Unexpected left hand side"); @@ -3501,6 +3514,7 @@ ParseRule rules[TOKEN_EOF + 1] = { [TOKEN_LPAREN] = { parse_grouping_expr, parse_call_expr, PREC_CALL }, [TOKEN_TYPE] = { parse_type_expr, NULL, PREC_NONE }, [TOKEN_CAST] = { parse_cast_expr, NULL, PREC_NONE }, + [TOKEN_TRY] = { parse_try_expr, NULL, PREC_TRY }, //[TOKEN_SIZEOF] = { parse_sizeof, NULL, PREC_NONE }, [TOKEN_LBRACKET] = { NULL, parse_subscript_expr, PREC_CALL }, [TOKEN_MINUS] = { parse_unary_expr, parse_binary, PREC_ADDITIVE }, @@ -3532,7 +3546,7 @@ ParseRule rules[TOKEN_EOF + 1] = { [TOKEN_IDENT] = { parse_maybe_scope, NULL, PREC_NONE }, [TOKEN_TYPE_IDENT] = { parse_type_identifier, NULL, PREC_NONE }, [TOKEN_CT_IDENT] = { parse_identifier, NULL, PREC_NONE }, - [TOKEN_AT_IDENT] = { parse_identifier, NULL, PREC_NONE }, + [TOKEN_AT] = { parse_macro_expr, NULL, PREC_UNARY }, [TOKEN_CONST_IDENT] = { parse_identifier, NULL, PREC_NONE }, [TOKEN_STRING] = { parse_string_literal, NULL, PREC_NONE }, [TOKEN_FLOAT] = { parse_double, NULL, PREC_NONE }, diff --git a/src/compiler/semantic_analyser.c b/src/compiler/semantic_analyser.c index ab4e446dc..795011897 100644 --- a/src/compiler/semantic_analyser.c +++ b/src/compiler/semantic_analyser.c @@ -60,6 +60,14 @@ static inline void context_pop_defers_to(Context *context, DeferList *list) context_pop_defers(context); } +static inline void context_add_exit(Context *context, ExitType exit) +{ + if (context->current_scope->exit < exit) + { + context->current_scope->exit = exit; + } +} + static inline void context_pop_scope(Context *context) { assert(context->current_scope != &context->scopes[0]); @@ -201,7 +209,6 @@ static inline bool sema_analyse_struct_union(Context *context, Decl *decl) } } DEBUG_LOG("Analysis complete."); - // Todo, resolve alignment, size etc. return decl_ok(decl); } @@ -235,7 +242,7 @@ static inline bool sema_analyse_function_param(Context *context, Decl *param, bo static inline Type *sema_analyse_function_signature(Context *context, FunctionSignature *signature, bool is_function) { - char buffer[2048]; + char buffer[MAX_FUNCTION_SIGNATURE_SIZE + 200]; size_t buffer_write_offset = 0; bool all_ok = true; all_ok = sema_resolve_type_info(context, signature->rtype) && all_ok; @@ -244,8 +251,14 @@ static inline Type *sema_analyse_function_signature(Context *context, FunctionSi type_append_signature_name(signature->rtype->type, buffer, &buffer_write_offset); buffer[buffer_write_offset++] = '('; } + if (vec_size(signature->params) > MAX_PARAMS) + { + SEMA_ERROR(signature->params[MAX_PARAMS], "Number of params exceeds %d which is unsupported.", MAX_PARAMS); + return false; + } STable *names = &context->scratch_table; stable_clear(names); + VECEACH(signature->params, i) { Decl *param = signature->params[i]; @@ -284,20 +297,28 @@ static inline Type *sema_analyse_function_signature(Context *context, FunctionSi buffer[buffer_write_offset++] = '.'; } buffer[buffer_write_offset++] = ')'; + if (signature->throw_any) + { + assert(!signature->throws); + buffer[buffer_write_offset++] = '!'; + } if (vec_size(signature->throws)) { buffer[buffer_write_offset++] = '!'; VECEACH(signature->throws, i) { - TODO + Decl *err_decl = signature->throws[i]; + if (!sema_analyse_decl(context, err_decl)) + { + continue; + } if (i > 0 && all_ok) { - buffer[buffer_write_offset++] = ','; + buffer[buffer_write_offset++] = '|'; } -// type_append_signature_name(signature->tparam->var.type, buffer, &buffer_write_offset); + type_append_signature_name(err_decl->type, buffer, &buffer_write_offset); } } - if (!all_ok) return NULL; TokenType type = TOKEN_INVALID_TOKEN; signature->mangled_signature = symtab_add(buffer, buffer_write_offset, fnv1a(buffer, buffer_write_offset), &type); @@ -587,6 +608,7 @@ static inline bool sema_analyse_goto_stmt(Context *context, Ast *statement) } } vec_add(context->gotos, statement); + context_add_exit(context, EXIT_GOTO); return true; } @@ -864,14 +886,47 @@ static bool sema_analyse_switch_stmt(Context *context, Ast *statement) return success; } -static bool sema_analyse_try_stmt(Context *context __unused, Ast *statement __unused) +static bool sema_analyse_try_stmt(Context *context, Ast *statement) { - TODO + context->try_nesting++; + unsigned errors = vec_size(context->errors); + if (!sema_analyse_statement(context, statement->try_stmt)) + { + context->try_nesting--; + return false; + } + unsigned new_errors = vec_size(context->errors); + if (new_errors == errors) + { + SEMA_ERROR(statement, "No error to 'try' in the statement that follows, please remove the 'try'."); + return false; + } + for (unsigned i = errors; i < new_errors; i++) + { + // At least one uncaught error found! + if (context->errors[i]) return true; + } + SEMA_ERROR(statement, "All errors in the following statement was caught, please remove the 'try'."); + return false; } -static bool sema_analyse_throw_stmt(Context *context __unused, Ast *statement __unused) +static bool sema_analyse_throw_stmt(Context *context, Ast *statement) { - TODO + Expr *throw_value = statement->throw_stmt.throw_value; + if (!sema_analyse_expr(context, NULL, throw_value)) return false; + Type *type = throw_value->type->canonical; + if (type->type_kind != TYPE_ERROR) + { + SEMA_ERROR(throw_value, "Only 'error' types can be thrown, this is a '%s'.", type->name); + return false; + } + if (!context->try_nesting && !func_has_error_return(&context->active_function_for_analysis->func.function_signature)) + { + SEMA_ERROR(statement, "This 'throw' is not handled, please add a 'throws %s' clause to the function signature or use try-catch.", type->name); + return false; + } + VECADD(context->errors, type->decl); + return true; } @@ -998,6 +1053,12 @@ static inline bool sema_analyse_function_body(Context *context, Decl *func) context->current_scope = &context->scopes[0]; // Clean out the current scope. memset(context->current_scope, 0, sizeof(*context->current_scope)); + + // Clear try handling + vec_resize(context->errors, 0); + context->try_nesting = 0; + + context->labels = NULL; context->gotos = NULL; context->last_local = &context->locals[0]; @@ -1222,10 +1283,42 @@ static inline bool sema_analyse_enum(Context *context, Decl *decl) return success; } +static inline bool sema_analyse_throws(Context *context, Decl *decl) +{ + if (!sema_resolve_type_info(context, decl->throws)) return false; + decl->type = decl->throws->type; + return true; +} + static inline bool sema_analyse_error(Context *context, Decl *decl) { - // TODO assign numbers to constants - return true; + Decl **constants = decl->error.error_constants; + unsigned size = vec_size(constants); + if (size > MAX_ERRORS) + { + SEMA_ERROR(decl, "More than %d errors declared in a single error type.", MAX_ERRORS); + return false; + } + bool success = true; + for (unsigned i = 0; i < size; i++) + { + Decl *constant = constants[i]; + for (unsigned j = 0; j < i; j++) + { + if (constant->name == constants[j]->name) + { + SEMA_ERROR(constant, "Duplicate error names, please remove one of them."); + SEMA_PREV(constants[j], "The previous declaration was here."); + decl_poison(constant); + decl_poison(constants[j]); + success = false; + break; + } + } + constant->error_constant.value = i; + constant->resolve_status = RESOLVE_DONE; + } + return success; } bool sema_analyse_decl(Context *context, Decl *decl) @@ -1245,10 +1338,12 @@ bool sema_analyse_decl(Context *context, Decl *decl) switch (decl->decl_kind) { case DECL_THROWS: - TODO + if (!sema_analyse_throws(context, decl)) return decl_poison(decl); + break; case DECL_STRUCT: case DECL_UNION: if (!sema_analyse_struct_union(context, decl)) return decl_poison(decl); + llvm_set_struct_size_alignment(decl); decl_set_external_name(decl); break; case DECL_FUNC: @@ -1408,17 +1503,18 @@ static bool sema_resolve_type_identifier(Context *context, TypeInfo *type_info) type_info->unresolved.path, &ambiguous_decl); + if (!decl) + { + SEMA_TOKEN_ERROR(type_info->unresolved.name_loc, "Unknown type '%s'.", type_info->unresolved.name_loc.string); + return type_info_poison(type_info); + } + // Already handled if (!decl_ok(decl)) { return type_info_poison(type_info); } - if (!decl) - { - SEMA_TOKEN_ERROR(type_info->unresolved.name_loc, "Unknown type '%s'.", type_info->unresolved.name_loc.string); - return type_info_poison(type_info); - } if (ambiguous_decl) { diff --git a/src/compiler/target.c b/src/compiler/target.c index ae0ad82fe..3b9a8039b 100644 --- a/src/compiler/target.c +++ b/src/compiler/target.c @@ -1,25 +1,26 @@ #include #include +#include +#include #include "compiler_internal.h" -typedef struct -{ - LLVMTargetRef target; - LLVMTargetMachineRef machine; - LLVMTargetDataRef data_layout; - int alloca_address_space; -} Target; +static unsigned arch_pointer_bit_width(ArchType arch); +static ArchType arch_from_llvm_string(const char *string); +static unsigned os_target_c_type_bits(OsType os, ArchType arch, CType type); +static OsType os_from_llvm_string(const char *string); +static VendorType vendor_from_llvm_string(const char *string); -static Target target = {}; +Target build_target = {}; int target_alloca_addr_space() { - return target.alloca_address_space; + return build_target.alloca_address_space; } + void target_setup() { - assert(!target.target); + assert(!build_target.target); LLVMInitializeAllTargetInfos(); LLVMInitializeAllTargetMCs(); @@ -27,20 +28,20 @@ void target_setup() LLVMInitializeAllAsmPrinters(); LLVMInitializeAllAsmParsers(); - target.target = NULL; + build_target.target = NULL; if (!build_options.target) { build_options.target = LLVMGetDefaultTargetTriple(); } char *err = NULL; - if (LLVMGetTargetFromTriple(build_options.target, &target.target, &err) != 0) + if (LLVMGetTargetFromTriple(build_options.target, ((LLVMTargetRef *)&build_target.target), &err) != 0) { error_exit("Could not create target: %s", err); // Usually we would dispose of err, but no need to do it due to exit. } - target.alloca_address_space = 0; + build_target.alloca_address_space = 0; DEBUG_LOG("Target set to %s.", build_options.target); // Create a specific target machine @@ -76,29 +77,395 @@ void target_setup() { opt->features = ""; }*/ - if (!(target.machine = LLVMCreateTargetMachine(target.target, build_options.target, build_options.cpu, "", level, reloc_mode, - LLVMCodeModelDefault))) { + if (!(build_target.machine = LLVMCreateTargetMachine(build_target.target, build_options.target, "", "", level, reloc_mode, + LLVMCodeModelDefault))) { error_exit("Failed to create target machine."); } - // The below is broken for the AMDGPU target. - target.alloca_address_space = 0; - target.data_layout = LLVMCreateTargetDataLayout(target.machine); - build_options.pointer_size = (int)LLVMPointerSize(target.data_layout); - DEBUG_LOG("Deduced pointer size to be %d bits", build_options.pointer_size * 8); + + build_target.llvm_data_layout = LLVMCreateTargetDataLayout(build_target.machine); + + char *target_triple = LLVMGetTargetMachineTriple(build_target.machine); + + build_target.arch_name = strdup(strtok(target_triple, "-")); + build_target.vendor_name = strdup(strtok(NULL, "-")); + build_target.os_name = strdup(strtok(NULL, "0123456789")); + + LLVMDisposeMessage(target_triple); + + build_target.arch = arch_from_llvm_string(build_target.arch_name); + build_target.os = os_from_llvm_string(build_target.os_name); + build_target.vendor = vendor_from_llvm_string(build_target.vendor_name); + + build_target.width_pointer = arch_pointer_bit_width(build_target.arch); + assert(build_target.width_pointer == LLVMPointerSize(build_target.llvm_data_layout) * 8); + build_target.alloca_address_space = 0; + + + LLVMTypeRef byte_type = LLVMIntType(8); + LLVMTypeRef short_type = LLVMIntType(16); + LLVMTypeRef int_type = LLVMIntType(32); + LLVMTypeRef long_type = LLVMIntType(64); + LLVMTypeRef float_type = LLVMFloatType(); + LLVMTypeRef double_type = LLVMDoubleType(); + LLVMTypeRef quad_type = LLVMFP128Type(); + build_target.align_byte = LLVMABIAlignmentOfType(build_target.llvm_data_layout, byte_type); + build_target.align_short = LLVMABIAlignmentOfType(build_target.llvm_data_layout, short_type); + build_target.align_int = LLVMABIAlignmentOfType(build_target.llvm_data_layout, int_type); + build_target.align_long = LLVMABIAlignmentOfType(build_target.llvm_data_layout, long_type); + build_target.align_f128 = LLVMABIAlignmentOfType(build_target.llvm_data_layout, quad_type); + build_target.align_double = LLVMABIAlignmentOfType(build_target.llvm_data_layout, double_type); + build_target.align_float = LLVMABIAlignmentOfType(build_target.llvm_data_layout, float_type); + build_target.little_endian = LLVMByteOrder(build_target.llvm_data_layout) == LLVMLittleEndian; + build_target.width_c_short = os_target_c_type_bits(build_target.os, build_target.arch, CTYPE_SHORT); + build_target.width_c_int = os_target_c_type_bits(build_target.os, build_target.arch, CTYPE_INT); + build_target.width_c_long = os_target_c_type_bits(build_target.os, build_target.arch, CTYPE_LONG); + build_target.width_c_long_long = os_target_c_type_bits(build_target.os, build_target.arch, CTYPE_LONG_LONG); + + builtin_setup(&build_target); + } void target_destroy() { - assert(target.machine); - LLVMDisposeTargetMachine(target.machine); + assert(build_target.machine); + LLVMDisposeTargetMachine(build_target.machine); +} + +void *target_target() +{ + return build_target.target; } void *target_machine() { - return target.machine; + return build_target.machine; } void *target_data_layout() { - return target.data_layout; -} \ No newline at end of file + return build_target.llvm_data_layout; +} + +static ArchType arch_from_llvm_string(const char *string) +{ +#define STRCASE(_str, _arch) if (strcmp(string, _str) == 0) return _arch; + STRCASE("i386", ARCH_TYPE_X86) + STRCASE("i486", ARCH_TYPE_X86) + STRCASE("i586", ARCH_TYPE_X86) + STRCASE("i686", ARCH_TYPE_X86) + STRCASE("i786", ARCH_TYPE_X86) + STRCASE("i886", ARCH_TYPE_X86) + STRCASE("i986", ARCH_TYPE_X86) + STRCASE("aarch64", ARCH_TYPE_AARCH64) + STRCASE("arm64", ARCH_TYPE_AARCH64) + STRCASE("aarch64_be", ARCH_TYPE_AARCH64_BE) + STRCASE("aarch64_32", ARCH_TYPE_AARCH64_32) + STRCASE("arm64_32", ARCH_TYPE_AARCH64_32) + STRCASE("arm", ARCH_TYPE_ARM) + STRCASE("xscale", ARCH_TYPE_ARM) + STRCASE("armeb", ARCH_TYPE_ARMB) + STRCASE("xscaleeb", ARCH_TYPE_ARMB) + STRCASE("arc", ARCH_TYPE_ARC) + STRCASE("avr", ARCH_TYPE_AVR) + STRCASE("bpfeb", ARCH_TYPE_BPFEB) + STRCASE("bpfel", ARCH_TYPE_BPFEL) + STRCASE("hexagon", ARCH_TYPE_HEXAGON) + STRCASE("mips", ARCH_TYPE_MIPS) + STRCASE("mipseb", ARCH_TYPE_MIPS) + STRCASE("mipsallegrex", ARCH_TYPE_MIPS) + STRCASE("mipsisa32r6", ARCH_TYPE_MIPS) + STRCASE("mipsr6", ARCH_TYPE_MIPS) + STRCASE("mipsel", ARCH_TYPE_MIPSEL) + STRCASE("mipsallegrexel", ARCH_TYPE_MIPSEL) + STRCASE("mipsisa32r6el", ARCH_TYPE_MIPSEL) + STRCASE("mipsr6el", ARCH_TYPE_MIPSEL) + STRCASE("mips64", ARCH_TYPE_MIPS64) + STRCASE("mips64eb", ARCH_TYPE_MIPS64) + STRCASE("mipsn32", ARCH_TYPE_MIPS64) + STRCASE("mipsisa64r6", ARCH_TYPE_MIPS64) + STRCASE("mips64r6", ARCH_TYPE_MIPS64) + STRCASE("mipsn32r6", ARCH_TYPE_MIPS64) + STRCASE("mips64el", ARCH_TYPE_MIPS64EL) + STRCASE("mipsn32el", ARCH_TYPE_MIPS64EL) + STRCASE("mipsisa64r6el", ARCH_TYPE_MIPS64EL) + STRCASE("mips64r6el", ARCH_TYPE_MIPS64EL) + STRCASE("mipsn32r6el", ARCH_TYPE_MIPS64EL) + STRCASE("msp430", ARCH_TYPE_MSP430) + STRCASE("powerpc64", ARCH_TYPE_PPC64) + STRCASE("ppu", ARCH_TYPE_PPC64) + STRCASE("ppc64", ARCH_TYPE_PPC64) + STRCASE("powerpc64le", ARCH_TYPE_PPC64LE) + STRCASE("ppc64le", ARCH_TYPE_PPC64LE) + STRCASE("powerpc", ARCH_TYPE_PPC) + STRCASE("ppc", ARCH_TYPE_PPC) + STRCASE("ppc32", ARCH_TYPE_PPC) + STRCASE("r600", ARCH_TYPE_R600) + STRCASE("amdgcn", ARCH_TYPE_AMDGCN) + STRCASE("riscv32", ARCH_TYPE_RISCV32) + STRCASE("riscv64", ARCH_TYPE_RISCV64) + STRCASE("sparc", ARCH_TYPE_SPARC) + STRCASE("sparcel", ARCH_TYPE_SPARCEL) + STRCASE("sparcv9", ARCH_TYPE_SPARCV9) + STRCASE("sparc64", ARCH_TYPE_SPARCV9) + STRCASE("systemz", ARCH_TYPE_SYSTEMZ) + STRCASE("s390x", ARCH_TYPE_SYSTEMZ) + STRCASE("tce", ARCH_TYPE_TCE) + STRCASE("tcele", ARCH_TYPE_TCELE) + STRCASE("thumb", ARCH_TYPE_THUMB) + STRCASE("thumbeb", ARCH_TYPE_THUMBEB) + STRCASE("x86_64", ARCH_TYPE_X86_64) + STRCASE("amd64", ARCH_TYPE_X86_64) + STRCASE("x86_64h", ARCH_TYPE_X86_64) + STRCASE("xcore", ARCH_TYPE_XCORE) + STRCASE("nvptx", ARCH_TYPE_NVPTX) + STRCASE("nvptx64", ARCH_TYPE_NVPTX64) + STRCASE("le32", ARCH_TYPE_LE32) + STRCASE("le64", ARCH_TYPE_LE64) + STRCASE("amdil", ARCH_TYPE_AMDIL) + STRCASE("amdil64", ARCH_TYPE_AMDIL64) + STRCASE("hsail", ARCH_TYPE_HSAIL) + STRCASE("hsail64", ARCH_TYPE_HSAIL64) + STRCASE("spir", ARCH_TYPE_SPIR) + STRCASE("spir64", ARCH_TYPE_SPIR64) + STRCASE("kalimba", ARCH_TYPE_KALIMBA) + STRCASE("lanai", ARCH_TYPE_LANAI) + STRCASE("shave", ARCH_TYPE_SHAVE) + STRCASE("wasm32", ARCH_TYPE_WASM32) + STRCASE("wasm64", ARCH_TYPE_WASM64) + STRCASE("renderscript32", ARCH_TYPE_RSCRIPT32) + STRCASE("renderscript64", ARCH_TYPE_RSCRIPT64) + return ARCH_TYPE_UNKNOWN; +#undef STRCASE + // TODO parse arm & bpf names +} + + +static OsType os_from_llvm_string(const char *string) +{ +#define STRCASE(_str, _os) if (strcmp(string, _str) == 0) return _os; + STRCASE("ananas", OS_TYPE_ANANAS) + STRCASE("cloudabi", OS_TYPE_CLOUD_ABI) + STRCASE("darwin", OS_TYPE_DARWIN) + STRCASE("dragonfly", OS_TYPE_DRAGON_FLY) + STRCASE("freebsd", OS_TYPE_FREE_BSD) + STRCASE("fuchsia", OS_TYPE_FUCHSIA) + STRCASE("ios", OS_TYPE_IOS) + STRCASE("kfreebsd", OS_TYPE_KFREEBSD) + STRCASE("linux", OS_TYPE_LINUX) + STRCASE("lv2", OS_TYPE_PS3) + STRCASE("macosx", OS_TYPE_MACOSX) + STRCASE("netbsd", OS_TYPE_NETBSD) + STRCASE("openbsd", OS_TYPE_OPENBSD) + STRCASE("solaris", OS_TYPE_SOLARIS) + STRCASE("windows", OS_TYPE_WIN32) + STRCASE("haiku", OS_TYPE_HAIKU) + STRCASE("minix", OS_TYPE_MINIX) + STRCASE("rtems", OS_TYPE_RTEMS) + STRCASE("nacl", OS_TYPE_NACL) + STRCASE("cnk", OS_TYPE_CNK) + STRCASE("aix", OS_TYPE_AIX) + STRCASE("cuda", OS_TYPE_CUDA) + STRCASE("nvcl", OS_TYPE_NVOPENCL) + STRCASE("amdhsa", OS_TYPE_AMDHSA) + STRCASE("ps4", OS_TYPE_PS4) + STRCASE("elfiamcu", OS_TYPE_ELFIAMCU) + STRCASE("tvos", OS_TYPE_TVOS) + STRCASE("watchos", OS_TYPE_WATCHOS) + STRCASE("mesa3d", OS_TYPE_MESA3D) + STRCASE("contiki", OS_TYPE_CONTIKI) + STRCASE("amdpal", OS_TYPE_AMDPAL) + STRCASE("hermit", OS_TYPE_HERMITCORE) + STRCASE("hurd", OS_TYPE_HURD) + STRCASE("wasi", OS_TYPE_WASI) + STRCASE("emscripten", OS_TYPE_EMSCRIPTEN) + return OS_TYPE_UNKNOWN; +#undef STRCASE +} + +static VendorType vendor_from_llvm_string(const char *string) +{ +#define STRCASE(_str, _vendor) if (strcmp(string, _str) == 0) return _vendor; + STRCASE("apple", VENDOR_APPLE) + STRCASE("pc", VENDOR_PC) + STRCASE("scei", VENDOR_SCEI) + STRCASE("bgp", VENDOR_BGP) + STRCASE("bgq", VENDOR_BGQ) + STRCASE("fsl", VENDOR_FREESCALE) + STRCASE("ibm", VENDOR_IBM) + STRCASE("img", VENDOR_IMAGINATION_TECHNOLOGIES) + STRCASE("mti", VENDOR_MIPS_TECHNOLOGIES) + STRCASE("nvidia", VENDOR_NVIDIA) + STRCASE("csr", VENDOR_CSR) + STRCASE("myriad", VENDOR_MYRIAD) + STRCASE("amd", VENDOR_AMD) + STRCASE("mesa", VENDOR_MESA) + STRCASE("suse", VENDOR_SUSE) + STRCASE("oe", VENDOR_OPEN_EMBEDDED) + return VENDOR_UNKNOWN; +#undef STRCASE +} + +static unsigned arch_pointer_bit_width(ArchType arch) +{ + switch (arch) + { + case ARCH_TYPE_UNKNOWN: + return 0; + case ARCH_TYPE_MSP430: + case ARCH_TYPE_AVR: + return 16; + case ARCH_TYPE_ARM: + case ARCH_TYPE_ARMB: + case ARCH_TYPE_AARCH64_32: + case ARCH_TYPE_ARC: + case ARCH_TYPE_HEXAGON: + case ARCH_TYPE_MIPS: + case ARCH_TYPE_MIPSEL: + case ARCH_TYPE_PPC: + case ARCH_TYPE_R600: + case ARCH_TYPE_RISCV32: + case ARCH_TYPE_SPARC: + case ARCH_TYPE_SPARCEL: + case ARCH_TYPE_TCE: + case ARCH_TYPE_TCELE: + case ARCH_TYPE_THUMB: + case ARCH_TYPE_THUMBEB: + case ARCH_TYPE_X86: + case ARCH_TYPE_XCORE: + case ARCH_TYPE_NVPTX: + case ARCH_TYPE_LE32: + case ARCH_TYPE_AMDIL: + case ARCH_TYPE_HSAIL: + case ARCH_TYPE_SPIR: + case ARCH_TYPE_KALIMBA: + case ARCH_TYPE_SHAVE: + case ARCH_TYPE_LANAI: + case ARCH_TYPE_WASM32: + case ARCH_TYPE_RSCRIPT32: + return 32; + case ARCH_TYPE_SPIR64: + case ARCH_TYPE_RSCRIPT64: + case ARCH_TYPE_WASM64: + case ARCH_TYPE_LE64: + case ARCH_TYPE_BPFEL: + case ARCH_TYPE_BPFEB: + case ARCH_TYPE_AARCH64: + case ARCH_TYPE_AARCH64_BE: + case ARCH_TYPE_X86_64: + case ARCH_TYPE_SYSTEMZ: + case ARCH_TYPE_PPC64: + case ARCH_TYPE_SPARCV9: + case ARCH_TYPE_MIPS64: + case ARCH_TYPE_NVPTX64: + case ARCH_TYPE_AMDIL64: + case ARCH_TYPE_HSAIL64: + case ARCH_TYPE_RISCV64: + case ARCH_TYPE_AMDGCN: + case ARCH_TYPE_MIPS64EL: + case ARCH_TYPE_PPC64LE: + return 64; + default: + UNREACHABLE + } +} + +unsigned os_target_c_type_bits(OsType os, ArchType arch, CType type) +{ + switch (os) + { + case OS_TYPE_UNKNOWN: + if (arch == ARCH_TYPE_MSP430) + { + switch (type) + { + case CTYPE_SHORT: + case CTYPE_INT: + return 16; + case CTYPE_LONG: + return 32; + case CTYPE_LONG_LONG: + return 64; + default: + UNREACHABLE + } + } + // Use default + break; + case OS_TYPE_LINUX: + case OS_TYPE_DARWIN: + case OS_TYPE_MACOSX: + case OS_TYPE_FREE_BSD: + case OS_TYPE_NETBSD: + case OS_TYPE_DRAGON_FLY: + case OS_TYPE_OPENBSD: + case OS_TYPE_WASI: + case OS_TYPE_EMSCRIPTEN: + // Use default + break; + case OS_TYPE_WIN32: + switch (type) + { + case CTYPE_SHORT: + return 16; + case CTYPE_INT: + case CTYPE_LONG: + return 32; + case CTYPE_LONG_LONG: + return 64; + default: + UNREACHABLE + } + case OS_TYPE_IOS: + switch (type) + { + case CTYPE_SHORT: + return 16; + case CTYPE_INT: + return 32; + case CTYPE_LONG: + case CTYPE_LONG_LONG: + return 64; + default: + UNREACHABLE + } + case OS_TYPE_ANANAS: + case OS_TYPE_CLOUD_ABI: + case OS_TYPE_FUCHSIA: + case OS_TYPE_KFREEBSD: + case OS_TYPE_PS3: + case OS_TYPE_SOLARIS: + case OS_TYPE_HAIKU: + case OS_TYPE_MINIX: + case OS_TYPE_RTEMS: + case OS_TYPE_NACL: + case OS_TYPE_CNK: + case OS_TYPE_AIX: + case OS_TYPE_CUDA: + case OS_TYPE_NVOPENCL: + case OS_TYPE_AMDHSA: + case OS_TYPE_PS4: + case OS_TYPE_ELFIAMCU: + case OS_TYPE_TVOS: + case OS_TYPE_WATCHOS: + case OS_TYPE_MESA3D: + case OS_TYPE_CONTIKI: + case OS_TYPE_AMDPAL: + case OS_TYPE_HERMITCORE: + case OS_TYPE_HURD: + TODO + } + switch (type) + { + case CTYPE_SHORT: + return 16; + case CTYPE_INT: + return 32; + case CTYPE_LONG: + return arch_pointer_bit_width(arch); + case CTYPE_LONG_LONG: + return 64; + default: + UNREACHABLE + } + +} diff --git a/src/compiler/target.h b/src/compiler/target.h new file mode 100644 index 000000000..d7c8058a7 --- /dev/null +++ b/src/compiler/target.h @@ -0,0 +1,197 @@ +#pragma once + +// Copyright (c) 2020 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a LGPLv3.0 +// a copy of which can be found in the LICENSE file. + + +// Note: This list is based on Clang's +typedef enum +{ + ARCH_TYPE_UNKNOWN, + ARCH_TYPE_ARM, // ARM (little endian): arm, armv.*, xscale + ARCH_TYPE_ARMB, // ARM (big endian): armeb + ARCH_TYPE_AARCH64, // AArch64 (little endian): aarch64 + ARCH_TYPE_AARCH64_BE, // AArch64 (big endian): aarch64_be + ARCH_TYPE_AARCH64_32, // AArch64 (little endian) ILP32: aarch64_32 + ARCH_TYPE_ARC, // ARC: Synopsys ARC + ARCH_TYPE_AVR, // AVR: Atmel AVR microcontroller + ARCH_TYPE_BPFEL, // eBPF or extended BPF or 64-bit BPF (little endian) + ARCH_TYPE_BPFEB, // eBPF or extended BPF or 64-bit BPF (big endian) + ARCH_TYPE_HEXAGON, // Hexagon: hexagon + ARCH_TYPE_MIPS, // MIPS: mips, mipsallegrex, mipsr6 + ARCH_TYPE_MIPSEL, // MIPSEL: mipsel, mipsallegrexe, mipsr6el + ARCH_TYPE_MIPS64, // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6 + ARCH_TYPE_MIPS64EL, // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el + ARCH_TYPE_MSP430, // MSP430: msp430 + ARCH_TYPE_PPC, // PPC: powerpc + ARCH_TYPE_PPC64, // PPC64: powerpc64, ppu + ARCH_TYPE_PPC64LE, // PPC64LE: powerpc64le + ARCH_TYPE_R600, // R600: AMD GPUs HD2XXX - HD6XXX + ARCH_TYPE_AMDGCN, // AMDGCN: AMD GCN GPUs + ARCH_TYPE_RISCV32, // RISC-V (32-bit): riscv32 + ARCH_TYPE_RISCV64, // RISC-V (64-bit): riscv64 + ARCH_TYPE_SPARC, // Sparc: sparc + ARCH_TYPE_SPARCV9, // Sparcv9: Sparcv9 + ARCH_TYPE_SPARCEL, // Sparc: (endianness = little). NB: 'Sparcle' is a CPU variant + ARCH_TYPE_SYSTEMZ, // SystemZ: s390x + ARCH_TYPE_TCE, // TCE (http://tce.cs.tut.fi/): tce + ARCH_TYPE_TCELE, // TCE little endian (http://tce.cs.tut.fi/): tcele + ARCH_TYPE_THUMB, // Thumb (little endian): thumb, thumbv.* + ARCH_TYPE_THUMBEB, // Thumb (big endian): thumbeb + ARCH_TYPE_X86, // X86: i[3-9]86 + ARCH_TYPE_X86_64, // X86-64: amd64, x86_64 + ARCH_TYPE_XCORE, // XCore: xcore + ARCH_TYPE_NVPTX, // NVPTX: 32-bit + ARCH_TYPE_NVPTX64, // NVPTX: 64-bit + ARCH_TYPE_LE32, // le32: generic little-endian 32-bit CPU (PNaCl) + ARCH_TYPE_LE64, // le64: generic little-endian 64-bit CPU (PNaCl) + ARCH_TYPE_AMDIL, // AMDIL + ARCH_TYPE_AMDIL64, // AMDIL with 64-bit pointers + ARCH_TYPE_HSAIL, // AMD HSAIL + ARCH_TYPE_HSAIL64, // AMD HSAIL with 64-bit pointers + ARCH_TYPE_SPIR, // SPIR: standard portable IR for OpenCL 32-bit version + ARCH_TYPE_SPIR64, // SPIR: standard portable IR for OpenCL 64-bit version + ARCH_TYPE_KALIMBA, // Kalimba: generic kalimba + ARCH_TYPE_SHAVE, // SHAVE: Movidius vector VLIW processors + ARCH_TYPE_LANAI, // Lanai: Lanai 32-bit + ARCH_TYPE_WASM32, // WebAssembly with 32-bit pointers + ARCH_TYPE_WASM64, // WebAssembly with 64-bit pointers + ARCH_TYPE_RSCRIPT32, // 32-bit RenderScript + ARCH_TYPE_RSCRIPT64, // 64-bit RenderScript + ARCH_TYPE_LAST = ARCH_TYPE_RSCRIPT64 + +} ArchType; + +typedef enum +{ + CTYPE_SHORT, + CTYPE_INT, + CTYPE_LONG, + CTYPE_LONG_LONG +} CType; +typedef enum +{ + OS_TYPE_UNKNOWN, + OS_TYPE_ANANAS, + OS_TYPE_CLOUD_ABI, + OS_TYPE_DARWIN, + OS_TYPE_DRAGON_FLY, + OS_TYPE_FREE_BSD, + OS_TYPE_FUCHSIA, + OS_TYPE_IOS, + OS_TYPE_KFREEBSD, + OS_TYPE_LINUX, + OS_TYPE_PS3, + OS_TYPE_MACOSX, + OS_TYPE_NETBSD, + OS_TYPE_OPENBSD, + OS_TYPE_SOLARIS, + OS_TYPE_WIN32, + OS_TYPE_HAIKU, + OS_TYPE_MINIX, + OS_TYPE_RTEMS, + OS_TYPE_NACL, // Native Client + OS_TYPE_CNK, // BG/P Compute-Node Kernel + OS_TYPE_AIX, + OS_TYPE_CUDA, + OS_TYPE_NVOPENCL, + OS_TYPE_AMDHSA, + OS_TYPE_PS4, + OS_TYPE_ELFIAMCU, + OS_TYPE_TVOS, + OS_TYPE_WATCHOS, + OS_TYPE_MESA3D, + OS_TYPE_CONTIKI, + OS_TYPE_AMDPAL, + OS_TYPE_HERMITCORE, + OS_TYPE_HURD, + OS_TYPE_WASI, + OS_TYPE_EMSCRIPTEN, + OS_TYPE_LAST = OS_TYPE_EMSCRIPTEN +} OsType; + +typedef enum +{ + VENDOR_UNKNOWN, + VENDOR_APPLE, + VENDOR_PC, + VENDOR_SCEI, + VENDOR_BGP, + VENDOR_BGQ, + VENDOR_FREESCALE, + VENDOR_IBM, + VENDOR_IMAGINATION_TECHNOLOGIES, + VENDOR_MIPS_TECHNOLOGIES, + VENDOR_NVIDIA, + VENDOR_CSR, + VENDOR_MYRIAD, + VENDOR_AMD, + VENDOR_MESA, + VENDOR_SUSE, + VENDOR_OPEN_EMBEDDED, + VENDOR_LAST = VENDOR_OPEN_EMBEDDED +} VendorType; + + +typedef struct +{ + void *target; + void *machine; + void *llvm_data_layout; + ArchType arch; + const char *arch_name; + OsType os; + const char *os_name; + VendorType vendor; + const char *vendor_name; + int alloca_address_space; + bool little_endian; + bool tls_supported; + bool asm_supported; + bool float_128; + bool float_16; + unsigned align_min_pointer; + unsigned align_min_byte; + unsigned align_min_short; + unsigned align_min_int; + unsigned align_min_long; + unsigned align_min_half; + unsigned align_min_float; + unsigned align_min_double; + unsigned align_min_f128; + unsigned align_pointer; + unsigned align_byte; + unsigned align_short; + unsigned align_int; + unsigned align_long; + unsigned align_half; + unsigned align_float; + unsigned align_double; + unsigned align_f128; + unsigned align_c_long_double; + unsigned align_c_int; + unsigned align_c_long; + unsigned align_c_long_long; + unsigned align_simd_default; + unsigned align_max_vector; + unsigned align_global_min; + unsigned align_new; + unsigned align_large_array; + unsigned width_pointer; + unsigned width_c_short; + unsigned width_c_int; + unsigned width_c_long; + unsigned width_c_long_long; + unsigned width_c_long_double; + unsigned width_c_wchar; + unsigned width_c_wint; + unsigned width_large_array_min; + unsigned reg_param_max; + unsigned sse_reg_param_max; + unsigned builtin_ms_valist; + unsigned aarch64sve_types; + char *platform_name; +} Target; + +extern Target build_target; \ No newline at end of file diff --git a/src/compiler/tokens.c b/src/compiler/tokens.c index 29dcacb61..13d3ab5bd 100644 --- a/src/compiler/tokens.c +++ b/src/compiler/tokens.c @@ -140,8 +140,6 @@ const char *token_type_to_string(TokenType type) // Identifiers case TOKEN_IDENT: return "IDENT"; - case TOKEN_AT_IDENT: - return "AT_IDENT"; case TOKEN_HASH_IDENT: return "HASH_IDENT"; case TOKEN_CT_IDENT: diff --git a/src/compiler/types.c b/src/compiler/types.c index cf00014f5..d56ec3196 100644 --- a/src/compiler/types.c +++ b/src/compiler/types.c @@ -21,26 +21,31 @@ Type t_cus, t_cui, t_cul, t_cull; Type t_cs, t_ci, t_cl, t_cll; Type t_voidstar; -Type *type_signed_int_by_size(int bytesize) +#define META_OFFSET 0 +#define PTR_OFFSET 1 +#define VAR_ARRAY_OFFSET 2 +#define ARRAY_OFFSET 3 + +Type *type_signed_int_by_bitsize(unsigned bytesize) { switch (bytesize) { - case 1: return type_char; - case 2: return type_short; - case 4: return type_int; - case 8: return type_long; - default: FATAL_ERROR("Illegal bytesize %d", bytesize); + case 8: return type_char; + case 16: return type_short; + case 32: return type_int; + case 64: return type_long; + default: FATAL_ERROR("Illegal bitsize %d", bytesize); } } -Type *type_unsigned_int_by_size(int bytesize) +Type *type_unsigned_int_by_bitsize(unsigned bytesize) { switch (bytesize) { - case 1: return type_byte; - case 2: return type_ushort; - case 4: return type_uint; - case 8: return type_ulong; - default: FATAL_ERROR("Illegal bytesize %d", bytesize); + case 8: return type_byte; + case 16: return type_ushort; + case 32: return type_uint; + case 64: return type_ulong; + default: FATAL_ERROR("Illegal bitsize %d", bytesize); } } @@ -73,6 +78,9 @@ const char *type_to_error_string(Type *type) case TYPE_UNION: case TYPE_ERROR: return type->name; + case TYPE_META_TYPE: + asprintf(&buffer, "type %s", type_to_error_string(type->child)); + return buffer; case TYPE_POINTER: asprintf(&buffer, "%s*", type_to_error_string(type->pointer)); return buffer; @@ -135,9 +143,25 @@ static void type_append_signature_name_user_defined(Decl *decl, char *dst, size_ } void type_append_signature_name(Type *type, char *dst, size_t *offset) { - assert(*offset < 2000); - memcpy(dst + *offset, type->name, strlen(type->name)); - *offset += strlen(type->name); + assert(*offset < MAX_FUNCTION_SIGNATURE_SIZE); + const char *name; + switch (type->type_kind) + { + case TYPE_POISONED: + case TYPE_TYPEDEF: + UNREACHABLE; + case TYPE_ERROR: + case TYPE_ENUM: + case TYPE_STRUCT: + case TYPE_UNION: + name = type->decl->external_name; + break; + default: + name = type->name; + break; + } + memcpy(dst + *offset, name, strlen(name)); + *offset += strlen(name); } @@ -149,6 +173,8 @@ size_t type_size(Type *canonical) { case TYPE_POISONED: UNREACHABLE; + case TYPE_META_TYPE: + return 0; case TYPE_ENUM: case TYPE_TYPEDEF: case TYPE_STRUCT: @@ -188,27 +214,29 @@ size_t type_size(Type *canonical) TODO } -static inline void create_ptr_cache(Type *canonical_type) +static inline void create_type_cache(Type *canonical_type) { assert(canonical_type->canonical == canonical_type); - canonical_type->ptr_cache = VECADD(canonical_type->ptr_cache, NULL); - canonical_type->ptr_cache = VECADD(canonical_type->ptr_cache, NULL); + for (int i = 0; i < ARRAY_OFFSET; i++) + { + vec_add(canonical_type->type_cache, NULL); + } } static Type *type_generate_ptr(Type *ptr_type, bool canonical) { if (canonical) ptr_type = ptr_type->canonical; - if (!ptr_type->ptr_cache) + if (!ptr_type->type_cache) { - create_ptr_cache(ptr_type); + create_type_cache(ptr_type); } - Type *ptr = ptr_type->ptr_cache[0]; + Type *ptr = ptr_type->type_cache[PTR_OFFSET]; if (ptr == NULL) { ptr = type_new(TYPE_POINTER, strformat("%s*", ptr_type->name)); ptr->pointer = ptr_type; - ptr_type->ptr_cache[0] = ptr; + ptr_type->type_cache[PTR_OFFSET] = ptr; if (ptr_type == ptr_type->canonical) { ptr->canonical = ptr; @@ -221,24 +249,56 @@ static Type *type_generate_ptr(Type *ptr_type, bool canonical) return ptr; } +static Type *type_generate_meta(Type *type, bool canonical) +{ + if (canonical) type = type->canonical; + if (!type->type_cache) + { + create_type_cache(type); + } + + Type *meta = type->type_cache[META_OFFSET]; + if (meta == NULL) + { + meta = type_new(TYPE_META_TYPE, strformat("type %s", type->name)); + meta->child = type; + type->type_cache[META_OFFSET] = meta; + if (type == type->canonical) + { + meta->canonical = meta; + } + else + { + meta->canonical = type_generate_meta(type->canonical, true); + } + } + return meta; +} + + Type *type_get_ptr(Type *ptr_type) { return type_generate_ptr(ptr_type, false); } +Type *type_get_meta(Type *meta_type) +{ + return type_generate_meta(meta_type, false); +} + Type *type_create_array(Type *arr_type, uint64_t len, bool canonical) { if (canonical) arr_type = arr_type->canonical; - if (!arr_type->ptr_cache) + if (!arr_type->type_cache) { - create_ptr_cache(arr_type); + create_type_cache(arr_type); } // Dynamic array if (len == 0) { - Type *array = arr_type->ptr_cache[1]; + Type *array = arr_type->type_cache[VAR_ARRAY_OFFSET]; if (array == NULL) { array = type_new(TYPE_VARARRAY, strformat("%s[]", arr_type->name)); @@ -252,15 +312,15 @@ Type *type_create_array(Type *arr_type, uint64_t len, bool canonical) { array->canonical = type_create_array(arr_type, len, true); } - arr_type->ptr_cache[1] = array; + arr_type->type_cache[VAR_ARRAY_OFFSET] = array; } return array; } - int entries = (int)vec_size(arr_type->ptr_cache); - for (int i = 1; i < entries; i++) + int entries = (int)vec_size(arr_type->type_cache); + for (int i = ARRAY_OFFSET; i < entries; i++) { - Type *ptr = arr_type->ptr_cache[i]; + Type *ptr = arr_type->type_cache[i]; if (ptr->array.len == arr_type->array.len) { return ptr; @@ -277,7 +337,7 @@ Type *type_create_array(Type *arr_type, uint64_t len, bool canonical) { array->canonical = type_create_array(arr_type, len, true); } - VECADD(arr_type->ptr_cache, array); + VECADD(arr_type->type_cache, array); return array; } @@ -286,14 +346,17 @@ Type *type_get_array(Type *arr_type, uint64_t len) return type_create_array(arr_type, len, false); } -static void type_create(const char *name, Type *location, Type **ptr, TypeKind kind, unsigned bytesize, unsigned bitsize) +static void type_create(const char *name, Type *location, Type **ptr, TypeKind kind, unsigned bitsize, + unsigned align, unsigned pref_align) { *location = (Type) { .type_kind = kind, - .builtin.bytesize = bytesize, + .builtin.bytesize = (bitsize + 7) / 8, .builtin.bitsize = bitsize, + .builtin.min_alignment = align, + .builtin.pref_alignment = pref_align, .name = name, - .canonical = location + .canonical = location, }; location->name = name; location->canonical = location; @@ -311,56 +374,58 @@ static void type_create_alias(const char *name, Type *location, Type **ptr, Type } -void builtin_setup() +void builtin_setup(Target *target) { - type_create("void", &t_u0, &type_void, TYPE_VOID, 1, 8); - type_create("string", &t_str, &type_string, TYPE_STRING, build_options.pointer_size, build_options.pointer_size * 8); - create_ptr_cache(type_void); - type_void->ptr_cache[0] = &t_voidstar; - type_create("void*", &t_voidstar, &type_voidptr, TYPE_POINTER, 0, 0); - t_voidstar.pointer = type_void; /*TODO * decl_string = (Decl) { .decl_kind = DECL_BUILTIN, .name.string = "string" }; create_type(&decl_string, &type_string); type_string.type_kind = TYPE_STRING; */ -#define DEF_TYPE(_name, _shortname, _type, _bits) \ -type_create(#_name, &_shortname, &type_ ## _name, _type, (_bits + 7) / 8, _bits); +#define DEF_TYPE(_name, _shortname, _type, _bits, _align) \ +type_create(#_name, &_shortname, &type_ ## _name, _type, _bits, target->align_min_ ## _align, target->align_ ## _align) - DEF_TYPE(compint, t_ixx, TYPE_IXX, 64); - DEF_TYPE(compuint, t_uxx, TYPE_UXX, 64); - DEF_TYPE(compfloat, t_fxx, TYPE_FXX, 64); - DEF_TYPE(bool, t_u1, TYPE_BOOL, 1); + DEF_TYPE(bool, t_u1, TYPE_BOOL, 1, byte); + DEF_TYPE(float, t_f32, TYPE_F32, 32, float); + DEF_TYPE(double, t_f64, TYPE_F64, 64, double); - DEF_TYPE(float, t_f32, TYPE_F32, 32); - DEF_TYPE(double, t_f64, TYPE_F64, 64); + DEF_TYPE(char, t_i8, TYPE_I8, 8, byte); + DEF_TYPE(short, t_i16, TYPE_I16, 16, short); + DEF_TYPE(int, t_i32, TYPE_I32, 32, int); + DEF_TYPE(long, t_i64, TYPE_I64, 64, long); - DEF_TYPE(char, t_i8, TYPE_I8, 8); - DEF_TYPE(short, t_i16, TYPE_I16, 16); - DEF_TYPE(int, t_i32, TYPE_I32, 32); - DEF_TYPE(long, t_i64, TYPE_I64, 64); + DEF_TYPE(byte, t_u8, TYPE_U8, 8, byte); + DEF_TYPE(ushort, t_u16, TYPE_U16, 16, short); + DEF_TYPE(uint, t_u32, TYPE_U32, 32, int); + DEF_TYPE(ulong, t_u64, TYPE_U64, 64, long); - DEF_TYPE(byte, t_u8, TYPE_U8, 8); - DEF_TYPE(ushort, t_u16, TYPE_U16, 16); - DEF_TYPE(uint, t_u32, TYPE_U32, 32); - DEF_TYPE(ulong, t_u64, TYPE_U64, 64); - - type_create_alias("usize", &t_usz, &type_usize, type_unsigned_int_by_size(build_options.pointer_size)); - type_create_alias("isize", &t_isz, &type_isize, type_signed_int_by_size(build_options.pointer_size)); - - type_create_alias("c_ushort", &t_cus, &type_c_ushort, type_unsigned_int_by_size(build_options.cshort_size)); - type_create_alias("c_uint", &t_cui, &type_c_uint, type_unsigned_int_by_size(build_options.cint_size)); - type_create_alias("c_ulong", &t_cul, &type_c_ulong, type_unsigned_int_by_size(build_options.clong_size)); - type_create_alias("c_ulonglong", &t_cull, &type_c_ulonglong, type_unsigned_int_by_size(build_options.clonglong_size)); - - type_create_alias("c_short", &t_cs, &type_c_short, type_signed_int_by_size(build_options.cshort_size)); - type_create_alias("c_int", &t_ci, &type_c_int, type_signed_int_by_size(build_options.cint_size)); - type_create_alias("c_long", &t_cl, &type_c_long, type_signed_int_by_size(build_options.clong_size)); - type_create_alias("c_longlong", &t_cll, &type_c_longlong, type_signed_int_by_size(build_options.clonglong_size)); + DEF_TYPE(void, t_u0, TYPE_VOID, 8, byte); + DEF_TYPE(string, t_str, TYPE_STRING, target->width_pointer, pointer); #undef DEF_TYPE + type_create("void*", &t_voidstar, &type_voidptr, TYPE_POINTER, target->width_pointer, target->align_min_pointer, target->align_pointer); + create_type_cache(type_void); + type_void->type_cache[0] = &t_voidstar; + t_voidstar.pointer = type_void; + type_create("compint", &t_ixx, &type_compint, TYPE_IXX, 64, 0, 0); + type_create("compuint", &t_uxx, &type_compuint, TYPE_UXX, 64, 0, 0); + type_create("compfloat", &t_fxx, &type_compfloat, TYPE_FXX, 64, 0, 0); + + type_create_alias("usize", &t_usz, &type_usize, type_unsigned_int_by_bitsize(target->width_pointer)); + type_create_alias("isize", &t_isz, &type_isize, type_signed_int_by_bitsize(target->width_pointer)); + + type_create_alias("c_ushort", &t_cus, &type_c_ushort, type_unsigned_int_by_bitsize(target->width_c_short)); + type_create_alias("c_uint", &t_cui, &type_c_uint, type_unsigned_int_by_bitsize(target->width_c_int)); + type_create_alias("c_ulong", &t_cul, &type_c_ulong, type_unsigned_int_by_bitsize(target->width_c_long)); + type_create_alias("c_ulonglong", &t_cull, &type_c_ulonglong, type_unsigned_int_by_bitsize(target->width_c_long_long)); + + type_create_alias("c_short", &t_cs, &type_c_short, type_signed_int_by_bitsize(target->width_c_short)); + type_create_alias("c_int", &t_ci, &type_c_int, type_signed_int_by_bitsize(target->width_c_int)); + type_create_alias("c_long", &t_cl, &type_c_long, type_signed_int_by_bitsize(target->width_c_long)); + type_create_alias("c_longlong", &t_cll, &type_c_longlong, type_signed_int_by_bitsize(target->width_c_long_long)); + + } /** @@ -395,6 +460,7 @@ bool type_may_have_method_functions(Type *type) case TYPE_UNION: case TYPE_STRUCT: case TYPE_ENUM: + case TYPE_ERROR: return true; default: return false; @@ -441,9 +507,9 @@ Type *type_find_max_num_type(Type *num_type, Type *other_num) assert (other_num->type_kind != TYPE_FXX); return other_num; case LS: - return type_signed_int_by_size(num_type->builtin.bytesize); + return type_signed_int_by_bitsize(num_type->builtin.bytesize * 8); case RS: - return type_signed_int_by_size(other_num->builtin.bytesize); + return type_signed_int_by_bitsize(other_num->builtin.bytesize * 8); case FL: return type_double; default: @@ -579,6 +645,7 @@ Type *type_find_max_type(Type *type, Type *other) case TYPE_FUNC: case TYPE_UNION: case TYPE_ERROR_UNION: + case TYPE_META_TYPE: return NULL; case TYPE_STRUCT: TODO diff --git a/src/target_info/target_info.c b/src/target_info/target_info.c new file mode 100644 index 000000000..6b0a93802 --- /dev/null +++ b/src/target_info/target_info.c @@ -0,0 +1,72 @@ +// Copyright (c) 2020 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a LGPLv3.0 +// a copy of which can be found in the LICENSE file. + +#include "target_info_internal.h" + +typedef enum +{ + AVXABI_LEVEL_NONE, + AVXABI_AVX, + AVXABI_512 +} X86AVXABILevel; + +static unsigned x86avxabi_vector_size(X86AVXABILevel level) +{ + switch (level) + { + case AVXABI_512: + return 512; + case AVXABI_AVX: + return 256; + case AVXABI_LEVEL_NONE: + return 128; + } + UNREACHABLE +} + +TargetInfo target_info_new() +{ + // From the glibc documentation, on GNU systems, malloc guarantees 16-byte + // alignment on 64-bit systems and 8-byte alignment on 32-bit systems. See + // https://www.gnu.org/software/libc/manual/html_node/Malloc-Examples.html. + // This alignment guarantee also applies to Windows and Android. + /* + if (T.isGNUEnvironment() || T.isWindowsMSVCEnvironment() || T.isAndroid()) + NewAlign = Triple.isArch64Bit() ? 128 : Triple.isArch32Bit() ? 64 : 0; + else + NewAlign = 0; // Infer from basic type alignment. + */ + TargetInfo target_info = { + .little_endian = true, + .asm_supported = false, + .float_128 = false, + .float_16 = false, + .align_byte = 8, + .align_c_int = 32, + .align_c_long = 32, + .align_c_long_long = 64, + .align_c_long_double = 64, + .align_f128 = 128, + .align_large_array = 0, + .align_global_min = 0, + .align_new = 0, + .align_max_vector = 0, + .align_simd_default = 0, + .width_pointer = 32, + .width_c_int = 32, + .width_c_long = 32, + .width_c_long_long = 64, + .width_c_long_double = 64, + .width_large_array_min = 0, + .width_c_wchar = 32, + .width_c_wint = 32, + .reg_param_max = 0, + .sse_reg_param_max = 0, + .builtin_ms_valist = false, + .aarch64sve_types = false, + .platform_name = "unknown" + }; + return target_info; +} + diff --git a/src/target_info/target_info.h b/src/target_info/target_info.h new file mode 100644 index 000000000..7d9bb174a --- /dev/null +++ b/src/target_info/target_info.h @@ -0,0 +1,121 @@ +#pragma once + +// Copyright (c) 2020 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a LGPLv3.0 +// a copy of which can be found in the LICENSE file. + +#include "utils/common.h" + + + + +typedef enum +{ + SUB_ARCH_NONE, + SUB_ARCH_ARM_V8_5A, + SUB_ARCH_ARM_V8_4A, + SUB_ARCH_ARM_V8_3A, + SUB_ARCH_ARM_V8_2A, + SUB_ARCH_ARM_V8_1A, + SUB_ARCH_ARM_V8, + SUB_ARCH_ARM_V8R, + SUB_ARCH_ARM_V8M_BASELINE, + SUB_ARCH_ARM_V8M_MAINLINE, + SUB_ARCH_ARM_V8_1M_MAINLINE, + SUB_ARCH_ARM_V7, + SUB_ARCH_ARM_V7EM, + SUB_ARCH_ARM_V7M, + SUB_ARCH_ARM_V7S, + SUB_ARCH_ARM_V7K, + SUB_ARCH_ARM_V7VE, + SUB_ARCH_ARM_V6, + SUB_ARCH_ARM_V6M, + SUB_ARCH_ARM_V6K, + SUB_ARCH_ARM_V6T2, + SUB_ARCH_ARM_V5, + SUB_ARCH_ARM_V5TE, + SUB_ARCH_ARM_V4, + SUB_ARCH_KALIMBA_V3, + SUB_ARCH_KALIMBA_V4, + SUB_ARCH_KALIMBA_V5, + SUB_ARCH_MIPS_V6, +} SubArchType; + + +typedef enum +{ + ENV_TYPE_UNKNOWN, + ENV_TYPE_GNU, + ENV_TYPE_GNUABIN32, + ENV_TYPE_SNUABI64, + ENV_TYPE_GNUEABI, + ENV_TYPE_GNUEABIHF, + ENV_TYPE_GNUX32, + ENV_TYPE_CODE16, + ENV_TYPE_EABI, + ENV_TYPE_EABIHF, + ENV_TYPE_ELFV1, + ENV_TYPE_ELFV2, + ENV_TYPE_ANDROID, + ENV_TYPE_MUSL, + ENV_TYPE_MUSLEABI, + ENV_TYPE_MUSLEABIHF, + ENV_TYPE_MSVC, + ENV_TYPE_ITANIUM, + ENV_TYPE_CYGNUS, + ENV_TYPE_CORECLR, + ENV_TYPE_SIMULATOR, + ENV_TYPE_MACABI, + ENV_TYPE_LAST = ENV_TYPE_MACABI +} EnvironmentType; + +typedef enum +{ + OBJ_FORMAT_COFF, + OBJ_FORMAT_ELF, + OBJ_FORMAT_MACHO, + OBJ_FORMAT_WASM, + OBJ_FORMAT_XCOFF +} ObjectFormatType; + + +typedef struct +{ + bool little_endian; + bool tls_supported; + bool asm_supported; + bool float_128; + bool float_16; + unsigned align_pointer; + unsigned align_byte; + unsigned align_short; + unsigned align_int; + unsigned align_long; + unsigned align_half; + unsigned align_float; + unsigned align_double; + unsigned align_f128; + unsigned align_c_long_double; + unsigned align_c_int; + unsigned align_c_long; + unsigned align_c_long_long; + unsigned align_simd_default; + unsigned align_max_vector; + unsigned align_global_min; + unsigned align_new; + unsigned align_large_array; + unsigned width_size; + unsigned width_pointer; + unsigned width_c_int; + unsigned width_c_long; + unsigned width_c_long_long; + unsigned width_c_long_double; + unsigned width_c_wchar; + unsigned width_c_wint; + unsigned width_large_array_min; + unsigned reg_param_max; + unsigned sse_reg_param_max; + unsigned builtin_ms_valist; + unsigned aarch64sve_types; + char *platform_name; +} TargetInfo; diff --git a/src/target_info/target_info_internal.h b/src/target_info/target_info_internal.h new file mode 100644 index 000000000..17457796e --- /dev/null +++ b/src/target_info/target_info_internal.h @@ -0,0 +1,7 @@ +#pragma once + +// Copyright (c) 2020 Christoffer Lerno. All rights reserved. +// Use of this source code is governed by a LGPLv3.0 +// a copy of which can be found in the LICENSE file. + +#include "target_info.h"