diff --git a/.gitignore b/.gitignore index 02fbb5f0..0471c9b2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,10 @@ wrend wren-cpp libwren.a libwrend.a +libwren.so +libwrend.so +libwren.dylib +libwrend.dylib libwren-cpp.a # XCode user-specific stuff. @@ -23,4 +27,4 @@ benchmark/baseline.txt # built docs get copied here, which is presumed to be a separate checkout of # the repo so they can be pushed to GitHub Pages. -gh-pages/ \ No newline at end of file +gh-pages/ diff --git a/Makefile b/Makefile index 32d86a10..95cf6a65 100644 --- a/Makefile +++ b/Makefile @@ -19,12 +19,22 @@ else CPPFLAGS += -fPIC endif +# Clang on Mac OS X has different flags and a different extension to build a +# shared library. +ifneq (,$(findstring darwin,$(TARGET_OS))) + SHARED_LIB_FLAGS = + SHARED_EXT = dylib +else + SHARED_LIB_FLAGS = -Wl,-soname,$@.so + SHARED_EXT = so +endif + # Files. SOURCES := $(wildcard src/*.c) HEADERS := $(wildcard src/*.h) OBJECTS := $(SOURCES:.c=.o) -# Don't include main.c in the shared library. +# Don't include main.c in the libraries. DEBUG_OBJECTS := $(addprefix build/debug/, $(notdir $(OBJECTS))) RELEASE_OBJECTS := $(addprefix build/release/, $(notdir $(OBJECTS))) RELEASE_CPP_OBJECTS := $(addprefix build/release-cpp/, $(notdir $(OBJECTS))) @@ -38,17 +48,18 @@ RELEASE_CPP_LIB_OBJECTS := $(subst build/release-cpp/main.o,,$(RELEASE_CPP_OBJEC all: release clean: - @rm -rf build wren wrend libwren.a libwrend.a + @rm -rf build wren wrend libwren libwrend prep: @mkdir -p build/debug build/release build/release-cpp # Debug build. -debug: prep wrend libwrend.a +debug: prep wrend libwrend -# Debug shared library. -libwrend.a: $(DEBUG_LIB_OBJECTS) - $(AR) $@ $^ +# Debug static and shared libraries. +libwrend: $(DEBUG_LIB_OBJECTS) + $(AR) $@.a $^ + $(CC) $(DEBUG_CFLAGS) -shared $(SHARED_LIB_FLAGS) -o $@.$(SHARED_EXT) $^ # Debug command-line interpreter. wrend: $(DEBUG_OBJECTS) @@ -59,11 +70,12 @@ build/debug/%.o: src/%.c include/wren.h $(HEADERS) $(CC) -c $(CFLAGS) $(DEBUG_CFLAGS) -Iinclude -o $@ $< # Release build. -release: prep wren libwren.a +release: prep wren libwren -# Release shared library. -libwren.a: $(RELEASE_LIB_OBJECTS) - $(AR) $@ $^ +# Release static and shared libraries. +libwren: $(RELEASE_LIB_OBJECTS) + $(AR) $@.a $^ + $(CC) $(RELEASE_CFLAGS) -shared $(SHARED_LIB_FLAGS) -o $@.$(SHARED_EXT) $^ # Release command-line interpreter. wren: $(RELEASE_OBJECTS) @@ -74,11 +86,11 @@ build/release/%.o: src/%.c include/wren.h $(HEADERS) $(CC) -c $(CFLAGS) $(RELEASE_CFLAGS) -Iinclude -o $@ $< # Release C++ build. -release-cpp: prep wren-cpp libwren-cpp.a +release-cpp: prep wren-cpp libwren-cpp -# Release C++ shared lib -libwren-cpp.a: $(RELEASE_CPP_LIB_OBJECTS) - $(AR) $@ $^ +# Release C++ static library. +libwren-cpp: $(RELEASE_CPP_LIB_OBJECTS) + $(AR) $@.a $^ # Release C++ command-line interpreter. wren-cpp: $(RELEASE_CPP_OBJECTS) diff --git a/src/wren_compiler.c b/src/wren_compiler.c index e64a8fd1..82a25d72 100644 --- a/src/wren_compiler.c +++ b/src/wren_compiler.c @@ -702,6 +702,7 @@ static void readString(Parser* parser) { case '"': addStringChar(parser, '"'); break; case '\\': addStringChar(parser, '\\'); break; + case '0': addStringChar(parser, '\0'); break; case 'a': addStringChar(parser, '\a'); break; case 'b': addStringChar(parser, '\b'); break; case 'f': addStringChar(parser, '\f'); break; @@ -1269,7 +1270,8 @@ static int copyName(Compiler* compiler, char* name) length = MAX_METHOD_NAME; } - strncpy(name, token->start, length); + memcpy(name, token->start, length); + name[length] = '\0'; return length; } @@ -2010,14 +2012,14 @@ static void super_(Compiler* compiler, bool allowAssignment) int length; if (enclosingClass != NULL) { length = enclosingClass->methodLength; - strncpy(name, enclosingClass->methodName, length); + memcpy(name, enclosingClass->methodName, length); } else { // We get here if super is used outside of a method. In that case, we // have already reported the error, so just stub this out so we can keep // going to try to find later errors. length = 0; - strncpy(name, "", length); } + name[length] = '\0'; // Call the superclass method with the same name. methodCall(compiler, CODE_SUPER_0, name, length); @@ -2103,8 +2105,7 @@ static void new_(Compiler* compiler, bool allowAssignment) methodSymbol(compiler, " instantiate", 12)); // Invoke the constructor on the new instance. - char name[MAX_METHOD_SIGNATURE]; - strcpy(name, "new"); + char name[MAX_METHOD_SIGNATURE] = "new"; methodCall(compiler, CODE_CALL_0, name, 3); } diff --git a/src/wren_core.c b/src/wren_core.c index eb78759b..07853db0 100644 --- a/src/wren_core.c +++ b/src/wren_core.c @@ -175,7 +175,8 @@ static bool validateFn(WrenVM* vm, Value* args, int index, const char* argName) { if (IS_FN(args[index]) || IS_CLOSURE(args[index])) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be a function.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, + " must be a function.", -1)); return false; } @@ -185,7 +186,8 @@ static bool validateNum(WrenVM* vm, Value* args, int index, const char* argName) { if (IS_NUM(args[index])) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be a number.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, + " must be a number.", -1)); return false; } @@ -196,7 +198,8 @@ static bool validateIntValue(WrenVM* vm, Value* args, double value, { if (trunc(value) == value) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be an integer.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, + " must be an integer.", -1)); return false; } @@ -226,7 +229,7 @@ static int validateIndexValue(WrenVM* vm, Value* args, int count, double value, // Check bounds. if (index >= 0 && index < count) return index; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " out of bounds.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " out of bounds.", -1)); return -1; } @@ -263,7 +266,8 @@ static bool validateString(WrenVM* vm, Value* args, int index, { if (IS_STRING(args[index])) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be a string.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, + " must be a string.", -1)); return false; } @@ -1056,8 +1060,9 @@ DEF_NATIVE(object_toString) else if (IS_INSTANCE(args[0])) { ObjInstance* instance = AS_INSTANCE(args[0]); - RETURN_OBJ(wrenStringConcat(vm, "instance of ", - instance->obj.classObj->name->value)); + ObjString* name = instance->obj.classObj->name; + RETURN_OBJ(wrenStringConcat(vm, "instance of ", -1, + name->value, name->length)); } RETURN_VAL(wrenNewString(vm, "", 8)); @@ -1156,10 +1161,7 @@ DEF_NATIVE(string_contains) ObjString* string = AS_STRING(args[0]); ObjString* search = AS_STRING(args[1]); - // Corner case, the empty string contains the empty string. - if (string->length == 0 && search->length == 0) RETURN_TRUE; - - RETURN_BOOL(strstr(string->value, search->value) != NULL); + RETURN_BOOL(wrenStringFind(vm, string, search) != UINT32_MAX); } DEF_NATIVE(string_count) @@ -1179,7 +1181,7 @@ DEF_NATIVE(string_endsWith) if (search->length > string->length) RETURN_FALSE; int result = memcmp(string->value + string->length - search->length, - search->value, search->length); + search->value, search->length); RETURN_BOOL(result == 0); } @@ -1191,9 +1193,9 @@ DEF_NATIVE(string_indexOf) ObjString* string = AS_STRING(args[0]); ObjString* search = AS_STRING(args[1]); - char* firstOccurrence = strstr(string->value, search->value); + uint32_t index = wrenStringFind(vm, string, search); - RETURN_NUM(firstOccurrence ? firstOccurrence - string->value : -1); + RETURN_NUM(index == UINT32_MAX ? -1 : (int)index); } DEF_NATIVE(string_iterate) @@ -1216,7 +1218,7 @@ DEF_NATIVE(string_iterate) do { index++; - if (index >= string->length) RETURN_FALSE; + if ((uint32_t)index >= string->length) RETURN_FALSE; } while ((string->value[index] & 0xc0) == 0x80); RETURN_NUM(index); @@ -1253,7 +1255,10 @@ DEF_NATIVE(string_toString) DEF_NATIVE(string_plus) { if (!validateString(vm, args, 1, "Right operand")) return PRIM_ERROR; - RETURN_OBJ(wrenStringConcat(vm, AS_CSTRING(args[0]), AS_CSTRING(args[1]))); + ObjString* left = AS_STRING(args[0]); + ObjString* right = AS_STRING(args[1]); + RETURN_OBJ(wrenStringConcat(vm, left->value, left->length, + right->value, right->length)); } DEF_NATIVE(string_subscript) diff --git a/src/wren_debug.c b/src/wren_debug.c index 35d9d2ae..0bd92f3a 100644 --- a/src/wren_debug.c +++ b/src/wren_debug.c @@ -92,7 +92,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) { int slot = READ_SHORT(); printf("%-16s %5d '%s'\n", "LOAD_MODULE_VAR", slot, - fn->module->variableNames.data[slot]); + fn->module->variableNames.data[slot].buffer); break; } @@ -100,7 +100,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) { int slot = READ_SHORT(); printf("%-16s %5d '%s'\n", "STORE_MODULE_VAR", slot, - fn->module->variableNames.data[slot]); + fn->module->variableNames.data[slot].buffer); break; } @@ -133,7 +133,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) int numArgs = bytecode[i - 1] - CODE_CALL_0; int symbol = READ_SHORT(); printf("CALL_%-11d %5d '%s'\n", numArgs, symbol, - vm->methodNames.data[symbol]); + vm->methodNames.data[symbol].buffer); break; } @@ -158,7 +158,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) int numArgs = bytecode[i - 1] - CODE_SUPER_0; int symbol = READ_SHORT(); printf("SUPER_%-10d %5d '%s'\n", numArgs, symbol, - vm->methodNames.data[symbol]); + vm->methodNames.data[symbol].buffer); break; } @@ -230,7 +230,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) { int symbol = READ_SHORT(); printf("%-16s %5d '%s'\n", "METHOD_INSTANCE", symbol, - vm->methodNames.data[symbol]); + vm->methodNames.data[symbol].buffer); break; } @@ -238,7 +238,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) { int symbol = READ_SHORT(); printf("%-16s %5d '%s'\n", "METHOD_STATIC", symbol, - vm->methodNames.data[symbol]); + vm->methodNames.data[symbol].buffer); break; } diff --git a/src/wren_utils.c b/src/wren_utils.c index 87e033aa..e38fac2b 100644 --- a/src/wren_utils.c +++ b/src/wren_utils.c @@ -5,7 +5,7 @@ DEFINE_BUFFER(Byte, uint8_t); DEFINE_BUFFER(Int, int); -DEFINE_BUFFER(String, char*); +DEFINE_BUFFER(String, String); void wrenSymbolTableInit(WrenVM* vm, SymbolTable* symbols) { @@ -16,26 +16,28 @@ void wrenSymbolTableClear(WrenVM* vm, SymbolTable* symbols) { for (int i = 0; i < symbols->count; i++) { - wrenReallocate(vm, symbols->data[i], 0, 0); + wrenReallocate(vm, symbols->data[i].buffer, 0, 0); } wrenStringBufferClear(vm, symbols); } -int wrenSymbolTableAdd(WrenVM* vm, SymbolTable* symbols, const char* name, - size_t length) +int wrenSymbolTableAdd(WrenVM* vm, SymbolTable* symbols, + const char* name, size_t length) { - char* heapString = (char*)wrenReallocate(vm, NULL, 0, - sizeof(char) * (length + 1)); - strncpy(heapString, name, length); - heapString[length] = '\0'; + String symbol; + symbol.buffer = (char*)wrenReallocate(vm, NULL, 0, + sizeof(char) * (length + 1)); + memcpy(symbol.buffer, name, length); + symbol.buffer[length] = '\0'; + symbol.length = (int)length; - wrenStringBufferWrite(vm, symbols, heapString); + wrenStringBufferWrite(vm, symbols, symbol); return symbols->count - 1; } int wrenSymbolTableEnsure(WrenVM* vm, SymbolTable* symbols, - const char* name, size_t length) + const char* name, size_t length) { // See if the symbol is already defined. int existing = wrenSymbolTableFind(symbols, name, length); @@ -51,9 +53,8 @@ int wrenSymbolTableFind(SymbolTable* symbols, const char* name, size_t length) // TODO: O(n). Do something better. for (int i = 0; i < symbols->count; i++) { - // TODO: strlen() here is gross. Symbol table should store lengths. - if (strlen(symbols->data[i]) == length && - strncmp(symbols->data[i], name, length) == 0) return i; + if (symbols->data[i].length == length && + memcmp(symbols->data[i].buffer, name, length) == 0) return i; } return -1; diff --git a/src/wren_utils.h b/src/wren_utils.h index e5ba483d..dc25e678 100644 --- a/src/wren_utils.h +++ b/src/wren_utils.h @@ -6,6 +6,13 @@ // Reusable data structures and other utility functions. +// A simple structure to keep trace of the string length as long as its data +// (including the null-terminator) +typedef struct { + char* buffer; + int length; +} String; + // We need buffers of a few different types. To avoid lots of casting between // void* and back, we'll use the preprocessor as a poor man's generics and let // it generate a few type-specific ones. @@ -50,7 +57,7 @@ DECLARE_BUFFER(Byte, uint8_t); DECLARE_BUFFER(Int, int); -DECLARE_BUFFER(String, char*); +DECLARE_BUFFER(String, String); // TODO: Change this to use a map. typedef StringBuffer SymbolTable; diff --git a/src/wren_value.c b/src/wren_value.c index 34e83c95..ba2f266b 100644 --- a/src/wren_value.c +++ b/src/wren_value.c @@ -32,8 +32,11 @@ DEFINE_BUFFER(Method, Method); #define ALLOCATE(vm, type) \ ((type*)wrenReallocate(vm, NULL, 0, sizeof(type))) -#define ALLOCATE_FLEX(vm, type, extra) \ - ((type*)wrenReallocate(vm, NULL, 0, sizeof(type) + extra)) + +#define ALLOCATE_FLEX(vm, mainType, arrayType, count) \ + ((mainType*)wrenReallocate(vm, NULL, 0, \ + sizeof(mainType) + sizeof(arrayType) * count)) + #define ALLOCATE_ARRAY(vm, type, count) \ ((type*)wrenReallocate(vm, NULL, 0, sizeof(type) * count)) @@ -83,7 +86,8 @@ ObjClass* wrenNewClass(WrenVM* vm, ObjClass* superclass, int numFields, wrenPushRoot(vm, (Obj*)name); // Create the metaclass. - ObjString* metaclassName = wrenStringConcat(vm, name->value, " metaclass"); + ObjString* metaclassName = wrenStringConcat(vm, name->value, name->length, + " metaclass", -1); wrenPushRoot(vm, (Obj*)metaclassName); ObjClass* metaclass = wrenNewSingleClass(vm, 0, metaclassName); @@ -131,7 +135,7 @@ void wrenBindMethod(WrenVM* vm, ObjClass* classObj, int symbol, Method method) ObjClosure* wrenNewClosure(WrenVM* vm, ObjFn* fn) { ObjClosure* closure = ALLOCATE_FLEX(vm, ObjClosure, - sizeof(Upvalue*) * fn->numUpvalues); + Upvalue*, fn->numUpvalues); initObj(vm, &closure->obj, OBJ_CLOSURE, vm->fnClass); closure->fn = fn; @@ -197,7 +201,7 @@ ObjFn* wrenNewFunction(WrenVM* vm, ObjModule* module, // Copy the function's name. debug->name = ALLOCATE_ARRAY(vm, char, debugNameLength + 1); - strncpy(debug->name, debugName, debugNameLength); + memcpy(debug->name, debugName, debugNameLength); debug->name[debugNameLength] = '\0'; debug->sourceLines = sourceLines; @@ -224,7 +228,7 @@ ObjFn* wrenNewFunction(WrenVM* vm, ObjModule* module, Value wrenNewInstance(WrenVM* vm, ObjClass* classObj) { ObjInstance* instance = ALLOCATE_FLEX(vm, ObjInstance, - classObj->numFields * sizeof(Value)); + Value, classObj->numFields); initObj(vm, &instance->obj, OBJ_INSTANCE, classObj); // Initialize fields to null. @@ -364,7 +368,7 @@ static uint32_t hashObject(Obj* object) ObjString* string = (ObjString*)object; // FNV-1a hash. See: http://www.isthe.com/chongo/tech/comp/fnv/ - uint32_t hash = 2166136261; + uint32_t hash = 2166136261u; // We want the contents of the string to affect the hash, but we also // want to ensure it runs in constant time. We also don't want to bias @@ -403,6 +407,9 @@ static uint32_t hashValue(Value value) case TAG_NAN: return HASH_NAN; case TAG_NULL: return HASH_NULL; case TAG_TRUE: return HASH_TRUE; + default: + UNREACHABLE(); + return 0; } #else switch (value.type) @@ -414,10 +421,9 @@ static uint32_t hashValue(Value value) case VAL_OBJ: return hashObject(AS_OBJ(value)); default: UNREACHABLE(); + return 0; } #endif - UNREACHABLE(); - return 0; } // Inserts [key] and [value] in the array of [entries] with the given @@ -629,7 +635,7 @@ Value wrenNewString(WrenVM* vm, const char* text, size_t length) ObjString* string = AS_STRING(wrenNewUninitializedString(vm, length)); // Copy the string (if given one). - if (length > 0) strncpy(string->value, text, length); + if (length > 0) memcpy(string->value, text, length); string->value[length] = '\0'; @@ -638,22 +644,23 @@ Value wrenNewString(WrenVM* vm, const char* text, size_t length) Value wrenNewUninitializedString(WrenVM* vm, size_t length) { - ObjString* string = ALLOCATE_FLEX(vm, ObjString, length + 1); + ObjString* string = ALLOCATE_FLEX(vm, ObjString, char, length + 1); initObj(vm, &string->obj, OBJ_STRING, vm->stringClass); string->length = (int)length; return OBJ_VAL(string); } -ObjString* wrenStringConcat(WrenVM* vm, const char* left, const char* right) +ObjString* wrenStringConcat(WrenVM* vm, const char* left, int leftLength, + const char* right, int rightLength) { - size_t leftLength = strlen(left); - size_t rightLength = strlen(right); + if (leftLength == -1) leftLength = (int)strlen(left); + if (rightLength == -1) rightLength = (int)strlen(right); Value value = wrenNewUninitializedString(vm, leftLength + rightLength); ObjString* string = AS_STRING(value); - strcpy(string->value, left); - strcpy(string->value + leftLength, right); + memcpy(string->value, left, leftLength); + memcpy(string->value + leftLength, right, rightLength); string->value[leftLength + rightLength] = '\0'; return string; @@ -683,6 +690,65 @@ Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, int index) return value; } +// Uses the Boyer-Moore-Horspool string matching algorithm. +uint32_t wrenStringFind(WrenVM* vm, ObjString* haystack, ObjString* needle) +{ + // Corner case, an empty needle is always found. + if (needle->length == 0) return 0; + + // If the needle is longer than the haystack it won't be found. + if (needle->length > haystack->length) return UINT32_MAX; + + // Pre-calculate the shift table. For each character (8-bit value), we + // determine how far the search window can be advanced if that character is + // the last character in the haystack where we are searching for the needle + // and the needle doesn't match there. + uint32_t shift[UINT8_MAX]; + uint32_t needleEnd = needle->length - 1; + + // By default, we assume the character is not the needle at all. In that case + // case, if a match fails on that character, we can advance one whole needle + // width since. + for (uint32_t index = 0; index < UINT8_MAX; index++) + { + shift[index] = needle->length; + } + + // Then, for every character in the needle, determine how far it is from the + // end. If a match fails on that character, we can advance the window such + // that it the last character in it lines up with the last place we could + // find it in the needle. + for (uint32_t index = 0; index < needleEnd; index++) + { + char c = needle->value[index]; + shift[(uint8_t)c] = needleEnd - index; + } + + // Slide the needle across the haystack, looking for the first match or + // stopping if the needle goes off the end. + char lastChar = needle->value[needleEnd]; + uint32_t range = haystack->length - needle->length; + + for (uint32_t index = 0; index <= range; ) + { + // Compare the last character in the haystack's window to the last character + // in the needle. If it matches, see if the whole needle matches. + char c = haystack->value[index + needleEnd]; + if (lastChar == c && + memcmp(haystack->value + index, needle->value, needleEnd) == 0) + { + // Found a match. + return index; + } + + // Otherwise, slide the needle forward. + index += shift[(uint8_t)c]; + } + + // Not found. + return UINT32_MAX; +} + Upvalue* wrenNewUpvalue(WrenVM* vm, Value* value) { Upvalue* upvalue = ALLOCATE(vm, Upvalue); diff --git a/src/wren_value.h b/src/wren_value.h index ae78f7a0..34a6bd2e 100644 --- a/src/wren_value.h +++ b/src/wren_value.h @@ -97,8 +97,10 @@ typedef enum typedef struct { ValueType type; - double num; - Obj* obj; + union { + double num; + Obj* obj; + } as; } Value; #endif @@ -109,7 +111,7 @@ typedef struct { Obj obj; // Does not include the null terminator. - int length; + uint32_t length; char value[FLEXIBLE_ARRAY]; } ObjString; @@ -574,7 +576,7 @@ typedef struct #define AS_BOOL(value) ((value).type == VAL_TRUE) // Value -> Obj*. -#define AS_OBJ(v) ((v).obj) +#define AS_OBJ(v) ((v).as.obj) // Determines if [value] is a garbage-collected object or not. #define IS_OBJ(value) ((value).type == VAL_OBJ) @@ -585,10 +587,10 @@ typedef struct #define IS_UNDEFINED(value) ((value).type == VAL_UNDEFINED) // Singleton values. -#define FALSE_VAL ((Value){ VAL_FALSE, 0.0, NULL }) -#define NULL_VAL ((Value){ VAL_NULL, 0.0, NULL }) -#define TRUE_VAL ((Value){ VAL_TRUE, 0.0, NULL }) -#define UNDEFINED_VAL ((Value){ VAL_UNDEFINED, 0.0, NULL }) +#define FALSE_VAL ((Value){ VAL_FALSE }) +#define NULL_VAL ((Value){ VAL_NULL }) +#define TRUE_VAL ((Value){ VAL_TRUE }) +#define UNDEFINED_VAL ((Value){ VAL_UNDEFINED }) #endif @@ -685,14 +687,22 @@ Value wrenNewString(WrenVM* vm, const char* text, size_t length); // The caller is expected to fully initialize the buffer after calling. Value wrenNewUninitializedString(WrenVM* vm, size_t length); -// Creates a new string that is the concatenation of [left] and [right]. -ObjString* wrenStringConcat(WrenVM* vm, const char* left, const char* right); +// Creates a new string that is the concatenation of [left] and [right] (with +// length [leftLength] and [rightLength], respectively). If -1 is passed +// the string length is automatically calculated. +ObjString* wrenStringConcat(WrenVM* vm, const char* left, int leftLength, + const char* right, int rightLength); // Creates a new string containing the code point in [string] starting at byte // [index]. If [index] points into the middle of a UTF-8 sequence, returns an // empty string. Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, int index); +// Search for the first occurence of [needle] within [haystack] and returns its +// zero-based offset. Returns `UINT32_MAX` if [haystack] does not contain +// [needle]. +uint32_t wrenStringFind(WrenVM* vm, ObjString* haystack, ObjString* needle); + // Creates a new open upvalue pointing to [value] on the stack. Upvalue* wrenNewUpvalue(WrenVM* vm, Value* value); @@ -725,8 +735,8 @@ static inline bool wrenValuesSame(Value a, Value b) return a == b; #else if (a.type != b.type) return false; - if (a.type == VAL_NUM) return a.num == b.num; - return a.obj == b.obj; + if (a.type == VAL_NUM) return a.as.num == b.as.num; + return a.as.obj == b.as.obj; #endif } @@ -770,7 +780,7 @@ static inline Value wrenObjectToValue(Obj* obj) #else Value value; value.type = VAL_OBJ; - value.obj = obj; + value.as.obj = obj; return value; #endif } @@ -783,7 +793,7 @@ static inline double wrenValueToNum(Value value) data.bits64 = value; return data.num; #else - return value.num; + return value.as.num; #endif } @@ -795,7 +805,10 @@ static inline Value wrenNumToValue(double num) data.num = num; return data.bits64; #else - return (Value){ VAL_NUM, num, NULL }; + Value value; + value.type = VAL_NUM; + value.as.num = num; + return value; #endif } diff --git a/src/wren_vm.c b/src/wren_vm.c index 2ca0a74e..fbc3b7b1 100644 --- a/src/wren_vm.c +++ b/src/wren_vm.c @@ -330,7 +330,7 @@ static ObjString* methodNotFound(WrenVM* vm, ObjClass* classObj, int symbol) { // Count the number of spaces to determine the number of parameters the // method expects. - const char* methodName = vm->methodNames.data[symbol]; + const char* methodName = vm->methodNames.data[symbol].buffer; int methodLength = (int)strlen(methodName); int numParams = 0; @@ -1167,7 +1167,7 @@ static void defineMethod(WrenVM* vm, const char* className, // Create a name for the method, including its arity. char name[MAX_METHOD_SIGNATURE]; - strncpy(name, methodName, length); + memcpy(name, methodName, length); for (int i = 0; i < numParams; i++) { name[length++] = ' '; diff --git a/test/string/concatenation.wren b/test/string/concatenation.wren index a3417b46..cbc696ac 100644 --- a/test/string/concatenation.wren +++ b/test/string/concatenation.wren @@ -1 +1,4 @@ IO.print("a" + "b") // expect: ab + +// 8-bit clean. +IO.print(("a\0b" + "\0c") == "a\0b\0c") // expect: true diff --git a/test/string/contains.wren b/test/string/contains.wren index f86bcf58..3151c5ef 100644 --- a/test/string/contains.wren +++ b/test/string/contains.wren @@ -8,3 +8,9 @@ IO.print("something".contains("math")) // expect: false // Non-ASCII. IO.print("søméthîng".contains("méth")) // expect: true IO.print("søméthîng".contains("meth")) // expect: false + +// 8-bit clean. +IO.print("a\0b\0c".contains("\0")) // expect: true +IO.print("a\0b\0c".contains("b")) // expect: true +IO.print("a\0b\0c".contains("b\0c")) // expect: true +IO.print("a\0b\0c".contains("bc")) // expect: false diff --git a/test/string/count.wren b/test/string/count.wren index d7af9f3b..5fe3981a 100644 --- a/test/string/count.wren +++ b/test/string/count.wren @@ -1,2 +1,8 @@ IO.print("".count) // expect: 0 IO.print("a string".count) // expect: 8 + +// 8-bit clean. +IO.print("\0".count) // expect: 1 +IO.print("a\0b".count) // expect: 3 +IO.print("\0c".count) // expect: 2 +IO.print(("a\0b" + "\0c").count) // expect: 5 diff --git a/test/string/ends_with.wren b/test/string/ends_with.wren index a55e6350..6fd81c29 100644 --- a/test/string/ends_with.wren +++ b/test/string/ends_with.wren @@ -7,3 +7,9 @@ IO.print("abcd".endsWith("")) // expect: true // Non-ASCII. IO.print("søméthîng".endsWith("thîng")) // expect: true IO.print("søméthîng".endsWith("thing")) // expect: false + +// 8-bit clean. +IO.print("a\0b\0c".endsWith("\0")) // expect: false +IO.print("a\0b\0c".endsWith("c")) // expect: true +IO.print("a\0b\0c".endsWith("\0c")) // expect: true +IO.print("a\0b\0c".endsWith("\0b")) // expect: false diff --git a/test/string/equality.wren b/test/string/equality.wren index 92a3aef7..56ae4800 100644 --- a/test/string/equality.wren +++ b/test/string/equality.wren @@ -21,3 +21,8 @@ IO.print("true" != true) // expect: true // Non-ASCII. IO.print("vålue" == "value") // expect: false IO.print("vålue" == "vålue") // expect: true + +// 8-bit clean. +IO.print("a\0b\0c" == "a") // expect: false +IO.print("a\0b\0c" == "abc") // expect: false +IO.print("a\0b\0c" == "a\0b\0c") // expect: true diff --git a/test/string/index_of.wren b/test/string/index_of.wren index 8ee67978..18544278 100644 --- a/test/string/index_of.wren +++ b/test/string/index_of.wren @@ -1,9 +1,24 @@ +IO.print("abcd".indexOf("")) // expect: 0 IO.print("abcd".indexOf("cd")) // expect: 2 IO.print("abcd".indexOf("a")) // expect: 0 +IO.print("abcd".indexOf("abcd")) // expect: 0 IO.print("abcd".indexOf("abcde")) // expect: -1 IO.print("abab".indexOf("ab")) // expect: 0 +// More complex cases. +IO.print("abcdefabcdefg".indexOf("defg")) // expect: 9 +IO.print("abcdabcdabcd".indexOf("dab")) // expect: 3 +IO.print("abcdabcdabcdabcd".indexOf("dabcdabc")) // expect: 3 +IO.print("abcdefg".indexOf("abcdef!")) // expect: -1 + // Non-ASCII. Note that it returns byte indices, not code points. IO.print("søméஃthîng".indexOf("e")) // expect: -1 IO.print("søméஃthîng".indexOf("m")) // expect: 3 IO.print("søméஃthîng".indexOf("thî")) // expect: 9 + +// 8-bit clean. +IO.print("a\0b\0c".indexOf("\0")) // expect: 1 +IO.print("a\0b\0c".indexOf("a")) // expect: 0 +IO.print("a\0b\0c".indexOf("b\0c")) // expect: 2 +IO.print("a\0b\0c".indexOf("a\0b\0c\0d")) // expect: -1 +IO.print("a\0b\0a\0b".indexOf("a\0b")) // expect: 0 diff --git a/test/string/iterate.wren b/test/string/iterate.wren index e79861b6..e8a5f1d2 100644 --- a/test/string/iterate.wren +++ b/test/string/iterate.wren @@ -14,3 +14,11 @@ IO.print(s.iterate(-1)) // expect: false // Nothing to iterate in an empty string. IO.print("".iterate(null)) // expect: false + +// 8-bit clean. +IO.print("a\0b\0c".iterate(null)) // expect: 0 +IO.print("a\0b\0c".iterate(0)) // expect: 1 +IO.print("a\0b\0c".iterate(1)) // expect: 2 +IO.print("a\0b\0c".iterate(2)) // expect: 3 +IO.print("a\0b\0c".iterate(3)) // expect: 4 +IO.print("a\0b\0c".iterate(4)) // expect: false diff --git a/test/string/iterator_value.wren b/test/string/iterator_value.wren index 343cee4d..6dd1d401 100644 --- a/test/string/iterator_value.wren +++ b/test/string/iterator_value.wren @@ -5,3 +5,11 @@ IO.print(s.iteratorValue(2)) // expect: ç // Iterator value in middle of UTF sequence is an empty string. IO.print(s.iteratorValue(3) == "") // expect: true IO.print(s.iteratorValue(4)) // expect: d + +// 8-bit clean. +var t = "a\0b\0c" +IO.print(t.iteratorValue(0) == "a") // expect: true +IO.print(t.iteratorValue(1) == "\0") // expect: true +IO.print(t.iteratorValue(2) == "b") // expect: true +IO.print(t.iteratorValue(3) == "\0") // expect: true +IO.print(t.iteratorValue(4) == "c") // expect: true diff --git a/test/string/join.wren b/test/string/join.wren index 6c1183c1..19a8f215 100644 --- a/test/string/join.wren +++ b/test/string/join.wren @@ -3,3 +3,8 @@ var str = "string" IO.print(str.join("") == str) // expect: true IO.print(str.join(", ")) // expect: s, t, r, i, n, g + +// 8-bit clean. +var ing = "a\0b\0c" +IO.print(ing.join("") == ing) // expect: true +IO.print(ing.join(", ") == "a, \0, b, \0, c") // expect: true diff --git a/test/string/starts_with.wren b/test/string/starts_with.wren index 856307e5..1b22e669 100644 --- a/test/string/starts_with.wren +++ b/test/string/starts_with.wren @@ -7,3 +7,8 @@ IO.print("abcd".startsWith("")) // expect: true // Non-ASCII. IO.print("søméthîng".startsWith("sømé")) // expect: true IO.print("søméthîng".startsWith("some")) // expect: false + +// 8-bit clean. +IO.print("a\0b\0c".startsWith("a")) // expect: true +IO.print("a\0b\0c".startsWith("a\0")) // expect: true +IO.print("a\0b\0c".startsWith("b\0")) // expect: false diff --git a/test/string/subscript.wren b/test/string/subscript.wren index 8212e32b..70c740aa 100644 --- a/test/string/subscript.wren +++ b/test/string/subscript.wren @@ -33,3 +33,10 @@ IO.print("søméஃthîng"[7] == "") // expect: true IO.print("søméஃthîng"[8] == "") // expect: true IO.print("søméஃ"[-1] == "") // expect: true IO.print("søméஃ"[-2] == "") // expect: true + +// 8-bit clean. +IO.print("a\0b\0c"[0] == "a") // expect: true +IO.print("a\0b\0c"[1] == "\0") // expect: true +IO.print("a\0b\0c"[2] == "b") // expect: true +IO.print("a\0b\0c"[3] == "\0") // expect: true +IO.print("a\0b\0c"[4] == "c") // expect: true diff --git a/test/string/to_string.wren b/test/string/to_string.wren index c7f11fd0..16d60d49 100644 --- a/test/string/to_string.wren +++ b/test/string/to_string.wren @@ -1,2 +1,7 @@ IO.print("".toString == "") // expect: true IO.print("blah".toString == "blah") // expect: true + +// 8-bit clean. +IO.print("a\0b\0c".toString == "a\0b\0c") // expect: true +IO.print("a\0b\0c".toString == "a") // expect: false +IO.print("a\0b\0c".toString) // expect: a