From 2f97ef57d330df4047cf14f5d3f854df1b75e8e0 Mon Sep 17 00:00:00 2001 From: Peter Reijnders Date: Tue, 27 Jan 2015 15:05:53 +0100 Subject: [PATCH 01/21] Change Makefile to differentiate between static and shared library Previously, just the static libraries (libwren*.a) were created. In the Makefile documentation these were mentioned in the comments as shared. Now both variants will be created. --- .gitignore | 4 +++- Makefile | 22 +++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 02fbb5f0..8830ad32 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ wrend wren-cpp libwren.a libwrend.a +libwren.so +libwrend.so libwren-cpp.a # XCode user-specific stuff. @@ -23,4 +25,4 @@ benchmark/baseline.txt # built docs get copied here, which is presumed to be a separate checkout of # the repo so they can be pushed to GitHub Pages. -gh-pages/ \ No newline at end of file +gh-pages/ diff --git a/Makefile b/Makefile index 32d86a10..153ef4cc 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ SOURCES := $(wildcard src/*.c) HEADERS := $(wildcard src/*.h) OBJECTS := $(SOURCES:.c=.o) -# Don't include main.c in the shared library. +# Don't include main.c in the libraries. DEBUG_OBJECTS := $(addprefix build/debug/, $(notdir $(OBJECTS))) RELEASE_OBJECTS := $(addprefix build/release/, $(notdir $(OBJECTS))) RELEASE_CPP_OBJECTS := $(addprefix build/release-cpp/, $(notdir $(OBJECTS))) @@ -38,18 +38,22 @@ RELEASE_CPP_LIB_OBJECTS := $(subst build/release-cpp/main.o,,$(RELEASE_CPP_OBJEC all: release clean: - @rm -rf build wren wrend libwren.a libwrend.a + @rm -rf build wren wrend libwren.a libwrend.a libwren.so libwrend.so prep: @mkdir -p build/debug build/release build/release-cpp # Debug build. -debug: prep wrend libwrend.a +debug: prep wrend libwrend.a libwrend.so -# Debug shared library. +# Debug static library. libwrend.a: $(DEBUG_LIB_OBJECTS) $(AR) $@ $^ +# Debug shared library. +libwrend.so: $(DEBUG_LIB_OBJECTS) + $(CC) $(DEBUG_CFLAGS) -shared -Wl,-soname,$@ -o $@ $^ + # Debug command-line interpreter. wrend: $(DEBUG_OBJECTS) $(CC) $(CFLAGS) $(DEBUG_CFLAGS) -Iinclude -o $@ $^ -lm @@ -59,12 +63,16 @@ build/debug/%.o: src/%.c include/wren.h $(HEADERS) $(CC) -c $(CFLAGS) $(DEBUG_CFLAGS) -Iinclude -o $@ $< # Release build. -release: prep wren libwren.a +release: prep wren libwren.a libwren.so -# Release shared library. +# Release static library. libwren.a: $(RELEASE_LIB_OBJECTS) $(AR) $@ $^ +# Release shared library. +libwren.so: $(RELEASE_LIB_OBJECTS) + $(CC) $(RELEASE_CFLAGS) -shared -Wl,-soname,$@ -o $@ $^ + # Release command-line interpreter. wren: $(RELEASE_OBJECTS) $(CC) $(CFLAGS) $(RELEASE_CFLAGS) -Iinclude -o $@ $^ -lm @@ -76,7 +84,7 @@ build/release/%.o: src/%.c include/wren.h $(HEADERS) # Release C++ build. release-cpp: prep wren-cpp libwren-cpp.a -# Release C++ shared lib +# Release C++ static lib libwren-cpp.a: $(RELEASE_CPP_LIB_OBJECTS) $(AR) $@ $^ From 3bed04e0b95f1a26c6dac530afb0c6822d13534b Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Wed, 28 Jan 2015 19:50:53 -0800 Subject: [PATCH 02/21] Support clang/OS X shared libraries in the Makefile. --- .gitignore | 2 ++ Makefile | 46 +++++++++++++++++++++++++--------------------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index 8830ad32..0471c9b2 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ libwren.a libwrend.a libwren.so libwrend.so +libwren.dylib +libwrend.dylib libwren-cpp.a # XCode user-specific stuff. diff --git a/Makefile b/Makefile index 153ef4cc..95cf6a65 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,16 @@ else CPPFLAGS += -fPIC endif +# Clang on Mac OS X has different flags and a different extension to build a +# shared library. +ifneq (,$(findstring darwin,$(TARGET_OS))) + SHARED_LIB_FLAGS = + SHARED_EXT = dylib +else + SHARED_LIB_FLAGS = -Wl,-soname,$@.so + SHARED_EXT = so +endif + # Files. SOURCES := $(wildcard src/*.c) HEADERS := $(wildcard src/*.h) @@ -38,21 +48,18 @@ RELEASE_CPP_LIB_OBJECTS := $(subst build/release-cpp/main.o,,$(RELEASE_CPP_OBJEC all: release clean: - @rm -rf build wren wrend libwren.a libwrend.a libwren.so libwrend.so + @rm -rf build wren wrend libwren libwrend prep: @mkdir -p build/debug build/release build/release-cpp # Debug build. -debug: prep wrend libwrend.a libwrend.so +debug: prep wrend libwrend -# Debug static library. -libwrend.a: $(DEBUG_LIB_OBJECTS) - $(AR) $@ $^ - -# Debug shared library. -libwrend.so: $(DEBUG_LIB_OBJECTS) - $(CC) $(DEBUG_CFLAGS) -shared -Wl,-soname,$@ -o $@ $^ +# Debug static and shared libraries. +libwrend: $(DEBUG_LIB_OBJECTS) + $(AR) $@.a $^ + $(CC) $(DEBUG_CFLAGS) -shared $(SHARED_LIB_FLAGS) -o $@.$(SHARED_EXT) $^ # Debug command-line interpreter. wrend: $(DEBUG_OBJECTS) @@ -63,15 +70,12 @@ build/debug/%.o: src/%.c include/wren.h $(HEADERS) $(CC) -c $(CFLAGS) $(DEBUG_CFLAGS) -Iinclude -o $@ $< # Release build. -release: prep wren libwren.a libwren.so +release: prep wren libwren -# Release static library. -libwren.a: $(RELEASE_LIB_OBJECTS) - $(AR) $@ $^ - -# Release shared library. -libwren.so: $(RELEASE_LIB_OBJECTS) - $(CC) $(RELEASE_CFLAGS) -shared -Wl,-soname,$@ -o $@ $^ +# Release static and shared libraries. +libwren: $(RELEASE_LIB_OBJECTS) + $(AR) $@.a $^ + $(CC) $(RELEASE_CFLAGS) -shared $(SHARED_LIB_FLAGS) -o $@.$(SHARED_EXT) $^ # Release command-line interpreter. wren: $(RELEASE_OBJECTS) @@ -82,11 +86,11 @@ build/release/%.o: src/%.c include/wren.h $(HEADERS) $(CC) -c $(CFLAGS) $(RELEASE_CFLAGS) -Iinclude -o $@ $< # Release C++ build. -release-cpp: prep wren-cpp libwren-cpp.a +release-cpp: prep wren-cpp libwren-cpp -# Release C++ static lib -libwren-cpp.a: $(RELEASE_CPP_LIB_OBJECTS) - $(AR) $@ $^ +# Release C++ static library. +libwren-cpp: $(RELEASE_CPP_LIB_OBJECTS) + $(AR) $@.a $^ # Release C++ command-line interpreter. wren-cpp: $(RELEASE_CPP_OBJECTS) From 5143b0707d3926c2d04a4074e2864f242762030f Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Thu, 29 Jan 2015 16:54:21 +0100 Subject: [PATCH 03/21] Keeping trace of symbols length in the symbol table. --- src/wren_utils.c | 27 ++++++++++++++------------- src/wren_utils.h | 9 ++++++++- src/wren_vm.c | 2 +- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/wren_utils.c b/src/wren_utils.c index 87e033aa..4c7ddadf 100644 --- a/src/wren_utils.c +++ b/src/wren_utils.c @@ -5,7 +5,7 @@ DEFINE_BUFFER(Byte, uint8_t); DEFINE_BUFFER(Int, int); -DEFINE_BUFFER(String, char*); +DEFINE_BUFFER(String, String); void wrenSymbolTableInit(WrenVM* vm, SymbolTable* symbols) { @@ -16,26 +16,28 @@ void wrenSymbolTableClear(WrenVM* vm, SymbolTable* symbols) { for (int i = 0; i < symbols->count; i++) { - wrenReallocate(vm, symbols->data[i], 0, 0); + wrenReallocate(vm, symbols->data[i].buffer, 0, 0); } wrenStringBufferClear(vm, symbols); } -int wrenSymbolTableAdd(WrenVM* vm, SymbolTable* symbols, const char* name, - size_t length) +int wrenSymbolTableAdd(WrenVM* vm, SymbolTable* symbols, + const char* name, size_t length) { - char* heapString = (char*)wrenReallocate(vm, NULL, 0, - sizeof(char) * (length + 1)); - strncpy(heapString, name, length); - heapString[length] = '\0'; + String symbol; + symbol.buffer = (char*)wrenReallocate(vm, NULL, 0, + sizeof(char) * (length + 1)); + strncpy(symbol.buffer, name, length); + symbol.buffer[length] = '\0'; + symbol.length = length; - wrenStringBufferWrite(vm, symbols, heapString); + wrenStringBufferWrite(vm, symbols, symbol); return symbols->count - 1; } int wrenSymbolTableEnsure(WrenVM* vm, SymbolTable* symbols, - const char* name, size_t length) + const char* name, size_t length) { // See if the symbol is already defined. int existing = wrenSymbolTableFind(symbols, name, length); @@ -51,9 +53,8 @@ int wrenSymbolTableFind(SymbolTable* symbols, const char* name, size_t length) // TODO: O(n). Do something better. for (int i = 0; i < symbols->count; i++) { - // TODO: strlen() here is gross. Symbol table should store lengths. - if (strlen(symbols->data[i]) == length && - strncmp(symbols->data[i], name, length) == 0) return i; + if (symbols->data[i].length == length && + strncmp(symbols->data[i].buffer, name, length) == 0) return i; } return -1; diff --git a/src/wren_utils.h b/src/wren_utils.h index e5ba483d..977d6c20 100644 --- a/src/wren_utils.h +++ b/src/wren_utils.h @@ -6,6 +6,13 @@ // Reusable data structures and other utility functions. +// A simple structure to keep trace of the string length as long as its data +// (including the null-terminator) +typedef struct { + char *buffer; + int length; +} String; + // We need buffers of a few different types. To avoid lots of casting between // void* and back, we'll use the preprocessor as a poor man's generics and let // it generate a few type-specific ones. @@ -50,7 +57,7 @@ DECLARE_BUFFER(Byte, uint8_t); DECLARE_BUFFER(Int, int); -DECLARE_BUFFER(String, char*); +DECLARE_BUFFER(String, String); // TODO: Change this to use a map. typedef StringBuffer SymbolTable; diff --git a/src/wren_vm.c b/src/wren_vm.c index 0bdf3585..548056f7 100644 --- a/src/wren_vm.c +++ b/src/wren_vm.c @@ -330,7 +330,7 @@ static ObjString* methodNotFound(WrenVM* vm, ObjClass* classObj, int symbol) { // Count the number of spaces to determine the number of parameters the // method expects. - const char* methodName = vm->methodNames.data[symbol]; + const char* methodName = vm->methodNames.data[symbol].buffer; int methodLength = (int)strlen(methodName); int numParams = 0; From 23cc8a5384c3c21bce94064b0b5d03cb5422d8cf Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Thu, 29 Jan 2015 17:08:05 +0100 Subject: [PATCH 04/21] Fixed debug symbols dump. --- src/wren_debug.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/wren_debug.c b/src/wren_debug.c index b855bb4a..3a4bfc77 100644 --- a/src/wren_debug.c +++ b/src/wren_debug.c @@ -92,7 +92,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) { int global = READ_SHORT(); printf("%-16s %5d '%s'\n", "LOAD_GLOBAL", global, - vm->globalNames.data[global]); + vm->globalNames.data[global].buffer); break; } @@ -100,7 +100,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) { int global = READ_SHORT(); printf("%-16s %5d '%s'\n", "STORE_GLOBAL", global, - vm->globalNames.data[global]); + vm->globalNames.data[global].buffer); break; } @@ -133,7 +133,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) int numArgs = bytecode[i - 1] - CODE_CALL_0; int symbol = READ_SHORT(); printf("CALL_%-11d %5d '%s'\n", numArgs, symbol, - vm->methodNames.data[symbol]); + vm->methodNames.data[symbol].buffer); break; } @@ -158,7 +158,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) int numArgs = bytecode[i - 1] - CODE_SUPER_0; int symbol = READ_SHORT(); printf("SUPER_%-10d %5d '%s'\n", numArgs, symbol, - vm->methodNames.data[symbol]); + vm->methodNames.data[symbol].buffer); break; } @@ -230,7 +230,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) { int symbol = READ_SHORT(); printf("%-16s %5d '%s'\n", "METHOD_INSTANCE", symbol, - vm->methodNames.data[symbol]); + vm->methodNames.data[symbol].buffer); break; } @@ -238,7 +238,7 @@ static int debugPrintInstruction(WrenVM* vm, ObjFn* fn, int i, int* lastLine) { int symbol = READ_SHORT(); printf("%-16s %5d '%s'\n", "METHOD_STATIC", symbol, - vm->methodNames.data[symbol]); + vm->methodNames.data[symbol].buffer); break; } From a54bde729401f8299d6c80d105485b85378c73ed Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Fri, 30 Jan 2015 09:23:30 +0100 Subject: [PATCH 05/21] Fixing style typo. --- src/wren_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wren_utils.h b/src/wren_utils.h index 977d6c20..dc25e678 100644 --- a/src/wren_utils.h +++ b/src/wren_utils.h @@ -9,7 +9,7 @@ // A simple structure to keep trace of the string length as long as its data // (including the null-terminator) typedef struct { - char *buffer; + char* buffer; int length; } String; From 87b9fdd6202c43ddd3f4e9c620770dfec6ae2d89 Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Fri, 30 Jan 2015 09:24:10 +0100 Subject: [PATCH 06/21] Using 'memcmp' in place of 'strncmp'. --- src/wren_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wren_utils.c b/src/wren_utils.c index 4c7ddadf..f79e6741 100644 --- a/src/wren_utils.c +++ b/src/wren_utils.c @@ -54,7 +54,7 @@ int wrenSymbolTableFind(SymbolTable* symbols, const char* name, size_t length) for (int i = 0; i < symbols->count; i++) { if (symbols->data[i].length == length && - strncmp(symbols->data[i].buffer, name, length) == 0) return i; + memcmp(symbols->data[i].buffer, name, length) == 0) return i; } return -1; From 98df1b2bd14a17e1b346433cb5982e98ba66c50e Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Sat, 31 Jan 2015 10:54:27 -0800 Subject: [PATCH 07/21] Add cast. --- src/wren_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wren_utils.c b/src/wren_utils.c index f79e6741..c98d7d7c 100644 --- a/src/wren_utils.c +++ b/src/wren_utils.c @@ -30,7 +30,7 @@ int wrenSymbolTableAdd(WrenVM* vm, SymbolTable* symbols, sizeof(char) * (length + 1)); strncpy(symbol.buffer, name, length); symbol.buffer[length] = '\0'; - symbol.length = length; + symbol.length = (int)length; wrenStringBufferWrite(vm, symbols, symbol); return symbols->count - 1; From 593e5c8b35ef5e094b80cc3093a804b6431346ee Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Sun, 1 Feb 2015 15:12:37 -0800 Subject: [PATCH 08/21] Clean up how flex arrays are allocated. --- src/wren_value.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/wren_value.c b/src/wren_value.c index 1a0b376d..88723e36 100644 --- a/src/wren_value.c +++ b/src/wren_value.c @@ -32,8 +32,11 @@ DEFINE_BUFFER(Method, Method); #define ALLOCATE(vm, type) \ ((type*)wrenReallocate(vm, NULL, 0, sizeof(type))) -#define ALLOCATE_FLEX(vm, type, extra) \ - ((type*)wrenReallocate(vm, NULL, 0, sizeof(type) + extra)) + +#define ALLOCATE_FLEX(vm, mainType, arrayType, count) \ + ((mainType*)wrenReallocate(vm, NULL, 0, \ + sizeof(mainType) + sizeof(arrayType) * count)) + #define ALLOCATE_ARRAY(vm, type, count) \ ((type*)wrenReallocate(vm, NULL, 0, sizeof(type) * count)) @@ -131,7 +134,7 @@ void wrenBindMethod(WrenVM* vm, ObjClass* classObj, int symbol, Method method) ObjClosure* wrenNewClosure(WrenVM* vm, ObjFn* fn) { ObjClosure* closure = ALLOCATE_FLEX(vm, ObjClosure, - sizeof(Upvalue*) * fn->numUpvalues); + Upvalue*, fn->numUpvalues); initObj(vm, &closure->obj, OBJ_CLOSURE, vm->fnClass); closure->fn = fn; @@ -222,7 +225,7 @@ ObjFn* wrenNewFunction(WrenVM* vm, Value* constants, int numConstants, Value wrenNewInstance(WrenVM* vm, ObjClass* classObj) { ObjInstance* instance = ALLOCATE_FLEX(vm, ObjInstance, - classObj->numFields * sizeof(Value)); + Value, classObj->numFields); initObj(vm, &instance->obj, OBJ_INSTANCE, classObj); // Initialize fields to null. @@ -618,7 +621,7 @@ Value wrenNewString(WrenVM* vm, const char* text, size_t length) Value wrenNewUninitializedString(WrenVM* vm, size_t length) { - ObjString* string = ALLOCATE_FLEX(vm, ObjString, length + 1); + ObjString* string = ALLOCATE_FLEX(vm, ObjString, char, length + 1); initObj(vm, &string->obj, OBJ_STRING, vm->stringClass); string->length = (int)length; From 9917c168769ecce93847312b667eeecfb0025dfc Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Sun, 1 Feb 2015 15:26:33 -0800 Subject: [PATCH 09/21] Fix un-Nan-tagging mode. Thanks, Michel Hermier! --- src/wren_value.c | 8 ++++++-- src/wren_value.h | 29 +++++++++++++++++------------ 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/wren_value.c b/src/wren_value.c index 88723e36..18eb0480 100644 --- a/src/wren_value.c +++ b/src/wren_value.c @@ -404,6 +404,9 @@ static uint32_t hashValue(Value value) case TAG_NAN: return HASH_NAN; case TAG_NULL: return HASH_NULL; case TAG_TRUE: return HASH_TRUE; + default: + UNREACHABLE(); + return 0; } #else switch (value.type) @@ -413,10 +416,11 @@ static uint32_t hashValue(Value value) case VAL_NUM: return hashNumber(AS_NUM(value)); case VAL_TRUE: return HASH_TRUE; case VAL_OBJ: return hashObject(AS_OBJ(value)); + default: + UNREACHABLE(); + return 0; } #endif - UNREACHABLE(); - return 0; } // Inserts [key] and [value] in the array of [entries] with the given diff --git a/src/wren_value.h b/src/wren_value.h index 81915f28..f5c1a24b 100644 --- a/src/wren_value.h +++ b/src/wren_value.h @@ -96,8 +96,10 @@ typedef enum typedef struct { ValueType type; - double num; - Obj* obj; + union { + double num; + Obj* obj; + } as; } Value; #endif @@ -550,7 +552,7 @@ typedef struct #define AS_BOOL(value) ((value).type == VAL_TRUE) // Value -> Obj*. -#define AS_OBJ(v) ((v).obj) +#define AS_OBJ(v) ((v).as.obj) // Determines if [value] is a garbage-collected object or not. #define IS_OBJ(value) ((value).type == VAL_OBJ) @@ -561,10 +563,10 @@ typedef struct #define IS_UNDEFINED(value) ((value).type == VAL_UNDEFINED) // Singleton values. -#define FALSE_VAL ((Value){ VAL_FALSE, 0.0, NULL }) -#define NULL_VAL ((Value){ VAL_NULL, 0.0, NULL }) -#define TRUE_VAL ((Value){ VAL_TRUE, 0.0, NULL }) -#define UNDEFINED_VAL ((Value){ VAL_UNDEFINED, 0.0, NULL }) +#define FALSE_VAL ((Value){ VAL_FALSE }) +#define NULL_VAL ((Value){ VAL_NULL }) +#define TRUE_VAL ((Value){ VAL_TRUE }) +#define UNDEFINED_VAL ((Value){ VAL_UNDEFINED }) #endif @@ -697,8 +699,8 @@ static inline bool wrenValuesSame(Value a, Value b) return a == b; #else if (a.type != b.type) return false; - if (a.type == VAL_NUM) return a.num == b.num; - return a.obj == b.obj; + if (a.type == VAL_NUM) return a.as.num == b.as.num; + return a.as.obj == b.as.obj; #endif } @@ -742,7 +744,7 @@ static inline Value wrenObjectToValue(Obj* obj) #else Value value; value.type = VAL_OBJ; - value.obj = obj; + value.as.obj = obj; return value; #endif } @@ -755,7 +757,7 @@ static inline double wrenValueToNum(Value value) data.bits64 = value; return data.num; #else - return value.num; + return value.as.num; #endif } @@ -767,7 +769,10 @@ static inline Value wrenNumToValue(double num) data.num = num; return data.bits64; #else - return (Value){ VAL_NUM, n, NULL }; + Value value; + value.type = VAL_NUM; + value.as.num = num; + return value; #endif } From 6997b9475df605fbf2e777d68305091aa890533b Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Wed, 4 Feb 2015 13:25:25 +0100 Subject: [PATCH 10/21] Adding '\0' escape sequence for null characters. --- src/wren_compiler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wren_compiler.c b/src/wren_compiler.c index ec96aaac..797c9bba 100644 --- a/src/wren_compiler.c +++ b/src/wren_compiler.c @@ -699,6 +699,7 @@ static void readString(Parser* parser) { case '"': addStringChar(parser, '"'); break; case '\\': addStringChar(parser, '\\'); break; + case '0': addStringChar(parser, '\0'); break; case 'a': addStringChar(parser, '\a'); break; case 'b': addStringChar(parser, '\b'); break; case 'f': addStringChar(parser, '\f'); break; From 866aeb7a3a3ac18c3b811cd7ce2ded470247c48d Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Wed, 4 Feb 2015 13:29:15 +0100 Subject: [PATCH 11/21] Changing string size type to 'uint32_t'. --- src/wren_value.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wren_value.h b/src/wren_value.h index f5c1a24b..31a85fa9 100644 --- a/src/wren_value.h +++ b/src/wren_value.h @@ -110,7 +110,7 @@ typedef struct { Obj obj; // Does not include the null terminator. - int length; + uint32_t length; char value[FLEXIBLE_ARRAY]; } ObjString; From 875ee9e55a288133c1cb23df9de06be866415501 Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Wed, 4 Feb 2015 13:38:15 +0100 Subject: [PATCH 12/21] Replacing 'strstr()' with 'wrenStringFind()' to support 8-bit strings. Internally the Boyer-Moore-Horspool algorithm is used. --- src/wren_core.c | 6 +++--- src/wren_value.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ src/wren_value.h | 5 +++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/wren_core.c b/src/wren_core.c index dca76df8..76373e8b 100644 --- a/src/wren_core.c +++ b/src/wren_core.c @@ -1159,7 +1159,7 @@ DEF_NATIVE(string_contains) // Corner case, the empty string contains the empty string. if (string->length == 0 && search->length == 0) RETURN_TRUE; - RETURN_BOOL(strstr(string->value, search->value) != NULL); + RETURN_BOOL(wrenStringFind(vm, string, search) != string->length); } DEF_NATIVE(string_count) @@ -1191,9 +1191,9 @@ DEF_NATIVE(string_indexOf) ObjString* string = AS_STRING(args[0]); ObjString* search = AS_STRING(args[1]); - char* firstOccurrence = strstr(string->value, search->value); + uint32_t index = wrenStringFind(vm, string, search); - RETURN_NUM(firstOccurrence ? firstOccurrence - string->value : -1); + RETURN_NUM(index == string->length ? -1 : (int)index); } DEF_NATIVE(string_iterate) diff --git a/src/wren_value.c b/src/wren_value.c index 18eb0480..6af0cd01 100644 --- a/src/wren_value.c +++ b/src/wren_value.c @@ -670,6 +670,59 @@ Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, int index) return value; } +// Implementing Boyer-Moore-Horspool string matching algorithm. +uint32_t wrenStringFind(WrenVM* vm, ObjString* haystack, ObjString* needle) +{ + uint32_t length, last_index, index; + uint32_t shift[256]; // Full 8-bit alphabet + char last_char; + // We need to convert the 'char' value to 'unsigned char' in order not + // to get negative indexes. We use a union as an elegant alternative to + // explicit coercion (whose behaviour is not sure across compilers) + union { + char c; + unsigned char uc; + } value; + + // If the needle is longer than the haystack it won't be found. + if (needle->length > haystack->length) + { + return haystack->length; + } + + // Precalculate shifts. + last_index = needle->length - 1; + + for (index = 0; index < 256; index++) + { + shift[index] = needle->length; + } + for (index = 0; index < last_index; index++) + { + value.c = needle->value[index]; + shift[value.uc] = last_index - index; + } + + // Search, left to right. + last_char = needle->value[last_index]; + + length = haystack->length - needle->length; + + for (index = 0; index <= length; ) + { + value.c = haystack->value[index + last_index]; + if ((last_char == value.c) && + memcmp(haystack->value + index, needle->value, last_index) == 0) + { + return index; + } + index += shift[value.uc]; // Convert, same as above. + } + + // Not found. + return haystack->length; +} + Upvalue* wrenNewUpvalue(WrenVM* vm, Value* value) { Upvalue* upvalue = ALLOCATE(vm, Upvalue); diff --git a/src/wren_value.h b/src/wren_value.h index f5c1a24b..ac688d27 100644 --- a/src/wren_value.h +++ b/src/wren_value.h @@ -667,6 +667,11 @@ ObjString* wrenStringConcat(WrenVM* vm, const char* left, const char* right); // empty string. Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, int index); +// Search for the first occurence of [needle] within [haystack] and returns its +// zero-based offset. Returns [haystack->length] if [haystack] does not contain +// [needle]. +uint32_t wrenStringFind(WrenVM* vm, ObjString* haystack, ObjString* needle); + // Creates a new open upvalue pointing to [value] on the stack. Upvalue* wrenNewUpvalue(WrenVM* vm, Value* value); From d3ca27fb9a949b5bc465571e38a7f1b5deca1495 Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Wed, 4 Feb 2015 13:44:39 +0100 Subject: [PATCH 13/21] Changing 'wrenStringConcat()' to support null-containing strings. --- src/wren_core.c | 20 ++++++++++++-------- src/wren_value.c | 14 ++++++++------ src/wren_value.h | 7 +++++-- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/wren_core.c b/src/wren_core.c index dca76df8..e6917b9e 100644 --- a/src/wren_core.c +++ b/src/wren_core.c @@ -175,7 +175,7 @@ static bool validateFn(WrenVM* vm, Value* args, int index, const char* argName) { if (IS_FN(args[index]) || IS_CLOSURE(args[index])) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be a function.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " must be a function.", -1)); return false; } @@ -185,7 +185,7 @@ static bool validateNum(WrenVM* vm, Value* args, int index, const char* argName) { if (IS_NUM(args[index])) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be a number.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " must be a number.", -1)); return false; } @@ -196,7 +196,7 @@ static bool validateIntValue(WrenVM* vm, Value* args, double value, { if (trunc(value) == value) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be an integer.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " must be an integer.", -1)); return false; } @@ -226,7 +226,7 @@ static int validateIndexValue(WrenVM* vm, Value* args, int count, double value, // Check bounds. if (index >= 0 && index < count) return index; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " out of bounds.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " out of bounds.", -1)); return -1; } @@ -263,7 +263,7 @@ static bool validateString(WrenVM* vm, Value* args, int index, { if (IS_STRING(args[index])) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, " must be a string.")); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " must be a string.", -1)); return false; } @@ -1056,8 +1056,9 @@ DEF_NATIVE(object_toString) else if (IS_INSTANCE(args[0])) { ObjInstance* instance = AS_INSTANCE(args[0]); - RETURN_OBJ(wrenStringConcat(vm, "instance of ", - instance->obj.classObj->name->value)); + ObjString* name = instance->obj.classObj->name; + RETURN_OBJ(wrenStringConcat(vm, "instance of ", -1, + name->value, name->length)); } RETURN_VAL(wrenNewString(vm, "", 8)); @@ -1253,7 +1254,10 @@ DEF_NATIVE(string_toString) DEF_NATIVE(string_plus) { if (!validateString(vm, args, 1, "Right operand")) return PRIM_ERROR; - RETURN_OBJ(wrenStringConcat(vm, AS_CSTRING(args[0]), AS_CSTRING(args[1]))); + ObjString* left = AS_STRING(args[0]); + ObjString* right = AS_STRING(args[1]); + RETURN_OBJ(wrenStringConcat(vm, left->value, left->length, + right->value, right->length)); } DEF_NATIVE(string_subscript) diff --git a/src/wren_value.c b/src/wren_value.c index 18eb0480..2499759f 100644 --- a/src/wren_value.c +++ b/src/wren_value.c @@ -86,7 +86,8 @@ ObjClass* wrenNewClass(WrenVM* vm, ObjClass* superclass, int numFields, wrenPushRoot(vm, (Obj*)name); // Create the metaclass. - ObjString* metaclassName = wrenStringConcat(vm, name->value, " metaclass"); + ObjString* metaclassName = wrenStringConcat(vm, name->value, name->length, + " metaclass", -1); wrenPushRoot(vm, (Obj*)metaclassName); ObjClass* metaclass = wrenNewSingleClass(vm, 0, metaclassName); @@ -632,15 +633,16 @@ Value wrenNewUninitializedString(WrenVM* vm, size_t length) return OBJ_VAL(string); } -ObjString* wrenStringConcat(WrenVM* vm, const char* left, const char* right) +ObjString* wrenStringConcat(WrenVM* vm, const char* left, int leftLength, + const char* right, int rightLength) { - size_t leftLength = strlen(left); - size_t rightLength = strlen(right); + if (leftLength == -1) leftLength = (int)strlen(left); + if (rightLength == -1) rightLength = (int)strlen(right); Value value = wrenNewUninitializedString(vm, leftLength + rightLength); ObjString* string = AS_STRING(value); - strcpy(string->value, left); - strcpy(string->value + leftLength, right); + memcpy(string->value, left, leftLength); + memcpy(string->value + leftLength, right, rightLength); string->value[leftLength + rightLength] = '\0'; return string; diff --git a/src/wren_value.h b/src/wren_value.h index f5c1a24b..293903a6 100644 --- a/src/wren_value.h +++ b/src/wren_value.h @@ -659,8 +659,11 @@ Value wrenNewString(WrenVM* vm, const char* text, size_t length); // The caller is expected to fully initialize the buffer after calling. Value wrenNewUninitializedString(WrenVM* vm, size_t length); -// Creates a new string that is the concatenation of [left] and [right]. -ObjString* wrenStringConcat(WrenVM* vm, const char* left, const char* right); +// Creates a new string that is the concatenation of [left] and [right] (of +// length [leftLength] and [rightLength], respectively). If -1 is passed +// the string length is automatically calculated. +ObjString* wrenStringConcat(WrenVM* vm, const char* left, int leftLength, + const char* right, int rightLength); // Creates a new string containing the code point in [string] starting at byte // [index]. If [index] points into the middle of a UTF-8 sequence, returns an From 4c58830684fb9b43de1dfaa8b079ed702fd71458 Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Wed, 4 Feb 2015 13:47:09 +0100 Subject: [PATCH 14/21] Fixing minor string length comparison warning. --- src/wren_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wren_core.c b/src/wren_core.c index dca76df8..50450c69 100644 --- a/src/wren_core.c +++ b/src/wren_core.c @@ -1216,7 +1216,7 @@ DEF_NATIVE(string_iterate) do { index++; - if (index >= string->length) RETURN_FALSE; + if ((uint32_t)index >= string->length) RETURN_FALSE; } while ((string->value[index] & 0xc0) == 0x80); RETURN_NUM(index); From 4d1e0833fb3cb5a5b80f0f087bba886d28d9a98b Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Wed, 4 Feb 2015 13:51:54 +0100 Subject: [PATCH 15/21] Getting rid of "strcpy()/strncpy()" in favour of "memcpy()". --- src/wren_compiler.c | 10 +++++----- src/wren_core.c | 2 +- src/wren_utils.c | 2 +- src/wren_value.c | 8 ++++---- src/wren_vm.c | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/wren_compiler.c b/src/wren_compiler.c index ec96aaac..1a03c889 100644 --- a/src/wren_compiler.c +++ b/src/wren_compiler.c @@ -1264,7 +1264,8 @@ static int copyName(Compiler* compiler, char* name) length = MAX_METHOD_NAME; } - strncpy(name, token->start, length); + memcpy(name, token->start, length); + name[length] = '\0'; return length; } @@ -2004,14 +2005,14 @@ static void super_(Compiler* compiler, bool allowAssignment) int length; if (enclosingClass != NULL) { length = enclosingClass->methodLength; - strncpy(name, enclosingClass->methodName, length); + memcpy(name, enclosingClass->methodName, length); } else { // We get here if super is used outside of a method. In that case, we // have already reported the error, so just stub this out so we can keep // going to try to find later errors. length = 0; - strncpy(name, "", length); } + name[length] = '\0'; // Call the superclass method with the same name. methodCall(compiler, CODE_SUPER_0, name, length); @@ -2097,8 +2098,7 @@ static void new_(Compiler* compiler, bool allowAssignment) methodSymbol(compiler, " instantiate", 12)); // Invoke the constructor on the new instance. - char name[MAX_METHOD_SIGNATURE]; - strcpy(name, "new"); + char name[MAX_METHOD_SIGNATURE] = "new"; methodCall(compiler, CODE_CALL_0, name, 3); } diff --git a/src/wren_core.c b/src/wren_core.c index dca76df8..fb3fa0ee 100644 --- a/src/wren_core.c +++ b/src/wren_core.c @@ -1179,7 +1179,7 @@ DEF_NATIVE(string_endsWith) if (search->length > string->length) RETURN_FALSE; int result = memcmp(string->value + string->length - search->length, - search->value, search->length); + search->value, search->length); RETURN_BOOL(result == 0); } diff --git a/src/wren_utils.c b/src/wren_utils.c index c98d7d7c..e38fac2b 100644 --- a/src/wren_utils.c +++ b/src/wren_utils.c @@ -28,7 +28,7 @@ int wrenSymbolTableAdd(WrenVM* vm, SymbolTable* symbols, String symbol; symbol.buffer = (char*)wrenReallocate(vm, NULL, 0, sizeof(char) * (length + 1)); - strncpy(symbol.buffer, name, length); + memcpy(symbol.buffer, name, length); symbol.buffer[length] = '\0'; symbol.length = (int)length; diff --git a/src/wren_value.c b/src/wren_value.c index 18eb0480..cc7c7fa7 100644 --- a/src/wren_value.c +++ b/src/wren_value.c @@ -199,7 +199,7 @@ ObjFn* wrenNewFunction(WrenVM* vm, Value* constants, int numConstants, // Copy the function's name. debug->name = ALLOCATE_ARRAY(vm, char, debugNameLength + 1); - strncpy(debug->name, debugName, debugNameLength); + memcpy(debug->name, debugName, debugNameLength); debug->name[debugNameLength] = '\0'; debug->sourceLines = sourceLines; @@ -616,7 +616,7 @@ Value wrenNewString(WrenVM* vm, const char* text, size_t length) ObjString* string = AS_STRING(wrenNewUninitializedString(vm, length)); // Copy the string (if given one). - if (length > 0) strncpy(string->value, text, length); + if (length > 0) memcpy(string->value, text, length); string->value[length] = '\0'; @@ -639,8 +639,8 @@ ObjString* wrenStringConcat(WrenVM* vm, const char* left, const char* right) Value value = wrenNewUninitializedString(vm, leftLength + rightLength); ObjString* string = AS_STRING(value); - strcpy(string->value, left); - strcpy(string->value + leftLength, right); + memcpy(string->value, left, leftLength); + memcpy(string->value + leftLength, right, rightLength); string->value[leftLength + rightLength] = '\0'; return string; diff --git a/src/wren_vm.c b/src/wren_vm.c index 548056f7..961f2198 100644 --- a/src/wren_vm.c +++ b/src/wren_vm.c @@ -1143,7 +1143,7 @@ static void defineMethod(WrenVM* vm, const char* className, // Create a name for the method, including its arity. char name[MAX_METHOD_SIGNATURE]; - strncpy(name, methodName, length); + memcpy(name, methodName, length); for (int i = 0; i < numParams; i++) { name[length++] = ' '; From 096e5cf33c87dc558c3d5c604c2425d9751e3e8d Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Wed, 4 Feb 2015 13:53:59 +0100 Subject: [PATCH 16/21] Fixing empty search string corner case. --- src/wren_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wren_core.c b/src/wren_core.c index 76373e8b..9fa118cc 100644 --- a/src/wren_core.c +++ b/src/wren_core.c @@ -1156,8 +1156,8 @@ DEF_NATIVE(string_contains) ObjString* string = AS_STRING(args[0]); ObjString* search = AS_STRING(args[1]); - // Corner case, the empty string contains the empty string. - if (string->length == 0 && search->length == 0) RETURN_TRUE; + // Corner case, the empty string is always contained. + if (search->length == 0) RETURN_TRUE; RETURN_BOOL(wrenStringFind(vm, string, search) != string->length); } From d73b19967dd087608f2dce779ffdaa7f242ebb18 Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Wed, 4 Feb 2015 13:58:16 +0100 Subject: [PATCH 17/21] Adding 8-bit strings test cases. --- test/string/concatenation.wren | 3 +++ test/string/contains.wren | 6 ++++++ test/string/count.wren | 6 ++++++ test/string/ends_with.wren | 6 ++++++ test/string/equality.wren | 5 +++++ test/string/index_of.wren | 7 +++++++ test/string/iterate.wren | 8 ++++++++ test/string/iterator_value.wren | 8 ++++++++ test/string/join.wren | 5 +++++ test/string/starts_with.wren | 5 +++++ test/string/subscript.wren | 7 +++++++ test/string/to_string.wren | 5 +++++ 12 files changed, 71 insertions(+) diff --git a/test/string/concatenation.wren b/test/string/concatenation.wren index a3417b46..60df5588 100644 --- a/test/string/concatenation.wren +++ b/test/string/concatenation.wren @@ -1 +1,4 @@ IO.print("a" + "b") // expect: ab + +// 8-bit clean +IO.print(("a\0b" + "\0c") == "a\0b\0c") // expect: true diff --git a/test/string/contains.wren b/test/string/contains.wren index f86bcf58..301aedd1 100644 --- a/test/string/contains.wren +++ b/test/string/contains.wren @@ -8,3 +8,9 @@ IO.print("something".contains("math")) // expect: false // Non-ASCII. IO.print("søméthîng".contains("méth")) // expect: true IO.print("søméthîng".contains("meth")) // expect: false + +// 8-bit clean +IO.print("a\0b\0c".contains("\0")) // expect: true +IO.print("a\0b\0c".contains("b")) // expect: true +IO.print("a\0b\0c".contains("b\0c")) // expect: true +IO.print("a\0b\0c".contains("bc")) // expect: false diff --git a/test/string/count.wren b/test/string/count.wren index d7af9f3b..686f9b08 100644 --- a/test/string/count.wren +++ b/test/string/count.wren @@ -1,2 +1,8 @@ IO.print("".count) // expect: 0 IO.print("a string".count) // expect: 8 + +// 8-bit clean +IO.print("\0".count) // expect: 1 +IO.print("a\0b".count) // expect: 3 +IO.print("\0c".count) // expect: 2 +IO.print(("a\0b" + "\0c").count) // expect: 5 diff --git a/test/string/ends_with.wren b/test/string/ends_with.wren index a55e6350..9859adce 100644 --- a/test/string/ends_with.wren +++ b/test/string/ends_with.wren @@ -7,3 +7,9 @@ IO.print("abcd".endsWith("")) // expect: true // Non-ASCII. IO.print("søméthîng".endsWith("thîng")) // expect: true IO.print("søméthîng".endsWith("thing")) // expect: false + +// 8-bit clean +IO.print("a\0b\0c".endsWith("\0")) // expect: false +IO.print("a\0b\0c".endsWith("c")) // expect: true +IO.print("a\0b\0c".endsWith("\0c")) // expect: true +IO.print("a\0b\0c".endsWith("\0b")) // expect: false diff --git a/test/string/equality.wren b/test/string/equality.wren index 92a3aef7..91977516 100644 --- a/test/string/equality.wren +++ b/test/string/equality.wren @@ -21,3 +21,8 @@ IO.print("true" != true) // expect: true // Non-ASCII. IO.print("vålue" == "value") // expect: false IO.print("vålue" == "vålue") // expect: true + +// 8-bit clean +IO.print("a\0b\0c" == "a") // expect: false +IO.print("a\0b\0c" == "abc") // expect: false +IO.print("a\0b\0c" == "a\0b\0c") // expect: true diff --git a/test/string/index_of.wren b/test/string/index_of.wren index 8ee67978..b72c9a6a 100644 --- a/test/string/index_of.wren +++ b/test/string/index_of.wren @@ -7,3 +7,10 @@ IO.print("abab".indexOf("ab")) // expect: 0 IO.print("søméஃthîng".indexOf("e")) // expect: -1 IO.print("søméஃthîng".indexOf("m")) // expect: 3 IO.print("søméஃthîng".indexOf("thî")) // expect: 9 + +// 8-bit clean +IO.print("a\0b\0c".indexOf("\0")) // expect: 1 +IO.print("a\0b\0c".indexOf("a")) // expect: 0 +IO.print("a\0b\0c".indexOf("b\0c")) // expect: 2 +IO.print("a\0b\0c".indexOf("a\0b\0c\0d")) // expect: -1 +IO.print("a\0b\0a\0b".indexOf("a\0b")) // expect: 0 diff --git a/test/string/iterate.wren b/test/string/iterate.wren index e79861b6..9a32d570 100644 --- a/test/string/iterate.wren +++ b/test/string/iterate.wren @@ -14,3 +14,11 @@ IO.print(s.iterate(-1)) // expect: false // Nothing to iterate in an empty string. IO.print("".iterate(null)) // expect: false + +// 8-bit clean +IO.print("a\0b\0c".iterate(null)) // expect: 0 +IO.print("a\0b\0c".iterate(0)) // expect: 1 +IO.print("a\0b\0c".iterate(1)) // expect: 2 +IO.print("a\0b\0c".iterate(2)) // expect: 3 +IO.print("a\0b\0c".iterate(3)) // expect: 4 +IO.print("a\0b\0c".iterate(4)) // expect: false diff --git a/test/string/iterator_value.wren b/test/string/iterator_value.wren index 343cee4d..5bc74405 100644 --- a/test/string/iterator_value.wren +++ b/test/string/iterator_value.wren @@ -5,3 +5,11 @@ IO.print(s.iteratorValue(2)) // expect: ç // Iterator value in middle of UTF sequence is an empty string. IO.print(s.iteratorValue(3) == "") // expect: true IO.print(s.iteratorValue(4)) // expect: d + +// 8-bit clean +var t = "a\0b\0c" +IO.print(t.iteratorValue(0) == "a") // expect: true +IO.print(t.iteratorValue(1) == "\0") // expect: true +IO.print(t.iteratorValue(2) == "b") // expect: true +IO.print(t.iteratorValue(3) == "\0") // expect: true +IO.print(t.iteratorValue(4) == "c") // expect: true diff --git a/test/string/join.wren b/test/string/join.wren index 6c1183c1..df71a543 100644 --- a/test/string/join.wren +++ b/test/string/join.wren @@ -3,3 +3,8 @@ var str = "string" IO.print(str.join("") == str) // expect: true IO.print(str.join(", ")) // expect: s, t, r, i, n, g + +// 8-bit clean +var ing = "a\0b\0c" +IO.print(ing.join("") == ing) // expect: true +IO.print(ing.join(", ") == "a, \0, b, \0, c") // expect: true diff --git a/test/string/starts_with.wren b/test/string/starts_with.wren index 856307e5..e537e6ae 100644 --- a/test/string/starts_with.wren +++ b/test/string/starts_with.wren @@ -7,3 +7,8 @@ IO.print("abcd".startsWith("")) // expect: true // Non-ASCII. IO.print("søméthîng".startsWith("sømé")) // expect: true IO.print("søméthîng".startsWith("some")) // expect: false + +// 8-bit clean +IO.print("a\0b\0c".startsWith("a")) // expect: true +IO.print("a\0b\0c".startsWith("a\0")) // expect: true +IO.print("a\0b\0c".startsWith("b\0")) // expect: false diff --git a/test/string/subscript.wren b/test/string/subscript.wren index 8212e32b..4556fcc6 100644 --- a/test/string/subscript.wren +++ b/test/string/subscript.wren @@ -33,3 +33,10 @@ IO.print("søméஃthîng"[7] == "") // expect: true IO.print("søméஃthîng"[8] == "") // expect: true IO.print("søméஃ"[-1] == "") // expect: true IO.print("søméஃ"[-2] == "") // expect: true + +// 8-bit clean +IO.print("a\0b\0c"[0] == "a") // expect: true +IO.print("a\0b\0c"[1] == "\0") // expect: true +IO.print("a\0b\0c"[2] == "b") // expect: true +IO.print("a\0b\0c"[3] == "\0") // expect: true +IO.print("a\0b\0c"[4] == "c") // expect: true diff --git a/test/string/to_string.wren b/test/string/to_string.wren index c7f11fd0..8a583864 100644 --- a/test/string/to_string.wren +++ b/test/string/to_string.wren @@ -1,2 +1,7 @@ IO.print("".toString == "") // expect: true IO.print("blah".toString == "blah") // expect: true + +// 8-bit clean +IO.print("a\0b\0c".toString == "a\0b\0c") // expect: true +IO.print("a\0b\0c".toString == "a") // expect: false +IO.print("a\0b\0c".toString) // expect: a From 8af1a787e2d661e5124a3819dac38026fe9b5424 Mon Sep 17 00:00:00 2001 From: Marco Lizza Date: Wed, 4 Feb 2015 13:59:53 +0100 Subject: [PATCH 18/21] Getting rid of constant unsigned error when compiling for C++ on GCC. --- src/wren_value.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wren_value.c b/src/wren_value.c index 18eb0480..07de5111 100644 --- a/src/wren_value.c +++ b/src/wren_value.c @@ -365,7 +365,7 @@ static uint32_t hashObject(Obj* object) ObjString* string = (ObjString*)object; // FNV-1a hash. See: http://www.isthe.com/chongo/tech/comp/fnv/ - uint32_t hash = 2166136261; + uint32_t hash = 2166136261u; // We want the contents of the string to affect the hash, but we also // want to ensure it runs in constant time. We also don't want to bias From aa9cfffb3479faeae53c09ad23a6767957e97a0c Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Wed, 4 Feb 2015 08:15:35 -0800 Subject: [PATCH 19/21] Keep code within 80 columns. --- src/wren_core.c | 12 ++++++++---- src/wren_value.h | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/wren_core.c b/src/wren_core.c index 6477bf10..8d5420a1 100644 --- a/src/wren_core.c +++ b/src/wren_core.c @@ -175,7 +175,8 @@ static bool validateFn(WrenVM* vm, Value* args, int index, const char* argName) { if (IS_FN(args[index]) || IS_CLOSURE(args[index])) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " must be a function.", -1)); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, + " must be a function.", -1)); return false; } @@ -185,7 +186,8 @@ static bool validateNum(WrenVM* vm, Value* args, int index, const char* argName) { if (IS_NUM(args[index])) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " must be a number.", -1)); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, + " must be a number.", -1)); return false; } @@ -196,7 +198,8 @@ static bool validateIntValue(WrenVM* vm, Value* args, double value, { if (trunc(value) == value) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " must be an integer.", -1)); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, + " must be an integer.", -1)); return false; } @@ -263,7 +266,8 @@ static bool validateString(WrenVM* vm, Value* args, int index, { if (IS_STRING(args[index])) return true; - args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, " must be a string.", -1)); + args[0] = OBJ_VAL(wrenStringConcat(vm, argName, -1, + " must be a string.", -1)); return false; } diff --git a/src/wren_value.h b/src/wren_value.h index 293903a6..d2c59cd0 100644 --- a/src/wren_value.h +++ b/src/wren_value.h @@ -659,7 +659,7 @@ Value wrenNewString(WrenVM* vm, const char* text, size_t length); // The caller is expected to fully initialize the buffer after calling. Value wrenNewUninitializedString(WrenVM* vm, size_t length); -// Creates a new string that is the concatenation of [left] and [right] (of +// Creates a new string that is the concatenation of [left] and [right] (with // length [leftLength] and [rightLength], respectively). If -1 is passed // the string length is automatically calculated. ObjString* wrenStringConcat(WrenVM* vm, const char* left, int leftLength, From 878be6fb6b7ee1ed3d0b96a7821bfdc716857dcd Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Wed, 4 Feb 2015 20:23:50 -0800 Subject: [PATCH 20/21] Clean up wrenStringFind() a bit. --- src/wren_core.c | 7 ++-- src/wren_value.c | 68 +++++++++++++++++++++------------------ src/wren_value.h | 2 +- test/string/index_of.wren | 8 +++++ 4 files changed, 48 insertions(+), 37 deletions(-) diff --git a/src/wren_core.c b/src/wren_core.c index 68ba963f..001a9c0c 100644 --- a/src/wren_core.c +++ b/src/wren_core.c @@ -1161,10 +1161,7 @@ DEF_NATIVE(string_contains) ObjString* string = AS_STRING(args[0]); ObjString* search = AS_STRING(args[1]); - // Corner case, the empty string is always contained. - if (search->length == 0) RETURN_TRUE; - - RETURN_BOOL(wrenStringFind(vm, string, search) != string->length); + RETURN_BOOL(wrenStringFind(vm, string, search) != UINT32_MAX); } DEF_NATIVE(string_count) @@ -1198,7 +1195,7 @@ DEF_NATIVE(string_indexOf) uint32_t index = wrenStringFind(vm, string, search); - RETURN_NUM(index == string->length ? -1 : (int)index); + RETURN_NUM(index == UINT32_MAX ? -1 : (int)index); } DEF_NATIVE(string_iterate) diff --git a/src/wren_value.c b/src/wren_value.c index 80d167e0..f6cdaa46 100644 --- a/src/wren_value.c +++ b/src/wren_value.c @@ -672,57 +672,63 @@ Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, int index) return value; } -// Implementing Boyer-Moore-Horspool string matching algorithm. +// Uses the Boyer-Moore-Horspool string matching algorithm. uint32_t wrenStringFind(WrenVM* vm, ObjString* haystack, ObjString* needle) { - uint32_t length, last_index, index; - uint32_t shift[256]; // Full 8-bit alphabet - char last_char; - // We need to convert the 'char' value to 'unsigned char' in order not - // to get negative indexes. We use a union as an elegant alternative to - // explicit coercion (whose behaviour is not sure across compilers) - union { - char c; - unsigned char uc; - } value; + // Corner case, an empty needle is always found. + if (needle->length == 0) return 0; // If the needle is longer than the haystack it won't be found. - if (needle->length > haystack->length) - { - return haystack->length; - } + if (needle->length > haystack->length) return UINT32_MAX; - // Precalculate shifts. - last_index = needle->length - 1; + // Pre-calculate the shift table. For each character (8-bit value), we + // determine how far the search window can be advanced if that character is + // the last character in the haystack where we are searching for the needle + // and the needle doesn't match there. + uint32_t shift[UINT8_MAX]; + uint32_t needleEnd = needle->length - 1; - for (index = 0; index < 256; index++) + // By default, we assume the character is not the needle at all. In that case + // case, if a match fails on that character, we can advance one whole needle + // width since. + for (uint32_t index = 0; index < UINT8_MAX; index++) { shift[index] = needle->length; } - for (index = 0; index < last_index; index++) + + // Then, for every character in the needle, determine how far it is from the + // end. If a match fails on that character, we can advance the window such + // that it the last character in it lines up with the last place we could + // find it in the needle. + for (uint32_t index = 0; index < needleEnd; index++) { - value.c = needle->value[index]; - shift[value.uc] = last_index - index; + char c = needle->value[index]; + shift[(uint8_t)c] = needleEnd - index; } - // Search, left to right. - last_char = needle->value[last_index]; + // Slide the needle across the haystack, looking for the first match or + // stopping if the needle goes off the end. + char lastChar = needle->value[needleEnd]; + uint32_t range = haystack->length - needle->length; - length = haystack->length - needle->length; - - for (index = 0; index <= length; ) + for (uint32_t index = 0; index <= range; ) { - value.c = haystack->value[index + last_index]; - if ((last_char == value.c) && - memcmp(haystack->value + index, needle->value, last_index) == 0) + // Compare the last character in the haystack's window to the last character + // in the needle. If it matches, see if the whole needle matches. + char c = haystack->value[index + needleEnd]; + if (lastChar == c && + memcmp(haystack->value + index, needle->value, needleEnd) == 0) { + // Found a match. return index; } - index += shift[value.uc]; // Convert, same as above. + + // Otherwise, slide the needle forward. + index += shift[(uint8_t)c]; } // Not found. - return haystack->length; + return UINT32_MAX; } Upvalue* wrenNewUpvalue(WrenVM* vm, Value* value) diff --git a/src/wren_value.h b/src/wren_value.h index c99197b1..b50d1a0e 100644 --- a/src/wren_value.h +++ b/src/wren_value.h @@ -671,7 +671,7 @@ ObjString* wrenStringConcat(WrenVM* vm, const char* left, int leftLength, Value wrenStringCodePointAt(WrenVM* vm, ObjString* string, int index); // Search for the first occurence of [needle] within [haystack] and returns its -// zero-based offset. Returns [haystack->length] if [haystack] does not contain +// zero-based offset. Returns `UINT32_MAX` if [haystack] does not contain // [needle]. uint32_t wrenStringFind(WrenVM* vm, ObjString* haystack, ObjString* needle); diff --git a/test/string/index_of.wren b/test/string/index_of.wren index 8ee67978..7c8ca35b 100644 --- a/test/string/index_of.wren +++ b/test/string/index_of.wren @@ -1,8 +1,16 @@ +IO.print("abcd".indexOf("")) // expect: 0 IO.print("abcd".indexOf("cd")) // expect: 2 IO.print("abcd".indexOf("a")) // expect: 0 +IO.print("abcd".indexOf("abcd")) // expect: 0 IO.print("abcd".indexOf("abcde")) // expect: -1 IO.print("abab".indexOf("ab")) // expect: 0 +// More complex cases. +IO.print("abcdefabcdefg".indexOf("defg")) // expect: 9 +IO.print("abcdabcdabcd".indexOf("dab")) // expect: 3 +IO.print("abcdabcdabcdabcd".indexOf("dabcdabc")) // expect: 3 +IO.print("abcdefg".indexOf("abcdef!")) // expect: -1 + // Non-ASCII. Note that it returns byte indices, not code points. IO.print("søméஃthîng".indexOf("e")) // expect: -1 IO.print("søméஃthîng".indexOf("m")) // expect: 3 From 55caae343b5e12f8f8a06977198a6549aeda95e3 Mon Sep 17 00:00:00 2001 From: Bob Nystrom Date: Wed, 4 Feb 2015 20:26:29 -0800 Subject: [PATCH 21/21] Add "." to comments. :) --- test/string/concatenation.wren | 2 +- test/string/contains.wren | 2 +- test/string/count.wren | 2 +- test/string/ends_with.wren | 2 +- test/string/equality.wren | 2 +- test/string/index_of.wren | 2 +- test/string/iterate.wren | 2 +- test/string/iterator_value.wren | 2 +- test/string/join.wren | 2 +- test/string/starts_with.wren | 2 +- test/string/subscript.wren | 2 +- test/string/to_string.wren | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/string/concatenation.wren b/test/string/concatenation.wren index 60df5588..cbc696ac 100644 --- a/test/string/concatenation.wren +++ b/test/string/concatenation.wren @@ -1,4 +1,4 @@ IO.print("a" + "b") // expect: ab -// 8-bit clean +// 8-bit clean. IO.print(("a\0b" + "\0c") == "a\0b\0c") // expect: true diff --git a/test/string/contains.wren b/test/string/contains.wren index 301aedd1..3151c5ef 100644 --- a/test/string/contains.wren +++ b/test/string/contains.wren @@ -9,7 +9,7 @@ IO.print("something".contains("math")) // expect: false IO.print("søméthîng".contains("méth")) // expect: true IO.print("søméthîng".contains("meth")) // expect: false -// 8-bit clean +// 8-bit clean. IO.print("a\0b\0c".contains("\0")) // expect: true IO.print("a\0b\0c".contains("b")) // expect: true IO.print("a\0b\0c".contains("b\0c")) // expect: true diff --git a/test/string/count.wren b/test/string/count.wren index 686f9b08..5fe3981a 100644 --- a/test/string/count.wren +++ b/test/string/count.wren @@ -1,7 +1,7 @@ IO.print("".count) // expect: 0 IO.print("a string".count) // expect: 8 -// 8-bit clean +// 8-bit clean. IO.print("\0".count) // expect: 1 IO.print("a\0b".count) // expect: 3 IO.print("\0c".count) // expect: 2 diff --git a/test/string/ends_with.wren b/test/string/ends_with.wren index 9859adce..6fd81c29 100644 --- a/test/string/ends_with.wren +++ b/test/string/ends_with.wren @@ -8,7 +8,7 @@ IO.print("abcd".endsWith("")) // expect: true IO.print("søméthîng".endsWith("thîng")) // expect: true IO.print("søméthîng".endsWith("thing")) // expect: false -// 8-bit clean +// 8-bit clean. IO.print("a\0b\0c".endsWith("\0")) // expect: false IO.print("a\0b\0c".endsWith("c")) // expect: true IO.print("a\0b\0c".endsWith("\0c")) // expect: true diff --git a/test/string/equality.wren b/test/string/equality.wren index 91977516..56ae4800 100644 --- a/test/string/equality.wren +++ b/test/string/equality.wren @@ -22,7 +22,7 @@ IO.print("true" != true) // expect: true IO.print("vålue" == "value") // expect: false IO.print("vålue" == "vålue") // expect: true -// 8-bit clean +// 8-bit clean. IO.print("a\0b\0c" == "a") // expect: false IO.print("a\0b\0c" == "abc") // expect: false IO.print("a\0b\0c" == "a\0b\0c") // expect: true diff --git a/test/string/index_of.wren b/test/string/index_of.wren index b92a5a72..18544278 100644 --- a/test/string/index_of.wren +++ b/test/string/index_of.wren @@ -16,7 +16,7 @@ IO.print("søméஃthîng".indexOf("e")) // expect: -1 IO.print("søméஃthîng".indexOf("m")) // expect: 3 IO.print("søméஃthîng".indexOf("thî")) // expect: 9 -// 8-bit clean +// 8-bit clean. IO.print("a\0b\0c".indexOf("\0")) // expect: 1 IO.print("a\0b\0c".indexOf("a")) // expect: 0 IO.print("a\0b\0c".indexOf("b\0c")) // expect: 2 diff --git a/test/string/iterate.wren b/test/string/iterate.wren index 9a32d570..e8a5f1d2 100644 --- a/test/string/iterate.wren +++ b/test/string/iterate.wren @@ -15,7 +15,7 @@ IO.print(s.iterate(-1)) // expect: false // Nothing to iterate in an empty string. IO.print("".iterate(null)) // expect: false -// 8-bit clean +// 8-bit clean. IO.print("a\0b\0c".iterate(null)) // expect: 0 IO.print("a\0b\0c".iterate(0)) // expect: 1 IO.print("a\0b\0c".iterate(1)) // expect: 2 diff --git a/test/string/iterator_value.wren b/test/string/iterator_value.wren index 5bc74405..6dd1d401 100644 --- a/test/string/iterator_value.wren +++ b/test/string/iterator_value.wren @@ -6,7 +6,7 @@ IO.print(s.iteratorValue(2)) // expect: ç IO.print(s.iteratorValue(3) == "") // expect: true IO.print(s.iteratorValue(4)) // expect: d -// 8-bit clean +// 8-bit clean. var t = "a\0b\0c" IO.print(t.iteratorValue(0) == "a") // expect: true IO.print(t.iteratorValue(1) == "\0") // expect: true diff --git a/test/string/join.wren b/test/string/join.wren index df71a543..19a8f215 100644 --- a/test/string/join.wren +++ b/test/string/join.wren @@ -4,7 +4,7 @@ IO.print(str.join("") == str) // expect: true IO.print(str.join(", ")) // expect: s, t, r, i, n, g -// 8-bit clean +// 8-bit clean. var ing = "a\0b\0c" IO.print(ing.join("") == ing) // expect: true IO.print(ing.join(", ") == "a, \0, b, \0, c") // expect: true diff --git a/test/string/starts_with.wren b/test/string/starts_with.wren index e537e6ae..1b22e669 100644 --- a/test/string/starts_with.wren +++ b/test/string/starts_with.wren @@ -8,7 +8,7 @@ IO.print("abcd".startsWith("")) // expect: true IO.print("søméthîng".startsWith("sømé")) // expect: true IO.print("søméthîng".startsWith("some")) // expect: false -// 8-bit clean +// 8-bit clean. IO.print("a\0b\0c".startsWith("a")) // expect: true IO.print("a\0b\0c".startsWith("a\0")) // expect: true IO.print("a\0b\0c".startsWith("b\0")) // expect: false diff --git a/test/string/subscript.wren b/test/string/subscript.wren index 4556fcc6..70c740aa 100644 --- a/test/string/subscript.wren +++ b/test/string/subscript.wren @@ -34,7 +34,7 @@ IO.print("søméஃthîng"[8] == "") // expect: true IO.print("søméஃ"[-1] == "") // expect: true IO.print("søméஃ"[-2] == "") // expect: true -// 8-bit clean +// 8-bit clean. IO.print("a\0b\0c"[0] == "a") // expect: true IO.print("a\0b\0c"[1] == "\0") // expect: true IO.print("a\0b\0c"[2] == "b") // expect: true diff --git a/test/string/to_string.wren b/test/string/to_string.wren index 8a583864..16d60d49 100644 --- a/test/string/to_string.wren +++ b/test/string/to_string.wren @@ -1,7 +1,7 @@ IO.print("".toString == "") // expect: true IO.print("blah".toString == "blah") // expect: true -// 8-bit clean +// 8-bit clean. IO.print("a\0b\0c".toString == "a\0b\0c") // expect: true IO.print("a\0b\0c".toString == "a") // expect: false IO.print("a\0b\0c".toString) // expect: a