From 079cbb8f684de4e462194967e57b3da63b6858ac Mon Sep 17 00:00:00 2001 From: Christoffer Lerno Date: Thu, 27 Jun 2024 13:37:37 +0200 Subject: [PATCH] Updated module mangling, restrict module names. --- releasenotes.md | 2 + src/compiler/compiler_internal.h | 10 +++++ src/compiler/context.c | 22 ++++------ src/compiler/module.c | 25 +++++++++++ src/compiler/sema_casts.c | 1 + src/compiler/sema_decls.c | 75 +------------------------------- src/compiler/sema_expr.c | 5 +++ src/compiler/types.c | 5 +-- src/utils/lib.h | 1 + src/utils/stringutils.c | 16 +++++++ test/src/tester.py | 4 ++ 11 files changed, 76 insertions(+), 90 deletions(-) diff --git a/releasenotes.md b/releasenotes.md index 352980e1e..f9581414e 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -9,6 +9,8 @@ - Trailing body arguments may now be `&ref`, `#hash`, `$const` and `$Type` arguments. - "panic-msg" setting to suppress panic message output. - Require `@export` functions to have `@export` types. +- Disallow leading/trailing/duplicate '_' in module names. +- Updated mangling. ### Fixes - Error with unsigned compare in `@ensure` when early returning 0 #1207. diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index 090883b61..644892ade 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -2300,6 +2300,7 @@ bool lexer_next_token(Lexer *lexer); // --- Module functions +void module_copy_extern_name_to_buffer(Module *module); Decl *module_find_symbol(Module *module, const char *symbol); const char *module_create_object_file_name(Module *module); @@ -3833,4 +3834,13 @@ INLINE const char *section_from_id(SectionId id) return id ? global_context.section_list[id - 1] + SECTION_PREFIX_LEN : NULL; } +INLINE bool check_module_name(Path *path) +{ + if (!str_is_valid_module_name(path->module)) + { + RETURN_PRINT_ERROR_AT(false, path, "A module name may not have any uppercase characters, trailing, leading or double '_'"); + } + return true; +} + extern char swizzle[256]; diff --git a/src/compiler/context.c b/src/compiler/context.c index 3b52c636c..a80ff9ecf 100644 --- a/src/compiler/context.c +++ b/src/compiler/context.c @@ -50,19 +50,25 @@ static bool filename_to_module_in_buffer(const char *path) int name_len = last_dot - last_slash - 1; if (name_len < 1) return false; scratch_buffer_clear(); + bool last_was_underscore = true; for (int i = last_slash + 1; i < last_dot; i++) { char c = path[i]; if (char_is_letter(c) || char_is_digit(c)) { + last_was_underscore = false; c = (char)(char_is_upper(c) ? c + 'a' - 'A' : c); } else { + if (last_was_underscore) continue; c = '_'; + last_was_underscore = true; } scratch_buffer_append_char(c); } + if (last_was_underscore && scratch_buffer.len) scratch_buffer.len--; + if (!scratch_buffer.len) return false; return true; } @@ -78,11 +84,7 @@ bool context_set_module_from_filename(ParseContext *context) } TokenType type = TOKEN_IDENT; - const char *module_name = symtab_add(scratch_buffer.str, - scratch_buffer.len, - fnv1a(scratch_buffer.str, (uint32_t) scratch_buffer.len), - &type); - + const char *module_name = scratch_buffer_interned_as(&type); if (type != TOKEN_IDENT) { print_error(context, "Generating a filename from the file '%s' resulted in a name that is a reserved keyword, " @@ -99,10 +101,7 @@ bool context_set_module_from_filename(ParseContext *context) bool context_set_module(ParseContext *context, Path *path, const char **generic_parameters) { - if (!str_has_no_uppercase(path->module)) - { - RETURN_PRINT_ERROR_AT(false, path, "A module name may not have any uppercase characters."); - } + if (!check_module_name(path)) return false; return create_module_or_check_name(context->unit, path, generic_parameters); } @@ -267,10 +266,7 @@ bool unit_add_import(CompilationUnit *unit, Path *path, bool private_import) { DEBUG_LOG("SEMA: Add import of '%s'.", path->module); - if (!str_has_no_uppercase(path->module)) - { - RETURN_PRINT_ERROR_AT(false, path, "A module is not expected to have any uppercase characters, please change it."); - } + if (!check_module_name(path)) return false; Decl *import = decl_calloc(); import->span = path->span; diff --git a/src/compiler/module.c b/src/compiler/module.c index 45f509950..502271b3a 100644 --- a/src/compiler/module.c +++ b/src/compiler/module.c @@ -9,6 +9,31 @@ Decl *module_find_symbol(Module *module, const char *symbol) return htable_get(&module->symbols, (void*)symbol); } +void module_copy_extern_name_to_buffer(Module *module) +{ + if (module->extname) + { + scratch_buffer_append(module->extname); + return; + } + const char *name = module->name->module; + char c; + while ((c = *(name++)) != 0) + { + switch (c) + { + case ':': + assert(name[0] == ':'); + scratch_buffer_append_char('_'); + name++; + break; + default: + scratch_buffer_append_char(c); + break; + } + } +} + const char *module_create_object_file_name(Module *module) { scratch_buffer_clear(); diff --git a/src/compiler/sema_casts.c b/src/compiler/sema_casts.c index 0a58ac39f..60f6f103f 100644 --- a/src/compiler/sema_casts.c +++ b/src/compiler/sema_casts.c @@ -155,6 +155,7 @@ void cast_to_int_to_max_bit_size(SemaContext *context, Expr *lhs, Expr *rhs, Typ // Lhs is smaller than rhs, so widen it using the right type if (bit_size_left < bit_size_right) { + Type *to = lhs->type->type_kind < TYPE_U8 ? type_int_signed_by_bitsize(bit_size_right) : type_int_unsigned_by_bitsize(bit_size_right); diff --git a/src/compiler/sema_decls.c b/src/compiler/sema_decls.c index af54a3f5f..0be99c464 100644 --- a/src/compiler/sema_decls.c +++ b/src/compiler/sema_decls.c @@ -28,7 +28,7 @@ static inline bool sema_analyse_operator_element_at(SemaContext *context, Decl * static inline bool sema_analyse_operator_element_set(SemaContext *context, Decl *method); static inline bool sema_analyse_operator_len(Decl *method, SemaContext *context); static bool sema_check_operator_method_validity(SemaContext *context, Decl *method); -static void sema_recursively_import(Type *type); + static inline const char *method_name_by_decl(Decl *method_like); static bool sema_analyse_struct_union(SemaContext *context, Decl *decl, bool *erase_decl); @@ -243,7 +243,6 @@ static inline bool sema_analyse_struct_member(SemaContext *context, Decl *parent TypeInfo *type_info = type_infoptr(decl->var.type_info); if (!sema_resolve_type_info(context, type_info, RESOLVE_TYPE_ALLOW_FLEXIBLE)) return decl_poison(decl); Type *type = type_info->type; - if (is_export) sema_recursively_import(type); switch (type_storage_type(type)) { case STORAGE_NORMAL: @@ -949,71 +948,6 @@ ERROR: return decl_poison(decl); } -static void sema_recursively_import(Type *type) -{ - if (!type) return; - Decl *decl = type_no_export(type); - if (!decl) return; - decl->is_export = true; - if (decl->resolve_status != RESOLVE_DONE) return; - decl->extname = NULL; - assert(!decl->has_extname); - decl_set_external_name(decl); - switch (decl->decl_kind) - { - case DECL_POISONED: - case DECL_ATTRIBUTE: - case DECL_BODYPARAM: - case DECL_CT_ASSERT: - case DECL_CT_ECHO: - case DECL_CT_EXEC: - case DECL_CT_INCLUDE: - case DECL_DECLARRAY: - case DECL_DEFINE: - case DECL_ENUM_CONSTANT: - case DECL_ERASED: - case DECL_FAULTVALUE: - case DECL_FUNC: - case DECL_GLOBALS: - case DECL_IMPORT: - case DECL_LABEL: - case DECL_MACRO: - case DECL_INTERFACE: - case DECL_VAR: - UNREACHABLE - case DECL_BITSTRUCT: - sema_recursively_import(decl->bitstruct.base_type->type); - return; - case DECL_DISTINCT: - sema_recursively_import(decl->distinct->type); - return; - case DECL_ENUM: - sema_recursively_import(decl->enums.type_info->type); - FOREACH_BEGIN(Decl *param, decl->enums.parameters) - sema_recursively_import(param->type); - FOREACH_END(); - return; - case DECL_TYPEDEF: - sema_recursively_import(type->canonical); - return; - case DECL_FAULT: - return; - case DECL_FNTYPE: - sema_recursively_import(type_infoptr(decl->fntype_decl.rtype)->type); - FOREACH_BEGIN(Decl *param, decl->fntype_decl.params) - sema_recursively_import(param->type); - FOREACH_END(); - return; - case DECL_STRUCT: - case DECL_UNION: - FOREACH_BEGIN(Decl *param, decl->strukt.members) - sema_recursively_import(param->type); - FOREACH_END(); - return; - } - UNREACHABLE -} - static inline bool sema_analyse_signature(SemaContext *context, Signature *sig, TypeInfoId type_parent, bool is_export) { Variadic variadic_type = sig->variadic; @@ -1032,7 +966,6 @@ static inline bool sema_analyse_signature(SemaContext *context, Signature *sig, is_macro ? RESOLVE_TYPE_ALLOW_INFER : RESOLVE_TYPE_DEFAULT)) return false; rtype = rtype_info->type; - if (is_export) sema_recursively_import(rtype); if (sig->attrs.nodiscard) { if (type_is_void(rtype)) @@ -1069,8 +1002,6 @@ static inline bool sema_analyse_signature(SemaContext *context, Signature *sig, if (!sema_resolve_type_info(context, method_parent, is_macro ? RESOLVE_TYPE_MACRO_METHOD : RESOLVE_TYPE_FUNC_METHOD)) return false; } - if (is_export && method_parent) sema_recursively_import(method_parent->type); - // Fill in the type if the first parameter is lacking a type. if (method_parent && params && params[0] && !params[0]->var.type_info) { @@ -1142,7 +1073,6 @@ static inline bool sema_analyse_signature(SemaContext *context, Signature *sig, if (!sema_resolve_type_info(context, type_info, is_macro ? RESOLVE_TYPE_ALLOW_INFER : RESOLVE_TYPE_DEFAULT)) return decl_poison(param); - if (is_export) sema_recursively_import(type_info->type); param->type = type_info->type; } switch (var_kind) @@ -1313,7 +1243,6 @@ static inline bool sema_analyse_typedef(SemaContext *context, Decl *decl, bool * } TypeInfo *info = decl->typedef_decl.type_info; if (!sema_resolve_type_info(context, info, RESOLVE_TYPE_DEFAULT)) return false; - if (is_export) sema_recursively_import(info->type); decl->type->canonical = info->type->canonical; // Do we need anything else? return true; @@ -1337,8 +1266,6 @@ static inline bool sema_analyse_distinct(SemaContext *context, Decl *decl, bool // Optional isn't allowed of course. if (type_is_optional(info->type)) RETURN_SEMA_ERROR(decl, "You cannot create a distinct type from an optional."); - if (decl->is_export) sema_recursively_import(info->type); - // Distinct types drop the canonical part. info->type = info->type->canonical; return true; diff --git a/src/compiler/sema_expr.c b/src/compiler/sema_expr.c index 04cc78cc7..c602ccf0a 100644 --- a/src/compiler/sema_expr.c +++ b/src/compiler/sema_expr.c @@ -6088,6 +6088,11 @@ static bool sema_expr_analyse_comp(SemaContext *context, Expr *expr, Expr *left, type_quoted_error_string(left->type), type_quoted_error_string(right->type)); } + if (max->type_kind == TYPE_VECTOR && !is_equality_type_op) + { + RETURN_SEMA_ERROR(expr, "Vector types can only be tested for equality, for other comparison, use vector comparison functions."); + } + if (!type_is_comparable(max)) { if (type_is_user_defined(max->canonical)) diff --git a/src/compiler/types.c b/src/compiler/types.c index 569fe4728..e66038620 100644 --- a/src/compiler/types.c +++ b/src/compiler/types.c @@ -633,9 +633,8 @@ void type_mangle_introspect_name_to_buffer(Type *type) type = type->function.prototype->raw_type; if (type->function.decl) { - Module *module = decl_module(type->function.decl); - scratch_buffer_append(module->extname ? module->extname : module->name->module); - scratch_buffer_append_char('$'); + module_copy_extern_name_to_buffer(decl_module(type->function.decl)); + scratch_buffer_append("$"); scratch_buffer_append(type->name); } else diff --git a/src/utils/lib.h b/src/utils/lib.h index 7b0929808..c100d8fbb 100644 --- a/src/utils/lib.h +++ b/src/utils/lib.h @@ -143,6 +143,7 @@ bool str_eq(const char *str1, const char *str2); bool str_is_type(const char *string); bool str_is_integer(const char *string); bool str_has_no_uppercase(const char *string); +bool str_is_valid_module_name(const char *name); char *str_copy(const char *start, size_t str_len); StringSlice slice_next_token(StringSlice *slice, char separator); diff --git a/src/utils/stringutils.c b/src/utils/stringutils.c index f00c33d09..6e46fa44f 100644 --- a/src/utils/stringutils.c +++ b/src/utils/stringutils.c @@ -19,6 +19,22 @@ int str_findlist(const char *value, unsigned count, const char** elements) return -1; } + +bool str_is_valid_module_name(const char *name) +{ + char c = *name++; + if (c == '_' || char_is_upper(c)) return false; + while ((c = *(name++)) != '\0') + { + if (c == '_') + { + // Prevent trailing `_` and `__` + if (name[-2] == ':' || name[0] == '_' || name[0] == ':' || name[0] == '\0') return false; + } + if (char_is_upper(c)) return false; + } + return true; +} bool str_has_no_uppercase(const char *string) { char c; diff --git a/test/src/tester.py b/test/src/tester.py index 7442f2d8a..677b41799 100644 --- a/test/src/tester.py +++ b/test/src/tester.py @@ -257,7 +257,11 @@ class Issues: if current_line >= len(lines): self.set_failed() print(file.filename + " did not contain: \"" + line + "\"") + print(""); + print("File dump: --------------------------------------------------->") print("\n".join(lines) + "\n") + print("<---------------------------------------------------- " + file.filename + " ends.") + print(""); return if line in lines[current_line]: current_line += 1