From 0be291e0d72b32db21e26bc48f638fbbf8ca5083 Mon Sep 17 00:00:00 2001 From: Manu Linares Date: Fri, 6 Feb 2026 10:01:49 -0300 Subject: [PATCH] compiler: added `c3c fetch-msvc` command (#2854) * compiler: added `c3c fetch-msvc` command ported `msvc_build_libraries.py` to built-in compiler 'fetch-msvc' subcommand. * fix alpine build * fix windows build * fix WinHTTP download to correctly handle full URLs by splitting hostname and path. * fix old `cp` and escape args for `file_copy_file` also cleanup the temp directory when we are finished it would be better to use a cache :| * remove msiextract dependency * Auto-fetch `msvc_sdk` if missing - auto trigger `msvc_sdk` download when compiling for Windows if no SDK is detected. - make native Windows SDK detection non-fatal to allow fallback to the portable SDK - add debug logging for diagnostics * removed the LZX decompression logic - moved `msvc_sdk` to user cache locations to avoid permission issues. - refined terminal messaging for better accuracy and clarity. * use %LOCALAPPDATA% on Windows * Add support for C3_MSVC_SDK * Update release notes. --------- Co-authored-by: Christoffer Lerno --- CMakeLists.txt | 2 + releasenotes.md | 1 + src/build/build.h | 6 + src/build/build_options.c | 75 +++ src/build/builder.c | 2 + src/compiler/compiler_internal.h | 1 + src/compiler/linker.c | 51 +- src/compiler/windows_support.c | 41 +- src/main.c | 5 + src/utils/common.h | 5 + src/utils/fetch_msvc.c | 718 ++++++++++++++++++++++++++ src/utils/file_utils.c | 9 +- src/utils/find_msvc.c | 21 +- src/utils/http.c | 98 ++-- src/utils/json.c | 1 + src/utils/msi.c | 839 +++++++++++++++++++++++++++++++ src/utils/msi.h | 15 + 17 files changed, 1820 insertions(+), 70 deletions(-) create mode 100644 src/utils/fetch_msvc.c create mode 100644 src/utils/msi.c create mode 100644 src/utils/msi.h diff --git a/CMakeLists.txt b/CMakeLists.txt index cc588d10e..bf0e2f790 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -464,10 +464,12 @@ add_executable(c3c src/utils/whereami.c src/utils/cpus.c src/utils/unzipper.c + src/utils/msi.c src/compiler/c_codegen.c src/compiler/decltable.c src/compiler/methodtable.c src/compiler/mac_support.c + src/utils/fetch_msvc.c src/compiler/windows_support.c src/compiler/codegen_asm.c src/compiler/asm_target.c diff --git a/releasenotes.md b/releasenotes.md index afb5376a8..5a2c9495e 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -6,6 +6,7 @@ - Method resolution and `$define` now works together well unless definitions are out of order for real. - Improve error message when using functions as values #2856 - Improve support for Android with Termux. +- Integrated download of the MSVC SDK when compiling for Windows. ### Stdlib changes - Summarize sort macros as generic function wrappers to reduce the amount of generated code. #2831 diff --git a/src/build/build.h b/src/build/build.h index af8d2499f..a2a8d7e4a 100644 --- a/src/build/build.h +++ b/src/build/build.h @@ -48,6 +48,7 @@ typedef enum COMMAND_UNIT_TEST, COMMAND_PRINT_SYNTAX, COMMAND_PROJECT, + COMMAND_FETCH_MSVC, } CompilerCommand; typedef enum @@ -593,6 +594,10 @@ typedef struct BuildOptions_ bool print_input; bool run_once; bool suppress_run; + bool msvc_accept_license; + bool msvc_show_versions; + const char *msvc_version_override; + const char *msvc_sdk_version_override; bool old_slice_copy; bool old_enums; bool old_compact_eq; @@ -917,6 +922,7 @@ extern const int manifest_target_keys_count; extern const char *arch_os_target[ARCH_OS_TARGET_LAST + 1]; extern LinuxLibc default_libc; +void fetch_msvc(BuildOptions *options); BuildOptions parse_arguments(int argc, const char *argv[]); ArchOsTarget arch_os_target_from_string(const char *target); bool command_accepts_files(CompilerCommand command); diff --git a/src/build/build_options.c b/src/build/build_options.c index b337641df..d2ab4084b 100644 --- a/src/build/build_options.c +++ b/src/build/build_options.c @@ -45,6 +45,7 @@ const char *arch_os_target[ARCH_OS_TARGET_LAST + 1]; #define FAIL_WITH_ERR(string, ...) do { fprintf(stderr, "Error: " string "\n\n", ##__VA_ARGS__); usage(false); exit_compiler(EXIT_FAILURE); } while (0) /* NOLINT */ #define FAIL_WITH_ERR_LONG(string, ...) do { fprintf(stderr, "Error: " string "\n\n", ##__VA_ARGS__); usage(true); exit_compiler(EXIT_FAILURE); } while (0) /* NOLINT */ #define PROJECT_FAIL_WITH_ERR(string, ...) do { fprintf(stderr, "Error: " string "\n\n", ##__VA_ARGS__); project_usage(); exit_compiler(EXIT_FAILURE); } while (0) /* NOLINT */ +#define FETCH_MSVC_FAIL_WITH_ERR(string, ...) do { fprintf(stderr, "Error: " string "\n\n", ##__VA_ARGS__); fetch_msvc_usage(); exit_compiler(EXIT_FAILURE); } while (0) /* NOLINT */ static void usage(bool full) { @@ -70,6 +71,7 @@ static void usage(bool full) print_cmd("dynamic-lib [ ...]", "Compile files without a project into a dynamic library."); print_cmd("vendor-fetch ...", "Fetches one or more libraries from the vendor collection."); print_cmd("project ...", "Manipulate or view project files."); + print_cmd("fetch-msvc []", "Fetches the MSVC SDK required for cross-compiling."); PRINTF(""); full ? PRINTF("Options:") : PRINTF("Common options:"); print_opt("-h -hh --help", "Print the help, -h for the normal options, -hh for the full help."); @@ -233,6 +235,23 @@ static void usage(bool full) } } +static void fetch_msvc_usage() +{ + PRINTF("Usage: %s fetch-msvc []", args[0]); + PRINTF(""); + PRINTF("Fetches the MSVC SDK required for cross-compiling to Windows."); + PRINTF(""); + PRINTF("Options:"); + print_opt("--accept-license", "Automatically accept the MSVC license."); + print_opt("--show-versions", + "Show available MSVC and Windows SDK versions."); + print_opt("--msvc-version ", + "Specify a particular MSVC version to fetch."); + print_opt("--sdk-version ", + "Specify a particular Windows SDK version to fetch."); + PRINTF(""); +} + static void project_usage() { PRINTF("Usage: %s [] project []", args[0]); @@ -495,6 +514,62 @@ static void parse_command(BuildOptions *options) parse_project_options(options); return; } + if (arg_match("fetch-msvc")) + { + options->command = COMMAND_FETCH_MSVC; + while (!at_end() && next_is_opt()) + { + next_arg(); + if (match_longopt("accept-license")) + { + options->msvc_accept_license = true; + continue; + } + if (match_longopt("show-versions")) + { + options->msvc_show_versions = true; + continue; + } + if (match_longopt("msvc-version")) + { + if (at_end() || next_is_opt()) + error_exit("error: msvc-version needs a version."); + options->msvc_version_override = next_arg(); + continue; + } + if (match_longopt("sdk-version")) + { + if (at_end() || next_is_opt()) + error_exit("error: sdk-version needs a version."); + options->msvc_sdk_version_override = next_arg(); + continue; + } + if (current_arg[0] == '-' && current_arg[1] == 'v') + { + options->verbosity_level = 1; + continue; + } + if (match_shortopt("q")) + { + options->verbosity_level = -1; + continue; + } + if (match_longopt("help") || match_shortopt("h")) + { + fetch_msvc_usage(); + exit_compiler(COMPILER_SUCCESS_EXIT); + } + FETCH_MSVC_FAIL_WITH_ERR("Unknown option '%s' for fetch-msvc", current_arg); + } + if (!at_end()) + { + next_arg(); + FETCH_MSVC_FAIL_WITH_ERR("fetch-msvc does not accept arguments, " + "only flags. Failed on: %s.", + current_arg); + } + return; + } FAIL_WITH_ERR("Cannot process the unknown command \"%s\".", current_arg); } diff --git a/src/build/builder.c b/src/build/builder.c index dcd26f957..5e8776538 100644 --- a/src/build/builder.c +++ b/src/build/builder.c @@ -132,6 +132,7 @@ bool command_accepts_files(CompilerCommand command) case COMMAND_TEST: case COMMAND_VENDOR_FETCH: case COMMAND_PROJECT: + case COMMAND_FETCH_MSVC: return false; } UNREACHABLE @@ -164,6 +165,7 @@ bool command_passes_args(CompilerCommand command) case COMMAND_PRINT_SYNTAX: case COMMAND_VENDOR_FETCH: case COMMAND_PROJECT: + case COMMAND_FETCH_MSVC: return false; } UNREACHABLE diff --git a/src/compiler/compiler_internal.h b/src/compiler/compiler_internal.h index b3f3b4e35..2f475b91e 100644 --- a/src/compiler/compiler_internal.h +++ b/src/compiler/compiler_internal.h @@ -2640,6 +2640,7 @@ bool arch_is_wasm(ArchType type); const char *macos_sysroot(void); MacSDK *macos_sysroot_sdk_information(const char *sdk_path); WindowsSDK *windows_get_sdk(void); +// This string may be in the scratch buffer const char *windows_cross_compile_library(void); void c_abi_func_create(Signature *sig, FunctionPrototype *proto, Expr **vaargs); diff --git a/src/compiler/linker.c b/src/compiler/linker.c index 32dc1eada..c73fd31e5 100644 --- a/src/compiler/linker.c +++ b/src/compiler/linker.c @@ -95,31 +95,44 @@ static void linker_setup_windows(const char ***args_ref, Linker linker_type, con if (!compiler.build.win.sdk && !compiler.build.win.vs_dirs) { const char *path = windows_cross_compile_library(); + if (!path && !windows_get_sdk()) + { + BuildOptions options = { .verbosity_level = (compiler.build.silent || compiler.build.quiet) ? -1 : 0 }; + fetch_msvc(&options); + path = windows_cross_compile_library(); + } + // Note that path here may be allocated on the string scratch buffer. if (path) { + if (!compiler.build.quiet && !compiler.build.silent) + { + OUTF("Using MSVC SDK at: %s\n", path); + } + + const char *suffix = NULL; switch (compiler.platform.arch) { - case ARCH_TYPE_ARM: - scratch_buffer_append("/arm"); - break; - case ARCH_TYPE_AARCH64: - scratch_buffer_append("/arm64"); - break; - case ARCH_TYPE_X86_64: - scratch_buffer_append("/x64"); - break; - case ARCH_TYPE_X86: - scratch_buffer_append("/x86"); - break; - default: - UNREACHABLE_VOID + case ARCH_TYPE_ARM: suffix = "arm"; break; + case ARCH_TYPE_AARCH64: suffix = "arm64"; break; + case ARCH_TYPE_X86_64: suffix = "x64"; break; + case ARCH_TYPE_X86: suffix = "x86"; break; + default: break; } - if (file_exists(scratch_buffer_to_string())) + + if (suffix) { - compiler.build.win.sdk = scratch_buffer_copy(); - // If we only use the msvc cross compile on windows, we - // avoid linking with dynamic debug dlls. - link_with_dynamic_debug_libc = false; + char *full_path = file_append_path(path, suffix); + if (file_exists(full_path)) + { + compiler.build.win.sdk = full_path; + // If we only use the msvc cross compile on windows, we + // avoid linking with dynamic debug dlls. + link_with_dynamic_debug_libc = false; + } + else + { + free(full_path); + } } } } diff --git a/src/compiler/windows_support.c b/src/compiler/windows_support.c index 5e6a0c8d9..248f41d40 100644 --- a/src/compiler/windows_support.c +++ b/src/compiler/windows_support.c @@ -24,7 +24,10 @@ WindowsSDK *windows_get_sdk(void) if (!sdk) { loaded = get_windows_paths(); - sdk = &loaded; + if (loaded.windows_sdk_path && loaded.vs_library_path) + { + sdk = &loaded; + } } return sdk; } @@ -40,5 +43,39 @@ WindowsSDK *windows_get_sdk(void) const char *windows_cross_compile_library(void) { - return find_rel_exe_dir("msvc_sdk"); + const char *local = find_rel_exe_dir("msvc_sdk"); + if (local && file_is_dir((char *)local)) return local; + + char *msvc_sdk = getenv("C3_MSVC_SDK"); + if (msvc_sdk && file_is_dir(msvc_sdk)) return msvc_sdk; + +#if PLATFORM_WINDOWS + char *app_data = getenv("LOCALAPPDATA"); + if (app_data) + { + scratch_buffer_clear(); + scratch_buffer_printf("%s/c3/msvc_sdk", app_data); + const char *path = scratch_buffer_to_string(); + if (file_is_dir(path) return path; + } +#else + char *cache_home = getenv("XDG_CACHE_HOME"); + if (cache_home) + { + scratch_buffer_clear(); + scratch_buffer_printf("%s/c3/msvc_sdk", cache_home); + const char *path = scratch_buffer_to_string(); + if (file_is_dir(path)) return path; + } + + char *home = getenv("HOME"); + if (home) + { + scratch_buffer_clear(); + scratch_buffer_printf("%s/.cache/c3/msvc_sdk", home); + const char *path = scratch_buffer_to_string(); + if (file_is_dir(path)) return path; + } +#endif + return NULL; } \ No newline at end of file diff --git a/src/main.c b/src/main.c index 0f221915a..57bd98a07 100644 --- a/src/main.c +++ b/src/main.c @@ -131,6 +131,11 @@ int main_real(int argc, const char *argv[]) break; } break; + case COMMAND_FETCH_MSVC: + { + fetch_msvc(&build_options); + break; + } case COMMAND_MISSING: UNREACHABLE } diff --git a/src/utils/common.h b/src/utils/common.h index cb7901999..ed7d65915 100644 --- a/src/utils/common.h +++ b/src/utils/common.h @@ -36,9 +36,14 @@ #if defined( _WIN32 ) || defined( __WIN32__ ) || defined( _WIN64 ) #define PLATFORM_WINDOWS 1 #define PLATFORM_POSIX 0 +#define STRCASECMP _stricmp +#define STRNCASECMP _strnicmp #else #define PLATFORM_WINDOWS 0 #define PLATFORM_POSIX 1 +#include +#define STRCASECMP strcasecmp +#define STRNCASECMP strncasecmp #endif #ifndef USE_PTHREAD diff --git a/src/utils/fetch_msvc.c b/src/utils/fetch_msvc.c new file mode 100644 index 000000000..2b3f2043a --- /dev/null +++ b/src/utils/fetch_msvc.c @@ -0,0 +1,718 @@ +#include +#include +#include + +#if defined(_WIN32) || defined(_WIN64) + #define WIN32_LEAN_AND_MEAN + #define TokenType WindowsTokenType + #define MAX_PRIORITY WindowsMAX_PRIORITY + #include + #undef TokenType + #undef MAX_PRIORITY + #define STRCASECMP _stricmp + #define STRNCASECMP _strnicmp +#else + #include + #include + #define STRCASECMP strcasecmp + #define STRNCASECMP strncasecmp +#endif + +#include "../compiler/compiler_internal.h" +#include "json.h" +#include "msi.h" +#include "whereami.h" + +#ifndef MAX_PATH + #if defined(PATH_MAX) + #define MAX_PATH PATH_MAX + #elif defined(_MAX_PATH) + #define MAX_PATH _MAX_PATH + #else + #define MAX_PATH 260 + #endif +#endif +#define MAX_PATH_ZIP_FILENAME 512 + +#define MANIFEST_URL "https://aka.ms/vs/17/release/channel" +#define VS_MANIFEST_ID "Microsoft.VisualStudio.Manifests.VisualStudio" +#define BUILD_TOOLS_ID "Microsoft.VisualStudio.Product.BuildTools" + +static char *get_sdk_output_path(void) +{ + char *env_path = NULL; +#if PLATFORM_WINDOWS + env_path = getenv("LOCALAPPDATA"); +#else + env_path = getenv("XDG_CACHE_HOME"); +#endif + + if (env_path) + { + return file_append_path(env_path, "c3/msvc_sdk"); + } + +#if !PLATFORM_WINDOWS + char *home = getenv("HOME"); + if (home) return file_append_path(home, ".cache/c3/msvc_sdk"); +#endif + + const char *path = find_executable_path(); + return file_append_path(path, "msvc_sdk"); +} + +static int verbose_level = 0; + +// Minimal dirent-like structure for Windows +#if PLATFORM_WINDOWS +struct dirent +{ + char d_name[MAX_PATH]; +}; +typedef struct +{ + HANDLE handle; + WIN32_FIND_DATAW data; + struct dirent entry; + bool first; +} DIR; + +static DIR *opendir(const char *name) +{ + DIR *dir = calloc(1, sizeof(DIR)); + char *search_path = str_printf("%s\\*", name); + uint16_t *wpath = win_utf8to16(search_path); + dir->handle = FindFirstFileW(wpath, &dir->data); + free(wpath); + if (dir->handle == INVALID_HANDLE_VALUE) + { + free(dir); + return NULL; + } + dir->first = true; + return dir; +} + +static struct dirent *readdir(DIR *dir) +{ + if (!dir->first && !FindNextFileW(dir->handle, &dir->data)) return NULL; + dir->first = false; + char *name = win_utf16to8(dir->data.cFileName); + strncpy(dir->entry.d_name, name, MAX_PATH); + free(name); + return &dir->entry; +} + +static void closedir(DIR *dir) +{ + if (dir) FindClose(dir->handle); + free(dir); +} +#endif + +static int version_compare(const char *v1, const char *v2) +{ + while (*v1 && *v2) + { + int n1 = atoi(v1); + int n2 = atoi(v2); + if (n1 > n2) return 1; + if (n1 < n2) return -1; + while (char_is_digit(*v1)) + v1++; + while (char_is_digit(*v2)) + v2++; + if (*v1 == '.') v1++; + if (*v2 == '.') v2++; + } + if (*v1) return 1; + if (*v2) return -1; + return 0; +} + +static char *pick_max_version(JSONObject *map) +{ + char *max_v = NULL; + FOREACH(const char *, key, map->keys) + { + if (!max_v || version_compare(key, max_v) > 0) max_v = (char *)key; + } + return max_v; +} + +static bool my_strcasestr(const char *h, const char *n) +{ + size_t nl = strlen(n); + for (; *h; h++) + if (STRNCASECMP(h, n, nl) == 0) return true; + return false; +} + +static char *find_folder_inf(const char *root, const char *pattern, bool exact) +{ + DIR *d = opendir(root); + if (!d) return NULL; + struct dirent *de; + char *found = NULL; + while ((de = readdir(d))) + { + if (de->d_name[0] == '.') continue; + char *path = (char *)file_append_path(root, de->d_name); + if (file_is_dir(path)) + { + bool match = exact ? (STRCASECMP(de->d_name, pattern) == 0) + : (my_strcasestr(de->d_name, pattern)); + if (match) + { + found = path; + break; + } + found = find_folder_inf(path, pattern, exact); + if (found) break; + } + } + closedir(d); + return found; +} + +static bool download_with_verification(const char *url, const char *name, + const char *dst) +{ + if (verbose_level >= 1) + { + printf("%s ... downloading", name); + fflush(stdout); + } + else if (verbose_level == 0) + { + printf("."); + fflush(stdout); + } + const char *err = download_file(url, "", dst); + if (err) + { + if (verbose_level >= 1) printf(" ... failed.\n"); + if (verbose_level >= 0) + eprintf("\nWarning: Download failed for %s: %s\n", name, err); + return false; + } + if (verbose_level >= 1) printf(" ... done.\n"); + return true; +} + +static void copy_to_msvc_sdk(const char *src, const char *dst) +{ + DIR *d = opendir(src); + if (!d) return; + dir_make_recursive((char *)dst); + struct dirent *de; + while ((de = readdir(d))) + { + if (de->d_name[0] == '.') continue; + char *s_path = file_append_path(src, de->d_name); + char *low_name = str_dup(de->d_name); + for (char *p = low_name; *p; p++) + *p = (char)tolower((unsigned char)*p); + char *d_path = file_append_path(dst, low_name); + + if (file_is_dir(s_path)) + { + copy_to_msvc_sdk(s_path, d_path); + } + else + { + if (str_eq(low_name, "msvcrt.lib")) + file_copy_file(s_path, (char *)file_append_path(dst, "MSVCRT.lib"), true); + else if (str_eq(low_name, "oldnames.lib")) + file_copy_file(s_path, (char *)file_append_path(dst, "OLDNAMES.lib"), true); + + file_copy_file(s_path, d_path, true); + } + } + closedir(d); +} + +static void extract_msvc_zip(const char *zip_path, const char *out_root) +{ + FILE *f = file_open_read(zip_path); + if (!f) error_exit("Failed to open %s", zip_path); + ZipDirIterator iter; + const char *err = zip_dir_iterator(f, &iter); + if (err) error_exit("Zip error: %s", err); + + ZipFile zfile; + const char *zip_content_prefix = "Contents/"; + size_t prefix_len = strlen(zip_content_prefix); + while (iter.current_file < iter.files) + { + err = zip_dir_iterator_next(&iter, &zfile); + if (err) error_exit("Zip iteration error: %s", err); + if (str_start_with(zfile.name, zip_content_prefix)) + { + char original_name[MAX_PATH_ZIP_FILENAME]; + memcpy(original_name, zfile.name, MAX_PATH_ZIP_FILENAME); + char *name = zfile.name; + memmove(name, name + prefix_len, strlen(name) - prefix_len + 1); + for (char *p = name; *p; p++) + { + if (*p == '\\') *p = '/'; + *p = (char)tolower((unsigned char)*p); + } + zip_file_write(f, &zfile, out_root, true); + memcpy(zfile.name, original_name, MAX_PATH_ZIP_FILENAME); + } + } + fclose(f); +} + +static void get_msi_cab_list(const char *msi_path, const char ***cabs) +{ + const size_t ext_len = 4; + const size_t guid_len = 32; + const size_t filename_len = guid_len + ext_len; + + size_t size = (size_t)-1; + unsigned char *buf = (unsigned char *)file_read_binary(msi_path, &size); + if (!buf) return; + for (size_t i = 0; i < size - ext_len; i++) + { + if (STRNCASECMP((char *)buf + i, ".cab", ext_len) == 0 && i >= guid_len) + { + char cab[128]; + memcpy(cab, buf + i - guid_len, filename_len); + cab[filename_len] = 0; + bool valid = true; + for (int j = 0; j < (int)guid_len; j++) + if (!char_is_hex(cab[j])) + { + valid = false; + break; + } + if (valid) + { + bool exists = false; + FOREACH(const char *, existing, *cabs) + { + if (STRCASECMP(existing, cab) == 0) + { + exists = true; + break; + } + } + if (!exists) vec_add(*cabs, str_dup(cab)); + } + } + } +} + +static void print_msvc_version(JSONObject *pkg, char out[128]) +{ + const char *id = json_map_get(pkg, "id")->str; + StringSlice slice = slice_from_string(id); + const int id_prefix_segments = 4; + for (int i = 0; i < id_prefix_segments; i++) + slice_next_token(&slice, '.'); + out[0] = 0; + const int max_version_segments = 20; + for (int i = 0; i < max_version_segments; i++) + { + StringSlice v = slice_next_token(&slice, '.'); + if (slice_strcmp(v, "x86")) break; + if (i > 0) strcat(out, "."); + strncat(out, v.ptr, v.len); + } +} + +static void extract_msi(const char *mpath, const char *out_root, + const char *dl_root) +{ + if (verbose_level >= 1) + { + printf("Extracting MSI: %s\n", mpath); + fflush(stdout); + } + if (!msi_extract(mpath, out_root, dl_root, verbose_level >= 1)) + { + fprintf(stderr, "Failed to extract MSI: %s\n", mpath); + } +} + +static bool is_english_package(JSONObject *pkg) +{ + JSONObject *lang = json_map_get(pkg, "language"); + return !lang || STRCASECMP(lang->str, "en-US") == 0; +} + +static JSONObject *find_package_by_id(JSONObject *pkgs, const char *id) +{ + FOREACH(JSONObject *, pkg, pkgs->elements) + { + if (STRCASECMP(json_map_get(pkg, "id")->str, id) == 0) + { + if (is_english_package(pkg)) return pkg; + } + } + return NULL; +} + +static void collect_versions(JSONObject *pkgs, JSONObject **msvc_vers_out, + JSONObject **sdk_paths_out) +{ + JSONObject *msvc_vers = json_new_object(J_OBJECT); + JSONObject *sdk_paths = json_new_object(J_OBJECT); + + FOREACH(JSONObject *, pkg, pkgs->elements) + { + JSONObject *id_obj = json_map_get(pkg, "id"); + if (!id_obj) continue; + const char *id = id_obj->str; + if (str_start_with(id, "Microsoft.VisualStudio.Component.VC.") && + strstr(id, ".x86.x64")) + { + StringSlice slice = slice_from_string(id); + const int id_prefix_segments = 4; + for (int i = 0; i < id_prefix_segments; i++) + slice_next_token(&slice, '.'); + StringSlice v4 = slice_next_token(&slice, '.'); + if (!v4.len || !char_is_digit(v4.ptr[0])) continue; + StringSlice v5 = slice_next_token(&slice, '.'); + char *vkey = str_printf("%.*s.%.*s", (int)v4.len, v4.ptr, + (int)v5.len, v5.ptr); + json_map_set(msvc_vers, vkey, pkg); + } + else if (str_start_with(id, "Microsoft.VisualStudio.Component.Windows10SDK.") || + str_start_with(id, "Microsoft.VisualStudio.Component.Windows11SDK.")) + { + const char *last_dot = strrchr(id, '.'); + if (last_dot && char_is_digit(last_dot[1])) + json_map_set(sdk_paths, last_dot + 1, pkg); + } + } + *msvc_vers_out = msvc_vers; + *sdk_paths_out = sdk_paths; +} + +static JSONObject *load_manifest(const char *url, const char *path, const char *description) +{ + if (verbose_level >= 1) + { + printf("Downloading %s manifest...\n", description); + } + + const char *err = download_file(url, "", path); + if (err) error_exit("Failed to download %s manifest: %s", description, err); + if (verbose_level >= 1) + { + printf(" Done.\n"); + } + + size_t size; + char *json_str = file_read_all(path, &size); + JsonParser parser; + json_init_string(&parser, json_str); + JSONObject *obj = json_parse(&parser); + if (!obj || obj->type == J_ERROR) error_exit("Failed to parse %s manifest", description); + return obj; +} + +static void select_versions(BuildOptions *options, JSONObject *msvc_vers, JSONObject *sdk_paths, + char **msvc_key_out, char **sdk_key_out) +{ + char *msvc_key = (char *)options->msvc_version_override; + if (!msvc_key) + { + msvc_key = pick_max_version(msvc_vers); + } + else if (!json_map_get(msvc_vers, msvc_key)) + { + bool found = false; + FOREACH(const char *, key, msvc_vers->keys) + { + char full_v[128]; + print_msvc_version(json_map_get(msvc_vers, key), full_v); + if (str_eq(full_v, msvc_key)) + { + msvc_key = (char *)key; + found = true; + break; + } + } + if (!found) error_exit("Could not find MSVC version '%s'", options->msvc_version_override); + } + + char *sdk_key = (char *)options->msvc_sdk_version_override; + if (!sdk_key) sdk_key = pick_max_version(sdk_paths); + if (!json_map_get(sdk_paths, sdk_key)) error_exit("Could not find SDK version '%s'", sdk_key); + + *msvc_key_out = msvc_key; + *sdk_key_out = sdk_key; +} + +static bool check_license(JSONObject *rj1_channel_items, bool accept_all) +{ + if (accept_all) return true; + + JSONObject *tools = NULL; + FOREACH(JSONObject *, item, rj1_channel_items->elements) + { + JSONObject *id = json_map_get(item, "id"); + if (id && str_eq(id->str, BUILD_TOOLS_ID)) + { + tools = item; + break; + } + } + + const char *lic = ""; + if (tools) + { + JSONObject *res = json_map_get(tools, "localizedResources"); + FOREACH(JSONObject *, r, res->elements) + { + JSONObject *lang = json_map_get(r, "language"); + if (lang && (STRCASECMP(lang->str, "en-us") == 0 || STRCASECMP(lang->str, "en-US") == 0)) + { + lic = json_map_get(r, "license")->str; + break; + } + } + } + + printf("Do you accept the license %s? [y/N]: ", lic); + + char c = (char)getchar(); + return (c == 'y' || c == 'Y'); +} + +void fetch_msvc(BuildOptions *options) +{ + verbose_level = options->verbosity_level; + const char *tmp_dir_base = dir_make_temp_dir(); + if (!tmp_dir_base) error_exit("Failed to create temp directory"); + if (verbose_level >= 1) printf("Temp dir: %s\n", tmp_dir_base); + + const char *m1_path = file_append_path(tmp_dir_base, "vschannel.json"); + JSONObject *rj1 = load_manifest(MANIFEST_URL, m1_path, "channel"); + + JSONObject *vsm = NULL; + JSONObject *rj1_channel_items = json_map_get(rj1, "channelItems"); + FOREACH(JSONObject *, item, rj1_channel_items->elements) + { + JSONObject *id = json_map_get(item, "id"); + if (id && str_eq(id->str, VS_MANIFEST_ID)) + { + vsm = item; + break; + } + } + if (!vsm) error_exit("Could not find VS manifest entry in channel file"); + + JSONObject *payloads = json_map_get(vsm, "payloads"); + const char *vsu = json_map_get(payloads->elements[0], "url")->str; + const char *vs_path_manifest = file_append_path(tmp_dir_base, "vs_manifest.json"); + JSONObject *vsroot = load_manifest(vsu, vs_path_manifest, "VS packages"); + + + + JSONObject *pkgs = json_map_get(vsroot, "packages"); + JSONObject *msvc_vers, *sdk_paths; + collect_versions(pkgs, &msvc_vers, &sdk_paths); + + if (options->msvc_show_versions) + { + printf("Available MSVC versions:\n"); + FOREACH(const char *, key, msvc_vers->keys) + { + char full_v[128]; + print_msvc_version(json_map_get(msvc_vers, key), full_v); + printf(" %s (%s)\n", full_v, key); + } + printf("\nAvailable Windows SDK versions:\n"); + FOREACH(const char *, key, sdk_paths->keys) { printf(" %s\n", key); } + return; + } + + char *msvc_key, *sdk_key; + select_versions(options, msvc_vers, sdk_paths, &msvc_key, &sdk_key); + + JSONObject *msvc_pkg_obj = json_map_get(msvc_vers, msvc_key); + char full_msvc_v[128]; + print_msvc_version(msvc_pkg_obj, full_msvc_v); + + char *sdk_output = get_sdk_output_path(); + + if (verbose_level >= 1) printf("Selected: MSVC %s, SDK %s\n", full_msvc_v, sdk_key); + + if (!options->msvc_accept_license) + { +#if PLATFORM_WINDOWS + printf("To target windows-x64 you need the MSVC SDK.\n"); +#else + printf("To cross-compile to windows-x64 you need the MSVC SDK.\n"); +#endif + printf("Downloading version %s to %s.\n", full_msvc_v, sdk_output); + } + + if (!check_license(rj1_channel_items, options->msvc_accept_license)) + { + exit_compiler(EXIT_FAILURE); + } + + char *out_root = (char *)file_append_path(tmp_dir_base, "OUTPUT"); + char *dl_root = (char *)file_append_path(tmp_dir_base, "DL"); + dir_make_recursive(out_root); + dir_make_recursive(dl_root); + + if (verbose_level == 0) + { + printf("Downloading and extracting packages"); + fflush(stdout); + } + + const char *suffixes[] = {"asan.headers.base", "crt.x64.desktop.base", "crt.x64.store.base", "asan.x64.base"}; + for (int i = 0; i < ELEMENTLEN(suffixes); i++) + { + char *pid_part = str_printf("microsoft.vc.%s.%s", full_msvc_v, suffixes[i]); + JSONObject *best_pkg = find_package_by_id(pkgs, pid_part); + if (best_pkg) + { + JSONObject *payloads_arr = json_map_get(best_pkg, "payloads"); + FOREACH_IDX(j, JSONObject *, payload, payloads_arr->elements) + { + char *zpath = (char *)file_append_path(dl_root, str_printf("p%d_%lu.zip", i, (unsigned long)j)); + if (download_with_verification(json_map_get(payload, "url")->str, pid_part, zpath)) + { + extract_msvc_zip(zpath, out_root); + } + } + if (verbose_level == 0) + { + printf("."); + fflush(stdout); + } + } + } + + JSONObject *sdk_comp = json_map_get(sdk_paths, sdk_key); + const char **sdk_pkg_ids = NULL; + JSONObject *deps_obj = json_map_get(sdk_comp, "dependencies"); + if (deps_obj && deps_obj->type == J_OBJECT) + { + FOREACH(const char *, dep, deps_obj->keys) vec_add(sdk_pkg_ids, dep); + } + + const char *msi_names[] = { + "Windows SDK for Windows Store Apps Libs-x86_en-us.msi", + "Windows SDK Desktop Libs x64-x86_en-us.msi", + "Universal CRT Headers Libraries and Sources-x86_en-us.msi"}; + const char **cab_list = NULL; + JSONObject **checked_pkgs = NULL; + + FOREACH(const char *, sid, sdk_pkg_ids) + { + JSONObject *pkg = find_package_by_id(pkgs, sid); + if (pkg) + { + vec_add(checked_pkgs, pkg); + JSONObject *p_deps = json_map_get(pkg, "dependencies"); + if (p_deps && p_deps->type == J_OBJECT) + { + FOREACH(const char *, pd_id, p_deps->keys) + { + JSONObject *ppkg = find_package_by_id(pkgs, pd_id); + if (ppkg) vec_add(checked_pkgs, ppkg); + } + } + } + } + + for (int i = 0; i < ELEMENTLEN(msi_names); i++) + { + FOREACH(JSONObject *, pkg, checked_pkgs) + { + JSONObject *pls = json_map_get(pkg, "payloads"); + if (!pls) continue; + FOREACH(JSONObject *, pl, pls->elements) + { + const char *f_name = json_map_get(pl, "fileName")->str; + if (STRCASECMP(filename(f_name), msi_names[i]) == 0) + { + char *mpath = (char *)file_append_path(dl_root, msi_names[i]); + if (download_with_verification(json_map_get(pl, "url")->str, msi_names[i], mpath)) + { + get_msi_cab_list(mpath, &cab_list); + } + goto NEXT_MSI; + } + } + } + NEXT_MSI:; + } + + FOREACH(const char *, cab, cab_list) + { + FOREACH(JSONObject *, pkg, checked_pkgs) + { + JSONObject *pls = json_map_get(pkg, "payloads"); + if (!pls) continue; + FOREACH(JSONObject *, pl, pls->elements) + { + const char *p_fname = json_map_get(pl, "fileName")->str; + if (STRCASECMP(filename(p_fname), cab) == 0) + { + download_with_verification(json_map_get(pl, "url")->str, cab, (char *)file_append_path(dl_root, cab)); + goto NEXT_CAB; + } + } + } + NEXT_CAB:; + } + + for (int i = 0; i < ELEMENTLEN(msi_names); i++) + { + char *mpath = (char *)file_append_path(dl_root, msi_names[i]); + if (file_exists(mpath)) + { + extract_msi(mpath, out_root, dl_root); + if (verbose_level == 0) + { + printf("."); + fflush(stdout); + } + } + } + + if (verbose_level == 0) + { + printf(" Done.\n"); + fflush(stdout); + } + + if (verbose_level >= 1) printf("Finalizing SDK\n"); + char *s_vc_root = find_folder_inf(out_root, "vc", false); + char *s_msvc_base = s_vc_root ? find_folder_inf(s_vc_root, "msvc", false) : NULL; + char *s_msvc = s_msvc_base ? find_folder_inf(s_msvc_base, "lib", true) : NULL; + + char *s_kits = find_folder_inf(out_root, "windows kits", false); + char *s_lib = s_kits ? find_folder_inf(s_kits, "lib", true) : NULL; + char *s_sdk_v = s_lib ? find_folder_inf(s_lib, sdk_key, false) : NULL; + char *s_ucrt = s_sdk_v ? find_folder_inf(s_sdk_v, "ucrt", true) : NULL; + char *s_um = s_sdk_v ? find_folder_inf(s_sdk_v, "um", true) : NULL; + + if (!s_ucrt || !s_um || !s_msvc) + { + if (verbose_level >= 0) + eprintf("UCRT: %s, UM: %s, MSVC: %s\n", s_ucrt ? "OK" : "MISSING", s_um ? "OK" : "MISSING", s_msvc ? "OK" : "MISSING"); + error_exit("Missing library components"); + } + + char *sdk_x64 = (char *)file_append_path(sdk_output, "x64"); + dir_make_recursive(sdk_x64); + copy_to_msvc_sdk(file_append_path(s_ucrt, "x64"), sdk_x64); + copy_to_msvc_sdk(file_append_path(s_um, "x64"), sdk_x64); + copy_to_msvc_sdk(file_append_path(s_msvc, "x64"), sdk_x64); + + if (verbose_level >= 0) printf("The 'msvc_sdk' directory was successfully generated at %s.\n", sdk_output); + + if (verbose_level == 0) file_delete_dir(tmp_dir_base); +} diff --git a/src/utils/file_utils.c b/src/utils/file_utils.c index d4954c6f9..71ed38d2d 100644 --- a/src/utils/file_utils.c +++ b/src/utils/file_utils.c @@ -637,8 +637,13 @@ void file_copy_file(const char *src_path, const char *dst_path, bool overwrite) #if (_MSC_VER) CopyFileW(win_utf8to16(src_path), win_utf8to16(dst_path), !overwrite); #else - const char *cmd = "cp %s %s %s"; - execute_cmd(str_printf(cmd, !overwrite ? "--update=none" : "--update=all", src_path, dst_path), true, NULL, 2048); + scratch_buffer_clear(); + scratch_buffer_append("cp "); + if (!overwrite) scratch_buffer_append("-u "); + scratch_buffer_append_cmd_argument(src_path); + scratch_buffer_append_char(' '); + scratch_buffer_append_cmd_argument(dst_path); + execute_cmd(scratch_buffer_to_string(), true, NULL, 2048); #endif } diff --git a/src/utils/find_msvc.c b/src/utils/find_msvc.c index 64541b55f..9bdf56351 100644 --- a/src/utils/find_msvc.c +++ b/src/utils/find_msvc.c @@ -20,7 +20,8 @@ WindowsSDK get_windows_paths() if (!root) { - error_exit("Failed to find windows kit root."); + DEBUG_LOG("Failed to find windows kit root."); + return out; } out.windows_sdk_path = find_best_version(root, "Lib"); @@ -28,7 +29,8 @@ WindowsSDK get_windows_paths() if (!out.windows_sdk_path) { free(root); - error_exit("Failed to find Lib dir in windows kit root."); + DEBUG_LOG("Failed to find Lib dir in windows kit root."); + return out; } char *windows_sdk_include_root = find_best_version(root, "Include"); @@ -36,10 +38,17 @@ WindowsSDK get_windows_paths() if (!windows_sdk_include_root) { free(root); - error_exit("Failed to find Include dir in windows kit root."); + DEBUG_LOG("Failed to find Include dir in windows kit root."); + return out; } char *vs_path = find_visual_studio(); + if (!vs_path) + { + free(root); + DEBUG_LOG("Failed to find Visual Studio installation."); + return out; + } scratch_buffer_clear(); scratch_buffer_printf("%s\\lib\\x64", vs_path); @@ -78,7 +87,8 @@ static char *find_visual_studio(void) // Call vswhere.exe if (!execute_cmd_failable(scratch_buffer_to_string(), &install_path, NULL, 0)) { - error_exit("Failed to find vswhere.exe to detect MSVC."); + DEBUG_LOG("Failed to find vswhere.exe to detect MSVC."); + return NULL; } // Find and read the version file. @@ -91,7 +101,8 @@ static char *find_visual_studio(void) if (version) version = str_trim(version); if (!version || strlen(version) == 0) { - error_exit("Failed to detect MSVC, could not read %s.", scratch_buffer_to_string()); + DEBUG_LOG("Failed to detect MSVC, could not read %s.", scratch_buffer_to_string()); + return NULL; } // We have the version, so we're done with the path: diff --git a/src/utils/http.c b/src/utils/http.c index ee102d59e..94a52196e 100644 --- a/src/utils/http.c +++ b/src/utils/http.c @@ -19,58 +19,66 @@ static inline wchar_t *char_to_wchar(const char *str) const char *download_file(const char *url, const char *resource, const char *file_path) { - - LPSTR pszOutBuffer; - bool results = false; - HINTERNET hSession = NULL, - hConnect = NULL, - hRequest = NULL; - + HINTERNET hSession = NULL, hConnect = NULL, hRequest = NULL; bool is_https = memcmp("https://", url, 8) == 0; - url = url + (is_https ? 8 : 7); + const char *hostname_and_path = url + (is_https ? 8 : 7); + const char *slash = strchr(hostname_and_path, '/'); + const char *hostname = hostname_and_path; + const char *url_path = ""; + + if (slash) + { + hostname = str_copy(hostname_and_path, slash - hostname_and_path); + url_path = slash; + } // Use WinHttpOpen to obtain a session handle. - HINTERNET session = WinHttpOpen(L"C3C/1.0", WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, - WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0); + hSession = WinHttpOpen(L"C3C/1.0", WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, + WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0); - if (!session) error_exit("Failed to create http session."); - // Specify an HTTP server. + if (!hSession) error_exit("Failed to create http session."); - wchar_t *wurl = char_to_wchar(url); - HINTERNET connect = WinHttpConnect(session, wurl, is_https ? INTERNET_DEFAULT_HTTPS_PORT : INTERNET_DEFAULT_HTTP_PORT, - 0); - if (!connect) error_exit("Failed to connect to '%s'", url); + DWORD redirect_policy = WINHTTP_OPTION_REDIRECT_POLICY_ALWAYS; + WinHttpSetOption(hSession, WINHTTP_OPTION_REDIRECT_POLICY, &redirect_policy, + sizeof(redirect_policy)); + + wchar_t *wurl = char_to_wchar(hostname); + hConnect = WinHttpConnect( + hSession, wurl, + is_https ? INTERNET_DEFAULT_HTTPS_PORT : INTERNET_DEFAULT_HTTP_PORT, 0); + if (!hConnect) error_exit("Failed to connect to '%s'", url); free(wurl); // Create an HTTP request handle. - wchar_t *wresource = char_to_wchar(resource); - HINTERNET request = WinHttpOpenRequest(connect, L"GET", wresource, NULL, - WINHTTP_NO_REFERER, - WINHTTP_DEFAULT_ACCEPT_TYPES, is_https ? WINHTTP_FLAG_SECURE : 0); + char *full_resource = str_cat(url_path, resource); + wchar_t *wresource = char_to_wchar(full_resource); + hRequest = WinHttpOpenRequest(hConnect, L"GET", wresource, NULL, + WINHTTP_NO_REFERER, + WINHTTP_DEFAULT_ACCEPT_TYPES, is_https ? WINHTTP_FLAG_SECURE : 0); - if (!connect) error_exit("Failed to connect to '%s'.", url); + if (!hRequest) error_exit("Failed to create request for '%s'.", url); free(wresource); - FILE* file = fopen(file_path, "w+b"); + FILE *file = fopen(file_path, "w+b"); if (!file) return str_printf("Failed to open file '%s' for output", file_path); - // Send a request. - bool result = WinHttpSendRequest(request, WINHTTP_NO_ADDITIONAL_HEADERS, 0, + bool result = WinHttpSendRequest(hRequest, WINHTTP_NO_ADDITIONAL_HEADERS, 0, WINHTTP_NO_REQUEST_DATA, 0, 0, 0); bool success = false; - if (!result || !WinHttpReceiveResponse(request, NULL)) goto END; - DWORD dwStatusCode = 0; + + if (!result || !WinHttpReceiveResponse(hRequest, NULL)) goto END; + DWORD dwSize = sizeof(dwStatusCode); - if (!WinHttpQueryHeaders(request, - WINHTTP_QUERY_STATUS_CODE | WINHTTP_QUERY_FLAG_NUMBER, - WINHTTP_HEADER_NAME_BY_INDEX, - &dwStatusCode, &dwSize, WINHTTP_NO_HEADER_INDEX)) + if (!WinHttpQueryHeaders(hRequest, + WINHTTP_QUERY_STATUS_CODE | WINHTTP_QUERY_FLAG_NUMBER, + WINHTTP_HEADER_NAME_BY_INDEX, + &dwStatusCode, &dwSize, WINHTTP_NO_HEADER_INDEX)) { error_exit("Failed to get status code when requesting 'http%s://%s%s'\n", - (is_https ? "s": ""), url, resource); + (is_https ? "s": ""), url, resource); } if (dwStatusCode != 200) goto END; @@ -79,17 +87,16 @@ const char *download_file(const char *url, const char *resource, const char *fil while (1) { DWORD dw_size = 0; - if (!WinHttpReadData(request, (LPVOID)buffer, sizeof(buffer), &dw_size)) goto END; + if (!WinHttpReadData(hRequest, (LPVOID)buffer, sizeof(buffer), &dw_size)) goto END; + if (dw_size == 0) break; fwrite(buffer, (size_t)dw_size, (size_t)1, file); - if (!WinHttpQueryDataAvailable(request, &dw_size)) goto END; - if (!dw_size) break; } success = true; END: fclose(file); - WinHttpCloseHandle(request); - WinHttpCloseHandle(connect); - WinHttpCloseHandle(session); + WinHttpCloseHandle(hRequest); + WinHttpCloseHandle(hConnect); + WinHttpCloseHandle(hSession); if (!success) { remove(file_path); @@ -110,8 +117,13 @@ const char *download_file(const char *url, const char *resource, const char *fil { CURL *curl_handle = curl_easy_init(); if (!curl_handle) error_exit("Could not initialize cURL subsystem."); - FILE* file = fopen(file_path, "w+b"); - if (!file) return str_printf("Failed to open file '%s' for output", file_path); + FILE *file = fopen(file_path, "w+b"); + if (!file) + { + curl_easy_cleanup(curl_handle); + return str_printf("Failed to open file '%s' for output", file_path); + } + const char *total_url = str_printf("%s%s", url, resource); curl_easy_setopt(curl_handle, CURLOPT_URL, total_url); curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L); @@ -122,14 +134,16 @@ const char *download_file(const char *url, const char *resource, const char *fil curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data); curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, file); CURLcode result = curl_easy_perform(curl_handle); - if (curl_easy_perform(curl_handle) != CURLE_OK) + if (result != CURLE_OK) { - fclose(file); remove(file_path); - return curl_easy_strerror(result); + const char *err_msg = str_dup(curl_easy_strerror(result)); + curl_easy_cleanup(curl_handle); + return err_msg; } fclose(file); + curl_easy_cleanup(curl_handle); return NULL; } diff --git a/src/utils/json.c b/src/utils/json.c index b890dcd06..44104f5aa 100644 --- a/src/utils/json.c +++ b/src/utils/json.c @@ -335,6 +335,7 @@ void json_map_set(JSONObject *obj, const char *key, JSONObject *value) if (str_eq(a_key, key)) { obj->members[i] = value; + return; } } vec_add(obj->members, value); diff --git a/src/utils/msi.c b/src/utils/msi.c new file mode 100644 index 000000000..cab84c7a3 --- /dev/null +++ b/src/utils/msi.c @@ -0,0 +1,839 @@ +#include "lib.h" +#include "json.h" +#include +#include +#include +#include +#include "miniz.h" + +#define OLE2_MAGIC "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" +#define OLE2_MINI_STREAM_CUTOFF 4096 + +typedef struct +{ + FILE *f; + size_t size; + uint32_t sector_size; + uint32_t mini_sector_size; + uint32_t *fat; + uint32_t fat_entries; + uint32_t *mini_fat; + uint32_t mini_fat_entries; + uint8_t *mini_stream; + size_t mini_stream_size; + uint32_t directory_first_sector; +} Ole2; + +static uint32_t read4(const uint8_t *d) { return d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); } +static uint16_t read2(const uint8_t *d) { return d[0] | (d[1] << 8); } + +static void ole2_free(Ole2 *ole) +{ + if (!ole) return; + if (ole->f) fclose(ole->f); + free(ole->fat); + free(ole->mini_fat); + free(ole->mini_stream); + free(ole); +} + +static uint8_t *ole2_read_stream(Ole2 *ole, uint32_t first_sector, uint32_t stream_size, size_t *out_size) +{ + if (stream_size == 0) + { + *out_size = 0; + return NULL; + } + uint8_t *res = cmalloc(stream_size); + if (!res) + { + *out_size = 0; + return NULL; + } + uint32_t remaining = stream_size; + uint32_t current = first_sector; + uint8_t *p = res; + + if (stream_size >= OLE2_MINI_STREAM_CUTOFF) + { + while (remaining > 0 && current < 0xFFFFFFFD) + { + if (current >= ole->fat_entries) break; + uint32_t to_read = (remaining < ole->sector_size) ? remaining : ole->sector_size; + fseek(ole->f, (long)(current + 1) * (long)ole->sector_size, SEEK_SET); + if (fread(p, 1, to_read, ole->f) != to_read) break; + p += to_read; + remaining -= to_read; + current = ole->fat[current]; + } + } + else + { + while (remaining > 0 && current < 0xFFFFFFFD) + { + if (current >= ole->mini_fat_entries) break; + uint32_t to_read = (remaining < ole->mini_sector_size) ? remaining : ole->mini_sector_size; + size_t offset = (size_t)current * ole->mini_sector_size; + if (offset + to_read <= ole->mini_stream_size) + { + memcpy(p, ole->mini_stream + offset, to_read); + } + p += to_read; + remaining -= to_read; + current = ole->mini_fat[current]; + } + } + *out_size = stream_size - remaining; + return res; +} + +static Ole2 *ole2_open(const char *path) +{ + FILE *f = file_open_read(path); + if (!f) return NULL; + + uint8_t header[512]; + if (fread(header, 1, 512, f) != 512 || memcmp(header, OLE2_MAGIC, 8) != 0) + { + fclose(f); + return NULL; + } + + Ole2 *ole = ccalloc(1, sizeof(Ole2)); + if (!ole) + { + fclose(f); + return NULL; + } + ole->f = f; + fseek(f, 0, SEEK_END); + ole->size = (size_t)ftell(f); + ole->sector_size = 1U << read2(header + 30); + ole->mini_sector_size = 1U << read2(header + 32); + + if (ole->sector_size < 512 || ole->sector_size > 4096) + { + ole2_free(ole); + return NULL; + } + + uint32_t fat_sectors = read4(header + 44); + ole->directory_first_sector = read4(header + 48); + uint32_t mini_fat_first_sector = read4(header + 60); + uint32_t mini_fat_sectors = read4(header + 64); + uint32_t difat_first_sector = read4(header + 68); + uint32_t difat_sectors = read4(header + 72); + + uint32_t entries_per_sector = ole->sector_size / 4; + size_t total_possible_sectors = ole->size / ole->sector_size + 1; + ole->fat_entries = (uint32_t)total_possible_sectors; + if (ole->fat_entries < 109 * entries_per_sector) ole->fat_entries = 109 * entries_per_sector; + + ole->fat = cmalloc(ole->fat_entries * 4); + if (!ole->fat) + { + ole2_free(ole); + return NULL; + } + memset(ole->fat, 0xFF, ole->fat_entries * 4); + + uint32_t fat_idx = 0; + for (int i = 0; i < 109 && fat_idx < fat_sectors; i++) + { + uint32_t s = read4(header + 76 + i * 4); + if (s >= 0xFFFFFFFD) continue; + fseek(f, (long)(s + 1) * (long)ole->sector_size, SEEK_SET); + uint32_t target_idx = fat_idx * entries_per_sector; + if (target_idx + entries_per_sector <= ole->fat_entries) + { + if (fread(ole->fat + target_idx, 1, ole->sector_size, f) != ole->sector_size) break; + fat_idx++; + } + } + + uint32_t curr_difat_s = difat_first_sector; + uint8_t *difat_buf = cmalloc(ole->sector_size); + while (curr_difat_s < 0xFFFFFFFD && fat_idx < fat_sectors && difat_buf) + { + fseek(f, (long)(curr_difat_s + 1) * (long)ole->sector_size, SEEK_SET); + if (fread(difat_buf, 1, ole->sector_size, f) != ole->sector_size) break; + for (uint32_t i = 0; i < entries_per_sector - 1 && fat_idx < fat_sectors; i++) + { + uint32_t s = read4(difat_buf + i * 4); + if (s >= 0xFFFFFFFD) continue; + fseek(f, (long)(s + 1) * (long)ole->sector_size, SEEK_SET); + uint32_t target_idx = fat_idx * entries_per_sector; + if (target_idx + entries_per_sector <= ole->fat_entries) + { + if (fread(ole->fat + target_idx, 1, ole->sector_size, f) != ole->sector_size) break; + fat_idx++; + } + } + curr_difat_s = read4(difat_buf + ole->sector_size - 4); + } + free(difat_buf); + + ole->mini_fat_entries = mini_fat_sectors * entries_per_sector; + if (ole->mini_fat_entries > 0) + { + ole->mini_fat = cmalloc(ole->mini_fat_entries * 4); + if (ole->mini_fat) + { + memset(ole->mini_fat, 0xFF, ole->mini_fat_entries * 4); + uint32_t m_fat_idx = 0; + uint32_t curr = mini_fat_first_sector; + while (curr < 0xFFFFFFFD && m_fat_idx < mini_fat_sectors) + { + if (curr >= ole->fat_entries) break; + fseek(f, (long)(curr + 1) * (long)ole->sector_size, SEEK_SET); + if (fread(ole->mini_fat + (size_t)m_fat_idx * entries_per_sector, 1, ole->sector_size, f) != ole->sector_size) break; + m_fat_idx++; + curr = ole->fat[curr]; + } + } + } + + fseek(f, (long)(ole->directory_first_sector + 1) * (long)ole->sector_size, SEEK_SET); + uint8_t root_entry[128]; + if (fread(root_entry, 1, 128, f) == 128) + { + uint32_t mini_stream_first_sector = read4(root_entry + 116); + uint32_t mini_stream_size = read4(root_entry + 120); + if (mini_stream_size > 0) + { + size_t actual; + ole->mini_stream = ole2_read_stream(ole, mini_stream_first_sector, mini_stream_size, &actual); + ole->mini_stream_size = actual; + } + } + + return ole; +} + +static int msi_mime2utf(int x) +{ + if (x < 10) return x + '0'; + if (x < 36) return x - 10 + 'A'; + if (x < 62) return x - 36 + 'a'; + if (x == 62) return '.'; + if (x == 63) return '_'; + return 0; +} + +static void decode_msi_stream_name(const uint16_t *in, char *out) +{ + while (*in) + { + uint16_t ch = *in++; + if (ch >= 0x3800 && ch < 0x4800) + { + *out++ = (char)msi_mime2utf((ch - 0x3800) & 0x3f); + *out++ = (char)msi_mime2utf(((ch - 0x3800) >> 6) & 0x3f); + } + else if (ch >= 0x4800 && ch < 0x4840) + { + *out++ = (char)msi_mime2utf((ch - 0x4800) & 0x3f); + } + else + { + *out++ = (char)ch; + } + } + *out = 0; +} + +typedef struct { + char name[256]; + uint32_t first_sector; + uint32_t size; +} StreamInfo; + +static int msi_find_streams(Ole2 *ole, StreamInfo *infos, int max_infos) +{ + uint32_t curr = ole->directory_first_sector; + int count = 0; + uint8_t *sec = cmalloc(ole->sector_size); + if (!sec) return 0; + while (curr < 0xFFFFFFFD && count < max_infos) + { + if (curr >= ole->fat_entries) break; + fseek(ole->f, (long)(curr + 1) * (long)ole->sector_size, SEEK_SET); + if (fread(sec, 1, ole->sector_size, ole->f) != ole->sector_size) break; + for (int i = 0; i < (int)(ole->sector_size / 128); i++) + { + uint8_t *entry = sec + i * 128; + uint16_t name_len = read2(entry + 64); + if (name_len > 2 && (entry[66] == 2 || entry[66] == 1)) + { + uint16_t wname[64]; + memcpy(wname, entry, 64); + decode_msi_stream_name(wname, infos[count].name); + infos[count].first_sector = read4(entry + 116); + infos[count].size = read4(entry + 120); + count++; + if (count >= max_infos) break; + } + } + curr = ole->fat[curr]; + } + free(sec); + return count; +} + +typedef struct +{ + char *name; + uint32_t usize; + uint32_t uoffset; + uint16_t folder_idx; +} CabFileInfo; + +typedef struct +{ + uint32_t data_offset; + uint16_t data_blocks; + uint16_t comp_type; +} CabFolderInfo; + +static bool cab_extract_buffer(uint8_t *data, size_t size, const char *out_root, JSONObject *name_map, bool verbose) +{ + if (size < 36 || memcmp(data, "MSCF", 4) != 0) return false; + + uint16_t num_folders = (uint16_t)read2(data + 26); + uint16_t num_files = (uint16_t)read2(data + 28); + uint32_t file_offset = read4(data + 16); + uint32_t header_ptr = 36; + + uint16_t cbCFData = 0; + uint8_t cbCFFolder = 0; + + uint16_t flags = (uint16_t)read2(data + 30); + if (flags & 4) + { + if (header_ptr + 4 > size) return false; + uint16_t cbCFHeader = (uint16_t)read2(data + header_ptr); + cbCFFolder = data[header_ptr + 2]; + cbCFData = data[header_ptr + 3]; + header_ptr += 4 + cbCFHeader; + } + + if (num_folders > 0x1000 || (size_t)header_ptr + num_folders * (8 + cbCFFolder) > size) return false; + CabFolderInfo *folders = cmalloc(num_folders * sizeof(CabFolderInfo)); + if (!folders) return false; + for (int i = 0; i < num_folders; i++) + { + folders[i].data_offset = read4(data + header_ptr); + folders[i].data_blocks = (uint16_t)read2(data + header_ptr + 4); + folders[i].comp_type = (uint16_t)read2(data + header_ptr + 6); + header_ptr += 8 + cbCFFolder; + } + + const char *main_algo = "unknown"; + if (num_folders > 0) + { + int comp_type = folders[0].comp_type & 0x0F; + if (comp_type == 0) main_algo = "none"; + else if (comp_type == 1) main_algo = "MSZIP"; + else if (comp_type == 3) main_algo = "LZX (unsupported)"; + } + if (verbose) printf(" CAB: %u folders, %u files, algo: %s\n", num_folders, num_files, main_algo); + + if (file_offset >= size) + { + free(folders); + return false; + } + CabFileInfo *files = cmalloc(num_files * sizeof(CabFileInfo)); + if (!files) + { + free(folders); + return false; + } + uint32_t f_ptr = file_offset; + for (int i = 0; i < num_files; i++) + { + if (f_ptr + 16 >= size) + { + num_files = (uint16_t)i; + break; + } + files[i].usize = read4(data + f_ptr); + files[i].uoffset = read4(data + f_ptr + 4); + files[i].folder_idx = (uint16_t)read2(data + f_ptr + 8); + files[i].name = str_dup((char *)data + f_ptr + 16); + f_ptr += 16 + (uint32_t)strlen(files[i].name) + 1; + } + + for (int i = 0; i < num_folders; i++) + { + uint32_t total_usize = 0; + for (int j = 0; j < num_files; j++) + { + if (files[j].folder_idx == i) + { + uint32_t end = files[j].uoffset + files[j].usize; + if (end > total_usize) total_usize = end; + } + } + if (total_usize == 0 || total_usize > 0x7FFFFFFF) continue; + uint8_t *ubuf = cmalloc(total_usize); + if (!ubuf) continue; + uint32_t uptr = 0; + uint32_t d_ptr = folders[i].data_offset; + int comp_type = folders[i].comp_type & 0x0F; + + // lzx_decomp_state *lzx = NULL; + if (comp_type == 3) + { + // LZX not supported anymore + } + + for (int b = 0; b < folders[i].data_blocks; b++) + { + if (d_ptr + 8 > size) break; + uint16_t csize = (uint16_t)read2(data + d_ptr + 4); + uint16_t usize = (uint16_t)read2(data + d_ptr + 6); + if ((size_t)d_ptr + 8 + cbCFData + csize > size || uptr + usize > total_usize) break; + + if (comp_type == 1) + { // MSZIP + if (csize < 2) break; + tinfl_decompressor inflator; + tinfl_init(&inflator); + size_t in_sz = csize - 2; + size_t out_sz = usize; + tinfl_status status = tinfl_decompress(&inflator, + data + d_ptr + 8 + cbCFData + 2, &in_sz, + ubuf, ubuf + uptr, &out_sz, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if (status < 0) + { + if (verbose) printf(" MSZIP decompression failed at folder %d block %d, error %d\n", i, b, (int)status); + break; + } + } + else if (comp_type == 3) + { // LZX + if (verbose) printf(" LZX decompression (unsupported) at block %d\n", b); + break; + } + else if (comp_type == 0) + { // NO COMP + memcpy(ubuf + uptr, data + d_ptr + 8 + cbCFData, usize); + } + uptr += usize; + d_ptr += 8 + cbCFData + csize; + } + + for (int j = 0; j < num_files; j++) + { + if (files[j].folder_idx == i) + { + JSONObject *entry = name_map ? json_map_get(name_map, files[j].name) : NULL; + if (entry) + { + const char *real_path = entry->str; + char *norm_path = str_dup(real_path); + for (char *p = norm_path; *p; p++) + if (*p == '\\') *p = '/'; + char *full_dst = (char *)file_append_path(out_root, norm_path); + file_create_folders(full_dst); + if (files[j].uoffset + files[j].usize <= total_usize) + { + file_write_all(full_dst, (const char *)ubuf + files[j].uoffset, files[j].usize); + } + } + } + } + free(ubuf); + } + free(files); + free(folders); + return true; +} + +static bool cab_extract(const char *path, const char *out_root, JSONObject *name_map, bool verbose) +{ + FILE *f = file_open_read(path); + if (!f) return false; + fseek(f, 0, SEEK_END); + size_t size = (size_t)ftell(f); + fseek(f, 0, SEEK_SET); + uint8_t *data = cmalloc(size); + if (data) + { + if (fread(data, 1, size, f) == size) + { + cab_extract_buffer(data, size, out_root, name_map, verbose); + } + } + free(data); + fclose(f); + return true; +} + +typedef struct +{ + char **strings; + uint32_t count; +} StringTable; + +static void free_string_table(StringTable *st) +{ + if (!st) return; + for (uint32_t i = 0; i < st->count; i++) + free(st->strings[i]); + free(st->strings); + free(st); +} + +static StringTable *msi_load_string_table(Ole2 *ole, const StreamInfo *infos, int stream_count, int *bytes_per_strref_out) +{ + uint8_t *pool_data = NULL; + size_t pool_size = 0; + uint8_t *str_data = NULL; + size_t str_size = 0; + + for (int i = 0; i < stream_count; i++) + { + if (strcmp(infos[i].name, "@_StringPool") == 0) + { + pool_data = ole2_read_stream(ole, infos[i].first_sector, infos[i].size, &pool_size); + } + else if (strcmp(infos[i].name, "@_StringData") == 0) + { + str_data = ole2_read_stream(ole, infos[i].first_sector, infos[i].size, &str_size); + } + } + + if (!pool_data || !str_data) + { + free(pool_data); + free(str_data); + return NULL; + } + + int bytes_per_strref = 2; + if (pool_size > 4 && (read2(pool_data + 2) & 0x8000)) + { + bytes_per_strref = 4; + } + if (bytes_per_strref_out) *bytes_per_strref_out = bytes_per_strref; + + uint32_t count = (uint32_t)pool_size / 4; + if (count == 0 || count > 0x1000000) + { + free(pool_data); + free(str_data); + return NULL; + } + + StringTable *st = ccalloc(1, sizeof(StringTable)); + st->count = count; + st->strings = ccalloc(count, sizeof(char *)); + + uint32_t offset = 0; + uint32_t n = 1; + for (uint32_t i = 1; i < count;) + { + uint16_t len = read2(pool_data + i * 4); + uint16_t refs = read2(pool_data + i * 4 + 2); + if (len == 0 && refs == 0) + { + i++; + n++; + continue; + } + if (len == 0) + { + if ((i + 1) * 4 + 4 <= pool_size) + { + uint32_t len_high = read2(pool_data + (i + 1) * 4 + 2); + uint32_t len_low = read2(pool_data + (i + 1) * 4); + len = (uint16_t)((len_high << 16) | len_low); + i += 2; + } + else + { + i++; + } + } + else + { + i += 1; + } + if (offset + len <= str_size && n < count) + { + st->strings[n] = cmalloc((size_t)len + 1); + memcpy(st->strings[n], str_data + offset, len); + st->strings[n][len] = 0; + offset += len; + } + n++; + } + free(pool_data); + free(str_data); + return st; +} + +static const char *st_get(StringTable *st, uint32_t i) +{ + if (!st || i >= st->count) return NULL; + return st->strings[i]; +} + +static char *msi_get_filename(const char *msi_val) +{ + if (!msi_val) return NULL; + const char *p = strchr(msi_val, '|'); + return str_dup(p ? p + 1 : msi_val); +} + +typedef struct +{ + uint8_t *f_data, *d_data, *c_data, *med_data; + size_t f_size, d_size, c_size, med_size; + bool is_compressed; + int s_sz; + StringTable *st; +} MsiTables; + +static void parse_directory_table(MsiTables *t, JSONObject *dir_map) +{ + uint32_t d_row_sz = 3 * t->s_sz; + uint32_t d_N = (uint32_t)(t->d_size / d_row_sz); + if (d_N == 0) return; + + for (int pass = 0; pass < 20; pass++) + { + for (uint32_t i = 0; i < d_N; i++) + { + uint32_t id_i, par_i, val_i; + if (t->is_compressed) + { + id_i = (t->s_sz == 4) ? read4(t->d_data + (0 * d_N + i) * 4) : read2(t->d_data + (0 * d_N + i) * 2); + par_i = (t->s_sz == 4) ? read4(t->d_data + (1 * d_N + i) * 4) : read2(t->d_data + (1 * d_N + i) * 2); + val_i = (t->s_sz == 4) ? read4(t->d_data + (2 * d_N + i) * 4) : read2(t->d_data + (2 * d_N + i) * 2); + } + else + { + uint8_t *r = t->d_data + i * d_row_sz; + id_i = (t->s_sz == 4) ? read4(r) : read2(r); + par_i = (t->s_sz == 4) ? read4(r + t->s_sz) : read2(r + t->s_sz); + val_i = (t->s_sz == 4) ? read4(r + 2 * t->s_sz) : read2(r + 2 * t->s_sz); + } + const char *id = st_get(t->st, id_i); + const char *parent = st_get(t->st, par_i); + const char *val = st_get(t->st, val_i); + if (id && val) + { + char *name = msi_get_filename(val); + if (par_i == 0 || (parent && strcmp(id, parent) == 0) || strcmp(id, "TARGETDIR") == 0 || strcmp(id, "SourceDir") == 0) + { + if (!json_map_get(dir_map, id)) json_map_set(dir_map, id, json_new_string(".")); + } + else if (parent) + { + JSONObject *p_path = json_map_get(dir_map, parent); + if (p_path) + { + if (!json_map_get(dir_map, id)) + { + char *full = (strcmp(p_path->str, ".") == 0) ? str_dup(name) : str_printf("%s/%s", p_path->str, name); + json_map_set(dir_map, id, json_new_string(full)); + } + } + } + } + } + } +} + +static void parse_component_table(MsiTables *t, JSONObject *dir_map, JSONObject *comp_map) +{ + uint32_t c_row_sz = (t->s_sz == 2) ? 12 : 20; + uint32_t c_N = (uint32_t)(t->c_size / c_row_sz); + if (c_N == 0) return; + + for (uint32_t i = 0; i < c_N; i++) + { + uint32_t id_i, dir_i; + if (t->is_compressed) + { + id_i = (t->s_sz == 4) ? read4(t->c_data + (0 * c_N + i) * 4) : read2(t->c_data + (0 * c_N + i) * 2); + dir_i = (t->s_sz == 4) ? read4(t->c_data + (2 * c_N + i) * 4) : read2(t->c_data + (2 * c_N + i) * 2); + } + else + { + uint8_t *r = t->c_data + i * c_row_sz; + id_i = (t->s_sz == 4) ? read4(r) : read2(r); + dir_i = (t->s_sz == 4) ? read4(r + 2 * t->s_sz) : read2(r + 2 * t->s_sz); + } + const char *id = st_get(t->st, id_i); + const char *dir_id = st_get(t->st, dir_i); + if (id && dir_id) + { + JSONObject *dir_path = json_map_get(dir_map, dir_id); + if (dir_path) json_map_set(comp_map, id, json_new_string(dir_path->str)); + } + } +} + +static void parse_file_table(MsiTables *t, JSONObject *comp_map, JSONObject *name_map) +{ + uint32_t f_row_sz = (t->s_sz == 2) ? 20 : 32; + uint32_t f_N = (uint32_t)(t->f_size / f_row_sz); + if (f_N == 0) return; + + for (uint32_t i = 0; i < f_N; i++) + { + uint32_t id_idx, comp_idx, name_idx; + if (t->is_compressed) + { + id_idx = (t->s_sz == 4) ? read4(t->f_data + (0 * f_N + i) * 4) : read2(t->f_data + (0 * f_N + i) * 2); + comp_idx = (t->s_sz == 4) ? read4(t->f_data + (1 * f_N + i) * 4) : read2(t->f_data + (1 * f_N + i) * 2); + name_idx = (t->s_sz == 4) ? read4(t->f_data + (2 * f_N + i) * 4) : read2(t->f_data + (2 * f_N + i) * 2); + } + else + { + uint8_t *r = t->f_data + i * f_row_sz; + id_idx = (t->s_sz == 4) ? read4(r) : read2(r); + comp_idx = (t->s_sz == 4) ? read4(r + t->s_sz) : read2(r + t->s_sz); + name_idx = (t->s_sz == 4) ? read4(r + 2 * t->s_sz) : read2(r + 2 * t->s_sz); + } + const char *id = st_get(t->st, id_idx); + const char *comp_id = st_get(t->st, comp_idx); + const char *file_val = st_get(t->st, name_idx); + if (id && comp_id && file_val) + { + char *fname = msi_get_filename(file_val); + JSONObject *c_path = json_map_get(comp_map, comp_id); + if (c_path) + { + char *full = str_printf("%s/%s", c_path->str, fname); + json_map_set(name_map, id, json_new_string(full)); + const char *p = strchr(file_val, '|'); + if (p) + { + char *short_name = str_copy(file_val, p - file_val); + json_map_set(name_map, short_name, json_new_string(full)); + } + } + } + } +} + +bool msi_extract(const char *msi_path, const char *out_root, const char *cab_dir, bool verbose) +{ + if (file_is_dir(msi_path)) return false; + Ole2 *ole = ole2_open(msi_path); + if (!ole) return false; + + StreamInfo *streams = cmalloc(8192 * sizeof(StreamInfo)); + if (!streams) + { + ole2_free(ole); + return false; + } + int stream_count = msi_find_streams(ole, streams, 8192); + + int bytes_per_strref = 2; + StringTable *st = msi_load_string_table(ole, streams, stream_count, &bytes_per_strref); + if (!st) + { + free(streams); + ole2_free(ole); + return false; + } + + MsiTables t = {0}; + t.s_sz = bytes_per_strref; + t.st = st; + JSONObject *name_map = json_new_object(J_OBJECT); + + for (int i = 0; i < stream_count; i++) + { + const char *name = streams[i].name; + const char *pname = name; + if (*pname == '@' || *pname == '!') + { + t.is_compressed = (*pname == '@'); + pname++; + } + while (*pname && !isalnum(*pname) && *pname != '_') + pname++; + + if (strcmp(pname, "File") == 0) t.f_data = ole2_read_stream(ole, streams[i].first_sector, streams[i].size, &t.f_size); + else if (strcmp(pname, "Directory") == 0) t.d_data = ole2_read_stream(ole, streams[i].first_sector, streams[i].size, &t.d_size); + else if (strcmp(pname, "Component") == 0) t.c_data = ole2_read_stream(ole, streams[i].first_sector, streams[i].size, &t.c_size); + else if (strcmp(pname, "Media") == 0) t.med_data = ole2_read_stream(ole, streams[i].first_sector, streams[i].size, &t.med_size); + } + + if (t.f_data && t.d_data && t.c_data) + { + JSONObject *dir_map = json_new_object(J_OBJECT); + parse_directory_table(&t, dir_map); + + JSONObject *comp_map = json_new_object(J_OBJECT); + parse_component_table(&t, dir_map, comp_map); + + parse_file_table(&t, comp_map, name_map); + } + + if (t.med_data && cab_dir) + { + uint32_t m_row_sz = (t.s_sz == 2) ? 14 : 26; + uint32_t m_N = (uint32_t)(t.med_size / m_row_sz); + for (uint32_t i = 0; i < m_N; i++) + { + uint32_t cab_i; + if (t.is_compressed) + { + cab_i = (t.s_sz == 4) ? read4(t.med_data + (10 * m_N + i * 4)) : read2(t.med_data + (8 * m_N + i * 2)); + } + else + { + cab_i = (t.s_sz == 4) ? read4(t.med_data + i * m_row_sz + 12) : read2(t.med_data + i * m_row_sz + 8); + } + const char *cab_name = st_get(st, cab_i); + if (cab_name && cab_name[0] != '#') + { + char *cp = (char *)file_append_path(cab_dir, cab_name); + if (!file_exists(cp)) + { + char *with_ext = str_printf("%s.cab", cp); + if (file_exists(with_ext)) + { + cp = with_ext; + } + } + if (file_exists(cp)) + { + if (verbose) printf(" Extracting external CAB: %s\n", cab_name); + cab_extract(cp, out_root, name_map, verbose); + } + } + } + } + + for (int i = 0; i < stream_count; i++) + { + size_t s_size; + uint8_t *s_data = ole2_read_stream(ole, streams[i].first_sector, streams[i].size, &s_size); + if (s_data && s_size > 4 && memcmp(s_data, "MSCF", 4) == 0) + { + if (verbose) printf(" Extracting embedded CAB from stream: %s\n", streams[i].name); + cab_extract_buffer(s_data, s_size, out_root, name_map, verbose); + } + free(s_data); + } + + free(t.f_data); + free(t.d_data); + free(t.c_data); + free(t.med_data); + free_string_table(st); + free(streams); + ole2_free(ole); + return true; +} diff --git a/src/utils/msi.h b/src/utils/msi.h new file mode 100644 index 000000000..436e37a2d --- /dev/null +++ b/src/utils/msi.h @@ -0,0 +1,15 @@ +#ifndef C3_MSI_H +#define C3_MSI_H + +#include + +/** + * Extracts files from an MSI package. + * @param msi_path Path to the .msi file. + * @param out_root Root directory to extract to. + * @param cab_dir Directory where external .cab files are located. + * @return true on success, false on failure. + */ +bool msi_extract(const char *msi_path, const char *out_root, const char *cab_dir, bool verbose); + +#endif // C3_MSI_H