// retoor #include "json_repair.h" #include #include #include #include #include static char *strip_comments(const char *src) { if (!src) return NULL; size_t len = strlen(src); char *result = malloc(len + 1); if (!result) return NULL; char *dst = result; const char *p = src; bool in_string = false; bool escaped = false; while (*p) { if (escaped) { *dst++ = *p++; escaped = false; continue; } if (*p == '\\') { *dst++ = *p++; escaped = true; continue; } if (*p == '"') { in_string = !in_string; *dst++ = *p++; continue; } if (!in_string) { if (*p == '/' && *(p + 1) == '/') { while (*p && *p != '\n') p++; continue; } if (*p == '/' && *(p + 1) == '*') { p += 2; while (*p && !(*p == '*' && *(p + 1) == '/')) p++; if (*p) p += 2; continue; } if (*p == '#') { while (*p && *p != '\n') p++; continue; } } *dst++ = *p++; } *dst = '\0'; return result; } static char *normalize_quotes(const char *src) { if (!src) return NULL; size_t len = strlen(src); // Over-allocate because single quotes might be replaced by double quotes + escaping char *result = malloc(len * 2 + 1); if (!result) return NULL; char *dst = result; const char *p = src; bool in_double_string = false; bool escaped = false; while (*p) { // Smart quote replacement if ((unsigned char)*p == 0xE2 && (unsigned char)*(p+1) == 0x80) { if ((unsigned char)*(p+2) == 0x9C || (unsigned char)*(p+2) == 0x9D) { // “ or ” *dst++ = '"'; p += 3; continue; } if ((unsigned char)*(p+2) == 0x98 || (unsigned char)*(p+2) == 0x99) { // ‘ or ’ *dst++ = '\''; p += 3; continue; } } if (escaped) { *dst++ = *p++; escaped = false; continue; } if (*p == '\\') { *dst++ = *p++; escaped = true; continue; } if (*p == '"') { in_double_string = !in_double_string; *dst++ = *p++; continue; } if (!in_double_string && *p == '\'') { // Heuristic: convert '...' to "..." *dst++ = '"'; p++; while (*p && *p != '\'') { if (*p == '\\' && *(p+1)) { *dst++ = *p++; *dst++ = *p++; } else if (*p == '"') { *dst++ = '\\'; *dst++ = '"'; p++; } else { *dst++ = *p++; } } if (*p == '\'') { *dst++ = '"'; p++; } continue; } *dst++ = *p++; } *dst = '\0'; return result; } static char *remove_trailing_commas(const char *src) { if (!src) return NULL; size_t len = strlen(src); char *result = malloc(len + 1); if (!result) return NULL; char *dst = result; const char *p = src; bool in_string = false; bool escaped = false; while (*p) { if (escaped) { *dst++ = *p++; escaped = false; continue; } if (*p == '\\') { *dst++ = *p++; escaped = true; continue; } if (*p == '"') { in_string = !in_string; *dst++ = *p++; continue; } if (!in_string && *p == ',') { // Check if next non-ws char is ] or } const char *next = p + 1; while (*next && isspace((unsigned char)*next)) next++; if (*next == ']' || *next == '}') { p = next; // Skip the comma continue; } } *dst++ = *p++; } *dst = '\0'; return result; } static char *quote_unquoted_keys(const char *src) { if (!src) return NULL; size_t len = strlen(src); char *result = malloc(len * 2 + 1); if (!result) return NULL; char *dst = result; const char *p = src; bool in_string = false; bool escaped = false; while (*p) { if (escaped) { *dst++ = *p++; escaped = false; continue; } if (*p == '\\') { *dst++ = *p++; escaped = true; continue; } if (*p == '"') { in_string = !in_string; *dst++ = *p++; continue; } if (!in_string && (isalnum((unsigned char)*p) || *p == '_' || *p == '-')) { // Potential unquoted key? // A key usually follows '{' or ',' and is followed by ':' // Heuristic: if we are at start of an identifier, check if it ends with ':' // Check backwards for { or , const char *prev = p - 1; while (prev >= src && isspace((unsigned char)*prev)) prev--; if (prev >= src && (*prev == '{' || *prev == ',')) { const char *end = p; while (*end && (isalnum((unsigned char)*end) || *end == '_' || *end == '-')) end++; const char *after = end; while (*after && isspace((unsigned char)*after)) after++; if (*after == ':') { // It is an unquoted key! *dst++ = '"'; while (p < end) *dst++ = *p++; *dst++ = '"'; continue; } } } *dst++ = *p++; } *dst = '\0'; return result; } static char *balance_brackets(const char *src) { if (!src) return NULL; size_t len = strlen(src); char *result = malloc(len + 1024); if (!result) return NULL; char stack[1024]; int top = 0; char *dst = result; const char *p = src; bool in_string = false; bool escaped = false; while (*p) { if (escaped) { *dst++ = *p++; escaped = false; continue; } if (*p == '\\') { *dst++ = *p++; escaped = true; continue; } if (*p == '"') { in_string = !in_string; *dst++ = *p++; continue; } if (!in_string) { if (*p == '{' || *p == '[') { if (top < 1024) stack[top++] = *p; } else if (*p == '}' || *p == ']') { if (top > 0) { char expected = (*p == '}') ? '{' : '['; if (stack[top - 1] == expected) { top--; } } else { // Mismatched closing; skip it p++; continue; } } } *dst++ = *p++; } while (top > 0) { char opener = stack[--top]; *dst++ = (opener == '{') ? '}' : ']'; } *dst = '\0'; return result; } static char *compact_json(const char *src) { if (!src) return NULL; size_t len = strlen(src); char *result = malloc(len + 1); if (!result) return NULL; char *dst = result; const char *p = src; bool in_string = false; bool escaped = false; while (*p) { if (escaped) { *dst++ = *p++; escaped = false; continue; } if (*p == '\\') { *dst++ = *p++; escaped = true; continue; } if (*p == '"') { in_string = !in_string; *dst++ = *p++; continue; } if (!in_string && isspace((unsigned char)*p)) { p++; continue; } *dst++ = *p++; } *dst = '\0'; return result; } char *json_repair_string(const char *src) { if (!src) return NULL; // Find the first occurrence of { or [ const char *start_ptr = src; while (*start_ptr && *start_ptr != '{' && *start_ptr != '[') start_ptr++; if (!*start_ptr) return strdup(src); // No JSON structure found, return as is char *s1 = strip_comments(start_ptr); char *s2 = normalize_quotes(s1); free(s1); char *s3 = quote_unquoted_keys(s2); free(s2); char *s4 = remove_trailing_commas(s3); free(s3); char *s5 = balance_brackets(s4); free(s4); // Heuristic: truncate after the first complete object/array int depth = 0; bool in_str = false; bool esc = false; char *p = s5; while (*p) { if (esc) { esc = false; } else if (*p == '\\') { esc = true; } else if (*p == '"') { in_str = !in_str; } else if (!in_str) { if (*p == '{' || *p == '[') depth++; else if (*p == '}' || *p == ']') { depth--; if (depth == 0) { *(p + 1) = '\0'; break; } } } p++; } char *s6 = compact_json(s5); free(s5); return s6; }