Ground-Zerro / HydraRoute Public
Code Issues Pull requests Actions Releases View on GitHub ↗
43.3 KB c
#include "../include/geodat.h"
#include "../include/ipset_nl.h"
#include "../include/log.h"
#include "../include/util.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <netinet/in.h>

static int read_varint(const uint8_t *data, int len, int pos, uint64_t *out_val) {
    uint64_t val = 0;
    int shift = 0;
    int start = pos;

    while (pos < len) {
        uint8_t b = data[pos++];
        val |= (uint64_t)(b & 0x7F) << shift;
        if (!(b & 0x80)) {
            *out_val = val;
            return pos - start;
        }
        shift += 7;
        if (shift >= 64) return -1;
    }
    return -1;
}

static int read_varint_stream(FILE *f, uint64_t *out_val) {
    uint64_t val = 0;
    int shift = 0;
    int count = 0;

    while (1) {
        int b = fgetc(f);
        if (b == EOF) return -1;
        count++;
        val |= (uint64_t)(b & 0x7F) << shift;
        if (!(b & 0x80)) {
            *out_val = val;
            return count;
        }
        shift += 7;
        if (shift >= 64) return -1;
    }
}

static int parse_cidr_body(const uint8_t *data, int len, geoip_entry_t *entry) {
    memset(entry, 0, sizeof(*entry));
    int pos = 0;

    while (pos < len) {
        uint8_t tag = data[pos++];

        if (tag == 0x0A) {
            uint64_t ip_len;
            int br = read_varint(data, len, pos, &ip_len);
            if (br < 0) return -1;
            pos += br;
            if (ip_len != 4 && ip_len != 16) return -1;
            if (pos + (int)ip_len > len) return -1;
            memcpy(entry->ip, data + pos, ip_len);
            entry->ip_len = (uint8_t)ip_len;
            pos += ip_len;
        } else if (tag == 0x10) {
            uint64_t prefix;
            int br = read_varint(data, len, pos, &prefix);
            if (br < 0) return -1;
            pos += br;
            entry->prefix = (uint32_t)prefix;
        } else {
            int wire_type = tag & 0x07;
            if (wire_type == 0) {
                uint64_t dummy;
                int br = read_varint(data, len, pos, &dummy);
                if (br < 0) return -1;
                pos += br;
            } else if (wire_type == 2) {
                uint64_t flen;
                int br = read_varint(data, len, pos, &flen);
                if (br < 0) return -1;
                pos += br + (int)flen;
            } else {
                return -1;
            }
        }
    }

    int all_zero = 1;
    for (int i = 0; i < 16; i++) {
        if (entry->ip[i] != 0) { all_zero = 0; break; }
    }
    if (all_zero) return -1;

    return 0;
}

static int parse_geoip_body(const uint8_t *data, int len,
                            geoip_entry_t **entries, int *count, int *capacity) {
    int pos = 0;

    while (pos < len) {
        uint8_t tag = data[pos++];

        if (tag == 0x12) {
            uint64_t cidr_len;
            int br = read_varint(data, len, pos, &cidr_len);
            if (br < 0) break;
            pos += br;
            if (pos + (int)cidr_len > len) break;

            geoip_entry_t entry;
            if (parse_cidr_body(data + pos, (int)cidr_len, &entry) == 0) {
                if (*count >= *capacity) {
                    int new_cap = *capacity * 2;
                    geoip_entry_t *tmp = realloc(*entries, new_cap * sizeof(geoip_entry_t));
                    if (!tmp) break;
                    *entries = tmp;
                    *capacity = new_cap;
                }
                (*entries)[(*count)++] = entry;
            }
            pos += (int)cidr_len;
        } else {
            int wire_type = tag & 0x07;
            if (wire_type == 0) {
                uint64_t dummy;
                int br = read_varint(data, len, pos, &dummy);
                if (br < 0) break;
                pos += br;
            } else if (wire_type == 2) {
                uint64_t flen;
                int br = read_varint(data, len, pos, &flen);
                if (br < 0) break;
                pos += br + (int)flen;
                if (pos > len) break;
            } else {
                break;
            }
        }
    }
    return 0;
}

static void count_geoip_body(const uint8_t *data, int len, int *ipv4, int *ipv6) {
    int pos = 0;
    while (pos < len) {
        uint8_t tag = data[pos++];
        if (tag == 0x12) {
            uint64_t cidr_len;
            int br = read_varint(data, len, pos, &cidr_len);
            if (br < 0) break;
            pos += br;
            if (pos + (int)cidr_len > len) break;
            geoip_entry_t entry;
            if (parse_cidr_body(data + pos, (int)cidr_len, &entry) == 0) {
                if (entry.ip_len == 4) (*ipv4)++;
                else if (entry.ip_len == 16) (*ipv6)++;
            }
            pos += (int)cidr_len;
        } else {
            int wire_type = tag & 0x07;
            if (wire_type == 0) {
                uint64_t dummy;
                int br = read_varint(data, len, pos, &dummy);
                if (br < 0) break;
                pos += br;
            } else if (wire_type == 2) {
                uint64_t flen;
                int br = read_varint(data, len, pos, &flen);
                if (br < 0) break;
                pos += br + (int)flen;
                if (pos > len) break;
            } else {
                break;
            }
        }
    }
}

static int parse_geosite_domain(const uint8_t *data, int len, geosite_domain_t *domain) {
    domain->type = 0;
    domain->value = NULL;
    int pos = 0;

    while (pos < len) {
        uint8_t tag = data[pos++];

        if (tag == 0x08) {
            uint64_t type_val;
            int br = read_varint(data, len, pos, &type_val);
            if (br < 0) return -1;
            pos += br;
            domain->type = (uint32_t)type_val;
        } else if (tag == 0x12) {
            uint64_t val_len;
            int br = read_varint(data, len, pos, &val_len);
            if (br < 0) return -1;
            pos += br;
            if (pos + (int)val_len > len) return -1;
            domain->value = malloc(val_len + 1);
            memcpy(domain->value, data + pos, val_len);
            domain->value[val_len] = 0;
            pos += (int)val_len;
        } else {
            int wire_type = tag & 0x07;
            if (wire_type == 0) {
                uint64_t dummy;
                int br = read_varint(data, len, pos, &dummy);
                if (br < 0) return -1;
                pos += br;
            } else if (wire_type == 2) {
                uint64_t flen;
                int br = read_varint(data, len, pos, &flen);
                if (br < 0) return -1;
                pos += br + (int)flen;
                if (pos > len) return -1;
            } else {
                return -1;
            }
        }
    }
    return (domain->value != NULL) ? 0 : -1;
}

static int parse_geosite_body(const uint8_t *data, int len,
                              geosite_domain_t **domains, int *count, int *capacity) {
    int pos = 0;

    while (pos < len) {
        uint8_t tag = data[pos++];

        if (tag == 0x12) {
            uint64_t domain_len;
            int br = read_varint(data, len, pos, &domain_len);
            if (br < 0) break;
            pos += br;
            if (pos + (int)domain_len > len) break;

            geosite_domain_t entry;
            if (parse_geosite_domain(data + pos, (int)domain_len, &entry) == 0) {
                if (*count >= *capacity) {
                    int new_cap = *capacity * 2;
                    geosite_domain_t *tmp = realloc(*domains, new_cap * sizeof(geosite_domain_t));
                    if (!tmp) {
                        free(entry.value);
                        break;
                    }
                    *domains = tmp;
                    *capacity = new_cap;
                }
                (*domains)[(*count)++] = entry;
            }
            pos += (int)domain_len;
        } else {
            int wire_type = tag & 0x07;
            if (wire_type == 0) {
                uint64_t dummy;
                int br = read_varint(data, len, pos, &dummy);
                if (br < 0) break;
                pos += br;
            } else if (wire_type == 2) {
                uint64_t flen;
                int br = read_varint(data, len, pos, &flen);
                if (br < 0) break;
                pos += br + (int)flen;
                if (pos > len) break;
            } else {
                break;
            }
        }
    }
    return 0;
}

static void upcase_buf(char *dst, size_t dst_size, const char *src) {
    size_t n = 0;
    while (n + 1 < dst_size && src[n]) { dst[n] = src[n]; n++; }
    dst[n] = 0;
    for (char *p = dst; *p; p++)
        if (*p >= 'a' && *p <= 'z') *p -= 32;
}

typedef void (*dat_body_visitor_t)(const uint8_t *body, int body_len, void *ctx);

static int scan_dat_file(const char *file_path, const char *target_upper,
                          dat_body_visitor_t visit, void *ctx) {
    FILE *f = fopen(file_path, "rb");
    if (!f) return -1;
    setvbuf(f, NULL, _IOFBF, 64 * 1024);

    while (1) {
        int top_tag = fgetc(f);
        if (top_tag == EOF) break;
        if (top_tag != 0x0A) break;

        uint64_t body_len;
        if (read_varint_stream(f, &body_len) < 0) break;

        uint8_t *body = malloc(body_len);
        if (!body) break;
        if (fread(body, 1, body_len, f) != body_len) { free(body); break; }

        if (body_len < 2 || body[0] != 0x0A) { free(body); continue; }

        uint64_t code_len;
        int br = read_varint(body, (int)body_len, 1, &code_len);
        if (br < 0) { free(body); continue; }
        int code_start = 1 + br;
        if (code_start + (int)code_len > (int)body_len) { free(body); continue; }

        char code[64] = {0};
        size_t clen = code_len < sizeof(code) - 1 ? code_len : sizeof(code) - 1;
        memcpy(code, body + code_start, clen);
        for (char *p = code; *p; p++)
            if (*p >= 'a' && *p <= 'z') *p -= 32;

        if (strcmp(code, target_upper) == 0) {
            int data_pos = code_start + (int)code_len;
            visit(body + data_pos, (int)body_len - data_pos, ctx);
        }
        free(body);
    }

    fclose(f);
    return 0;
}

typedef struct {
    int *ipv4;
    int *ipv6;
} count_ctx_t;

static void count_geoip_visitor(const uint8_t *body, int len, void *ctx) {
    count_ctx_t *c = (count_ctx_t *)ctx;
    count_geoip_body(body, len, c->ipv4, c->ipv6);
}

static void count_geoip_cidrs_all_files(
    const char (*geoip_files)[512], int geoip_count,
    const char *tag, int *out_ipv4, int *out_ipv6)
{
    *out_ipv4 = 0;
    *out_ipv6 = 0;

    char target[64];
    upcase_buf(target, sizeof(target), tag);

    count_ctx_t ctx = { out_ipv4, out_ipv6 };
    for (int gi = 0; gi < geoip_count; gi++)
        scan_dat_file(geoip_files[gi], target, count_geoip_visitor, &ctx);
}

typedef struct {
    geoip_entry_t **entries;
    int *count;
    int *capacity;
} extract_geoip_ctx_t;

static void extract_geoip_visitor(const uint8_t *body, int len, void *ctx) {
    extract_geoip_ctx_t *c = (extract_geoip_ctx_t *)ctx;
    parse_geoip_body(body, len, c->entries, c->count, c->capacity);
}

static int extract_geoip_cidrs(const char *file_path, const char *country_code,
                               geoip_entry_t **out_entries, int *out_count) {
    char target[64];
    upcase_buf(target, sizeof(target), country_code);

    int capacity = 4096;
    *out_entries = malloc(capacity * sizeof(geoip_entry_t));
    if (!*out_entries) return -1;
    *out_count = 0;

    extract_geoip_ctx_t ctx = { out_entries, out_count, &capacity };
    int rc = scan_dat_file(file_path, target, extract_geoip_visitor, &ctx);
    if (rc != 0) LOG_WARN("GeoIP file not found: %s", file_path);
    return rc;
}

typedef struct {
    geosite_domain_t **domains;
    int *count;
    int *capacity;
} extract_geosite_ctx_t;

static void extract_geosite_visitor(const uint8_t *body, int len, void *ctx) {
    extract_geosite_ctx_t *c = (extract_geosite_ctx_t *)ctx;
    parse_geosite_body(body, len, c->domains, c->count, c->capacity);
}

static int extract_geosite_domains(const char *file_path, const char *tag,
                                   geosite_domain_t **out_domains, int *out_count) {
    char target[64];
    upcase_buf(target, sizeof(target), tag);

    int capacity = 4096;
    *out_domains = malloc(capacity * sizeof(geosite_domain_t));
    if (!*out_domains) return -1;
    *out_count = 0;

    extract_geosite_ctx_t ctx = { out_domains, out_count, &capacity };
    int rc = scan_dat_file(file_path, target, extract_geosite_visitor, &ctx);
    if (rc != 0) LOG_WARN("GeoSite file not found: %s", file_path);
    return rc;
}

typedef struct {
    int idx;
    int dots;
} dedup_entry_t;

static int compare_dots(const void *a, const void *b) {
    return ((const dedup_entry_t *)a)->dots - ((const dedup_entry_t *)b)->dots;
}

static int deduplicate_domains(const geosite_domain_t *domains, int count,
                               geosite_domain_t **output, int *out_count) {
    dedup_entry_t *entries = malloc(count * sizeof(dedup_entry_t));
    int entry_count = 0;

    for (int i = 0; i < count; i++) {
        if (domains[i].type != GEOSITE_TYPE_DOMAIN && domains[i].type != GEOSITE_TYPE_FULL)
            continue;
        int dots = 0;
        for (const char *p = domains[i].value; *p; p++) {
            if (*p == '.') dots++;
        }
        entries[entry_count++] = (dedup_entry_t){i, dots};
    }

    if (entry_count == 0) {
        free(entries);
        *output = NULL;
        *out_count = 0;
        return 0;
    }

    qsort(entries, entry_count, sizeof(dedup_entry_t), compare_dots);

    domain_hashtable_t *accepted = ht_create();

    *output = malloc(entry_count * sizeof(geosite_domain_t));
    *out_count = 0;

    domain_entry_t dummy_entry;
    memset(&dummy_entry, 0, sizeof(dummy_entry));

    for (int i = 0; i < entry_count; i++) {
        const char *val = domains[entries[i].idx].value;
        int covered = 0;

        const char *p = val;
        while (*p) {
            const char *dot = strchr(p, '.');
            if (!dot) break;
            p = dot + 1;
            if (ht_lookup(accepted, p, strlen(p))) {
                covered = 1;
                break;
            }
        }

        if (!covered) {
            ht_insert(accepted, val, strlen(val), "1", 0);
            (*output)[(*out_count)++] = domains[entries[i].idx];
        }
    }

    ht_destroy(accepted);
    free(entries);
    return 0;
}

int parse_geosite_rules(const char *watchlist_path,
                        geosite_rule_t *rules, int max_rules) {
    FILE *f = fopen(watchlist_path, "r");
    if (!f) return -1;

    int count = 0;
    char *line = NULL;
    size_t cap = 0;

    while (getline(&line, &cap, f) != -1 && count < max_rules) {
        line[strcspn(line, "\n\r")] = 0;
        char *trimmed = line;
        while (*trimmed == ' ' || *trimmed == '\t') trimmed++;
        if (!*trimmed || *trimmed == '#') continue;

        char *last_slash = strrchr(trimmed, '/');
        if (!last_slash) continue;

        char policy[64] = {0};
        strncpy(policy, last_slash + 1, 63);
        char *comma = strchr(policy, ',');
        if (comma) *comma = 0;
        int plen = strlen(policy);
        while (plen > 0 && (policy[plen-1] == ' ' || policy[plen-1] == '\t'))
            policy[--plen] = 0;
        if (!plen) continue;

        *last_slash = 0;

        char *saveptr;
        char *token = strtok_r(trimmed, ",", &saveptr);
        while (token && count < max_rules) {
            while (*token == ' ' || *token == '\t') token++;
            if (strncmp(token, "geosite:", 8) == 0) {
                char *tag_str = token + 8;
                while (*tag_str == ' ') tag_str++;
                int tlen = strlen(tag_str);
                while (tlen > 0 && (tag_str[tlen-1] == ' ' || tag_str[tlen-1] == '\t'))
                    tlen--;
                if (tlen > 0) {
                    memset(&rules[count], 0, sizeof(geosite_rule_t));
                    int copy_len = tlen < MAX_TAG_LEN - 1 ? tlen : MAX_TAG_LEN - 1;
                    memcpy(rules[count].tag, tag_str, copy_len);
                    rules[count].tag[copy_len] = 0;
                    for (char *p = rules[count].tag; *p; p++) {
                        if (*p >= 'a' && *p <= 'z') *p -= 32;
                    }
                    strncpy(rules[count].policy_name, policy, MAX_POLICY_NAME - 1);
                    count++;
                }
            }
            token = strtok_r(NULL, ",", &saveptr);
        }
    }

    free(line);
    fclose(f);
    return count;
}

int build_geosite_domain_map(const char (*file_paths)[512], int file_count,
                             const geosite_rule_t *rules, int rule_count,
                             domain_hashtable_t *ht) {
    if (rule_count == 0 || file_count == 0) return 0;

    int total_plain_skipped = 0;
    int total_regex_skipped = 0;

    for (int r = 0; r < rule_count; r++) {
        geosite_domain_t *all_domains = NULL;
        int all_count = 0;
        int found = 0;

        for (int fi = 0; fi < file_count; fi++) {
            geosite_domain_t *domains = NULL;
            int domain_count = 0;

            if (extract_geosite_domains(file_paths[fi], rules[r].tag,
                                        &domains, &domain_count) != 0) {
                continue;
            }

            if (domain_count > 0) found = 1;

            if (all_domains == NULL) {
                all_domains = domains;
                all_count = domain_count;
            } else {
                geosite_domain_t *tmp = realloc(all_domains,
                                                (all_count + domain_count) * sizeof(geosite_domain_t));
                if (!tmp) {
                    for (int i = 0; i < domain_count; i++) free(domains[i].value);
                    free(domains);
                } else {
                    all_domains = tmp;
                    memcpy(all_domains + all_count, domains, domain_count * sizeof(geosite_domain_t));
                    all_count += domain_count;
                    free(domains);
                }
            }
        }

        if (!found) {
            LOG_WARN("GeoSite: tag '%s' not found in any configured file", rules[r].tag);
            free(all_domains);
            continue;
        }

        int plain_skipped = 0, regex_skipped = 0, before_count = 0;
        for (int i = 0; i < all_count; i++) {
            if (all_domains[i].type == GEOSITE_TYPE_PLAIN) plain_skipped++;
            else if (all_domains[i].type == GEOSITE_TYPE_REGEX) regex_skipped++;
            else before_count++;
        }
        total_plain_skipped += plain_skipped;
        total_regex_skipped += regex_skipped;

        if (plain_skipped > 0)
            LOG_WARN("geosite:%s: %d Plain-type entries skipped", rules[r].tag, plain_skipped);
        if (regex_skipped > 0)
            LOG_WARN("geosite:%s: %d Regex-type entries skipped (not implemented)", rules[r].tag, regex_skipped);

        geosite_domain_t *deduped = NULL;
        int dedup_count = 0;
        deduplicate_domains(all_domains, all_count, &deduped, &dedup_count);

        LOG_DEBUG("geosite:%s: %d entries total, %d Domain/Full before dedup, %d after dedup",
                  rules[r].tag, all_count, before_count, dedup_count);

        for (int i = 0; i < dedup_count; i++) {
            const char *val = deduped[i].value;
            size_t vlen = strlen(val);

            if (deduped[i].type == GEOSITE_TYPE_FULL) {
                ht_insert(ht, val, vlen, rules[r].policy_name, 0);
            } else if (deduped[i].type == GEOSITE_TYPE_DOMAIN) {
                ht_insert(ht, val, vlen, rules[r].policy_name, 1);
            }
        }

        for (int i = 0; i < all_count; i++) {
            free(all_domains[i].value);
        }
        free(all_domains);
        free(deduped);
    }

    if (total_plain_skipped > 0 || total_regex_skipped > 0)
        LOG_WARN("GeoSite total: %d Plain and %d Regex entries skipped",
                 total_plain_skipped, total_regex_skipped);

    return 0;
}

static int parse_cidr_str(const char *str, parsed_cidr_t *out) {
    memset(out, 0, sizeof(*out));

    const char *slash = strchr(str, '/');
    if (!slash) return -1;

    char ip_str[64];
    int ip_len = (int)(slash - str);
    if (ip_len <= 0 || ip_len >= 64) return -1;
    memcpy(ip_str, str, ip_len);
    ip_str[ip_len] = 0;

    int prefix = atoi(slash + 1);

    if (strchr(ip_str, ':')) {
        uint8_t buf[16];
        if (inet_pton(AF_INET6, ip_str, buf) != 1) return -1;
        memcpy(out->ip, buf, 16);
        out->prefix = prefix;
        out->family = AF_INET6;
    } else {
        uint8_t buf[4];
        if (inet_pton(AF_INET, ip_str, buf) != 1) return -1;
        memcpy(out->ip, buf, 4);
        out->prefix = prefix;
        out->family = AF_INET;
    }
    return 0;
}

typedef struct {
    char tag[MAX_TAG_LEN];
    int  ipv4;
    int  ipv6;
} geoip_tag_count_t;

typedef struct {
    char set_name[64];
    int  count;
    int  warned;
} ipset_usage_t;

/* Open-addressed FNV-1a name → array index for batches[] and usage[].
 * Replaces O(n) linear scan; load factor stays well under 50% since
 * MAX_POLICY_ORDER * 2 = 128 and SLOT_COUNT = 256. */
#define NAME_INDEX_SLOTS 256

typedef struct {
    int      slot_idx[NAME_INDEX_SLOTS];   /* -1 = empty */
    uint32_t slot_hash[NAME_INDEX_SLOTS];
} name_index_t;

static void name_index_init(name_index_t *ni) {
    for (int i = 0; i < NAME_INDEX_SLOTS; i++) ni->slot_idx[i] = -1;
}

static int usage_index_lookup(const name_index_t *ni, const ipset_usage_t *usage,
                               uint32_t hash, const char *name) {
    uint32_t mask = NAME_INDEX_SLOTS - 1;
    for (uint32_t probe = 0; probe < NAME_INDEX_SLOTS; probe++) {
        uint32_t slot = (hash + probe) & mask;
        if (ni->slot_idx[slot] < 0) return -1;
        if (ni->slot_hash[slot] == hash &&
            strcmp(usage[ni->slot_idx[slot]].set_name, name) == 0)
            return ni->slot_idx[slot];
    }
    return -1;
}

static void name_index_insert(name_index_t *ni, uint32_t hash, int idx) {
    uint32_t mask = NAME_INDEX_SLOTS - 1;
    for (uint32_t probe = 0; probe < NAME_INDEX_SLOTS; probe++) {
        uint32_t slot = (hash + probe) & mask;
        if (ni->slot_idx[slot] < 0) {
            ni->slot_idx[slot] = idx;
            ni->slot_hash[slot] = hash;
            return;
        }
    }
}

static int usage_find_or_add(ipset_usage_t *usage, int *n, name_index_t *idx,
                              const char *name) {
    uint32_t hash = fnv1a_hash(name, strlen(name));
    int existing = usage_index_lookup(idx, usage, hash, name);
    if (existing >= 0) return existing;
    if (*n >= MAX_POLICY_ORDER * 2) return -1;
    strncpy(usage[*n].set_name, name, 63);
    usage[*n].set_name[63] = 0;
    usage[*n].count = 0;
    usage[*n].warned = 0;
    int new_idx = (*n)++;
    name_index_insert(idx, hash, new_idx);
    return new_idx;
}

#define CIDR_MIGRATE_MAX_LINES  16384
#define CIDR_MIGRATE_MAX_BLOCKS 512

typedef struct {
    char *text;
    int   keep;
    int   block_id;
} cidr_line_t;

static void free_cidr_lines(cidr_line_t *lines, int n) {
    for (int i = 0; i < n; i++) free(lines[i].text);
    free(lines);
}

static int cidrfile_migrate_oversized(
    const char *path,
    const geoip_tag_count_t *oversized, int oversized_count)
{
    FILE *f = fopen(path, "r");
    if (!f) {
        LOG_ERROR("Cannot open CIDRfile for migration: %s: %s", path, strerror(errno));
        return -1;
    }

    cidr_line_t *lines = malloc(CIDR_MIGRATE_MAX_LINES * sizeof(cidr_line_t));
    if (!lines) { fclose(f); return -1; }

    int line_count = 0;
    char *buf = NULL;
    size_t cap = 0;
    while (getline(&buf, &cap, f) != -1 && line_count < CIDR_MIGRATE_MAX_LINES) {
        buf[strcspn(buf, "\n\r")] = 0;
        lines[line_count].text = strdup(buf);
        if (!lines[line_count].text) { free(buf); free_cidr_lines(lines, line_count); fclose(f); return -1; }
        lines[line_count].keep = 1;
        lines[line_count].block_id = -1;
        line_count++;
    }
    free(buf);
    fclose(f);

    int block_active[CIDR_MIGRATE_MAX_BLOCKS];
    int block_header[CIDR_MIGRATE_MAX_BLOCKS];
    memset(block_active, 0, sizeof(block_active));
    memset(block_header, -1, sizeof(block_header));

    int current_block = -1;
    int next_block = 0;
    int in_active = 0;

    for (int i = 0; i < line_count; i++) {
        char *t = lines[i].text;
        while (*t == ' ' || *t == '\t') t++;

        if (t[0] == '\0' || strncmp(t, "##", 2) == 0) {
            current_block = -1;
            in_active = 0;
            continue;
        }
        if (strncmp(t, "#/", 2) == 0) {
            current_block = -1;
            in_active = 0;
            continue;
        }
        if (t[0] == '/') {
            if (next_block >= CIDR_MIGRATE_MAX_BLOCKS) { free_cidr_lines(lines, line_count); return -1; }
            current_block = next_block++;
            in_active = 1;
            block_header[current_block] = i;
            lines[i].block_id = current_block;
            continue;
        }
        if (!in_active || current_block < 0) continue;

        lines[i].block_id = current_block;

        if (strncmp(t, "geoip:", 6) == 0) {
            char *country = t + 6;
            while (*country == ' ') country++;
            int clen = strlen(country);
            while (clen > 0 && country[clen - 1] == ' ') clen--;

            char tag_upper[MAX_TAG_LEN] = {0};
            int tcopy = clen < MAX_TAG_LEN - 1 ? clen : MAX_TAG_LEN - 1;
            memcpy(tag_upper, country, tcopy);
            for (char *p = tag_upper; *p; p++)
                if (*p >= 'a' && *p <= 'z') *p -= 32;

            int is_over = 0;
            for (int o = 0; o < oversized_count; o++) {
                if (strcmp(oversized[o].tag, tag_upper) == 0) { is_over = 1; break; }
            }
            if (is_over)
                lines[i].keep = 0;
            else
                block_active[current_block]++;
        } else {
            block_active[current_block]++;
        }
    }

    for (int bid = 0; bid < next_block; bid++) {
        if (block_active[bid] > 0 || block_header[bid] < 0) continue;
        lines[block_header[bid]].keep = 0;
        for (int i = block_header[bid] - 1; i >= 0; i--) {
            char *t = lines[i].text;
            while (*t == ' ' || *t == '\t') t++;
            if (t[0] == '\0' || strncmp(t, "##", 2) == 0) {
                lines[i].keep = 0;
            } else {
                break;
            }
        }
    }

    char tmp_path[MAX_PATH_LEN + 16];
    snprintf(tmp_path, sizeof(tmp_path), "%s.tmp", path);

    FILE *out = fopen(tmp_path, "w");
    if (!out) {
        LOG_ERROR("Cannot create temp CIDRfile %s: %s", tmp_path, strerror(errno));
        free_cidr_lines(lines, line_count);
        return -1;
    }

    for (int i = 0; i < line_count; i++) {
        if (lines[i].keep)
            fprintf(out, "%s\n", lines[i].text);
    }
    free_cidr_lines(lines, line_count);

    fprintf(out, "\n##impossible to use\n");
    fprintf(out, "#/Too-big-geoip-tag\n");

    char written[256][MAX_TAG_LEN];
    int written_count = 0;
    for (int o = 0; o < oversized_count; o++) {
        int already = 0;
        for (int w = 0; w < written_count; w++) {
            if (strcmp(written[w], oversized[o].tag) == 0) { already = 1; break; }
        }
        if (already || written_count >= 256) continue;
        strncpy(written[written_count++], oversized[o].tag, MAX_TAG_LEN - 1);
        char lower[MAX_TAG_LEN] = {0};
        strncpy(lower, oversized[o].tag, MAX_TAG_LEN - 1);
        for (char *p = lower; *p; p++)
            if (*p >= 'A' && *p <= 'Z') *p += 32;
        fprintf(out, "geoip:%s\n", lower);
    }
    fclose(out);

    if (rename(tmp_path, path) != 0) {
        LOG_ERROR("Failed to rename %s to %s: %s", tmp_path, path, strerror(errno));
        unlink(tmp_path);
        return -1;
    }
    return 0;
}

typedef struct {
    char name[64];
    int  has_v4;
    int  has_v6;
} cidr_block_t;

typedef void (*cidr_entry_fn)(const cidr_block_t *blk, const char *entry, void *ctx);

/* Generic block-aware scanner for CIDRfile. Walks lines, maintains the
 * /Name, ##, #/Name, empty-line state machine, and invokes on_entry for every
 * content line inside an active block (one whose name resolves to at least
 * one existing ipset). When verbose=1, emits LOG_DEBUG block-boundary lines. */
static int scan_cidrfile_blocks(const char *path, ipset_manager_t *mgr,
                                 cidr_entry_fn on_entry, void *ctx, int verbose) {
    FILE *f = fopen(path, "r");
    if (!f) return -1;

    char *line = NULL;
    size_t cap = 0;
    cidr_block_t blk;
    memset(&blk, 0, sizeof(blk));
    int is_active = 0;
    int in_block = 0;

    while (getline(&line, &cap, f) != -1) {
        line[strcspn(line, "\n\r")] = 0;
        char *t = line;
        while (*t == ' ' || *t == '\t') t++;

        if (t[0] == '\0' || strncmp(t, "##", 2) == 0) {
            if (verbose && in_block && blk.name[0])
                LOG_DEBUG("End of CIDR block: %s", blk.name);
            blk.name[0] = 0;
            blk.has_v4 = blk.has_v6 = 0;
            is_active = 0;
            in_block = 0;
            continue;
        }
        if (strncmp(t, "#/", 2) == 0) {
            if (verbose && in_block && blk.name[0])
                LOG_DEBUG("End of CIDR block: %s", blk.name);
            if (verbose)
                LOG_DEBUG("Disabled CIDR block: %s (skipping)", t + 2);
            blk.name[0] = 0;
            blk.has_v4 = blk.has_v6 = 0;
            is_active = 0;
            in_block = 1;
            continue;
        }
        if (t[0] == '/') {
            if (verbose && in_block && blk.name[0])
                LOG_DEBUG("End of CIDR block: %s", blk.name);
            char *name = t + 1;
            while (*name == ' ') name++;
            strncpy(blk.name, name, sizeof(blk.name) - 1);
            blk.name[sizeof(blk.name) - 1] = 0;
            int nlen = strlen(blk.name);
            while (nlen > 0 && (blk.name[nlen - 1] == ' ' || blk.name[nlen - 1] == '\t'))
                blk.name[--nlen] = 0;
            char v6n[64];
            snprintf(v6n, sizeof(v6n), "%.60sv6", blk.name);
            blk.has_v4 = ipset_set_exists(mgr, blk.name);
            blk.has_v6 = ipset_set_exists(mgr, v6n);
            is_active = blk.has_v4 || blk.has_v6;
            if (verbose) {
                if (is_active)
                    LOG_DEBUG("CIDR block start: %s (IPv4 %s, IPv6 %s)", blk.name,
                              blk.has_v4 ? "Y" : "N", blk.has_v6 ? "Y" : "N");
                else
                    LOG_DEBUG("CIDR block start: %s (skipped - no ipsets exist)", blk.name);
            }
            in_block = 1;
            continue;
        }

        if (!is_active || !blk.name[0]) continue;
        if (on_entry) on_entry(&blk, t, ctx);
    }
    free(line);
    fclose(f);
    return 0;
}

typedef struct {
    geoip_tag_count_t *tag_cache;
    int               *tag_cache_count;
    int                tag_cache_max;
    const char (*geoip_files)[512];
    int                geoip_count;
} phase1_ctx_t;

static void phase1_on_entry(const cidr_block_t *blk, const char *entry, void *ctx) {
    (void)blk;
    if (strncmp(entry, "geoip:", 6) != 0) return;
    phase1_ctx_t *cx = (phase1_ctx_t *)ctx;

    const char *country = entry + 6;
    while (*country == ' ') country++;
    int clen = strlen(country);
    while (clen > 0 && country[clen - 1] == ' ') clen--;

    char tag_upper[MAX_TAG_LEN] = {0};
    int tcopy = clen < MAX_TAG_LEN - 1 ? clen : MAX_TAG_LEN - 1;
    memcpy(tag_upper, country, tcopy);
    for (char *p = tag_upper; *p; p++)
        if (*p >= 'a' && *p <= 'z') *p -= 32;

    for (int k = 0; k < *cx->tag_cache_count; k++) {
        if (strcmp(cx->tag_cache[k].tag, tag_upper) == 0) return;
    }
    if (*cx->tag_cache_count >= cx->tag_cache_max) return;

    geoip_tag_count_t *tc = &cx->tag_cache[(*cx->tag_cache_count)++];
    strncpy(tc->tag, tag_upper, MAX_TAG_LEN - 1);
    tc->tag[MAX_TAG_LEN - 1] = 0;
    tc->ipv4 = 0;
    tc->ipv6 = 0;
    count_geoip_cidrs_all_files(cx->geoip_files, cx->geoip_count, tag_upper,
                                &tc->ipv4, &tc->ipv6);
}

typedef struct {
    char         set_name[64];
    parsed_cidr_t *entries;
    int           count;
    int           capacity;
} batch_t;

typedef struct {
    ipset_manager_t *mgr;
    const char (*geoip_files)[512];
    int              geoip_count;
    uint32_t         effective_limit;

    const geoip_tag_count_t *oversized;
    int                      oversized_count;
    const geoip_tag_count_t *tag_cache;
    int                      tag_cache_count;

    batch_t      *batches;
    int          *batch_count;
    int           batch_max;
    name_index_t *batch_index;

    ipset_usage_t *usage;
    int           *usage_count;
    name_index_t  *usage_index;
} phase2_ctx_t;

static int batches_index_lookup(const name_index_t *ni, const batch_t *batches,
                                 uint32_t hash, const char *name) {
    uint32_t mask = NAME_INDEX_SLOTS - 1;
    for (uint32_t probe = 0; probe < NAME_INDEX_SLOTS; probe++) {
        uint32_t slot = (hash + probe) & mask;
        if (ni->slot_idx[slot] < 0) return -1;
        if (ni->slot_hash[slot] == hash &&
            strcmp(batches[ni->slot_idx[slot]].set_name, name) == 0)
            return ni->slot_idx[slot];
    }
    return -1;
}

static int batch_find_or_add(batch_t *batches, int *count, int max,
                              name_index_t *idx, const char *set_name, int initial_cap) {
    uint32_t hash = fnv1a_hash(set_name, strlen(set_name));
    int existing = batches_index_lookup(idx, batches, hash, set_name);
    if (existing >= 0) return existing;
    if (*count >= max) return -1;
    int bi = (*count)++;
    strncpy(batches[bi].set_name, set_name, 63);
    batches[bi].set_name[63] = 0;
    batches[bi].capacity = initial_cap;
    batches[bi].entries = malloc(initial_cap * sizeof(parsed_cidr_t));
    batches[bi].count = 0;
    if (!batches[bi].entries) {
        (*count)--;
        return -1;
    }
    name_index_insert(idx, hash, bi);
    return bi;
}

static int batch_push(batch_t *b, const parsed_cidr_t *cidr) {
    if (b->count >= b->capacity) {
        int new_cap = b->capacity * 2;
        parsed_cidr_t *tmp = realloc(b->entries, new_cap * sizeof(parsed_cidr_t));
        if (!tmp) return -1;
        b->entries = tmp;
        b->capacity = new_cap;
    }
    b->entries[b->count++] = *cidr;
    return 0;
}

static void phase2_on_entry(const cidr_block_t *blk, const char *entry, void *ctx) {
    phase2_ctx_t *cx = (phase2_ctx_t *)ctx;
    const char *cur = blk->name;

    if (strncmp(entry, "geoip:", 6) == 0) {
        char country[MAX_TAG_LEN] = {0};
        const char *src = entry + 6;
        while (*src == ' ') src++;
        int clen = strlen(src);
        while (clen > 0 && src[clen - 1] == ' ') clen--;
        int ccopy = clen < MAX_TAG_LEN - 1 ? clen : MAX_TAG_LEN - 1;
        memcpy(country, src, ccopy);
        country[ccopy] = 0;

        if (cx->geoip_count == 0 || !cx->geoip_files) {
            LOG_WARN("GeoIP directive 'geoip:%s' found but GeoIPFile not configured", country);
            return;
        }

        char tag_upper[MAX_TAG_LEN] = {0};
        strncpy(tag_upper, country, MAX_TAG_LEN - 1);
        for (char *p = tag_upper; *p; p++)
            if (*p >= 'a' && *p <= 'z') *p -= 32;

        for (int o = 0; o < cx->oversized_count; o++) {
            if (strcmp(cx->oversized[o].tag, tag_upper) == 0) return;
        }

        int cached_ipv4 = -1, cached_ipv6 = -1;
        for (int k = 0; k < cx->tag_cache_count; k++) {
            if (strcmp(cx->tag_cache[k].tag, tag_upper) == 0) {
                cached_ipv4 = cx->tag_cache[k].ipv4;
                cached_ipv6 = cx->tag_cache[k].ipv6;
                break;
            }
        }

        char v4_target[64], v6_target[64];
        strncpy(v4_target, cur, 63);
        v4_target[63] = 0;
        snprintf(v6_target, sizeof(v6_target), "%.60sv6", cur);

        int allow_v4 = 1, allow_v6 = 1;

        if (cached_ipv4 > 0) {
            int ui = usage_find_or_add(cx->usage, cx->usage_count, cx->usage_index, v4_target);
            if (ui >= 0 && (uint32_t)(cx->usage[ui].count + cached_ipv4) > cx->effective_limit) {
                LOG_WARN("geoip:%s skipped for %s: %d + %d would exceed limit %u",
                         country, v4_target, cx->usage[ui].count, cached_ipv4, cx->effective_limit);
                allow_v4 = 0;
            }
        }
        if (cached_ipv6 > 0) {
            int ui = usage_find_or_add(cx->usage, cx->usage_count, cx->usage_index, v6_target);
            if (ui >= 0 && (uint32_t)(cx->usage[ui].count + cached_ipv6) > cx->effective_limit) {
                LOG_WARN("geoip:%s skipped for %s: %d + %d would exceed limit %u",
                         country, v6_target, cx->usage[ui].count, cached_ipv6, cx->effective_limit);
                allow_v6 = 0;
            }
        }
        if (!allow_v4 && !allow_v6) return;

        for (int gi = 0; gi < cx->geoip_count; gi++) {
            geoip_entry_t *entries = NULL;
            int entry_count = 0;

            if (extract_geoip_cidrs(cx->geoip_files[gi], country, &entries, &entry_count) != 0)
                continue;

            for (int e = 0; e < entry_count; e++) {
                char target_set[64];
                parsed_cidr_t cidr;
                memset(&cidr, 0, sizeof(cidr));

                if (entries[e].ip_len == 4) {
                    if (!allow_v4 || !ipset_set_exists(cx->mgr, cur)) continue;
                    strncpy(target_set, cur, 63);
                    target_set[63] = 0;
                    memcpy(cidr.ip, entries[e].ip, 4);
                    cidr.prefix = entries[e].prefix;
                    cidr.family = AF_INET;
                } else {
                    if (!allow_v6 || !ipset_set_exists(cx->mgr, v6_target)) continue;
                    strncpy(target_set, v6_target, 63);
                    target_set[63] = 0;
                    memcpy(cidr.ip, entries[e].ip, 16);
                    cidr.prefix = entries[e].prefix;
                    cidr.family = AF_INET6;
                }

                int bi = batch_find_or_add(cx->batches, cx->batch_count, cx->batch_max,
                                            cx->batch_index, target_set, 4096);
                if (bi >= 0) batch_push(&cx->batches[bi], &cidr);
            }
            free(entries);
        }

        if (allow_v4 && cached_ipv4 > 0) {
            int ui = usage_find_or_add(cx->usage, cx->usage_count, cx->usage_index, v4_target);
            if (ui >= 0) cx->usage[ui].count += cached_ipv4;
        }
        if (allow_v6 && cached_ipv6 > 0) {
            int ui = usage_find_or_add(cx->usage, cx->usage_count, cx->usage_index, v6_target);
            if (ui >= 0) cx->usage[ui].count += cached_ipv6;
        }
        return;
    }

    parsed_cidr_t cidr;
    if (parse_cidr_str(entry, &cidr) != 0) {
        LOG_WARN("Invalid CIDR: %s", entry);
        return;
    }

    char target_set[64];
    if (cidr.family == AF_INET) {
        if (!ipset_set_exists(cx->mgr, cur)) return;
        strncpy(target_set, cur, 63);
        target_set[63] = 0;
    } else {
        snprintf(target_set, sizeof(target_set), "%.60sv6", cur);
        if (!ipset_set_exists(cx->mgr, target_set)) return;
    }

    int ui = usage_find_or_add(cx->usage, cx->usage_count, cx->usage_index, target_set);
    if (ui >= 0 && (uint32_t)(cx->usage[ui].count + 1) > cx->effective_limit) {
        if (!cx->usage[ui].warned) {
            LOG_WARN("Static CIDR entries for %s reached limit %u, further entries skipped",
                     target_set, cx->effective_limit);
            cx->usage[ui].warned = 1;
        }
        return;
    }

    int bi = batch_find_or_add(cx->batches, cx->batch_count, cx->batch_max,
                                cx->batch_index, target_set, 256);
    if (bi >= 0 && batch_push(&cx->batches[bi], &cidr) == 0) {
        if (ui >= 0) cx->usage[ui].count++;
    }
}

int add_cidr_to_ipsets(ipset_manager_t *mgr, const char *cidr_path,
                       const ipset_pair_t *pairs, int pair_count,
                       int enable_timeout, int timeout,
                       const char (*geoip_files)[512], int geoip_count,
                       uint32_t maxelem)
{
    uint32_t effective_limit = (maxelem > 0) ? maxelem : IPSET_DEFAULT_MAXELEM;

    geoip_tag_count_t tag_cache[256];
    int tag_cache_count = 0;

    geoip_tag_count_t oversized[256];
    int oversized_count = 0;

    if (geoip_count > 0 && geoip_files) {
        phase1_ctx_t p1 = {
            .tag_cache = tag_cache,
            .tag_cache_count = &tag_cache_count,
            .tag_cache_max = 256,
            .geoip_files = geoip_files,
            .geoip_count = geoip_count,
        };
        if (scan_cidrfile_blocks(cidr_path, mgr, phase1_on_entry, &p1, 0) == 0) {
            for (int k = 0; k < tag_cache_count; k++) {
                if ((uint32_t)tag_cache[k].ipv4 > effective_limit ||
                    (uint32_t)tag_cache[k].ipv6 > effective_limit) {
                    LOG_WARN("geoip:%s: %d IPv4 + %d IPv6 CIDR exceeds limit %u, "
                             "migrating to disabled block",
                             tag_cache[k].tag, tag_cache[k].ipv4,
                             tag_cache[k].ipv6, effective_limit);
                    if (oversized_count < 256)
                        oversized[oversized_count++] = tag_cache[k];
                }
            }

            if (oversized_count > 0) {
                if (cidrfile_migrate_oversized(cidr_path, oversized, oversized_count) == 0)
                    LOG_INFO("CIDRfile updated: %d oversized tag(s) moved to disabled block",
                             oversized_count);
            }
        }
    }

    ipset_refresh_set_list(mgr);

    if (enable_timeout && timeout > 0) {
        uint32_t to = (uint32_t)timeout;
        for (int i = 0; i < pair_count; i++) {
            if (ipset_set_exists(mgr, pairs[i].ipv4))
                ipset_cache_timeout_for_set(mgr, pairs[i].ipv4, 1, to);
            if (ipset_set_exists(mgr, pairs[i].ipv6))
                ipset_cache_timeout_for_set(mgr, pairs[i].ipv6, 1, to);
        }
    }

    batch_t batches[MAX_POLICY_ORDER * 2];
    int batch_count = 0;
    name_index_t batch_index;
    name_index_init(&batch_index);

    ipset_usage_t usage[MAX_POLICY_ORDER * 2];
    int usage_count = 0;
    name_index_t usage_index;
    name_index_init(&usage_index);

    phase2_ctx_t p2 = {
        .mgr = mgr,
        .geoip_files = geoip_files,
        .geoip_count = geoip_count,
        .effective_limit = effective_limit,
        .oversized = oversized,
        .oversized_count = oversized_count,
        .tag_cache = tag_cache,
        .tag_cache_count = tag_cache_count,
        .batches = batches,
        .batch_count = &batch_count,
        .batch_max = MAX_POLICY_ORDER * 2,
        .batch_index = &batch_index,
        .usage = usage,
        .usage_count = &usage_count,
        .usage_index = &usage_index,
    };

    if (scan_cidrfile_blocks(cidr_path, mgr, phase2_on_entry, &p2, 1) != 0) {
        LOG_WARN("CIDR file not found: %s", cidr_path);
        return -1;
    }

    for (int i = 0; i < batch_count; i++) {
        if (batches[i].count == 0) {
            free(batches[i].entries);
            continue;
        }
        LOG_INFO("Adding %d entries to ipset %s", batches[i].count, batches[i].set_name);
        int new_count = 0;
        int new_indices[1];
        ipset_add_batch(mgr, batches[i].set_name,
                        batches[i].entries, batches[i].count,
                        0, &new_count, new_indices);
        free(batches[i].entries);
    }

    LOG_INFO("CIDR processing complete (processed %d ipsets)", batch_count);
    return 0;
}