newechoes/wasm/search/src/lib.rs

use wasm_bindgen::prelude::*;
use utils_common::compression::from_compressed_with_max_version;
use crate::models::{ArticleSearchIndex, SearchRequest, SearchResult, SearchResultItem, HeadingNode, HeadingIndexEntry, SuggestionCandidate, SearchSuggestion, SuggestionType};
use std::collections::{HashMap, HashSet};
use web_sys;
pub mod models;
pub mod builder;

/// WASM入口点 - 搜索文章
#[wasm_bindgen]
pub fn search_articles(index_data: &[u8], request_json: &str) -> Result<String, JsValue> {
    // 捕获Rust panic并转换为JS错误
    std::panic::set_hook(Box::new(console_error_panic_hook::hook));
    
    let start_time = web_sys::window()
        .and_then(|w| w.performance())
        .map(|p| p.now())
        .unwrap_or(0.0);
    
    // 解析搜索请求
    let req: SearchRequest = match serde_json::from_str(request_json) {
        Ok(r) => r,
        Err(e) => {
            return Err(JsValue::from_str(&format!("解析搜索请求失败: {}", e)));
        }
    };
    
    // 解压缩搜索索引
    let search_index = match from_compressed_with_max_version::<ArticleSearchIndex>(index_data, 9) {
        Ok(idx) => idx,
        Err(e) => {
            return Err(JsValue::from_str(&format!("解压搜索索引失败: {}", e)));
        }
    };
    
    // 执行搜索
    let mut result = match req.search_type.as_str() {
        "autocomplete" => perform_autocomplete(&search_index, &req),
        _ => perform_search(&search_index, &req),
    };
    
    // 计算执行时间
    let end_time = web_sys::window()
        .and_then(|w| w.performance())
        .map(|p| p.now())
        .unwrap_or(0.0);
    
    let time_ms = (end_time - start_time) as usize;
    result.time_ms = time_ms;
    
    // 序列化结果
    match serde_json::to_string(&result) {
        Ok(json) => Ok(json),
        Err(e) => Err(JsValue::from_str(&format!("序列化搜索结果失败: {}", e))),
    }
}

/// 分割查询为词条
fn split_query_to_terms(query: &str) -> Vec<String> {
    let mut terms = Vec::new();
    
    // 添加原始查询作为第一个词条
    let clean_query = query.trim().to_lowercase();
    if !clean_query.is_empty() {
        terms.push(clean_query.clone());
    }
    
    // 不再分割查询为多个词条，简化搜索逻辑
    // 如果需要，可以在这里添加分词逻辑
    
    terms
}

/// 获取搜索建议
fn get_search_suggestions(search_index: &ArticleSearchIndex, query: &str) -> Vec<SearchSuggestion> {
    let query = query.trim().to_lowercase();
    
    // 如果查询为空，返回热门词汇
    if query.is_empty() {
        let mut common_terms: Vec<(String, usize)> = search_index.common_terms
            .iter()
            .map(|(term, freq)| (term.clone(), *freq))
            .collect();
        
        common_terms.sort_by(|a, b| b.1.cmp(&a.1)); // 按频率降序排序
        
        return common_terms.iter().take(10).map(|(term, _)| {
            SearchSuggestion {
                text: term.clone(),
                suggestion_type: SuggestionType::Completion,
                matched_text: String::new(),
                suggestion_text: term.clone(),
            }
        }).collect();
    }
    
    // 保存所有候选建议
    let mut candidates: Vec<SuggestionCandidate> = Vec::new();
    
    // 第1步: 标题完全匹配
    for (_, article) in search_index.articles.iter().enumerate() {
        let title_lower = article.title.to_lowercase();
        
        if title_lower == query {
            // 找到完全匹配标题的文章，不返回完全相同的建议
            continue;
        } else if title_lower.starts_with(&query) {
            // 标题以查询开头，作为前缀补全
            candidates.push(SuggestionCandidate {
                text: article.title.clone(),
                score: 100,
                suggestion_type: SuggestionType::Completion,
                frequency: 100
            });
        } else if title_lower.contains(&query) {
            // 标题包含查询，作为纠正建议
            candidates.push(SuggestionCandidate {
                text: article.title.clone(),
                score: 90,
                suggestion_type: SuggestionType::Correction,
                frequency: 90
            });
        }
    }
    
    // 第2步: 独立词汇匹配
    for (term, freq) in &search_index.common_terms {
        let term_lower = term.to_lowercase();
        
        // 跳过与查询完全相同的词汇
        if term_lower == query {
            continue;
        }
        
        if term_lower.starts_with(&query) {
            // 前缀匹配，作为补全建议
            candidates.push(SuggestionCandidate {
                text: term.clone(),
                score: 95,
                suggestion_type: SuggestionType::Completion,
                frequency: *freq
            });
        } else if term_lower.contains(&query) {
            // 包含关系，作为纠正建议
            candidates.push(SuggestionCandidate {
                text: term.clone(),
                score: 85,
                suggestion_type: SuggestionType::Correction,
                frequency: *freq
            });
        }
    }
    
    // 第3步: 编辑距离匹配
    if candidates.len() < 5 {
        for (term, freq) in &search_index.common_terms {
            let term_lower = term.to_lowercase();
            
            // 跳过已添加的词汇和完全相同的词汇
            if term_lower == query || candidates.iter().any(|s| s.text.to_lowercase() == term_lower) {
                continue;
            }
            
            // 计算编辑距离
            let distance = levenshtein_distance(&query, &term_lower);
            
            // 只考虑编辑距离较小的词
            let max_allowed_distance = query.len().min(3);
            if distance <= max_allowed_distance as i32 {
                // 编辑距离分数: 基础分80,减去距离值
                let edit_score = 80 - distance * 5;
                
                candidates.push(SuggestionCandidate {
                    text: term.clone(),
                    score: edit_score,
                    suggestion_type: SuggestionType::Correction,
                    frequency: *freq
                });
            }
        }
    }
    
    // 首先按分数和频率排序
    candidates.sort_by(|a, b| {
        match b.score.cmp(&a.score) {
            std::cmp::Ordering::Equal => b.frequency.cmp(&a.frequency),
            other => other
        }
    });
    
    // 转换为SearchSuggestion格式并截取前10个结果
    candidates.iter()
        .take(10)
        .map(|candidate| {
            let text_lower = candidate.text.to_lowercase();
            
            let (matched_text, suggestion_text) = match candidate.suggestion_type {
                SuggestionType::Completion if text_lower.starts_with(&query) => {
                    // 前缀匹配：分离已匹配部分和建议部分，保留原始大小写
                    let original_case_matched = &candidate.text[..query.len()];
                    let original_case_suggestion = &candidate.text[query.len()..];
                    (original_case_matched.to_string(), original_case_suggestion.to_string())
                },
                _ => {
                    // 纠正建议：用户输入作为匹配部分，完整建议作为建议部分
                    (query.to_string(), candidate.text.clone())
                }
            };
            
            SearchSuggestion {
                text: candidate.text.clone(),
                suggestion_type: candidate.suggestion_type.clone(),
                matched_text,
                suggestion_text,
            }
        })
        .collect()
}

/// 计算两个字符串之间的Levenshtein编辑距离
fn levenshtein_distance(s1: &str, s2: &str) -> i32 {
    let s1_chars: Vec<char> = s1.chars().collect();
    let s2_chars: Vec<char> = s2.chars().collect();
    
    let m = s1_chars.len();
    let n = s2_chars.len();
    
    // 如果任何一个字符串为空，编辑距离就是另一个的长度
    if m == 0 { return n as i32; }
    if n == 0 { return m as i32; }
    
    // 为动态规划创建距离矩阵
    let mut matrix = vec![vec![0; n + 1]; m + 1];
    
    // 初始化第一行和第一列
    for i in 0..=m {
        matrix[i][0] = i as i32;
    }
    for j in 0..=n {
        matrix[0][j] = j as i32;
    }
    
    // 填充剩余的矩阵
    for i in 1..=m {
        for j in 1..=n {
            let cost = if s1_chars[i-1] == s2_chars[j-1] { 0 } else { 1 };
            
            matrix[i][j] = std::cmp::min(
                std::cmp::min(
                    matrix[i-1][j] + 1,     // 删除
                    matrix[i][j-1] + 1      // 插入
                ),
                matrix[i-1][j-1] + cost     // 替换
            );
        }
    }
    
    // 矩阵右下角的值就是编辑距离
    matrix[m][n]
}

/// 执行自动补全
fn perform_autocomplete(search_index: &ArticleSearchIndex, req: &SearchRequest) -> SearchResult {
    let query = req.query.to_lowercase();
    
    // 如果查询为空，返回空结果
    if query.is_empty() {
        return SearchResult {
            items: Vec::new(),
            total: 0,
            page: 1,
            page_size: 10,
            total_pages: 0,
            time_ms: 0,
            query: query.clone(),
            suggestions: Vec::new(),
        };
    }
    
    // 使用与普通搜索相同的建议生成逻辑
    let suggestions = get_search_suggestions(search_index, &query);
    
    SearchResult {
        items: Vec::new(), // 自动补全不需要返回结果项
        total: suggestions.len(),
        page: 1,
        page_size: suggestions.len(),
        total_pages: 1,
        time_ms: 0, // 由外部函数填充
        query: query.clone(),
        suggestions,
    }
}

/// 执行搜索
fn perform_search(search_index: &ArticleSearchIndex, req: &SearchRequest) -> SearchResult {
    let query = req.query.to_lowercase();
    
    // 如果查询为空，返回空结果
    if query.is_empty() {
        return SearchResult {
            items: Vec::new(),
            total: 0,
            page: req.page,
            page_size: req.page_size,
            total_pages: 0,
            time_ms: 0,
            query: query.clone(),
            suggestions: Vec::new(),
        };
    }
    
    // 分词 - 第一个词是完整查询
    let terms = split_query_to_terms(&query);
    if terms.is_empty() {
        return SearchResult {
            items: Vec::new(),
            total: 0,
            page: req.page,
            page_size: req.page_size,
            total_pages: 0,
            time_ms: 0,
            query: query.clone(),
            suggestions: Vec::new(),
        };
    }
    
    // 找到匹配的文章ID及其得分 - 已按匹配优先级排序
    let matched_articles = find_matched_articles(search_index, &terms);
    
    // 处理每个匹配的文章
    let mut all_items = Vec::new();
    
    for (article_id, base_score) in matched_articles {
        if article_id >= search_index.articles.len() {
            continue;
        }
        
        let article = &search_index.articles[article_id];
        
        // 构建标题树和匹配内容
        let heading_tree = build_heading_tree_with_matches(article, &terms, search_index);
        
        // 高亮处理文章标题
        let highlighted_title = if !terms.is_empty() {
            highlight_title(&article.title, &terms[0])
        } else {
            article.title.clone()
        };
        
        // 创建搜索结果项
        let result_item = SearchResultItem {
            id: article.id.clone(),
            title: highlighted_title,
            summary: article.summary.clone(),
            url: article.url.clone(),
            score: base_score,
            heading_tree,
            page_type: article.page_type.clone(),
        };
        
        all_items.push(result_item);
    }
    
    // 分页处理
    let total = all_items.len();
    let total_pages = (total + req.page_size - 1) / req.page_size;
    let start_idx = (req.page - 1) * req.page_size;
    let end_idx = std::cmp::min(start_idx + req.page_size, total);
    
    let paged_results = if start_idx < total {
        all_items[start_idx..end_idx].to_vec()
    } else {
        Vec::new()
    };
    
    // 生成搜索建议
    let suggestions = get_search_suggestions(search_index, &query);
    
    SearchResult {
        items: paged_results,
        total,
        page: req.page,
        page_size: req.page_size,
        total_pages,
        time_ms: 0, // 由外部函数填充
        query: query.clone(),
        suggestions,
    }
}

/// 高亮处理标题文本
fn highlight_title(title: &str, query: &str) -> String {
    if title.is_empty() || query.is_empty() {
        return title.to_string();
    }

    let title_lower = title.to_lowercase();
    let query_lower = query.to_lowercase();
    
    // 查找所有匹配位置
    let mut term_positions = Vec::new();
    let mut start_idx = 0;
    
    while start_idx < title_lower.len() {
        if let Some(found_idx) = title_lower[start_idx..].find(&query_lower) {
            let abs_idx = start_idx + found_idx;
            let match_end = abs_idx + query_lower.len();
            
            // 确保索引位于字符边界上
            let valid_abs_idx = find_char_boundary(title, abs_idx);
            let valid_match_end = find_char_boundary(title, match_end);
            
            // 添加匹配位置
            if valid_match_end > valid_abs_idx {
                term_positions.push((valid_abs_idx, valid_match_end));
            }
            
            start_idx = if valid_match_end > start_idx { valid_match_end } else { start_idx + 1 };
        } else {
            break;
        }
    }
    
    // 如果没有找到匹配，返回原始标题
    if term_positions.is_empty() {
        return title.to_string();
    }
    
    // 按位置排序
    term_positions.sort_by_key(|&(start, _)| start);
    
    // 构建高亮标题
    let mut highlighted = String::new();
    let mut last_pos = 0;
    
    for (start, end) in term_positions {
        // 添加匹配前的文本
        if start > last_pos {
            highlighted.push_str(&title[last_pos..start]);
        }
        
        // 添加高亮标记
        highlighted.push_str("<mark>");
        highlighted.push_str(&title[start..end]);
        highlighted.push_str("</mark>");
        
        last_pos = end;
    }
    
    // 添加最后一部分
    if last_pos < title.len() {
        highlighted.push_str(&title[last_pos..]);
    }
    
    highlighted
}

/// 查找匹配的文章ID并按优先级排序
fn find_matched_articles(search_index: &ArticleSearchIndex, terms: &[String]) -> Vec<(usize, f64)> {
    // 确保有搜索词
    if terms.is_empty() {
        return Vec::new();
    }
    
    let query = &terms[0].to_lowercase();
    let mut result_with_scores: Vec<(usize, f64)> = Vec::new();
    let mut seen_articles = HashSet::new();
    
    // 第1步: 查找以查询开头的标题 (如"wasm入门指南")
    for (article_id, article) in search_index.articles.iter().enumerate() {
        let title_lower = article.title.to_lowercase();
        
        if title_lower.starts_with(query) && title_lower != *query {
            result_with_scores.push((article_id, 115.0));
            seen_articles.insert(article_id);
        }
    }
    
    // 第2步: 查找包含查询的标题 (如"使用wasm")
    for (article_id, article) in search_index.articles.iter().enumerate() {
        if seen_articles.contains(&article_id) {
            continue;
        }
        
        let title_lower = article.title.to_lowercase();
        
        if title_lower.contains(query) {
            // 标题中包含查询词
            result_with_scores.push((article_id, 99.0));
            seen_articles.insert(article_id);
        }
    }
    
    // 第3步: 查找标题与查询完全匹配的文章 (如只有"wasm")
    for (article_id, article) in search_index.articles.iter().enumerate() {
        if seen_articles.contains(&article_id) {
            continue;
        }
        
        let title_lower = article.title.to_lowercase();
        
        if title_lower == *query {
            result_with_scores.push((article_id, 90.0));
            seen_articles.insert(article_id);
        }
    }
    
    // 第4步: 从索引中查找匹配
    if let Some(article_ids) = search_index.title_term_index.get(query) {
        for &article_id in article_ids {
            if seen_articles.contains(&article_id) {
                continue;
            }
            
            result_with_scores.push((article_id, 85.0));
            seen_articles.insert(article_id);
        }
    }
    
    // 第5步: 从标题关键词索引中查找
    if let Some(heading_ids) = search_index.heading_term_index.get(query) {
        for heading_id in heading_ids {
            if let Some(article_id) = extract_article_id_from_heading(heading_id) {
                if seen_articles.contains(&article_id) || article_id >= search_index.articles.len() {
                    continue;
                }
                
                result_with_scores.push((article_id, 80.0));
                seen_articles.insert(article_id);
            }
        }
    }
    
    // 第6步: 从内容索引中查找
    if let Some(article_ids) = search_index.content_term_index.get(query) {
        for &article_id in article_ids {
            if seen_articles.contains(&article_id) || article_id >= search_index.articles.len() {
                continue;
            }
            
            result_with_scores.push((article_id, 75.0));
            seen_articles.insert(article_id);
        }
    }
    
    // 第7步: 如果没有找到任何匹配，尝试更宽松的匹配
    if result_with_scores.is_empty() {
        // 对所有文章内容进行更宽松的搜索
        for (article_id, article) in search_index.articles.iter().enumerate() {
            let content_lower = article.content.to_lowercase();
            
            if content_lower.contains(query) {
                result_with_scores.push((article_id, 50.0));
            }
        }
    }
    
    // 按分数降序排序
    result_with_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
    
    result_with_scores
}

/// 从标题ID中提取文章ID
fn extract_article_id_from_heading(heading_id: &str) -> Option<usize> {
    // 标题ID的格式为 "article_id:heading_index"
    if let Some(colon_pos) = heading_id.find(':') {
        if let Some(article_id_str) = heading_id.get(0..colon_pos) {
            return article_id_str.parse::<usize>().ok();
        }
    }
    None
}

/// 在文章内容中查找匹配词，并提取段落上下文
fn find_matches_in_paragraph(article: &utils_common::models::ArticleMetadata, heading: &HeadingIndexEntry, terms: &[String]) -> Option<(String, Vec<String>)> {
    // 提取标题下的内容，确保位置在有效的字符边界上
    let mut content_start = heading.start_position + heading.text.len() + heading.level + 1; // +1 for the space
    let mut content_end = heading.end_position;
    
    // 确保起始位置是有效的字符边界
    if content_start < article.content.len() {
        content_start = find_char_boundary(&article.content, content_start);
    }
    
    // 确保结束位置是有效的字符边界
    if content_end > article.content.len() {
        content_end = article.content.len();
    }
    content_end = find_char_boundary(&article.content, content_end);
    
    // 确保有效的内容
    if content_start >= content_end || content_start >= article.content.len() {
        return None;
    }
    
    // 提取标题下的内容
    let content = &article.content[content_start..content_end];
    
    // 如果内容为空，则返回None
    if content.trim().is_empty() {
        return None;
    }
    
    // 在内容中查找匹配
    let content_lower = content.to_lowercase();
    let mut matched_terms = Vec::new();
    let mut term_positions = Vec::new();
    
    // 仅匹配完整查询
    if !terms.is_empty() {
        let complete_query = &terms[0].to_lowercase();
        
        // 查找完整查询在内容中的所有位置
        let mut start_idx = 0;
        while start_idx < content_lower.len() {
            if let Some(found_idx) = content_lower[start_idx..].find(complete_query) {
                let abs_idx = start_idx + found_idx;
                let match_end = abs_idx + complete_query.len();
                
                // 确保索引位于字符边界上
                let valid_abs_idx = find_char_boundary(content, abs_idx);
                let valid_match_end = find_char_boundary(content, match_end);
                
                // 确保匹配区域有效
                if valid_match_end > valid_abs_idx {
                    // 添加匹配位置
                    term_positions.push((valid_abs_idx, valid_match_end, 1));
                    matched_terms.push(terms[0].clone());
                }
                
                // 继续搜索
                start_idx = if valid_match_end > start_idx { valid_match_end } else { start_idx + 1 };
            } else {
                // 没有更多匹配
                break;
            }
        }
    }
    
    // 如果没有匹配，返回None
    if term_positions.is_empty() {
        return None;
    }
    
    // 对匹配位置排序（按位置）
    term_positions.sort_by_key(|&(start, _, _)| start);
    
    // 去除重复的匹配词
    matched_terms.sort();
    matched_terms.dedup();
    
    // 格式化匹配内容
    let highlighted_content = format_matched_content(content, &term_positions);
    
    Some((highlighted_content, matched_terms))
}

/// 格式化匹配内容，高亮显示匹配词
fn format_matched_content(content: &str, term_positions: &[(usize, usize, i32)]) -> String {
    // 如果没有匹配，返回原始内容
    if term_positions.is_empty() || content.is_empty() {
        return content.to_string();
    }
    
    let mut highlighted_content = String::new();
    
    // 如果段落太长，我们只提取匹配词周围的上下文
    if content.len() > 300 {
        // 查找第一个高优先级匹配（通常是完整查询）
        let primary_matches: Vec<&(usize, usize, i32)> = term_positions.iter()
            .filter(|(_, _, prio)| *prio == 1)
            .collect();
        
        // 如果有高优先级匹配，使用它；否则使用第一个匹配
        let (first_start, first_end, _) = if !primary_matches.is_empty() {
            **primary_matches.first().unwrap()
        } else {
            term_positions[0]
        };
        
        // 安全地计算上下文起始位置，确保位于字符边界上
        let mut ctx_start = if first_start > 150 { first_start - 150 } else { 0 };
        ctx_start = find_char_boundary(content, ctx_start); // 确保在字符边界上
        
        // 安全地计算上下文结束位置，确保位于字符边界上
        let mut ctx_end = std::cmp::min(first_end + 150, content.len());
        ctx_end = find_char_boundary(content, ctx_end); // 确保在字符边界上
        
        // 获取上下文
        let context = &content[ctx_start..ctx_end];
        
        // 在上下文中高亮匹配词
        let mut last_pos = 0;
        // 只处理在上下文范围内的匹配
        let visible_matches: Vec<(usize, usize)> = term_positions.iter()
            .filter(|&&(s, e, _)| s >= ctx_start && e <= ctx_end)
            .map(|&(s, e, _)| (s - ctx_start, e - ctx_start)) // 调整为相对位置
            .collect();
        
        for (rel_start, rel_end) in visible_matches {
            // 添加匹配前的文本
            if rel_start > last_pos && rel_start <= context.len() {
                // 确保所有边界都是有效的
                let safe_last_pos = find_char_boundary(context, last_pos);
                let safe_rel_start = find_char_boundary(context, rel_start);
                
                if safe_rel_start > safe_last_pos {
                    highlighted_content.push_str(&context[safe_last_pos..safe_rel_start]);
                }
            }
            
            // 添加带标记的匹配文本
            if rel_end <= context.len() {
                let safe_rel_start = find_char_boundary(context, rel_start);
                let safe_rel_end = find_char_boundary(context, rel_end);
                
                if safe_rel_end > safe_rel_start {
                    highlighted_content.push_str("<mark>");
                    highlighted_content.push_str(&context[safe_rel_start..safe_rel_end]);
                    highlighted_content.push_str("</mark>");
                }
                
                last_pos = safe_rel_end;
            }
        }
        
        // 添加最后一个匹配后的文本
        if last_pos < context.len() {
            let safe_last_pos = find_char_boundary(context, last_pos);
            highlighted_content.push_str(&context[safe_last_pos..]);
        }
        
        // 如果上下文前后有截断，添加省略号
        if ctx_start > 0 {
            highlighted_content = format!("...{}", highlighted_content);
        }
        if ctx_end < content.len() {
            highlighted_content = format!("{}...", highlighted_content);
        }
    } else {
        // 对于短段落，显示整个内容
        let mut last_pos = 0;
        for &(start, end, _) in term_positions {
            // 确保索引在有效范围内
            if start < content.len() {
                // 确保边界安全
                let safe_start = find_char_boundary(content, start);
                let safe_end = find_char_boundary(content, end.min(content.len()));
                let safe_last_pos = find_char_boundary(content, last_pos);
                
                // 添加匹配前的文本
                if safe_start > safe_last_pos {
                    highlighted_content.push_str(&content[safe_last_pos..safe_start]);
                }
                
                // 添加带标记的匹配文本
                if safe_end > safe_start {
                    highlighted_content.push_str("<mark>");
                    highlighted_content.push_str(&content[safe_start..safe_end]);
                    highlighted_content.push_str("</mark>");
                }
                
                last_pos = safe_end;
            }
        }
        
        // 添加最后一个匹配后的文本
        if last_pos < content.len() {
            let safe_last_pos = find_char_boundary(content, last_pos);
            highlighted_content.push_str(&content[safe_last_pos..]);
        }
    }
    
    // 如果由于某种原因结果为空，返回原始内容的一部分
    if highlighted_content.is_empty() && !content.is_empty() {
        // 安全返回内容的前300个字符
        let safe_end = find_char_boundary(content, content.len().min(300));
        return format!("{}...", &content[0..safe_end]);
    }
    
    highlighted_content
}

/// 辅助函数：确保索引位于有效的字符边界上
fn find_char_boundary(s: &str, index: usize) -> usize {
    // 确保边界值
    if s.is_empty() {
        return 0;
    }
    if index >= s.len() {
        return s.len();
    }
    
    // 如果索引已经在字符边界上，直接返回
    if s.is_char_boundary(index) {
        return index;
    }
    
    // 否则，找到最近的字符边界
    // 先向前查找
    let mut previous = index;
    while previous > 0 && !s.is_char_boundary(previous) {
        previous -= 1;
    }
    
    // 向后查找
    let mut next = index;
    while next < s.len() && !s.is_char_boundary(next) {
        next += 1;
    }
    
    // 返回最近的边界（前向或后向）
    if index - previous <= next - index {
        previous
    } else {
        next
    }
}

/// 构建带匹配内容的标题树
fn build_heading_tree_with_matches(
    article: &utils_common::models::ArticleMetadata, 
    terms: &[String],
    search_index: &ArticleSearchIndex
) -> Option<HeadingNode> {
    // 如果没有搜索词或内容为空，返回None
    if terms.is_empty() || article.content.is_empty() {
        return None;
    }

    // 获取与文章相关的所有标题
    let article_id_str = article.id.to_string();
    let heading_map: HashMap<String, &HeadingIndexEntry> = search_index.heading_index.iter()
        .filter(|(id, _)| id.starts_with(&format!("{}:", article_id_str)))
        .map(|(id, entry)| (id.clone(), entry))
        .collect();
    
    if heading_map.is_empty() {
        // 如果没有标题结构，创建一个根节点
        let root_heading = HeadingIndexEntry {
            id: format!("{}:root", article.id),
            level: 0,
            text: article.title.clone(),
            start_position: 0,
            end_position: article.content.len(),
            parent_id: None,
            children_ids: Vec::new(),
        };
        
        // 查找全文匹配
        if let Some((highlighted_content, matched_terms)) = find_matches_in_paragraph(article, &root_heading, terms) {
        return Some(HeadingNode {
                id: root_heading.id,
                text: root_heading.text,
                level: root_heading.level,
                content: Some(highlighted_content),
                matched_terms: Some(matched_terms),
            children: Vec::new(),
        });
    }
    
        return None;
    }
    
    // 查找根标题（没有父标题的标题）
    let mut root_headings: Vec<&&HeadingIndexEntry> = heading_map.values()
        .filter(|entry| entry.parent_id.is_none())
        .collect();
    
    // 如果没有根标题，返回None
    if root_headings.is_empty() {
        return None;
    }
    
    // 排序根标题，确保始终以相同的顺序处理
    root_headings.sort_by_key(|entry| entry.start_position);
    
    // 创建一个虚拟的根节点来包含所有顶级标题
            let root_heading = HeadingIndexEntry {
                id: format!("{}:root", article.id),
                level: 0,
                text: article.title.clone(),
                start_position: 0,
                end_position: article.content.len(),
                parent_id: None,
        children_ids: root_headings.iter().map(|entry| entry.id.clone()).collect(),
    };
    
    // 先查找每个段落中的匹配
    let mut heading_matches: HashMap<String, (String, Vec<String>)> = HashMap::new();
    
    // 处理所有标题下的匹配
    for (heading_id, heading) in &heading_map {
        if let Some((highlighted_content, matched_terms)) = find_matches_in_paragraph(article, heading, terms) {
            heading_matches.insert(heading_id.clone(), (highlighted_content, matched_terms));
        }
    }
    
    // 处理根节点下的直接内容（不属于任何标题的部分）
    let root_content = if let Some((highlighted_content, matched_terms)) = find_matches_in_paragraph(article, &root_heading, terms) {
        Some((highlighted_content, matched_terms))
        } else {
        None
    };
    
    // 创建根节点
    let mut root_node = HeadingNode {
        id: root_heading.id,
        text: root_heading.text,
        level: root_heading.level,
        content: root_content.as_ref().map(|(content, _)| content.clone()),
        matched_terms: root_content.as_ref().map(|(_, terms)| terms.clone()),
        children: Vec::new(),
    };
    
    // 递归构建子标题树
    for child_id in &root_heading.children_ids {
        if let Some(heading) = heading_map.get(child_id) {
            let mut child_node = HeadingNode {
                id: child_id.clone(),
                text: heading.text.clone(),
                level: heading.level,
        content: None,
        matched_terms: None,
                children: Vec::new(),
            };
            
            // 填充子节点的匹配内容和子节点
            if let Some((content, terms)) = heading_matches.get(child_id) {
                child_node.content = Some(content.clone());
                child_node.matched_terms = Some(terms.clone());
            }
            
            // 递归处理子标题
            if !heading.children_ids.is_empty() {
                for grandchild_id in &heading.children_ids {
                    if let Some(grandchild) = heading_map.get(grandchild_id) {
                        let mut grandchild_node = HeadingNode {
                            id: grandchild_id.clone(),
                            text: grandchild.text.clone(),
                            level: grandchild.level,
                            content: None,
                            matched_terms: None,
                children: Vec::new(),
            };
            
                        // 填充孙节点的匹配内容
                        if let Some((content, terms)) = heading_matches.get(grandchild_id) {
                            grandchild_node.content = Some(content.clone());
                            grandchild_node.matched_terms = Some(terms.clone());
                        }
                        
                        // 对于更深层次的节点，采用相同的处理方式
                        if !grandchild.children_ids.is_empty() {
                            process_deeper_nodes(&mut grandchild_node, grandchild, &heading_map, &heading_matches);
                        }
                        
                        child_node.children.push(grandchild_node);
                    }
                }
                
                // 按标题文本排序子节点，保持一致性
                child_node.children.sort_by(|a, b| a.text.cmp(&b.text));
            }
            
            root_node.children.push(child_node);
        }
    }
    
    // 按级别和文本排序子节点
    root_node.children.sort_by(|a, b| {
        match a.level.cmp(&b.level) {
            std::cmp::Ordering::Equal => a.text.cmp(&b.text),
            other => other
        }
    });
    
    Some(root_node)
}

/// 处理更深层次的标题节点
fn process_deeper_nodes(
    parent: &mut HeadingNode,
    heading: &HeadingIndexEntry,
    heading_map: &HashMap<String, &HeadingIndexEntry>,
    heading_matches: &HashMap<String, (String, Vec<String>)>
) {
    for child_id in &heading.children_ids {
        if let Some(child) = heading_map.get(child_id) {
            let mut child_node = HeadingNode {
                id: child_id.clone(),
                text: child.text.clone(),
                level: child.level,
                content: None,
                matched_terms: None,
                children: Vec::new(),
            };
            
            // 填充匹配内容
            if let Some((content, terms)) = heading_matches.get(child_id) {
                child_node.content = Some(content.clone());
                child_node.matched_terms = Some(terms.clone());
            }
            
            // 继续处理子节点
            if !child.children_ids.is_empty() {
                process_deeper_nodes(&mut child_node, child, heading_map, heading_matches);
            }
            
            parent.children.push(child_node);
        }
    }
    
    // 按级别和文本排序子节点
    parent.children.sort_by(|a, b| {
        match a.level.cmp(&b.level) {
            std::cmp::Ordering::Equal => a.text.cmp(&b.text),
            other => other
        }
    });
}