Source code for kicad_sch_api.discovery.search_index

"""
SQLite-based search index for fast component discovery.

This module creates and maintains a lightweight SQLite database for fast
multi-field component searches, built from the existing SymbolDefinition cache.
"""

import logging
import sqlite3
import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from ..library.cache import SymbolDefinition, get_symbol_cache

logger = logging.getLogger(__name__)


[docs] class ComponentSearchIndex: """Fast SQLite-based search index for KiCAD components."""
[docs] def __init__(self, cache_dir: Optional[Path] = None): """Initialize the search index.""" self.cache_dir = cache_dir or Path.home() / ".cache" / "kicad-sch-api" self.cache_dir.mkdir(parents=True, exist_ok=True) self.db_path = self.cache_dir / "search_index.db" self._init_database()
def _init_database(self): """Initialize the SQLite database schema.""" with sqlite3.connect(str(self.db_path)) as conn: conn.execute( """ CREATE TABLE IF NOT EXISTS components ( lib_id TEXT PRIMARY KEY, name TEXT NOT NULL, library TEXT NOT NULL, description TEXT DEFAULT '', keywords TEXT DEFAULT '', reference_prefix TEXT DEFAULT 'U', pin_count INTEGER DEFAULT 0, category TEXT DEFAULT '', last_updated REAL DEFAULT 0 ) """ ) # Create search indexes for fast queries conn.execute( """ CREATE INDEX IF NOT EXISTS idx_name ON components(name COLLATE NOCASE) """ ) conn.execute( """ CREATE INDEX IF NOT EXISTS idx_description ON components(description COLLATE NOCASE) """ ) conn.execute( """ CREATE INDEX IF NOT EXISTS idx_library ON components(library) """ ) conn.execute( """ CREATE INDEX IF NOT EXISTS idx_category ON components(category) """ ) # Full-text search virtual table for advanced queries conn.execute( """ CREATE VIRTUAL TABLE IF NOT EXISTS components_fts USING fts5(lib_id, name, description, keywords, content=components) """ ) conn.commit() logger.debug("Initialized search index database")
[docs] def rebuild_index(self, progress_callback: Optional[callable] = None) -> int: """Rebuild the search index from the symbol cache.""" start_time = time.time() symbol_cache = get_symbol_cache() # Get all cached symbols symbols = [] for lib_name in symbol_cache._library_index.keys(): try: lib_symbols = symbol_cache.get_library_symbols(lib_name) symbols.extend(lib_symbols) if progress_callback: progress_callback(f"Indexing {lib_name}: {len(lib_symbols)} symbols") except Exception as e: logger.warning(f"Failed to load library {lib_name}: {e}") # Clear and rebuild index with sqlite3.connect(str(self.db_path)) as conn: conn.execute("DELETE FROM components") conn.execute("DELETE FROM components_fts") # Insert symbols in batches for better performance batch_size = 100 for i in range(0, len(symbols), batch_size): batch = symbols[i : i + batch_size] # Prepare batch data batch_data = [] for symbol in batch: batch_data.append( ( symbol.lib_id, symbol.name, symbol.library, symbol.description, symbol.keywords, symbol.reference_prefix, len(symbol.pins), self._categorize_component(symbol), time.time(), ) ) # Insert batch conn.executemany( """ INSERT OR REPLACE INTO components (lib_id, name, library, description, keywords, reference_prefix, pin_count, category, last_updated) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, batch_data, ) # Update FTS table conn.executemany( """ INSERT OR REPLACE INTO components_fts (lib_id, name, description, keywords) VALUES (?, ?, ?, ?) """, [(data[0], data[1], data[3], data[4]) for data in batch_data], ) if progress_callback: progress_callback( f"Indexed {min(i + batch_size, len(symbols))}/{len(symbols)} components" ) conn.commit() elapsed = time.time() - start_time logger.info(f"Rebuilt search index with {len(symbols)} components in {elapsed:.2f}s") return len(symbols)
[docs] def search( self, query: str, library: Optional[str] = None, category: Optional[str] = None, limit: int = 20, ) -> List[Dict[str, Any]]: """Search components using multiple strategies.""" results = [] # Try different search strategies strategies = [ self._search_exact_match, self._search_prefix_match, self._search_contains, self._search_fts, ] for strategy in strategies: try: strategy_results = strategy(query, library, category, limit - len(results)) # Avoid duplicates existing_ids = {r["lib_id"] for r in results} new_results = [r for r in strategy_results if r["lib_id"] not in existing_ids] results.extend(new_results) if len(results) >= limit: break except Exception as e: logger.debug(f"Search strategy failed: {e}") return results[:limit]
def _search_exact_match( self, query: str, library: Optional[str], category: Optional[str], limit: int ) -> List[Dict[str, Any]]: """Search for exact name matches.""" conditions = ["name = ? COLLATE NOCASE"] params = [query] if library: conditions.append("library = ?") params.append(library) if category: conditions.append("category = ?") params.append(category) sql = f""" SELECT lib_id, name, library, description, keywords, reference_prefix, pin_count, category, 1.0 as match_score FROM components WHERE {' AND '.join(conditions)} ORDER BY name LIMIT ? """ params.append(limit) with sqlite3.connect(str(self.db_path)) as conn: conn.row_factory = sqlite3.Row return [dict(row) for row in conn.execute(sql, params)] def _search_prefix_match( self, query: str, library: Optional[str], category: Optional[str], limit: int ) -> List[Dict[str, Any]]: """Search for components starting with query.""" conditions = ["name LIKE ? COLLATE NOCASE"] params = [f"{query}%"] if library: conditions.append("library = ?") params.append(library) if category: conditions.append("category = ?") params.append(category) sql = f""" SELECT lib_id, name, library, description, keywords, reference_prefix, pin_count, category, 0.8 as match_score FROM components WHERE {' AND '.join(conditions)} ORDER BY name LIMIT ? """ params.append(limit) with sqlite3.connect(str(self.db_path)) as conn: conn.row_factory = sqlite3.Row return [dict(row) for row in conn.execute(sql, params)] def _search_contains( self, query: str, library: Optional[str], category: Optional[str], limit: int ) -> List[Dict[str, Any]]: """Search for components containing query in name or description.""" conditions = ["(name LIKE ? COLLATE NOCASE OR description LIKE ? COLLATE NOCASE)"] params = [f"%{query}%", f"%{query}%"] if library: conditions.append("library = ?") params.append(library) if category: conditions.append("category = ?") params.append(category) sql = f""" SELECT lib_id, name, library, description, keywords, reference_prefix, pin_count, category, 0.6 as match_score FROM components WHERE {' AND '.join(conditions)} ORDER BY CASE WHEN name LIKE ? COLLATE NOCASE THEN 1 ELSE 2 END, name LIMIT ? """ params.extend([f"%{query}%", limit]) with sqlite3.connect(str(self.db_path)) as conn: conn.row_factory = sqlite3.Row return [dict(row) for row in conn.execute(sql, params)] def _search_fts( self, query: str, library: Optional[str], category: Optional[str], limit: int ) -> List[Dict[str, Any]]: """Full-text search using FTS5.""" # Build FTS query fts_query = " ".join(f'"{term}"*' for term in query.split()) sql = """ SELECT c.lib_id, c.name, c.library, c.description, c.keywords, c.reference_prefix, c.pin_count, c.category, fts.rank as match_score FROM components_fts fts JOIN components c ON c.lib_id = fts.lib_id WHERE fts MATCH ? """ params = [fts_query] if library: sql += " AND c.library = ?" params.append(library) if category: sql += " AND c.category = ?" params.append(category) sql += " ORDER BY fts.rank LIMIT ?" params.append(limit) try: with sqlite3.connect(str(self.db_path)) as conn: conn.row_factory = sqlite3.Row return [dict(row) for row in conn.execute(sql, params)] except sqlite3.OperationalError: # FTS query failed, return empty results return []
[docs] def get_libraries(self) -> List[Dict[str, Any]]: """Get all available libraries with component counts.""" sql = """ SELECT library, COUNT(*) as component_count FROM components GROUP BY library ORDER BY library """ with sqlite3.connect(str(self.db_path)) as conn: conn.row_factory = sqlite3.Row return [dict(row) for row in conn.execute(sql)]
[docs] def get_categories(self) -> List[Dict[str, Any]]: """Get all component categories with counts.""" sql = """ SELECT category, COUNT(*) as component_count FROM components WHERE category != '' GROUP BY category ORDER BY component_count DESC """ with sqlite3.connect(str(self.db_path)) as conn: conn.row_factory = sqlite3.Row return [dict(row) for row in conn.execute(sql)]
[docs] def validate_component(self, lib_id: str) -> Optional[Dict[str, Any]]: """Check if a component exists in the index.""" sql = """ SELECT lib_id, name, library, description, keywords, reference_prefix, pin_count, category FROM components WHERE lib_id = ? """ with sqlite3.connect(str(self.db_path)) as conn: conn.row_factory = sqlite3.Row result = conn.execute(sql, [lib_id]).fetchone() return dict(result) if result else None
[docs] def get_stats(self) -> Dict[str, Any]: """Get search index statistics.""" with sqlite3.connect(str(self.db_path)) as conn: total_components = conn.execute("SELECT COUNT(*) FROM components").fetchone()[0] total_libraries = conn.execute( "SELECT COUNT(DISTINCT library) FROM components" ).fetchone()[0] # Get library breakdown library_stats = conn.execute( """ SELECT library, COUNT(*) as count FROM components GROUP BY library ORDER BY count DESC LIMIT 10 """ ).fetchall() return { "total_components": total_components, "total_libraries": total_libraries, "top_libraries": [{"library": lib, "count": count} for lib, count in library_stats], "database_path": str(self.db_path), "database_size_mb": round(self.db_path.stat().st_size / (1024 * 1024), 2), }
def _categorize_component(self, symbol: SymbolDefinition) -> str: """Categorize a component based on its properties.""" prefix = symbol.reference_prefix.upper() name_lower = symbol.name.lower() desc_lower = symbol.description.lower() # Category mapping based on reference prefix and description if prefix == "R": return "resistor" elif prefix == "C": return "capacitor" elif prefix == "L": return "inductor" elif prefix in ["D", "LED"]: return "diode" elif prefix == "Q": return "transistor" elif prefix == "U": if any(term in desc_lower for term in ["microcontroller", "mcu", "processor"]): return "microcontroller" elif any(term in desc_lower for term in ["amplifier", "op-amp", "opamp"]): return "amplifier" elif any(term in desc_lower for term in ["regulator", "ldo", "buck", "boost"]): return "regulator" else: return "integrated_circuit" elif prefix == "J": return "connector" elif prefix in ["SW", "S"]: return "switch" elif prefix == "Y": return "crystal" elif prefix == "TP": return "test_point" else: return "other"
# Global search index instance _global_search_index: Optional[ComponentSearchIndex] = None
[docs] def get_search_index() -> ComponentSearchIndex: """Get the global search index instance.""" global _global_search_index if _global_search_index is None: _global_search_index = ComponentSearchIndex() return _global_search_index
[docs] def ensure_index_built(rebuild: bool = False) -> int: """Ensure the search index is built and up-to-date.""" index = get_search_index() if rebuild or not index.db_path.exists(): logger.info("Building component search index...") return index.rebuild_index() else: # Check if index needs updating based on symbol cache stats = index.get_stats() logger.info(f"Search index ready: {stats['total_components']} components") return stats["total_components"]