|
#!/usr/bin/env python3 |
|
""" |
|
Script to count messages containing specific terms in Cursor chat history. |
|
Currently searches for "absolutely right" and "great question". |
|
Searches through the SQLite database where Cursor stores chat data. |
|
""" |
|
|
|
import sqlite3 |
|
import json |
|
import sys |
|
import os |
|
from pathlib import Path |
|
|
|
def search_blob_for_text(data, search_text, debug=False): |
|
"""Search for text in data, handling both strings and bytes.""" |
|
if not data: |
|
return False, "No data" |
|
|
|
search_text_lower = search_text.lower() |
|
|
|
# If it's already a string, search directly |
|
if isinstance(data, str): |
|
if debug: |
|
print(f"String data (first 200 chars): {data[:200]}") |
|
if search_text_lower in data.lower(): |
|
return True, "Found in string data" |
|
|
|
# Try to parse as JSON |
|
try: |
|
json_data = json.loads(data) |
|
json_str = json.dumps(json_data) |
|
if debug: |
|
print(f"JSON data (first 200 chars): {json_str[:200]}") |
|
if search_text_lower in json_str.lower(): |
|
return True, "Found in JSON data" |
|
except: |
|
pass |
|
|
|
return False, "Not found in string" |
|
|
|
# If it's bytes, try different decodings |
|
try: |
|
# Try as UTF-8 string |
|
text = data.decode('utf-8', errors='ignore') |
|
if debug: |
|
print(f"Decoded text (first 200 chars): {text[:200]}") |
|
if search_text_lower in text.lower(): |
|
return True, "Found in UTF-8 text" |
|
|
|
# Try as JSON |
|
try: |
|
json_data = json.loads(text) |
|
json_str = json.dumps(json_data) |
|
if search_text_lower in json_str.lower(): |
|
return True, "Found in JSON" |
|
except: |
|
pass |
|
|
|
except Exception as e: |
|
if debug: |
|
print(f"UTF-8 decode error: {e}") |
|
|
|
# Try other encodings |
|
for encoding in ['utf-16', 'latin-1']: |
|
try: |
|
text = data.decode(encoding, errors='ignore') |
|
if search_text_lower in text.lower(): |
|
return True, f"Found in {encoding} encoding" |
|
except: |
|
continue |
|
|
|
# Try direct byte search |
|
try: |
|
if search_text.lower().encode('utf-8') in data: |
|
return True, "Found in raw bytes" |
|
except: |
|
pass |
|
|
|
return False, "Not found" |
|
|
|
def count_messages_with_text(db_path, search_text, show_matches=False): |
|
"""Count messages containing the search text.""" |
|
if not os.path.exists(db_path): |
|
print(f"Database not found: {db_path}") |
|
return 0 |
|
|
|
try: |
|
conn = sqlite3.connect(db_path) |
|
cursor = conn.cursor() |
|
|
|
# Get all chat-related entries |
|
chat_patterns = [ |
|
'bubbleId:%', |
|
'messageRequestContext:%', |
|
'composerData:%', |
|
'checkpointId:%' |
|
] |
|
|
|
total_count = 0 |
|
total_entries = 0 |
|
|
|
print(f"Searching for '{search_text}' in Cursor chat history...") |
|
print("-" * 50) |
|
|
|
for pattern in chat_patterns: |
|
cursor.execute("SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", (pattern,)) |
|
rows = cursor.fetchall() |
|
|
|
pattern_count = 0 |
|
for i, (key, value) in enumerate(rows): |
|
total_entries += 1 |
|
found, reason = search_blob_for_text(value, search_text, debug=False) # No debug output |
|
if found: |
|
pattern_count += 1 |
|
if show_matches: |
|
print(f"Found in {key[:50]}{'...' if len(key) > 50 else ''} ({reason})") |
|
|
|
if pattern_count > 0: |
|
print(f"Pattern '{pattern}': {pattern_count} matches") |
|
total_count += pattern_count |
|
|
|
print("-" * 50) |
|
print(f"Total entries searched: {total_entries}") |
|
print(f"Total matches for '{search_text}': {total_count}") |
|
|
|
conn.close() |
|
return total_count |
|
|
|
except sqlite3.Error as e: |
|
print(f"Database error: {e}") |
|
return 0 |
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return 0 |
|
|
|
def main(): |
|
# Path to the Cursor state database |
|
db_path = Path.home() / "Library/Application Support/Cursor/User/globalStorage/state.vscdb" |
|
|
|
search_terms = ["absolutely right", "great question"] |
|
|
|
print("Cursor Chat History Search Tool") |
|
print("=" * 50) |
|
print(f"Database: {db_path}") |
|
print(f"Search terms: {search_terms}") |
|
print() |
|
|
|
if not db_path.exists(): |
|
print(f"Error: Database file not found at {db_path}") |
|
print("Make sure Cursor is installed and has been used.") |
|
sys.exit(1) |
|
|
|
results = {} |
|
|
|
for i, search_term in enumerate(search_terms): |
|
if i > 0: |
|
print("\n" + "=" * 70 + "\n") |
|
|
|
count = count_messages_with_text(str(db_path), search_term, show_matches=False) |
|
results[search_term] = count |
|
|
|
print("\n" + "=" * 70) |
|
print("FINAL RESULTS SUMMARY") |
|
print("=" * 70) |
|
|
|
for search_term, count in results.items(): |
|
if count > 0: |
|
print(f"✓ '{search_term}': {count} messages") |
|
else: |
|
print(f"✗ '{search_term}': No messages found") |
|
|
|
total_matches = sum(results.values()) |
|
print(f"\nTotal matches across all terms: {total_matches}") |
|
|
|
if __name__ == "__main__": |
|
main() |