Spaces:

medwa126
/

vpr-interactive-demo

Sleeping

vpr-interactive-demo / plot_interactive_loader.py

n10212884

add loader

8b6367e 8 months ago

21 kB

	"""
	Data loading and management for VPR plotting system.

	This module handles loading experiment data from JSON files and provides
	a clean interface for accessing results, statistics, and visualization data.
	"""

	import json
	import os
	import numpy as np

	def print_info(message):
	"""Utility function to print informational messages"""
	print(f"[INFO] {message}")

	def print_key(message):
	"""Utility function to print key messages"""
	print(f"[KEY] {message}")


	class ExperimentDataLoader:
	"""Load and manage VPR experiment data from JSON files"""

	def __init__(self, output_dir):
	self.output_dir = output_dir
	self.results_summary = None
	self.visualization_data = None
	self.statistical_comparison = None
	self.advanced_analysis = None
	self.distance_matrices = {}
	self.similarity_scores = {}
	self.gps_data = None # Add GPS data storage

	# Load all available data
	self._load_results_summary()
	self._load_visualization_data()
	self._load_statistical_comparison()
	self._load_advanced_analysis()
	self._load_matrix_data()
	self._load_similarity_data()
	self._extract_gps_data() # Extract GPS data from loaded results

	def load_all_data(self):
	"""Load all available experiment data"""
	success = True

	# Load core results
	if not self._load_results_summary():
	success = False

	# Load optional data (don't fail if missing)
	self._load_visualization_data()
	self._load_statistical_comparison()
	self._load_advanced_analysis()
	self._load_matrix_data()
	self._load_similarity_data()

	return success

	def _load_results_summary(self):
	"""Load the main results summary"""
	filepath = os.path.join(self.output_dir, "results_summary.json")
	if not os.path.exists(filepath):
	print_key(f"Error: Results summary not found: {filepath}")
	return False

	try:
	with open(filepath, 'r') as f:
	self.results_summary = json.load(f)
	print_info("✓ Results summary loaded")
	return True
	except Exception as e:
	print_key(f"Error loading results summary: {e}")
	return False

	def _load_visualization_data(self):
	"""Load visualization data"""
	filepath = os.path.join(self.output_dir, "visualization_data.json")
	if os.path.exists(filepath):
	try:
	with open(filepath, 'r') as f:
	self.visualization_data = json.load(f)
	print_info("✓ Visualization data loaded")
	except Exception as e:
	print_key(f"Warning: Could not load visualization data: {e}")

	def _load_statistical_comparison(self):
	"""Load statistical comparison data"""
	filepath = os.path.join(self.output_dir, "statistical_comparison.json")
	if os.path.exists(filepath):
	try:
	with open(filepath, 'r') as f:
	self.statistical_comparison = json.load(f)
	print_info("✓ Statistical comparison data loaded")
	except Exception as e:
	print_key(f"Warning: Could not load statistical comparison: {e}")

	def _load_advanced_analysis(self):
	"""Load advanced analysis data"""
	filepath = os.path.join(self.output_dir, "advanced_analysis.json")
	if os.path.exists(filepath):
	try:
	with open(filepath, 'r') as f:
	self.advanced_analysis = json.load(f)
	print_info("✓ Advanced analysis data loaded")
	except Exception as e:
	print_key(f"Warning: Could not load advanced analysis: {e}")

	def _load_matrix_data(self):
	"""Load distance matrix data for each method"""
	for method_key in self.get_method_keys():
	filepath = os.path.join(self.output_dir, f"distance_matrix_{method_key}.json")
	if os.path.exists(filepath):
	try:
	with open(filepath, 'r') as f:
	data = json.load(f)
	self.distance_matrices[method_key] = np.array(data['distance_matrix']['data'])
	print_info(f"✓ Distance matrix loaded for {method_key}")
	except Exception as e:
	print_key(f"Warning: Could not load distance matrix for {method_key}: {e}")

	def _load_similarity_data(self):
	"""Load similarity score data for each method"""
	for method_key in self.get_method_keys():
	filepath = os.path.join(self.output_dir, f"similarity_scores_{method_key}.json")
	if os.path.exists(filepath):
	try:
	with open(filepath, 'r') as f:
	data = json.load(f)
	self.similarity_scores[method_key] = np.array(data['similarity_scores']['data'])
	print_info(f"✓ Similarity scores loaded for {method_key}")
	except Exception as e:
	print_key(f"Warning: Could not load similarity scores for {method_key}: {e}")

	def _extract_gps_data(self):
	"""Extract GPS coordinates from visualization data if available"""
	if not self.results_summary:
	return

	# Check if experiment uses meter distances (GPS-based)
	exp_info = self.results_summary.get('experiment_info', {})
	use_meter_distances = exp_info.get('use_meter_distances', False)
	distance_calc = exp_info.get('distance_calculation', '')

	if not use_meter_distances and distance_calc != 'meters':
	print_info("Experiment does not use GPS-based distances")
	return

	dataset_name = exp_info.get('dataset', '')
	if not dataset_name:
	print_key("Warning: No dataset name found in experiment info")
	return

	try:
	# Initialize GPS data structure
	self.gps_data = {
	'use_meter_distances': True,
	'available': True,
	'dataset': dataset_name,
	'distance_unit': 'meters'
	}

	# GPS coordinates are now embedded in prediction structs, no need to load separately
	# The plotting functions will extract coordinates directly from predictions
	print_info("✓ GPS data structure initialized (coordinates embedded in predictions)")

	except Exception as e:
	print_key(f"Warning: Could not extract GPS data: {e}")

	def _load_gps_coordinates_from_dataset(self):
	"""Load actual GPS coordinates from dataset files"""
	if not self.gps_data or not self.gps_data.get('available'):
	return

	dataset_name = self.gps_data.get('dataset', '')
	if not dataset_name:
	return

	# Try to find the dataset directory
	dataset_path = None
	possible_paths = [
	os.path.join(os.path.dirname(self.output_dir), 'data', dataset_name),
	os.path.join(os.path.dirname(os.path.dirname(self.output_dir)), 'data', dataset_name),
	os.path.join('data', dataset_name)
	]

	for path in possible_paths:
	if os.path.exists(path):
	dataset_path = path
	break

	if not dataset_path:
	print_key(f"Warning: Could not find dataset directory for {dataset_name}")
	return

	# Try to load GPS files
	gps_files = ['summary_data_log.csv', 'full_data_log.csv', 'gps_data.csv', 'coordinates.csv']

	for gps_file in gps_files:
	gps_path = os.path.join(dataset_path, gps_file)
	if os.path.exists(gps_path):
	try:
	# Load GPS data manually without pandas dependency
	coords = []
	with open(gps_path, 'r') as f:
	lines = f.readlines()

	if len(lines) < 2:
	continue

	# Parse header to find longitude and latitude columns
	header = lines[0].strip().split(',')
	lon_idx = None
	lat_idx = None

	for i, col in enumerate(header):
	if 'longitude' in col.lower():
	lon_idx = i
	elif 'latitude' in col.lower():
	lat_idx = i

	if lon_idx is None or lat_idx is None:
	print_key(f"Warning: Could not find GPS columns in {gps_file}")
	continue

	# Parse GPS coordinates
	for line in lines[1:]:
	parts = line.strip().split(',')
	if len(parts) > max(lon_idx, lat_idx):
	try:
	lat = float(parts[lat_idx])
	lon = float(parts[lon_idx])
	coords.append((lat, lon))
	except (ValueError, IndexError):
	continue

	if coords:
	# For VPR datasets, typically the first half are reference/database images
	# and the second half are query images
	total_coords = len(coords)
	split_point = total_coords // 2

	# Assign based on VPR convention
	self.gps_data['database_coords'] = coords[:split_point]
	self.gps_data['query_coords'] = coords[split_point:]

	print_info(f"✓ GPS coordinates loaded from {gps_file}: {len(self.gps_data['database_coords'])} database, {len(self.gps_data['query_coords'])} query")
	return

	except Exception as e:
	print_key(f"Warning: Could not load GPS file {gps_file}: {e}")
	continue

	print_key(f"Warning: No valid GPS files found in {dataset_path}")

	def get_method_keys(self):
	"""Get list of available method keys"""
	keys = set()

	# From visualization data
	if self.visualization_data and 'successful_methods' in self.visualization_data:
	for method in self.visualization_data['successful_methods']:
	keys.add(method.get('method_key', 'unknown'))

	# From similarity data
	keys.update(self.similarity_scores.keys())

	return list(keys)

	def _enhance_predictions_with_gps(self, predictions, method_key):
	"""Enhance similarity-based predictions with GPS coordinates"""
	if not self.gps_data or not predictions:
	return predictions

	database_coords = self.gps_data.get('database_coords', [])
	query_coords = self.gps_data.get('query_coords', [])

	if not database_coords or not query_coords:
	return predictions

	enhanced_predictions = []
	for pred in predictions:
	enhanced_pred = pred.copy()
	query_idx = pred.get('query_index', 0)
	predicted_idx = pred.get('predicted_index', 0)

	# Add GPS coordinates if indices are valid
	if query_idx < len(query_coords) and predicted_idx < len(database_coords):
	try:
	query_coord = query_coords[query_idx]
	predicted_coord = database_coords[predicted_idx]

	# Calculate GPS-based ground truth (closest database coordinate to query)
	gt_idx = query_idx if query_idx < len(database_coords) else predicted_idx
	gt_coord = database_coords[gt_idx]

	# Calculate distance error using Haversine formula
	def haversine_distance(lat1, lon1, lat2, lon2):
	import math
	R = 6371000 # Earth radius in meters
	lat1_rad, lon1_rad = math.radians(lat1), math.radians(lon1)
	lat2_rad, lon2_rad = math.radians(lat2), math.radians(lon2)
	dlat, dlon = lat2_rad - lat1_rad, lon2_rad - lon1_rad
	a = math.sin(dlat/2)*2 + math.cos(lat1_rad) math.cos(lat2_rad) * math.sin(dlon/2)**2
	return R * 2 * math.asin(math.sqrt(a))

	distance_error = haversine_distance(
	query_coord[0], query_coord[1],
	predicted_coord[0], predicted_coord[1]
	)

	enhanced_pred.update({
	'distance_error': distance_error,
	'is_correct': distance_error < 25, # Default 25m tolerance
	'gps_coordinates': {
	'query_lat': query_coord[0],
	'query_lon': query_coord[1],
	'predicted_lat': predicted_coord[0],
	'predicted_lon': predicted_coord[1],
	'ground_truth_lat': gt_coord[0],
	'ground_truth_lon': gt_coord[1]
	}
	})
	except Exception as e:
	print_key(f"Warning: Could not add GPS coordinates for {method_key} prediction {query_idx}: {e}")

	enhanced_predictions.append(enhanced_pred)

	return enhanced_predictions
	if self.results_summary and 'method_results' in self.results_summary:
	return list(self.results_summary['method_results'].keys())
	return []

	def get_method_config(self, method_key):
	"""Get configuration for a specific method"""
	if (self.results_summary and
	'method_results' in self.results_summary and
	method_key in self.results_summary['method_results']):
	return self.results_summary['method_results'][method_key]
	return None

	def get_experiment_info(self):
	"""Get general experiment information"""
	if self.results_summary and 'experiment_info' in self.results_summary:
	return self.results_summary['experiment_info']
	return {}

	def get_distance_matrix(self, method_key):
	"""Get distance matrix for a method"""
	return self.distance_matrices.get(method_key)

	def get_similarity_scores(self, method_key):
	"""Get similarity scores for a method"""
	return self.similarity_scores.get(method_key)

	def get_method_data(self):
	"""Get method data with consistent key handling, combining visualization and similarity data"""
	method_data = []

	# First, get data from visualization (if available)
	if self.visualization_data:
	# Handle different key names for method data
	if 'successful_methods' in self.visualization_data:
	method_data.extend(self.visualization_data['successful_methods'])
	elif 'method results' in self.visualization_data:
	method_data.extend(self.visualization_data['method results'])
	elif 'methods' in self.visualization_data:
	method_data.extend(self.visualization_data['methods'])

	# Get method keys that are already in visualization data
	existing_method_keys = {method['method_key'] for method in method_data}

	# Add methods from similarity scores that aren't in visualization data
	for method_key, similarity_data in self.similarity_scores.items():
	if method_key not in existing_method_keys:
	# Create method data from similarity scores
	config = similarity_data.get('config', {
	'name': method_key,
	'description': method_key.title() + ' (from similarity scores)',
	'color': 'blue' # Default color
	})

	# Create a basic method entry with similarity data
	method_entry = {
	'method_key': method_key,
	'config': config,
	'predictions': [] # Will be populated by plots that need it
	}

	# If similarity matrix is available, we can create basic predictions
	if 'similarity_scores' in similarity_data and 'data' in similarity_data['similarity_scores']:
	sim_matrix = np.array(similarity_data['similarity_scores']['data'])
	# Create basic predictions (without GPS coordinates for now)
	predictions = []
	for query_idx in range(sim_matrix.shape[1]):
	predicted_idx = int(np.argmax(sim_matrix[:, query_idx]))
	predictions.append({
	'query_index': query_idx,
	'predicted_index': predicted_idx,
	'method_key': method_key
	})

	# Enhance with GPS coordinates
	method_entry['predictions'] = self._enhance_predictions_with_gps(predictions, method_key)

	method_data.append(method_entry)
	print_info(f"✓ Added {method_key} from similarity scores")

	return method_data

	def get_statistical_comparison_data(self):
	"""Get statistical comparison data"""
	return self.statistical_comparison

	def get_advanced_analysis_data(self, method_key=None):
	"""Get advanced analysis data"""
	if self.advanced_analysis is None:
	return None

	if method_key:
	methods = self.advanced_analysis.get('methods', {})
	return methods.get(method_key)

	return self.advanced_analysis

	def has_gps_data(self):
	"""Check if experiment used GPS-based distances"""
	exp_info = self.get_experiment_info()
	return exp_info.get('distance_calculation') == 'meters'

	def get_tolerance(self):
	"""Get experiment tolerance value"""
	exp_info = self.get_experiment_info()
	return exp_info.get('tolerance', 25)

	def get_distance_unit(self):
	"""Get distance unit (meters or frames)"""
	if self.has_gps_data():
	return 'meters'
	return 'frames'

	def get_data_dict(self):
	"""Get data in the format expected by plotting functions"""
	# Load GPS coordinates if available
	if self.gps_data and self.gps_data.get('available'):
	self._load_gps_coordinates_from_dataset()

	return {
	'output_dir': self.output_dir,
	'results_summary': self.results_summary,
	'visualization_data': self.visualization_data,
	'statistical_comparison': self.statistical_comparison,
	'advanced_analysis': self.advanced_analysis,
	'distance_matrices': self.distance_matrices,
	'similarity_scores': self.similarity_scores,
	'gps_data': self.gps_data, # Include GPS data
	'method_data': self.get_method_data(), # Consistent method data access
	'experiment_info': self.get_experiment_info(),
	'method_keys': self.get_method_keys(),
	'has_gps_data': self.has_gps_data(),
	'tolerance': self.get_tolerance(),
	'distance_unit': self.get_distance_unit()
	}


	def load_experiment_data(output_dir):
	"""
	Load experiment data from output directory

	Args:
	output_dir: Directory containing experiment results

	Returns:
	dict: Dictionary containing all loaded data, or None if loading failed
	"""
	if not os.path.exists(output_dir):
	print_key(f"Error: Output directory does not exist: {output_dir}")
	return None

	loader = ExperimentDataLoader(output_dir)

	if not loader.load_all_data():
	print_key("Failed to load required experiment data")
	return None

	return loader.get_data_dict()