Source code for kerb.multimodal.utilities

"""Multimodal utilities.

This module provides utility functions for media type detection, validation,
and file operations.
"""

import hashlib
import mimetypes
import os
from pathlib import Path
from typing import Optional

from .types import MediaType


[docs] def detect_media_type(file_path: str) -> MediaType: """Detect media type from file extension. Args: file_path: Path to the media file Returns: MediaType: Detected media type Examples: >>> detect_media_type("photo.jpg") MediaType.IMAGE >>> detect_media_type("audio.mp3") MediaType.AUDIO """ ext = Path(file_path).suffix.lower().lstrip(".") image_exts = {"jpg", "jpeg", "png", "webp", "gif", "bmp", "tiff", "tif", "svg"} audio_exts = {"mp3", "wav", "m4a", "flac", "ogg", "opus", "aac"} video_exts = {"mp4", "avi", "mov", "mkv", "webm", "flv", "wmv", "mpeg", "mpg"} if ext in image_exts: return MediaType.IMAGE elif ext in audio_exts: return MediaType.AUDIO elif ext in video_exts: return MediaType.VIDEO else: return MediaType.UNKNOWN
[docs] def get_mime_type(file_path: str) -> str: """Get MIME type for a file. Args: file_path: Path to the file Returns: str: MIME type (e.g., "image/jpeg") Examples: >>> get_mime_type("photo.jpg") 'image/jpeg' """ mime_type, _ = mimetypes.guess_type(file_path) return mime_type or "application/octet-stream"
[docs] def validate_media_file( file_path: str, expected_type: Optional[MediaType] = None ) -> bool: """Validate that a media file exists and is of expected type. Args: file_path: Path to the media file expected_type: Expected media type (None to skip type check) Returns: bool: True if valid, False otherwise Examples: >>> validate_media_file("photo.jpg", MediaType.IMAGE) True """ if not os.path.exists(file_path): return False if expected_type is None: return True detected_type = detect_media_type(file_path) return detected_type == expected_type
[docs] def calculate_file_checksum(file_path: str, algorithm: str = "md5") -> str: """Calculate checksum of a media file. Args: file_path: Path to the file algorithm: Hash algorithm ("md5", "sha1", "sha256") Returns: str: Hexadecimal checksum string Examples: >>> checksum = calculate_file_checksum("video.mp4") >>> len(checksum) 32 """ hash_obj = hashlib.new(algorithm) with open(file_path, "rb") as f: while chunk := f.read(8192): hash_obj.update(chunk) return hash_obj.hexdigest()