fusionagi/gpu/tensor_scoring.py

"""GPU-accelerated hypothesis scoring for reasoning pipelines.

Provides batched scoring of hypotheses against atomic semantic units
using GPU-accelerated tensor operations. Replaces the CPU-bound
ThreadPoolExecutor-based scoring in multi_path.py.
"""

from __future__ import annotations

from fusionagi._logger import logger
from fusionagi.gpu.backend import TensorBackend, get_backend
from fusionagi.reasoning.tot import ThoughtNode
from fusionagi.schemas.atomic import AtomicSemanticUnit


def gpu_score_hypotheses(
    hypotheses: list[str],
    units: list[AtomicSemanticUnit],
    backend: TensorBackend | None = None,
) -> list[tuple[ThoughtNode, float]]:
    """Score hypotheses against atomic units using GPU-accelerated similarity.

    Replaces the CPU-based generate_and_score_parallel with batched GPU operations.

    Args:
        hypotheses: List of hypothesis text strings.
        units: List of atomic semantic units for reference.
        backend: TensorBackend to use.

    Returns:
        List of (ThoughtNode, score) tuples sorted by score descending.
    """
    if not hypotheses:
        return []

    be = backend or get_backend()
    import numpy as np

    hyp_embeddings = be.embed_texts(hypotheses)

    unit_texts = [u.content for u in units if u.content]
    if not unit_texts:
        nodes = []
        for h in hypotheses:
            node = ThoughtNode(
                thought=h,
                trace=[h],
                unit_refs=[u.unit_id for u in units[:10]],
                score=0.5,
            )
            nodes.append((node, 0.5))
        return nodes

    unit_embeddings = be.embed_texts(unit_texts)

    sim_matrix = be.to_numpy(be.cosine_similarity_matrix(hyp_embeddings, unit_embeddings))

    coherence_scores = np.mean(sim_matrix, axis=1)

    max_sim = np.max(sim_matrix, axis=1)
    consistency_scores = max_sim

    combined_scores = 0.5 * coherence_scores + 0.5 * consistency_scores
    combined_scores = np.clip(combined_scores, 0.0, 1.0)

    results: list[tuple[ThoughtNode, float]] = []
    for i, h in enumerate(hypotheses):
        score = float(combined_scores[i])
        node = ThoughtNode(
            thought=h,
            trace=[h],
            unit_refs=[u.unit_id for u in units[:10]],
            score=score,
            metadata={"gpu_scored": True, "coherence": float(coherence_scores[i])},
        )
        results.append((node, score))

    results.sort(key=lambda x: x[1], reverse=True)

    logger.debug(
        "GPU hypothesis scoring complete",
        extra={
            "hypotheses": len(hypotheses),
            "units": len(units),
            "best_score": results[0][1] if results else 0.0,
            "backend": be.name,
        },
    )
    return results


def gpu_score_claims_against_reference(
    claims: list[str],
    reference: str,
    weights: list[float] | None = None,
    backend: TensorBackend | None = None,
) -> list[float]:
    """Score a batch of claims against a single reference using GPU batch_score.

    Args:
        claims: List of claim texts.
        reference: Reference text to score against.
        weights: Optional per-dimension weights.
        backend: TensorBackend to use.

    Returns:
        List of scores for each claim.
    """
    if not claims:
        return []

    be = backend or get_backend()

    claim_emb = be.embed_texts(claims)
    ref_emb = be.embed_texts([reference])

    weight_tensor = None
    if weights is not None:
        import numpy as np

        dim = be.to_numpy(ref_emb).shape[-1]
        w = np.ones(dim, dtype=np.float32)
        for i, wt in enumerate(weights[:dim]):
            w[i] = wt
        weight_tensor = be.from_numpy(w)

    import numpy as np

    ref_squeezed = be.to_numpy(ref_emb)[0]
    scores = be.to_numpy(
        be.batch_score(claim_emb, be.from_numpy(ref_squeezed), weight_tensor)
    )

    scores = np.atleast_1d(scores)
    return list(scores.tolist())
feat: GPU/TensorCore integration — TensorFlow backend, GPU-accelerated reasoning, training, and memory - New fusionagi/gpu/ module with TensorBackend protocol abstraction - TensorFlowBackend: GPU-accelerated ops with TensorCore mixed-precision - NumPyBackend: CPU fallback (always available, no extra deps) - Auto-selects best available backend at runtime - GPU-accelerated operations: - Cosine similarity matrix (batched, XLA-compiled) - Multi-head attention for consensus scoring - Batch hypothesis scoring on GPU - Semantic similarity search (pairwise, nearest-neighbor, deduplication) - New TensorFlowAdapter (fusionagi/adapters/): - LLMAdapter for local TF/Keras model inference - TensorCore mixed-precision support - GPU-accelerated embedding synthesis fallback - Reasoning pipeline integration: - gpu_scoring.py: drop-in GPU replacement for multi_path scoring - Super Big Brain: use_gpu config flag, GPU scoring when available - Memory integration: - gpu_search.py: GPU-accelerated semantic search for SemanticGraphMemory - Self-improvement integration: - gpu_training.py: gradient-based heuristic weight optimization - Reflective memory training loop with loss tracking - Dependencies: gpu extra (tensorflow>=2.16, numpy>=1.26) - 64 new tests (276 total), all passing - Architecture spec: docs/gpu_tensorcore_integration.md Co-Authored-By: Nakamoto, S <defi@defi-oracle.io> 2026-04-28 05:05:50 +00:00			`"""GPU-accelerated hypothesis scoring for reasoning pipelines.`

			`Provides batched scoring of hypotheses against atomic semantic units`
			`using GPU-accelerated tensor operations. Replaces the CPU-bound`
			`ThreadPoolExecutor-based scoring in multi_path.py.`
			`"""`

			`from __future__ import annotations`

			`from fusionagi._logger import logger`
			`from fusionagi.gpu.backend import TensorBackend, get_backend`
			`from fusionagi.reasoning.tot import ThoughtNode`
			`from fusionagi.schemas.atomic import AtomicSemanticUnit`


			`def gpu_score_hypotheses(`
			`hypotheses: list[str],`
			`units: list[AtomicSemanticUnit],`
			`backend: TensorBackend \| None = None,`
			`) -> list[tuple[ThoughtNode, float]]:`
			`"""Score hypotheses against atomic units using GPU-accelerated similarity.`

			`Replaces the CPU-based generate_and_score_parallel with batched GPU operations.`

			`Args:`
			`hypotheses: List of hypothesis text strings.`
			`units: List of atomic semantic units for reference.`
			`backend: TensorBackend to use.`

			`Returns:`
			`List of (ThoughtNode, score) tuples sorted by score descending.`
			`"""`
			`if not hypotheses:`
			`return []`

			`be = backend or get_backend()`
			`import numpy as np`

			`hyp_embeddings = be.embed_texts(hypotheses)`

			`unit_texts = [u.content for u in units if u.content]`
			`if not unit_texts:`
			`nodes = []`
			`for h in hypotheses:`
			`node = ThoughtNode(`
			`thought=h,`
			`trace=[h],`
			`unit_refs=[u.unit_id for u in units[:10]],`
			`score=0.5,`
			`)`
			`nodes.append((node, 0.5))`
			`return nodes`

			`unit_embeddings = be.embed_texts(unit_texts)`

			`sim_matrix = be.to_numpy(be.cosine_similarity_matrix(hyp_embeddings, unit_embeddings))`

			`coherence_scores = np.mean(sim_matrix, axis=1)`

			`max_sim = np.max(sim_matrix, axis=1)`
			`consistency_scores = max_sim`

			`combined_scores = 0.5 * coherence_scores + 0.5 * consistency_scores`
			`combined_scores = np.clip(combined_scores, 0.0, 1.0)`

			`results: list[tuple[ThoughtNode, float]] = []`
			`for i, h in enumerate(hypotheses):`
			`score = float(combined_scores[i])`
			`node = ThoughtNode(`
			`thought=h,`
			`trace=[h],`
			`unit_refs=[u.unit_id for u in units[:10]],`
			`score=score,`
			`metadata={"gpu_scored": True, "coherence": float(coherence_scores[i])},`
			`)`
			`results.append((node, score))`

			`results.sort(key=lambda x: x[1], reverse=True)`

			`logger.debug(`
			`"GPU hypothesis scoring complete",`
			`extra={`
			`"hypotheses": len(hypotheses),`
			`"units": len(units),`
			`"best_score": results[0][1] if results else 0.0,`
			`"backend": be.name,`
			`},`
			`)`
			`return results`


			`def gpu_score_claims_against_reference(`
			`claims: list[str],`
			`reference: str,`
			`weights: list[float] \| None = None,`
			`backend: TensorBackend \| None = None,`
			`) -> list[float]:`
			`"""Score a batch of claims against a single reference using GPU batch_score.`

			`Args:`
			`claims: List of claim texts.`
			`reference: Reference text to score against.`
			`weights: Optional per-dimension weights.`
			`backend: TensorBackend to use.`

			`Returns:`
			`List of scores for each claim.`
			`"""`
			`if not claims:`
			`return []`

			`be = backend or get_backend()`

			`claim_emb = be.embed_texts(claims)`
			`ref_emb = be.embed_texts([reference])`

			`weight_tensor = None`
			`if weights is not None:`
			`import numpy as np`

			`dim = be.to_numpy(ref_emb).shape[-1]`
			`w = np.ones(dim, dtype=np.float32)`
			`for i, wt in enumerate(weights[:dim]):`
			`w[i] = wt`
			`weight_tensor = be.from_numpy(w)`

			`import numpy as np`

			`ref_squeezed = be.to_numpy(ref_emb)[0]`
			`scores = be.to_numpy(`
			`be.batch_score(claim_emb, be.from_numpy(ref_squeezed), weight_tensor)`
			`)`

			`scores = np.atleast_1d(scores)`
			`return list(scores.tolist())`