98 lines
3.0 KiB
Python
98 lines
3.0 KiB
Python
|
|
"""Tests for fusionagi.gpu.tensor_scoring and reasoning.gpu_scoring."""
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from fusionagi.gpu.backend import reset_backend, get_backend
|
||
|
|
from fusionagi.gpu.tensor_scoring import (
|
||
|
|
gpu_score_hypotheses,
|
||
|
|
gpu_score_claims_against_reference,
|
||
|
|
)
|
||
|
|
from fusionagi.reasoning.gpu_scoring import (
|
||
|
|
generate_and_score_gpu,
|
||
|
|
score_claims_gpu,
|
||
|
|
deduplicate_claims_gpu,
|
||
|
|
)
|
||
|
|
from fusionagi.schemas.atomic import AtomicSemanticUnit, AtomicUnitType
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture(autouse=True)
|
||
|
|
def _use_numpy():
|
||
|
|
reset_backend()
|
||
|
|
get_backend(force="numpy")
|
||
|
|
yield
|
||
|
|
reset_backend()
|
||
|
|
|
||
|
|
|
||
|
|
def _make_unit(content: str) -> AtomicSemanticUnit:
|
||
|
|
return AtomicSemanticUnit(
|
||
|
|
unit_id=f"u_{hash(content) % 10000}",
|
||
|
|
content=content,
|
||
|
|
type=AtomicUnitType.FACT,
|
||
|
|
confidence=1.0,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class TestGPUScoreHypotheses:
|
||
|
|
def test_empty(self):
|
||
|
|
assert gpu_score_hypotheses([], []) == []
|
||
|
|
|
||
|
|
def test_basic(self):
|
||
|
|
units = [_make_unit("the sky is blue"), _make_unit("water is wet")]
|
||
|
|
results = gpu_score_hypotheses(["the sky is blue"], units)
|
||
|
|
assert len(results) == 1
|
||
|
|
node, score = results[0]
|
||
|
|
assert node.thought == "the sky is blue"
|
||
|
|
assert 0.0 <= score <= 1.0
|
||
|
|
|
||
|
|
def test_multiple_hypotheses(self):
|
||
|
|
units = [_make_unit("python is great")]
|
||
|
|
results = gpu_score_hypotheses(
|
||
|
|
["python is great", "java is better", "rust is fast"],
|
||
|
|
units,
|
||
|
|
)
|
||
|
|
assert len(results) == 3
|
||
|
|
# Should be sorted by score descending
|
||
|
|
scores = [s for _, s in results]
|
||
|
|
assert scores == sorted(scores, reverse=True)
|
||
|
|
|
||
|
|
def test_no_units(self):
|
||
|
|
results = gpu_score_hypotheses(["test hypothesis"], [])
|
||
|
|
assert len(results) == 1
|
||
|
|
assert results[0][1] == 0.5
|
||
|
|
|
||
|
|
def test_gpu_metadata(self):
|
||
|
|
units = [_make_unit("test content")]
|
||
|
|
results = gpu_score_hypotheses(["test content"], units)
|
||
|
|
node, _ = results[0]
|
||
|
|
assert node.metadata.get("gpu_scored") is True
|
||
|
|
|
||
|
|
|
||
|
|
class TestGPUScoreClaimsAgainstReference:
|
||
|
|
def test_empty(self):
|
||
|
|
assert gpu_score_claims_against_reference([], "ref") == []
|
||
|
|
|
||
|
|
def test_basic(self):
|
||
|
|
scores = gpu_score_claims_against_reference(
|
||
|
|
["claim one", "claim two"],
|
||
|
|
"claim one reference",
|
||
|
|
)
|
||
|
|
assert len(scores) == 2
|
||
|
|
assert all(isinstance(s, float) for s in scores)
|
||
|
|
|
||
|
|
|
||
|
|
class TestReasoningGPUScoring:
|
||
|
|
def test_generate_and_score_gpu(self):
|
||
|
|
units = [_make_unit("hello world"), _make_unit("testing gpu")]
|
||
|
|
results = generate_and_score_gpu(["hello world", "testing gpu"], units)
|
||
|
|
assert len(results) == 2
|
||
|
|
|
||
|
|
def test_score_claims_gpu(self):
|
||
|
|
scores = score_claims_gpu(["test claim"], "reference text")
|
||
|
|
assert len(scores) == 1
|
||
|
|
assert isinstance(scores[0], float)
|
||
|
|
|
||
|
|
def test_deduplicate_claims_gpu(self):
|
||
|
|
groups = deduplicate_claims_gpu(["a", "b", "c"])
|
||
|
|
all_indices = sorted(idx for group in groups for idx in group)
|
||
|
|
assert all_indices == [0, 1, 2]
|