Source code for phylo2vec.stats.treewise

"""
Distance metrics between phylogenetic trees.
"""

import numpy as np

from phylo2vec import _phylo2vec_core as core


[docs] def robinson_foulds( tree1: np.ndarray, tree2: np.ndarray, normalize: bool = False, ) -> float: """ Compute the Robinson-Foulds distance between two trees. RF distance counts the number of bipartitions (splits) that differ between two tree topologies. Lower values indicate more similar trees. Parameters ---------- tree1 : np.ndarray First tree as Phylo2Vec vector (1D) or matrix (2D). Only topology is used; branch lengths are ignored. tree2 : np.ndarray Second tree as Phylo2Vec vector (1D) or matrix (2D). Only topology is used; branch lengths are ignored. normalize : bool, default=False If True, return normalized distance in range [0.0, 1.0]. Returns ------- float RF distance. Integer value if normalize=False, float in [0,1] otherwise. Raises ------ AssertionError If trees have different numbers of leaves. Examples -------- >>> import numpy as np >>> from phylo2vec.stats import robinson_foulds >>> v1 = np.array([0, 1, 2, 3], dtype=np.int16) >>> v2 = np.array([0, 0, 1, 2], dtype=np.int16) >>> robinson_foulds(v1, v1) # Identical trees 0.0 >>> robinson_foulds(v1, v2) # Different trees 2.0 See Also -------- ete3.Tree.robinson_foulds : Reference implementation in ete3 ape::dist.topo : Reference implementation in R's ape package """ # Extract topology (column 0) if matrix input v1 = tree1[:, 0].astype(int).tolist() if tree1.ndim == 2 else tree1.tolist() v2 = tree2[:, 0].astype(int).tolist() if tree2.ndim == 2 else tree2.tolist() return core.robinson_foulds(v1, v2, normalize)