
     iW                         d Z ddlmZmZmZmZ ddlZddlZddl	Z
ddlmZ ddlmZmZ ddlmZmZ eee         ef         Z	  G d d	e          Z G d
 de          Z G d de          Z G d de          ZdS )a  
Updated nucleic acid analysis --- :mod:`MDAnalysis.analysis.nucleicacids`
=========================================================================

:Author: Alia Lescoulie
:Year: 2022-2023
:copyright: LGPLv2.1

The module provides classes for analyzing nucleic acids structures.
This is an updated, higher performance version of previous nucleic acid tools.
For applications see :footcite:p:`Denning2011,Denning2012`.

.. rubric:: References

.. footbibliography::

Distances
---------

.. autoclass:: NucPairDist
    :members:
    :inherited-members:

.. autoclass:: WatsonCrickDist
    :members:
    :exclude-members: select_strand_atoms
    :inherited-members:

.. autoclass:: MinorPairDist
    :members:
    :exclude-members: select_strand_atoms
    :inherited-members:

.. autoclass:: MajorPairDist
    :members:
    :exclude-members: select_strand_atoms
    :inherited-members:

.. versionadded 2.2.0

    )ListDictTupleUnionN   )
calc_bonds)AnalysisBaseResultsGroup)ResidueResidueGroupc                   P    e Zd ZU dZdZed             Zej        e	d<   ej        e	d<   e
e	d<   deej                 deej                 d	d
f fdZe	 	 	 	 	 ddededededededededed	eeej                 eej                 f         fd            Zd dZd dZd dZd Z xZS )!NucPairDista  Atom pair distance calculation base class.

    Takes two lists of :class:`~MDAnalysis.core.groups.AtomGroup` and
    computes the distances between them over a trajectory. Used as a
    superclass for the other nucleic acid distances classes. The distance
    will be measured between atoms sharing an index in the two lists of
    :class:`~MDAnalysis.core.groups.AtomGroup`.

    Parameters
    ----------
    selection1: List[AtomGroup]
        List of :class:`~MDAnalysis.core.groups.AtomGroup` containing an atom
        of each nucleic acid being analyzed.
    selection2: List[AtomGroup]
        List of :class:`~MDAnalysis.core.groups.AtomGroup` containing an atom
        of each nucleic acid being analyzed.
    kwargs: dict
        Arguments for :class:`~MDAnalysis.analysis.base.AnalysisBase`


    Attributes
    ----------
    results.pair_distances: numpy.ndarray
        2D array of pair distances. First dimension is simulation time,
        second dimension contains the pair distances for each each entry
        pair in selection1 and selection2.

        .. versionadded:: 2.4.0

        .. note::
            `results.pair_distances` is slated for deprecation in
            version 3.0.0, use `results.distances` instead.
        .. deprecated:: 2.7.0
            `results.pair_distances` will be removed in
            version 3.0.0, use :attr:`results.distances` instead.

    results.distances: numpy.ndarray
        stored in a 2d numpy array with first index selecting the
        Residue pair, and the second index selecting the frame number
        Distances are stored in a 2d numpy array with axis 0 (first index)
        indexing the trajectory frame and axis 1 (second index) selecting the
        Residue pair.

        .. versionadded:: 2.7.0

    times: numpy.ndarray
        Simulation times for analysis.


    Raises
    ------
    ValueError
        If the selections given are not the same length
    ValueError
        An :class:`~MDAnalysis.core.groups.AtomGroup` in one of the
        strands not a valid nucleic acid
    ValueError
        If a given residue pair from the provided strands returns an empty
        :class:`~MDAnalysis.core.groups.AtomGroup` when selecting the atom
        pairs used in the distance calculations


    *Version Info*

    .. versionchanged:: 2.5.0
       The ability to access by passing selection indices to :attr:`results`
       is now removed as of MDAnalysis version 2.5.0. Please use
       :attr:`results.pair_distances` instead.
       The :attr:`results.times` was deprecated and is now removed as of
       MDAnalysis 2.5.0.
       Please use the class attribute :attr:`times` instead.

    .. versionchanged:: 2.7.0
        Added static method :attr:`select_strand_atoms` as a
        helper for selecting atom pairs for distance analysis.

    .. versionchanged:: 2.9.0
       Enabled **parallel execution** with the ``multiprocessing`` and ``dask``
       backends; use the new method :meth:`get_supported_backends` to see all
       supported backends.
    Tc                     dS )N)serialmultiprocessingdask )clss    j/srv/www/vhosts/g4struct/public_html/venv/lib/python3.11/site-packages/MDAnalysis/analysis/nucleicacids.pyget_supported_backendsz"NucPairDist.get_supported_backends   s    44    _s1_s2_n_sel
selection1
selection2returnNc                     t          t          |           j        |d         j        j        fi | t          |          t          |          k    rt          d          t          |          | _        |d         | _        |d         | _	        t          d| j                  D ].}| xj        ||         z  c_        | xj	        ||         z  c_	        /d S )Nr   zSelections must be same lengthr   )superr   __init__universe
trajectorylen
ValueErrorr   r   r   range)selfr   r   kwargsi	__class__s        r   r    zNucPairDist.__init__   s     	*k4  )qM"-	
 	
17	
 	
 	
 z??c*oo--=>>>z??a=a=q$+&& 	& 	&AHH
1%HHHH
1%HHH	& 	&r   GAUTCstrand1strand2a1_namea2_nameg_namea_nameu_namet_namec_namec	                    |||g}	||g}
g }g }t          | j        |j                  D ]}|d         j        d         |	v r||}}n-|d         j        d         |
v r||}}nt          d| d          |d         j                            d|           }|d         j                            d|           }t          d ||fD                       s?t          |          dk    r
|d         |fn	|d         |f}t          |d          d          |                    |           |                    |           ||fS )ar  
        A helper method for nucleic acid pair distance analyses.
        Used for selecting specific atoms from two strands of nucleic acids.


        Parameters
        ----------
        strand1: List[Residue]
            The first nucleic acid strand
        strand2: List[Residue]
            The second nucleic acid strand
        a1_name: str
            The selection for the purine base of the strand pair
        a2_name: str
            the selection for the pyrimidine base of the strand pair
        g_name: str (optional)
            Name of Guanine in topology, by default assigned to G
        a_name: str (optional)
            Name of Adenine in topology, by default assigned to A
        u_name: str (optional)
            Name of Uracil in topology, by default assigned to U
        t_name: str (optional)
            Name of Thymine in topology, by default assigned to T
        c_name: str (optional)
            Name of Cytosine in topology, by default assigned to C

        Returns
        -------
        Tuple[List[AtomGroup], List[AtomGroup]]
            returns a tuple containing two lists of
            :class:`~MDAnalysis.core.groups.AtomGroup`\s
            corresponding to the provided selections from each strand.

        Raises
        ------
        ValueError:
            An :class:`~MDAnalysis.core.groups.AtomGroup`
            in one of the strands not a valid nucleic acid
        ValueError:
            An :class:`~MDAnalysis.core.groups.Residue` returns an empty
            :class:`~MDAnalysis.core.groups.AtomGroup`
            with the provided selection


        .. versionadded:: 2.7.0
        r   zAtomGroup in z is not a valid nucleic acidzname r   c              3   <   K   | ]}t          |          d k    V  dS )r   N)r#   ).0ags     r   	<genexpr>z2NucPairDist.select_strand_atoms.<locals>.<genexpr>  s,      88rs2ww{888888r   z< returns an empty AtomGroupwith selection string "name {a2}")	zipresiduesresnamer$   atomsselect_atomsallr#   append)r/   r0   r1   r2   r3   r4   r5   r6   r7   pyrimidinespurinessel1sel2paira1a2ag1ag2err_infos                      r   select_strand_atomszNucPairDist.select_strand_atoms   s   t #)&&!9$f-$&$&('*:;; 	 	DAwq![00 'Ba#w.. 'B FDFFF   q'-,,\R\\::Cq'-,,\R\\::C88c3Z88888 
%(XX]]T!WbMMa"  !#A; < < <   KKKKd|r   c                 Z    t          j        | j        | j        g          | j        _        d S N)npzerosn_framesr   results	distancesr&   s    r   _preparezNucPairDist._prepare'  s*    -/X]DK(.
 .
r   c                 ~    t          | j        j        | j        j                  }|| j        j        | j        d d f<   d S rP   )r   r   	positionsr   rT   rU   _frame_index)r&   dists     r   _single_framezNucPairDist._single_frame,  s<    %dh&8$(:LMM7;t0!!!3444r   c                 0    | j         d         | j         d<   d S )NrU   pair_distances)rT   rV   s    r   	_concludezNucPairDist._conclude1  s    )-k)B%&&&r   c                 :    t          dt           j        i          S )NrU   )lookup)r
   ndarray_vstackrV   s    r   _get_aggregatorzNucPairDist._get_aggregator5  s&    \8
 
 
 	
r   )r*   r+   r,   r-   r.   )r   N)__name__
__module____qualname____doc__%_analysis_algorithm_is_parallelizableclassmethodr   mda	AtomGroup__annotations__intr   r    staticmethodr   strr   rN   rW   r\   r_   rc   __classcell__r)   s   @r   r   r   V   s        P Pd -1)5 5 [5 
	KKK&'& '&
 
& & & & & &,  [ [[[ [ 	[
 [ [ [ [ [ 
tCM"D$77	8[ [ [ \[z
 
 
 

< < < <
C C C C
 
 
 
 
 
 
r   r   c                   \     e Zd ZdZ	 	 	 	 	 	 	 dd	ed
ededededededededdf fdZ xZS )WatsonCrickDista  
    Watson-Crick base pair distance for selected
    residues over a trajectory.

    Takes two :class:`~MDAnalysis.core.groups.ResidueGroup`
    objects or two lists of :class:`~MDAnalysis.core.groups.Residue`
    and calculates the distance between the nitrogen atoms in the
    Watson-Crick hydrogen bond over the trajectory. Bases are matched
    either by their index in the two
    :class:`~MDAnalysis.core.groups.ResidueGroup` provided as arguments,
    or based on the indices of the provided lists of
    :class:`~MDAnalysis.core.groups.Residue` objects depending
    on which is provided.

    .. note::
        Support for :class:`~MDAnalysis.core.groups.Residue` is slated for
        deprecation and will raise a warning when used. It still works but
        :class:`~MDAnalysis.core.groups.ResidueGroup` is preferred.

    Parameters
    ----------
    strand1: ResidueClass
        First list of bases

        .. deprecated:: 2.7.0
           Using a list of :class:`~MDAnalysis.core.groups.Residue` will
           be removed in 3.0.0. Use a
           :class:`~MDAnalysis.core.groups.ResidueGroup`.

    strand2: ResidueClass
        Second list of bases

        .. deprecated:: 2.7.0
           Using a list of :class:`~MDAnalysis.core.groups.Residue` will
           be removed in 3.0.0. Use a
           :class:`~MDAnalysis.core.groups.ResidueGroup`.

    n1_name: str (optional)
        Name of Nitrogen 1 of nucleic acids, by default assigned to "N1"
    n3_name: str (optional)
        Name of Nitrogen 3 of nucleic acids, by default assigned to "N3"
    g_name: str (optional)
        Name of Guanine in topology, by default assigned to "G"
    a_name: str (optional)
        Name of Adenine in topology, by default assigned to "A"
    u_name: str (optional)
        Name of Uracil in topology, by default assigned to "U"
    t_name: str (optional)
        Name of Thymine in topology, by default assigned to "T"
    c_name: str (optional)
        Name of Cytosine in topology, by default assigned to C
    **kwargs: dict
        Key word arguments for
        :class:`~MDAnalysis.analysis.base.AnalysisBase`

    Attributes
    ----------
    results.distances: numpy.ndarray
        Distances are stored in a 2d numpy array with axis 0 (first index)
        indexing the trajectory frame and axis 1 (second index) selecting the
        Residue pair.

        .. versionadded:: 2.7.0

    results.pair_distances: numpy.ndarray
        2D array of pair distances. First dimension is
        simulation time, second dimension contains the
        pair distances for each each entry pair in
        selection1 and selection2.

        .. versionadded:: 2.4.0

        .. deprecated:: 2.7.0
            `results.pair_distances` will be removed in version 3.0.0,
            use :attr:`results.distances` instead.

    times: numpy.ndarray
        Simulation times for analysis.

    Raises
    ------
    TypeError
        If the provided list of :class:`~MDAnalysis.core.Residue` contains
        non-Residue elements

        .. deprecated:: 2.7.0
           Starting with version 3.0.0, this exception will no longer
           be raised because only
           :class:`~MDAnalysis.core.groups.ResidueGroup` will be allowed.

    ValueError
        If `strand1` and `strand2` are not the same length
    ValueError:
        An :class:`~MDAnalysis.core.groups.AtomGroup`
        in one of the strands not a valid nucleic acid
    ValueError
        If a given residue pair from the provided strands returns an empty
        :class:`~MDAnalysis.core.groups.AtomGroup` when selecting the atom
        pairs used in the distance calculations


    *Version Info*

    .. versionchanged:: 2.5.0
       Accessing results by passing strand indices to :attr:`results`
       was deprecated and is now removed as of MDAnalysis version 2.5.0.
       Please use :attr:`results.pair_distances` instead.
       The :attr:`results.times` was deprecated and is now removed as of
       MDAnalysis 2.5.0. Please use the class attribute
       :attr:`times` instead.

    .. versionchanged:: 2.7.0
        `strand1` and `strand2` now also accept a
        :class:`~MDAnalysis.core.groups.ResidueGroup` as input.
        The previous input type, ``List[Residue]`` is still supported,
        but it is **deprecated** and will be removed in release 3.0.0.
    N1N3r*   r+   r,   r-   r.   r/   r0   n1_namen3_namer3   r4   r5   r6   r7   r   Nc
                     dt           dt          fd} ||          } ||          }|                     |||||||||		  	        } t          t          |           j        |d         |d         fi |
 d S )Nstrandr   c                     t          | t                    r_t          d | D                       st          |  d          t	          j        t          d|  d                     t          |           } | S )Nc              3   @   K   | ]}t          |t                    V  d S rP   )
isinstancer   )r:   resids     r   r<   zBWatsonCrickDist.__init__.<locals>.verify_strand.<locals>.<genexpr>  s,      JJ%:eW55JJJJJJr   z contains non-Residue elementsz ResidueGroup should be used for z  insteadof giving a Residue list)r|   listrB   	TypeErrorwarningswarnDeprecationWarningr   )ry   s    r   verify_strandz/WatsonCrickDist.__init__.<locals>.verify_strand  s     &$'' <JJ6JJJJJ O#v$M$M$MNNN&7v 7 7 7    (4F';';Mr   r3   r4   r6   r5   r7   r   r   )ResidueClassr   rN   r   rs   r    )r&   r/   r0   rv   rw   r3   r4   r5   r6   r7   r'   r   strand_atomgroupsr)   s                r   r    zWatsonCrickDist.__init__  s    	, 	< 	 	 	 	* !.g 6 6 -g 6 6 $$ % 
 
 	 	.ot$$-a "3A"6	
 	
:@	
 	
 	
 	
 	
r   )rt   ru   r*   r+   r,   r-   r.   )rd   re   rf   rg   r   ro   r    rp   rq   s   @r   rs   rs   =  s        t tt 6
 6
6
 6
 	6

 6
 6
 6
 6
 6
 6
 
6
 6
 6
 6
 6
 6
 6
 6
 6
 6
r   rs   c                   \     e Zd ZdZ	 	 	 	 	 	 	 dd	ed
ededededededededdf fdZ xZS )MinorPairDista  Minor-Pair basepair distance for selected residues over a trajectory.

    Takes two :class:`~MDAnalysis.core.groups.ResidueGroup` objects and
    calculates the Minor-groove hydrogen bond length between the
    nitrogen and oxygen atoms over the trajectory. Bases are
    matched by their index in the two
    :class:`~MDAnalysis.core.groups.ResidueGroup` provided as arguments.

    Parameters
    ----------
    strand1: List[Residue]
        First list of bases
    strand2: List[Residue]
        Second list of bases
    o2_name: str (optional)
        Name of Oxygen 2 of nucleic acids;
        by default assigned to "O2";
    c2_name: str (optional)
        Name of Carbon 2 of nucleic acids;
        by default assigned to "C2";
    g_name: str (optional)
        Name of Guanine in topology;
        by default assigned to "G";
    a_name: str (optional)
        Name of Adenine in topology
        by default assigned to "A";
    u_name: str (optional)
        Name of Uracil in topology;
        by default assigned to "U";
    t_name: str (optional)
        Name of Thymine in topology;
        by default assigned to "T";
    c_name: str (optional)
        Name of Cytosine in topology;
        by default assigned to "C";
    **kwargs:
        keyword arguments for
        :class:`~MDAnalysis.analysis.base.AnalysisBase`

    Attributes
    ----------
    results.distances: numpy.ndarray
        stored in a 2d numpy array with first index selecting
        the Residue pair, and the second index selecting the frame number
    times: numpy.ndarray
        Simulation times for analysis.

    Raises
    ------
    ValueError
        If the selections given are not the same length
        A :class:`~MDAnalysis.core.Residue` in
        one of the strands not a valid nucleic acid
    ValueError
        If a given residue pair from the provided strands returns an empty
        :class:`~MDAnalysis.core.groups.AtomGroup` when selecting the atom
        pairs used in the distance calculations


    .. versionadded:: 2.7.0
    O2C2r*   r+   r,   r-   r.   r/   r0   o2_namec2_namer3   r4   r5   r6   r7   r   Nc
                     |                      |||||||||		  	        } t          t          |           j        |d         |d         fi |
 d S Nr   r   r   )rN   r   r   r    )r&   r/   r0   r   r   r3   r4   r5   r6   r7   r'   
selectionsr)   s               r   r    zMinorPairDist.__init__,       $$ % 
 
 	 	,mT""+qM:a=	
 	
,2	
 	
 	
 	
 	
r   )r   r   r*   r+   r,   r-   r.   rd   re   rf   rg   r   ro   r    rp   rq   s   @r   r   r     s        < <D 
 

 
 	

 
 
 
 
 
 
 

 
 
 
 
 
 
 
 
 
r   r   c                   \     e Zd ZdZ	 	 	 	 	 	 	 dd	ed
ededededededededdf fdZ xZS )MajorPairDista$  Minor-Pair base pair distance for
    selected residues over a trajectory.

    Takes two :class:`~MDAnalysis.core.groups.ResidueGroup` objects and
    calculates the Major-groove hydrogen bond length between the nitrogen
    and oxygen atoms over the trajectory. Bases are matched by their index
    in the two :class:`~MDAnalysis.core.groups.ResidueGroup`
    provided as arguments.

    Parameters
    ----------
    strand1: List[Residue]
        First list of bases
    strand2: List[Residue]
        Second list of bases
    o6_name: str (optional)
        Name of Oxygen 6 of nucleic acids;
        by default assigned to "O6"
    n4_name: str (optional)
        Name of Nitrogen 4 of nucleic acids;
        by default assigned to "N4"
    g_name: str (optional)
        Name of Guanine in topology;
        by default assigned to "G"
    a_name: str (optional)
        Name of Adenine in topology;
        by default assigned to "A"
    u_name: str (optional)
        Name of Uracil in topology;
        by default assigned to "U"
    t_name: str (optional)
        Name of Thymine in topology;
        by default assigned to "T"
    c_name: str (optional)
        Name of Cytosine in topology;
        by default assigned to "C"
    **kwargs:
        arguments for :class:`~MDAnalysis.analysis.base.AnalysisBase`

    Attributes
    ----------
    results.distances: numpy.ndarray
        Distances are stored in a 2d numpy array with axis 0 (first index)
        indexing the trajectory frame and axis 1 (second index) selecting the
        Residue pair.
    times: numpy.ndarray
        Simulation times for analysis.

    Raises
    ------
    ValueError
        A :class:`~MDAnalysis.core.Residue`
        in one of the strands not a valid nucleic acid
    ValueError
        If a given residue pair from the provided strands returns an empty
        :class:`~MDAnalysis.core.groups.AtomGroup` when selecting the atom
        pairs used in the distance calculations
    ValueError
        if the selections given are not the same length


    .. versionadded:: 2.7.0
    N4O6r*   r+   r,   r-   r.   r/   r0   n4_nameo6_namer3   r4   r5   r6   r7   r   Nc
                     |                      |||||||||		  	        } t          t          |           j        |d         |d         fi |
 d S r   )rN   r   r   r    )r&   r/   r0   r   r   r3   r4   r5   r6   r7   r'   r   r)   s               r   r    zMajorPairDist.__init__  r   r   )r   r   r*   r+   r,   r-   r.   r   rq   s   @r   r   r   M  s        > >H 
 

 
 	

 
 
 
 
 
 
 

 
 
 
 
 
 
 
 
 
r   r   )rg   typingr   r   r   r   r   numpyrQ   
MDAnalysisrj   rU   r   baser	   r
   MDAnalysis.core.groupsr   r   r   r   rs   r   r   r   r   r   <module>r      s  0( (T , + + + + + + + + + + +          ! ! ! ! ! ! , , , , , , , , 8 8 8 8 8 8 8 8
 T']L01d
 d
 d
 d
 d
, d
 d
 d
Nm
 m
 m
 m
 m
k m
 m
 m
`]
 ]
 ]
 ]
 ]
K ]
 ]
 ]
@_
 _
 _
 _
 _
K _
 _
 _
 _
 _
r   