
     i I              	          d Z ddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZ ddlmZ dd	lmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ  ej        d
          Z d Z!dZ"e"#                                Z$ e%d  e&e" e'd                    D                       Z( e%d  e&e$ e'd                    D                       Z)d Z*d Z+ G d de
          Z,d Z-dS )a  
PDB Topology Parser
=========================================================================

This topology parser uses a standard PDB file to build a minimum
internal structure representation (list of atoms).

The topology reader reads a PDB file line by line and ignores atom
numbers but only reads residue numbers up to 9,999 correctly. If you
have systems containing at least 10,000 residues then you need to use
a different file format (e.g. the "extended" PDB, *XPDB* format, see
:mod:`~MDAnalysis.topology.ExtendedPDBParser`) that can handle residue
numbers up to 99,999.

.. note::

    Atomtypes will be created from elements if they are present and valid.
    Otherwise, they will be guessed on Universe creation.
    By default, masses will also be guessed on Universe creation.
    This may change in release 3.0.
    See :ref:`Guessers` for more information.


.. Note::

   The parser processes atoms and their names.
   Partial charges are not set. Elements are parsed if they are
   valid. If partially missing or incorrect, empty records are assigned.

See Also
--------
* :mod:`MDAnalysis.topology.ExtendedPDBParser`
* :class:`MDAnalysis.coordinates.PDB.PDBReader`
* :class:`MDAnalysis.core.universe.Universe`


Classes
-------

.. autoclass:: PDBParser
   :members:
   :inherited-members:

    N   )SYMB2Z)util   )TopologyReaderBasechange_squash)Topology)	AtomnamesAtomidsAltLocsBondsChainIDs	AtomtypesElementsICodesOccupanciesRecordTypesResidsResnamesResnumsSegidsTempfactorsFormalChargeszMDAnalysis.topology.PDBParserc                 F    	 t          |           S # t          $ r |cY S w xY wN)float
ValueError)valdefaults     g/srv/www/vhosts/g4struct/public_html/venv/lib/python3.11/site-packages/MDAnalysis/topology/PDBParser.pyfloat_or_defaultr!   c   s7    Szz   s      $0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZc                     g | ]}|S  r$   .0pairs     r    
<listcomp>r(   l       JJJTDJJJ    $   c                     g | ]}|S r$   r$   r%   s     r    r(   r(   m   r)   r*   c                 R    d}t          |           }|D ]}||z  }|| |         z  }|S )a  Decodes the string s using the digit, value associations for each
    character.

    Parameters
    ----------
    digits_values: dict
        A dictionary containing the base-10 numbers that each hexadecimal
        number corresponds to.
    s: str
        The contents of the pdb index columns.

    Returns
    -------
    The integer in base-10 corresponding to traditional base-36.
    r   )len)digits_valuessresultncs        r    decode_purer4   p   sD      FMA # #!-""Mr*   c                 x   t          |          | k    r|d         }|dk    s|dk    s|                                rt          |          S |t          v r(t	          t          |          dd| dz
  z  z  z
  d| z  z   S |t
          v r(t	          t
          |          dd| dz
  z  z  z   d| z  z   S t          d	          )
aC  
    Decodes base-10/upper-case base-36/lower-case base-36 hybrid.

    Parameters
    ----------
    width: int
        The number of columns in the pdb file store atom index.
    s: str
        The contents of the pdb index columns.

    Returns
    -------
    int
        Base-10 integer corresponding to hybrid36.
    r   - )r/   r0   
   r+   r      zinvalid number literal.)r.   isdigitintDIGITS_UPPER_VALUESr4   DIGITS_LOWER_VALUESr   )widthr0   fs      r    
hy36decoder@      s      	A%aDHHSAIIKKq66M&&&-@!"$ $ $&(2%!)+<&<=?AU{K K&&&-@!"$ $ $&(2%!)+<&<=?AU{K K
.
/
//r*   c                   ,    e Zd ZdZddgZd Zd Zd ZdS )	PDBParseraQ  Parser that obtains a list of atoms from a standard PDB file.

    Creates the following Attributes (if present):
     - names
     - chainids
     - tempfactors
     - occupancies
     - record_types (ATOM/HETATM)
     - resids
     - resnames
     - segids
     - elements
     - bonds
     - formalcharges

    Note that `PDBParser` accepts an optional keyword argument
    ``force_chainids_to_segids``. If set to ``True``, the chain IDs (even if
    empty values are in the chain ID column in the file) will forcibly be used
    instead of the segment IDs for creating segments.

    See Also
    --------
    :class:`MDAnalysis.coordinates.PDB.PDBReader`


    .. versionadded:: 0.8
    .. versionchanged:: 0.18.0
       Added parsing of Record types
    .. versionchanged:: 1.0.0
       Added parsing of valid Elements
    .. versionchanged:: 2.0.0
       Bonds attribute is not added if no bonds are present in PDB file.
       If elements are invalid or partially missing, empty elements records
       are now assigned (Issue #2422).
       Aliased ``bfactors`` topologyattribute to ``tempfactors``.
       ``bfactors`` is deprecated and will be removed in 3.0 (Issue #1901)
    .. versionchanged:: 2.3.0
       Formal charges are now read from PDB files if present. No formalcharge
       attribute is created if no formal charges are present in the PDB file.
       Any formal charges not set are assumed to have a value of 0.
       Raise `UserWarning` instead `RuntimeError`
       when CONECT records are corrupt.
    .. versionchanged:: 2.5.0
       Formal charges will not be populated if an unknown entry is encountered,
       instead a UserWarning is emitted.
    .. versionchanged:: 2.8.0
        Removed type and mass guessing (attributes guessing takes place now
        through universe.guess_TopologyAttrs() API).
    .. versionchanged:: 2.10.0
        segID is read from 73-76 instead of 67-76 and added the
        `force_chainids_to_segids` keyword argument. Some infos in logger will
        be generated if the segids is not present or if the chainids are not
        completely equal to segids.
    PDBENTc                     | j         di |}	 |                     |j        j                  }|r|                    |           nC# t
          $ r t          j        d           Y n#t          $ r t          j        d           Y nw xY w|S )zqParse atom information from PDB file

        Returns
        -------
        MDAnalysis Topology object
        ;Invalid atom serials were present, bonds will not be parsedz4CONECT records was corrupt, bonds will not be parsedr$   )	_parseatoms_parsebondsidsvaluesadd_TopologyAttrAttributeErrorwarningswarnRuntimeError)selfkwargstopbondss       r    parsezPDBParser.parse   s     d((((	,$$SW^44E  ,$$U+++  	6 	6 	6M 5 6 6 6 6 6 	6 	6 	6M 5 6 6 6 6 6	6 
s   A B&BBc           	      D   d}g }g }g }g }g }g }g }	g }
g }g }g }g }g }d| _         d}t          j        | j                  5 }|D ]}|                                }|s|                    d          r nw|                    d          sH|                    |dd                                                    	 t          |dd                   }n?#  	 t          d	|dd                   }n# t          $ r d
| _         |}|dz  }Y nw xY wY nxY w|                    |           n# |                    |           w xY w|                    |dd                                                    |                    |dd                                                    |                    |dd                                                    |                    |dd                                                    |                    |dd                                                    |                    |dd                                                    	 | j
        dk    rt          |dd                   }n0t          |dd                   }||z
  dk     r|dz  }||z
  dk     |}n&# t          $ r t          j        d           d}Y nw xY w|                    |           |                    |dd                                                    nI# |                    |           |                    |dd                                                    w xY w|
                    t          |dd         d                     |	                    t          |dd         d                     |                    |dd                                                    ddd           n# 1 swxY w Y   | j         rt          j        d            t          d! t          ||          D                       rt                               d"           t          |          st                               d#           |}|                    d$d          rt                               d%           |}t)          |          }g }|t*          t,          f|t.          t,          f|t0          t,          f|t2          t,          f|t4          t6          j        f|	t:          t6          j        f|
t>          t6          j        ffD ]8\  }}}|                     |t7          j         ||&                               9t          |          r|}|                    tC          t7          j         |t,          &                               g }|D ]t}|"                                tF          v r(|                    |"                                           Ed'| d(}t          j        |           |                    d)           u|                    tI          t7          j         |t,          &                               nt          j        d*           t          |          r	 tK          |          D ]G\  }}|d)k    s7|d+k    rd||<   d,|v sd-|v rt          |ddd.                   ||<   ;t          d||<   H	 |                    tM          t7          j         |t          &                               n*# t          $ r d/| d0}t          j        |           Y nw xY wt7          j         |t6          j        &          }t7          j         |t,          &          }| j
        dk    rd)g|z  }t7          j         |t,          &          }|'                                } t7          j         |t,          &          }tQ          ||||f|||| |f          \  }!\  }}}} }t)          |          }"|                    tS          |                      |                    tU          |                     |                    tW          |                     |                    tY          |                     |                    d$d          s(t          |          rbt          d1 |D                       sItQ          |f|f          \  }#\  }t)          |          }$|                    t[          |                     nZd}$|                    t[          t7          j         d2gt,          &                               d}#t                               d3           t]          ||"|$||!|#4          }%|%S )5z"Create the initial Topology objectr   Fi END)ATOMHETATMN         Tr      r9            L   N   P   XPDB      ixi'  z8PDB file is missing resid information.  Defaulted to '1'6   <   g        B   g      ?H   zCSerial numbers went over 100,000.  Higher serials have been guessedc                      g | ]\  }}||k    S r$   r$   )r&   abs      r    r(   z)PDBParser._parseatoms.<locals>.<listcomp>G  s     99941aQ999r*   z3Segment IDs and Chain IDs are not completely equal.zESetting segids from chainIDs because no segids found in the PDB file.force_chainids_to_segidsz@force_chainids_to_segids is set. Using chain IDs as segment IDs.)dtypezUnknown element z found for some atoms. These have been given an empty element record. If needed they can be guessed using universe.guess_TopologyAttrs(context='default', to_guess=['elements']). zElement information is missing, elements attribute will not be populated. If needed these can be guessed using universe.guess_TopologyAttrs(context='default', to_guess=['elements']).0+r6   zUnknown entry z encountered in formal charge field. This likely indicates that the PDB file is not fully standard compliant. The formalcharges attribute will not be populated.c              3      K   | ]}|d u V  	d S r   r$   )r&   r   s     r    	<genexpr>z(PDBParser._parseatoms.<locals>.<genexpr>  s&      $C$CSSD[$C$C$C$C$C$Cr*   SYSTEMzDSegment/chain ID is empty, setting segids to default value 'SYSTEM'.)attrsatom_resindexresidue_segindex)/_wrapped_serialsr   openanyfilenamestrip
startswithappendr;   r@   r   formatrM   rN   r!   anyziploggerdebuginfogetr.   r
   objectr   r   r   r   npint32r   float32r   arrayr   
capitalizer   r   	enumerater   copyr   r   r   r   r   r   r	   )&rP   rQ   
resid_prevrecord_typesserialsnamesaltlocschainidsicodestempfactorsoccupanciesresidsresnamessegidselementsformalchargeslast_wrapped_serialr?   lineserialresidn_atomsrv   valsAttrrn   	atomtypesvalidated_elementselemwmsgientryresnumsresidx
n_residuessegidx
n_segmentsrR   s&                                         r    rG   zPDBParser._parseatoms   s9
   
 %$\$-(( 5	3A 43 43zz|| ??5)) E'9:: ##D!HNN$4$4555+ ad__FF11!+AtAbDz!:!:% 1 1 104-!4+q0+++	1 NN6****GNN6****T"R%[..00111tBrE{0022333RU 1 1 3 3444RU 1 1 3 3444RU 1 1 3 3444$$T"R%[%6%6%8%89997{f,, #DBK 0 0 #DBK 0 0#j0588!UNE $j0588%*
!   M #5 6 6 6EEE
 MM%(((MM$r"u+"3"3"5"56666 MM%(((MM$r"u+"3"3"5"56666""#3DBK#E#EFFF""#3DBK#E#EFFFd2b5k//112222k5	3 5	3 5	3 5	3 5	3 5	3 5	3 5	3 5	3 5	3 5	3 5	3 5	3 5	3 5	3p   	>M = > > > 993vx#8#8999:: 	PLLNOOO 6{{ 	KK 1 2 2 2F ::0%88 	KK : ; ; ;Fg,, 	6*'6*8V,{F3'28,k2:6k2:6"
 		< 		<D$ LLbht5999::;;;;x== 	H ILL28HF#C#C#CDDEEE!#  
2 
2??$$..&--doo.?.?@@@@7t 7 7 7D
 M$'''&--b1111LL"(+=V"L"L"LMMNNNNM G H H H
 } 	PP )- 8 8 - -HAu B;; C<<
 01M!,,!Ullu/252;/?/?M!,,",,+,a((-* ]28M+M+M+MNNOOOO  $ $ $; ; ; ; d#####$ &1118HF333;&  TG^F&///++--&///>KXvv.67TZ0[?] ?];;67F[[
WW%%&&&VF^^$$$VF^^$$$Xh''((( JJ1599	E[[	E!$$C$CF$C$C$C!C!C	E !.vi& C CFIVVJLL((((JLL(6 B B BCCDDDFKK D E E E w
J"%+(.0 0 0
 
s   A:O7:CD'DC.-D.D		DD			DD'O7'D>>DO7AJ0/L0 KLKLAO7AM!!B
O77O;>O;A\1 1$]]c                 p   | j         rt          j        d           t          t	          d t          |          D                       }t                      }t          j        | j	                  5 }d |D             }|D ]}t          |                                          \  }}|D ]Y}		 t          ||         ||	         g          }
|                    |
           6# t          $ r t          j        d           Y Vw xY w	 d d d            n# 1 swxY w Y   t          |          }t          |          S )NrF   c              3   $   K   | ]\  }}||fV  d S r   r$   )r&   r   r0   s      r    rt   z(PDBParser._parsebonds.<locals>.<genexpr>  s*      ==$!Q1v======r*   c              3   6   K   | ]}|d d         dk    |V  d S )NrY   CONECTr$   )r&   r   s     r    rt   z(PDBParser._parsebonds.<locals>.<genexpr>  s4      @@d48x+?+?T+?+?+?+?@@r*   zOPDB file contained CONECT record to TER entry. These are not included in bonds.)ry   rM   rN   rL   dictr   setr   rz   r{   _parse_conectr|   tupleaddKeyErrorr   )rP   r   mappingrS   r?   linesr   atomatomsrk   bonds              r    rH   zPDBParser._parsebonds  s      	!M ' ( ( (   ==)G*<*<=====\$-(( 	(A@@a@@@E ( (+DJJLL99e 
( 
(A	($gdmWQZ%@AA 		$ $ @ @ @ !?@ @ @ @ @@
((	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	( 	(  eU||s6   09D*CDC>	;D=C>	>DDDN)__name__
__module____qualname____doc__r   rT   rG   rH   r$   r*   r    rB   rB      s[        5 5l U^F  0D D DL$ $ $ $ $r*   rB   c                 x    t           dd                   }t           dd                   dz  }	 t           dd                   |z  dk    r"t          d                                         n(# t          $ r t          j        d           |g fcY S w xY w fdt          |          D             }||fS )	aW  parse a CONECT record from pdbs

    Parameters
    ----------
    conect : str
        white space striped CONECT record

    Returns
    -------
    atom_id : int
        atom index of bond
    bonds : set
        atom ids of bonded atoms

    Raises
    ------
    RuntimeError
        Raised if ``conect`` is not a valid CONECT record
    rY   rZ   Nr[   r   z8Bond atoms aren't aligned proberly for CONECT record: {}z4Found CONECT record with single entry, ignoring thisc              3   ^   K   | ]'}t          d |dz  z   d|dz  z                      V  (dS )rZ   r[   r9   N)r;   )r&   r   conects     r    rt   z _parse_conect.<locals>.<genexpr>  sU       ' '!#fR!a%Za!e3455 ' ' ' ' ' 'r*   )r;   r.   rO   r   ZeroDivisionErrorrM   rN   range)r   atom_idn_bond_atoms
bond_atomss   `   r    r   r     s    ( &2,Gvbcc{##q(Lvbcc{l*a//  ,,2F6NN< < < 0    LMMM{
' ' ' '%%' ' 'JJs   A A5 5"BB).r   numpyr   rM   loggingguesser.tablesr   libr   baser   r   core.topologyr	   core.topologyattrsr
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	getLoggerr   r!   DIGITS_UPPERlowerDIGITS_LOWERr   r   r   r<   r=   r4   r@   rB   r   r$   r*   r    <module>r      s?  0+ +X       # # # # # #       3 3 3 3 3 3 3 3 $ $ $ $ $ $                                   ( 
	:	;	;   6!!##dJJSSuuRyy-I-IJJJKK dJJSSuuRyy-I-IJJJKK   00 0 0:{ { { { {" { { {|	" " " " "r*   