Tutorials adapted from MDtraj

try pytraj online:

http://mybinder.org/badge.svg

Clustering with scipy

Original tutorial is from MDtraj

Note

this example only shows how to populate pytraj to python‘s ecosystem. Users should check Cluster Analysis

In [1]:
from __future__ import print_function
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

%matplotlib inline

import pytraj as pt

import scipy
import scipy.cluster.hierarchy
In [2]:
# load data
traj = pt.iterload('tz2.nc', 'tz2.parm7')
traj
Out[2]:
pytraj.TrajectoryIterator, 101 frames: 
Size: 0.000503 (GB)
<Topology: 223 atoms, 13 residues, 1 mols, non-PBC>
           
In [3]:
# calculate pairwise rmsd with `autoimage=True`
# generally we only need to cluster a subset of atoms.
# cluster for 'CA' atoms

distances = pt.pairwise_rmsd(traj(autoimage=True), mask='@CA')

# use `scipy` to perform clustering
linkage = scipy.cluster.hierarchy.ward(distances)

scipy.cluster.hierarchy.dendrogram(linkage, no_labels=True, count_sort='descendent')
None
In [4]:
# cluster for all atoms but H

distances = pt.pairwise_rmsd(traj(autoimage=True), mask='!@H=')
linkage = scipy.cluster.hierarchy.ward(distances)
scipy.cluster.hierarchy.dendrogram(linkage, no_labels=True, count_sort='descendent')
None

(clustering_mdtraj_adapted.ipynb; clustering_mdtraj_adapted_evaluated.ipynb; clustering_mdtraj_adapted.py)

Principal component analysis with sklearn

Original tutorial is from here

Note

cpptraj (then pytraj) has PCA analysis. Doc is coming soon.

PCA analysis with sklearn (adapted from mdtraj and cpptraj tutorial)

In [1]:
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

%matplotlib inline
from __future__ import print_function
%config InlineBackend.figure_format = 'retina'  # high resolution
import matplotlib
matplotlib.rcParams['savefig.dpi'] = 2 * matplotlib.rcParams['savefig.dpi'] # larger image

import pytraj as pt
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from pytraj.plot import show_config
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/Users/haichit/miniconda3/lib/python3.5/site-packages/matplotlib/rcsetup.py in validate_dpi(s)
    206     try:
--> 207         return float(s)
    208     except ValueError:

ValueError: could not convert string to float: 'figurefigure'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
/Users/haichit/miniconda3/lib/python3.5/site-packages/matplotlib/__init__.py in __setitem__(self, key, val)
    925             try:
--> 926                 cval = self.validate[key](val)
    927             except ValueError as ve:

/Users/haichit/miniconda3/lib/python3.5/site-packages/matplotlib/rcsetup.py in validate_dpi(s)
    209         raise ValueError('"%s" is not string "figure" or'
--> 210             ' could not convert "%s" to float' % (s, s))
    211 

ValueError: "figurefigure" is not string "figure" or could not convert "figurefigure" to float

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-1-26cbf5b99efa> in <module>()
      6 get_ipython().magic("config InlineBackend.figure_format = 'retina'  # high resolution")
      7 import matplotlib
----> 8 matplotlib.rcParams['savefig.dpi'] = 2 * matplotlib.rcParams['savefig.dpi'] # larger image
      9 
     10 import pytraj as pt

/Users/haichit/miniconda3/lib/python3.5/site-packages/matplotlib/__init__.py in __setitem__(self, key, val)
    926                 cval = self.validate[key](val)
    927             except ValueError as ve:
--> 928                 raise ValueError("Key %s: %s" % (key, str(ve)))
    929             dict.__setitem__(self, key, cval)
    930         except KeyError:

ValueError: Key savefig.dpi: "figurefigure" is not string "figure" or could not convert "figurefigure" to float
In [2]:
# we use `load` method to load all data to memory. This is good for small data size.
# use `pytraj.iterload` for out-of-core traj.

traj = pt.load('tz2.nc', 'tz2.parm7')
traj
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-c725c494e36a> in <module>()
      2 # use `pytraj.iterload` for out-of-core traj.
      3 
----> 4 traj = pt.load('tz2.nc', 'tz2.parm7')
      5 traj

NameError: name 'pt' is not defined
In [3]:
pca = PCA(n_components=2)
pca
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-3a38360c2cc9> in <module>()
----> 1 pca = PCA(n_components=2)
      2 pca

NameError: name 'PCA' is not defined
In [4]:
# superpose to 1st frame
pt.superpose(traj, ref=0, mask='!@H=')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-18994d813a5f> in <module>()
      1 # superpose to 1st frame
----> 2 pt.superpose(traj, ref=0, mask='!@H=')

NameError: name 'pt' is not defined
In [5]:
# create average structure

avg = pt.mean_structure(traj)
avg
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-0afa673d8b2b> in <module>()
      1 # create average structure
      2 
----> 3 avg = pt.mean_structure(traj)
      4 avg

NameError: name 'pt' is not defined
In [6]:
# superpose all structures to average frame
pt.superpose(traj, ref=avg, mask='!@H=')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-25077ed5f129> in <module>()
      1 # superpose all structures to average frame
----> 2 pt.superpose(traj, ref=avg, mask='!@H=')

NameError: name 'pt' is not defined
In [7]:
# perform PCA calculation and get transformed coords
# we need to reshape 3D traj.xyz array to 2D to make sklearn happy
# make a new traj by stripping all H atoms
traj_new = traj['!@H=']
xyz_2d = traj_new.xyz.reshape(traj_new.n_frames, traj_new.n_atoms * 3)
print(xyz_2d.shape) # (n_frames, n_dimensions)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-d0184d7149fd> in <module>()
      2 # we need to reshape 3D traj.xyz array to 2D to make sklearn happy
      3 # make a new traj by stripping all H atoms
----> 4 traj_new = traj['!@H=']
      5 xyz_2d = traj_new.xyz.reshape(traj_new.n_frames, traj_new.n_atoms * 3)
      6 print(xyz_2d.shape) # (n_frames, n_dimensions)

NameError: name 'traj' is not defined
In [8]:
reduced_cartesian = pca.fit_transform(xyz_2d)
print(reduced_cartesian.shape) # (n_frames, n_dimensions)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-4cbba4e61bf6> in <module>()
----> 1 reduced_cartesian = pca.fit_transform(xyz_2d)
      2 print(reduced_cartesian.shape) # (n_frames, n_dimensions)

NameError: name 'pca' is not defined
In [9]:
# ignore warning
import warnings
warnings.filterwarnings('ignore')

plt.figure()
plt.scatter(reduced_cartesian[:, 0], reduced_cartesian[:,1], marker='o', c=range(traj_new.n_frames), alpha=0.5)
plt.xlabel('PC1')
plt.ylabel('PC2')
cbar = plt.colorbar()
cbar.set_label('frame #')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-78741aadba8b> in <module>()
      4 
      5 plt.figure()
----> 6 plt.scatter(reduced_cartesian[:, 0], reduced_cartesian[:,1], marker='o', c=range(traj_new.n_frames), alpha=0.5)
      7 plt.xlabel('PC1')
      8 plt.ylabel('PC2')

NameError: name 'reduced_cartesian' is not defined
<matplotlib.figure.Figure at 0x1061e9cc0>

Compare to cpptraj data

note: stop here if you do not care (a bit compilicated code)

In [10]:
# cpptraj

# copy from Amber15 manual (page 619)
command = '''
# Step one. Generate average structure.
# RMS-Fit to first frame to remove global translation/rotation.
parm tz2.parm7
trajin tz2.nc
rms first !@H=
average crdset AVG
run
# Step two. RMS-Fit to average structure. Calculate covariance matrix.
# Save the fit coordinates.
rms ref AVG !@H=
matrix covar name MyMatrix !@H=
createcrd CRD1
run
# Step three. Diagonalize matrix.
runanalysis diagmatrix MyMatrix vecs 2 name MyEvecs
# Step four. Project saved fit coordinates along eigenvectors 1 and 2
crdaction CRD1 projection evecs MyEvecs !@H= out project.dat beg 1 end 2
'''
In [11]:
state = pt.datafiles.load_cpptraj_state
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-bf09cdb6c965> in <module>()
----> 1 state = pt.datafiles.load_cpptraj_state

NameError: name 'pt' is not defined
In [12]:
state = pt.datafiles.load_cpptraj_state(command)
# tell 'run' to perform all calculation
state.run()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-fb6ac06f4aef> in <module>()
----> 1 state = pt.datafiles.load_cpptraj_state(command)
      2 # tell 'run' to perform all calculation
      3 state.run()

NameError: name 'pt' is not defined
In [13]:
# get data
state.data
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-6eac12750354> in <module>()
      1 # get data
----> 2 state.data

NameError: name 'state' is not defined
In [14]:
print([dset.key for dset in state.data])
print(state.data['MyMatrix'].values.shape)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-eba03c85f5cc> in <module>()
----> 1 print([dset.key for dset in state.data])
      2 print(state.data['MyMatrix'].values.shape)

NameError: name 'state' is not defined
In [15]:
# reduced_cartesian corresponds to dataset with names of 'Mode1', 'Mode2'
mode_0, mode_1 = -state.data['Mode1'].values, -state.data['Mode2'].values
# mode_0, mode_1 = state.data['Mode1'].values, state.data['Mode2'].values

# plot: cpptraj
fig = plt.figure()
ax_0 = fig.add_subplot(211)
ax_0.scatter(mode_0, mode_1, marker='o', c=range(traj.n_frames), alpha=0.5)
ax_0.set_xlabel('PC1')
ax_0.set_ylabel('PC2')
ax_0.set_xlim([-60, 80])
ax_0.set_ylim([-40, 40])
ax_0.set_title('cpptraj-flip')
ax_0.set_yticks([-40, -20, 0, 20, 40])

# plot: sklearn
fig = plt.figure()
ax_1 = fig.add_subplot(212)
ax_1.scatter(reduced_cartesian[:, 0], reduced_cartesian[:,1], marker='o', c=range(traj.n_frames), alpha=0.5)
ax_1.set_xlabel('PC1')
ax_1.set_ylabel('PC2')
ax_1.set_yticks([-40, -20, 0, 20, 40])
ax_1.set_title('sklearn')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-28a51c3c6770> in <module>()
      1 # reduced_cartesian corresponds to dataset with names of 'Mode1', 'Mode2'
----> 2 mode_0, mode_1 = -state.data['Mode1'].values, -state.data['Mode2'].values
      3 # mode_0, mode_1 = state.data['Mode1'].values, state.data['Mode2'].values
      4 
      5 # plot: cpptraj

NameError: name 'state' is not defined
In [16]:
print('sklearn \n')
print(reduced_cartesian[:, 0], reduced_cartesian[:, 1])

print('\ncpptraj\n') # flip sign, why?
print(mode_0, mode_1)
sklearn 

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-47225f2601de> in <module>()
      1 print('sklearn \n')
----> 2 print(reduced_cartesian[:, 0], reduced_cartesian[:, 1])
      3 
      4 print('\ncpptraj\n') # flip sign, why?
      5 print(mode_0, mode_1)

NameError: name 'reduced_cartesian' is not defined

(pca_mdtraj_adapted.ipynb; pca_mdtraj_adapted_evaluated.ipynb; pca_mdtraj_adapted.py)