from __future__ import print_function
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

%matplotlib inline

import pytraj as pt

import scipy
import scipy.cluster.hierarchy

# load data
traj = pt.iterload('tz2.nc', 'tz2.parm7')
traj

pytraj.TrajectoryIterator, 101 frames: 
Size: 0.000503 (GB)
<Topology: 223 atoms, 13 residues, 1 mols, non-PBC>

# calculate pairwise rmsd with `autoimage=True`
# generally we only need to cluster a subset of atoms.
# cluster for 'CA' atoms

distances = pt.pairwise_rmsd(traj(autoimage=True), mask='@CA')

# use `scipy` to perform clustering
linkage = scipy.cluster.hierarchy.ward(distances)

scipy.cluster.hierarchy.dendrogram(linkage, no_labels=True, count_sort='descendent')
None

# cluster for all atoms but H

distances = pt.pairwise_rmsd(traj(autoimage=True), mask='!@H=')
linkage = scipy.cluster.hierarchy.ward(distances)
scipy.cluster.hierarchy.dendrogram(linkage, no_labels=True, count_sort='descendent')
None

PCA analysis with sklearn (adapted from `mdtraj` and cpptraj tutorial)¶

import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

%matplotlib inline
from __future__ import print_function
%config InlineBackend.figure_format = 'retina'  # high resolution
import matplotlib
matplotlib.rcParams['savefig.dpi'] = 2 * matplotlib.rcParams['savefig.dpi'] # larger image

import pytraj as pt
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from pytraj.plot import show_config

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/Users/haichit/miniconda3/lib/python3.5/site-packages/matplotlib/rcsetup.py in validate_dpi(s)
    206     try:
--> 207         return float(s)
    208     except ValueError:

ValueError: could not convert string to float: 'figurefigure'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
/Users/haichit/miniconda3/lib/python3.5/site-packages/matplotlib/__init__.py in __setitem__(self, key, val)
    925             try:
--> 926                 cval = self.validate[key](val)
    927             except ValueError as ve:

/Users/haichit/miniconda3/lib/python3.5/site-packages/matplotlib/rcsetup.py in validate_dpi(s)
    209         raise ValueError('"%s" is not string "figure" or'
--> 210             ' could not convert "%s" to float' % (s, s))
    211 

ValueError: "figurefigure" is not string "figure" or could not convert "figurefigure" to float

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-1-26cbf5b99efa> in <module>()
      6 get_ipython().magic("config InlineBackend.figure_format = 'retina'  # high resolution")
      7 import matplotlib
----> 8 matplotlib.rcParams['savefig.dpi'] = 2 * matplotlib.rcParams['savefig.dpi'] # larger image
      9 
     10 import pytraj as pt

/Users/haichit/miniconda3/lib/python3.5/site-packages/matplotlib/__init__.py in __setitem__(self, key, val)
    926                 cval = self.validate[key](val)
    927             except ValueError as ve:
--> 928                 raise ValueError("Key %s: %s" % (key, str(ve)))
    929             dict.__setitem__(self, key, cval)
    930         except KeyError:

ValueError: Key savefig.dpi: "figurefigure" is not string "figure" or could not convert "figurefigure" to float

# we use `load` method to load all data to memory. This is good for small data size.
# use `pytraj.iterload` for out-of-core traj.

traj = pt.load('tz2.nc', 'tz2.parm7')
traj

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-c725c494e36a> in <module>()
      2 # use `pytraj.iterload` for out-of-core traj.
      3 
----> 4 traj = pt.load('tz2.nc', 'tz2.parm7')
      5 traj

NameError: name 'pt' is not defined

pca = PCA(n_components=2)
pca

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-3a38360c2cc9> in <module>()
----> 1 pca = PCA(n_components=2)
      2 pca

NameError: name 'PCA' is not defined

# superpose to 1st frame
pt.superpose(traj, ref=0, mask='!@H=')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-18994d813a5f> in <module>()
      1 # superpose to 1st frame
----> 2 pt.superpose(traj, ref=0, mask='!@H=')

NameError: name 'pt' is not defined

# create average structure

avg = pt.mean_structure(traj)
avg

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-0afa673d8b2b> in <module>()
      1 # create average structure
      2 
----> 3 avg = pt.mean_structure(traj)
      4 avg

NameError: name 'pt' is not defined

# superpose all structures to average frame
pt.superpose(traj, ref=avg, mask='!@H=')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-6-25077ed5f129> in <module>()
      1 # superpose all structures to average frame
----> 2 pt.superpose(traj, ref=avg, mask='!@H=')

NameError: name 'pt' is not defined

# perform PCA calculation and get transformed coords
# we need to reshape 3D traj.xyz array to 2D to make sklearn happy
# make a new traj by stripping all H atoms
traj_new = traj['!@H=']
xyz_2d = traj_new.xyz.reshape(traj_new.n_frames, traj_new.n_atoms * 3)
print(xyz_2d.shape) # (n_frames, n_dimensions)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-d0184d7149fd> in <module>()
      2 # we need to reshape 3D traj.xyz array to 2D to make sklearn happy
      3 # make a new traj by stripping all H atoms
----> 4 traj_new = traj['!@H=']
      5 xyz_2d = traj_new.xyz.reshape(traj_new.n_frames, traj_new.n_atoms * 3)
      6 print(xyz_2d.shape) # (n_frames, n_dimensions)

NameError: name 'traj' is not defined

reduced_cartesian = pca.fit_transform(xyz_2d)
print(reduced_cartesian.shape) # (n_frames, n_dimensions)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-4cbba4e61bf6> in <module>()
----> 1 reduced_cartesian = pca.fit_transform(xyz_2d)
      2 print(reduced_cartesian.shape) # (n_frames, n_dimensions)

NameError: name 'pca' is not defined

# ignore warning
import warnings
warnings.filterwarnings('ignore')

plt.figure()
plt.scatter(reduced_cartesian[:, 0], reduced_cartesian[:,1], marker='o', c=range(traj_new.n_frames), alpha=0.5)
plt.xlabel('PC1')
plt.ylabel('PC2')
cbar = plt.colorbar()
cbar.set_label('frame #')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-78741aadba8b> in <module>()
      4 
      5 plt.figure()
----> 6 plt.scatter(reduced_cartesian[:, 0], reduced_cartesian[:,1], marker='o', c=range(traj_new.n_frames), alpha=0.5)
      7 plt.xlabel('PC1')
      8 plt.ylabel('PC2')

NameError: name 'reduced_cartesian' is not defined

<matplotlib.figure.Figure at 0x1061e9cc0>

Compare to cpptraj data¶

note: stop here if you do not care (a bit compilicated code)

# cpptraj

# copy from Amber15 manual (page 619)
command = '''
# Step one. Generate average structure.
# RMS-Fit to first frame to remove global translation/rotation.
parm tz2.parm7
trajin tz2.nc
rms first !@H=
average crdset AVG
run
# Step two. RMS-Fit to average structure. Calculate covariance matrix.
# Save the fit coordinates.
rms ref AVG !@H=
matrix covar name MyMatrix !@H=
createcrd CRD1
run
# Step three. Diagonalize matrix.
runanalysis diagmatrix MyMatrix vecs 2 name MyEvecs
# Step four. Project saved fit coordinates along eigenvectors 1 and 2
crdaction CRD1 projection evecs MyEvecs !@H= out project.dat beg 1 end 2
'''

state = pt.datafiles.load_cpptraj_state

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-bf09cdb6c965> in <module>()
----> 1 state = pt.datafiles.load_cpptraj_state

NameError: name 'pt' is not defined

state = pt.datafiles.load_cpptraj_state(command)
# tell 'run' to perform all calculation
state.run()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-fb6ac06f4aef> in <module>()
----> 1 state = pt.datafiles.load_cpptraj_state(command)
      2 # tell 'run' to perform all calculation
      3 state.run()

NameError: name 'pt' is not defined

# get data
state.data

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-6eac12750354> in <module>()
      1 # get data
----> 2 state.data

NameError: name 'state' is not defined

print([dset.key for dset in state.data])
print(state.data['MyMatrix'].values.shape)

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-eba03c85f5cc> in <module>()
----> 1 print([dset.key for dset in state.data])
      2 print(state.data['MyMatrix'].values.shape)

NameError: name 'state' is not defined

# reduced_cartesian corresponds to dataset with names of 'Mode1', 'Mode2'
mode_0, mode_1 = -state.data['Mode1'].values, -state.data['Mode2'].values
# mode_0, mode_1 = state.data['Mode1'].values, state.data['Mode2'].values

# plot: cpptraj
fig = plt.figure()
ax_0 = fig.add_subplot(211)
ax_0.scatter(mode_0, mode_1, marker='o', c=range(traj.n_frames), alpha=0.5)
ax_0.set_xlabel('PC1')
ax_0.set_ylabel('PC2')
ax_0.set_xlim([-60, 80])
ax_0.set_ylim([-40, 40])
ax_0.set_title('cpptraj-flip')
ax_0.set_yticks([-40, -20, 0, 20, 40])

# plot: sklearn
fig = plt.figure()
ax_1 = fig.add_subplot(212)
ax_1.scatter(reduced_cartesian[:, 0], reduced_cartesian[:,1], marker='o', c=range(traj.n_frames), alpha=0.5)
ax_1.set_xlabel('PC1')
ax_1.set_ylabel('PC2')
ax_1.set_yticks([-40, -20, 0, 20, 40])
ax_1.set_title('sklearn')

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-28a51c3c6770> in <module>()
      1 # reduced_cartesian corresponds to dataset with names of 'Mode1', 'Mode2'
----> 2 mode_0, mode_1 = -state.data['Mode1'].values, -state.data['Mode2'].values
      3 # mode_0, mode_1 = state.data['Mode1'].values, state.data['Mode2'].values
      4 
      5 # plot: cpptraj

NameError: name 'state' is not defined

print('sklearn \n')
print(reduced_cartesian[:, 0], reduced_cartesian[:, 1])

print('\ncpptraj\n') # flip sign, why?
print(mode_0, mode_1)

sklearn

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-16-47225f2601de> in <module>()
      1 print('sklearn \n')
----> 2 print(reduced_cartesian[:, 0], reduced_cartesian[:, 1])
      3 
      4 print('\ncpptraj\n') # flip sign, why?
      5 print(mode_0, mode_1)

NameError: name 'reduced_cartesian' is not defined

Tutorials adapted from MDtraj ¶

Clustering with scipy ¶

Principal component analysis with sklearn ¶

PCA analysis with sklearn (adapted from `mdtraj` and cpptraj tutorial)¶

Compare to cpptraj data¶

Tutorials adapted from MDtraj¶

Clustering with scipy¶

Principal component analysis with sklearn¶

PCA analysis with sklearn (adapted from mdtraj and cpptraj tutorial)¶

Compare to cpptraj data¶

Tutorials adapted from MDtraj ¶

Clustering with scipy ¶

Principal component analysis with sklearn ¶

PCA analysis with sklearn (adapted from `mdtraj` and cpptraj tutorial)¶