"""
Class to perform various rotations of factor loading matrices.
:author: Jeremy Biggs (jeremy.m.biggs@gmail.com)
:author: Nitin Madnani (nmadnani@ets.org)
:organization: Educational Testing Service
:date: 2022-09-05
"""
import numpy as np
import scipy as sp
from sklearn.base import BaseEstimator
ORTHOGONAL_ROTATIONS = ["varimax", "oblimax", "quartimax", "equamax", "geomin_ort"]
OBLIQUE_ROTATIONS = ["promax", "oblimin", "quartimin", "geomin_obl"]
POSSIBLE_ROTATIONS = ORTHOGONAL_ROTATIONS + OBLIQUE_ROTATIONS
[docs]
class Rotator(BaseEstimator):
"""
Perform rotations on an unrotated factor loading matrix.
The Rotator class takes an (unrotated) factor loading matrix and
performs one of several rotations.
Parameters
----------
method : str, optional
The factor rotation method. Options include:
(a) varimax (orthogonal rotation)
(b) promax (oblique rotation)
(c) oblimin (oblique rotation)
(d) oblimax (orthogonal rotation)
(e) quartimin (oblique rotation)
(f) quartimax (orthogonal rotation)
(g) equamax (orthogonal rotation)
(h) geomin_obl (oblique rotation)
(i) geomin_ort (orthogonal rotation)
Defaults to 'varimax'.
normalize : bool or None, optional
Whether to perform Kaiser normalization and de-normalization prior
to and following rotation. Used for 'varimax' and 'promax' rotations.
If ``None``, default for 'promax' is ``False``, and default for
'varimax' is ``True``.
Defaults to ``None``.
power : int, optional
The exponent to which to raise the promax loadings (minus 1).
Numbers should generally range from 2 to 4.
Defaults to 4.
kappa : float, optional
The kappa value for the 'equamax' objective. Ignored if the method
is not 'equamax'.
Defaults to 0.
gamma : int, optional
The gamma level for the 'oblimin' objective. Ignored if the method
is not 'oblimin'.
Defaults to 0.
delta : float, optional
The delta level for 'geomin' objectives. Ignored if the method is
not 'geomin_*'.
Defaults to 0.01.
max_iter : int, optional
The maximum number of iterations. Used for 'varimax' and 'oblique'
rotations.
Defaults to 1000.
tol : float, optional
The convergence threshold. Used for 'varimax' and 'oblique' rotations.
Defaults to 1e-5.
Attributes
----------
loadings_ : :obj:`numpy.ndarray`, shape (``n_features``, ``n_factors``)
The loadings matrix.
rotation_ : :obj:`numpy.ndarray`, shape (``n_factors``, ``n_factors``)
The rotation matrix.
phi_ : :obj:`numpy.ndarray` or None
The factor correlations matrix. This only exists if ``method`` is
'oblique'.
Notes
-----
Most of the rotations in this class are ported from R's ``GPARotation``
package.
References
----------
[1] https://cran.r-project.org/web/packages/GPArotation/index.html
Examples
--------
>>> import pandas as pd
>>> from factor_analyzer import FactorAnalyzer, Rotator
>>> df_features = pd.read_csv('test02.csv')
>>> fa = FactorAnalyzer(rotation=None)
>>> fa.fit(df_features)
>>> rotator = Rotator()
>>> rotator.fit_transform(fa.loadings_)
array([[-0.07693215, 0.04499572, 0.76211208],
[ 0.01842035, 0.05757874, 0.01297908],
[ 0.06067925, 0.70692662, -0.03311798],
[ 0.11314343, 0.84525117, -0.03407129],
[ 0.15307233, 0.5553474 , -0.00121802],
[ 0.77450832, 0.1474666 , 0.20118338],
[ 0.7063001 , 0.17229555, -0.30093981],
[ 0.83990851, 0.15058874, -0.06182469],
[ 0.76620579, 0.1045194 , -0.22649615],
[ 0.81372945, 0.20915845, 0.07479506]])
"""
def __init__(
self,
method="varimax",
normalize=True,
power=4,
kappa=0,
gamma=0,
delta=0.01,
max_iter=500,
tol=1e-5,
):
"""Initialize the rotator class."""
self.method = method
self.normalize = normalize
self.power = power
self.kappa = kappa
self.gamma = gamma
self.delta = delta
self.max_iter = max_iter
self.tol = tol
self.loadings_ = None
self.rotation_ = None
self.phi_ = None
def _oblimax_obj(self, loadings): # noqa: D401
"""
The Oblimax function objective.
Parameters
----------
loadings : array-like
The loading matrix
Returns
-------
gradient_dict : dict
A dictionary containing the following keys:
(1) ``grad`` : :obj:`numpy.ndarray`, containing the gradients.
(2) ``criterion`` : float, containing the criterion for the objective.
"""
gradient = -(
4 * loadings**3 / (np.sum(loadings**4))
- 4 * loadings / (np.sum(loadings**2))
)
criterion = np.log(np.sum(loadings**4)) - 2 * np.log(np.sum(loadings**2))
return {"grad": gradient, "criterion": criterion}
def _quartimax_obj(self, loadings):
"""
Quartimax function objective.
Parameters
----------
loadings : array-like
The loading matrix.
Returns
-------
gradient_dict : dict
A dictionary containing the following keys:
(1) ``grad`` : :obj:`numpy.ndarray`, containing the gradients.
(2) ``criterion`` : float, containing the criterion for the objective.
"""
gradient = -(loadings**3)
criterion = -np.sum(np.diag(np.dot((loadings**2).T, loadings**2))) / 4
return {"grad": gradient, "criterion": criterion}
def _oblimin_obj(self, loadings): # noqa: D401
"""
The Oblimin function objective.
Parameters
----------
loadings : array-like
The loading matrix
Returns
-------
gradient_dict : dict
A dictionary containing the following keys:
(1) ``grad`` : :obj:`numpy.ndarray`, containing the gradients.
(2) ``criterion`` : float, containing the criterion for the objective.
"""
X = np.dot(loadings**2, np.eye(loadings.shape[1]) != 1)
if self.gamma != 0:
p = loadings.shape[0]
X = np.diag(np.full(1, p)) - np.dot(np.zeros((p, p)), X)
gradient = loadings * X
criterion = np.sum(loadings**2 * X) / 4
return {"grad": gradient, "criterion": criterion}
def _quartimin_obj(self, loadings): # noqa: D401
"""
The Quartimin function objective.
Parameters
----------
loadings : array-like
The loading matrix.
Returns
-------
gradient_dict : dict
A dictionary containing the following keys:
(1) ``grad`` : :obj:`numpy.ndarray`, containing the gradients.
(2) ``criterion`` : float, containing the criterion for the objective.
"""
X = np.dot(loadings**2, np.eye(loadings.shape[1]) != 1)
gradient = loadings * X
criterion = np.sum(loadings**2 * X) / 4
return {"grad": gradient, "criterion": criterion}
def _equamax_obj(self, loadings): # noqa: D401
"""
The Equamax function objective.
Parameters
----------
loadings : array-like
The loading matrix.
Returns
-------
gradient_dict : dict
A dictionary containing the following keys:
(1) ``grad`` : :obj:`numpy.ndarray`, containing the gradients.
(2) ``criterion`` : float, containing the criterion for the objective.
"""
p, k = loadings.shape
N = np.ones(k) - np.eye(k)
M = np.ones(p) - np.eye(p)
loadings_squared = loadings**2
f1 = (
(1 - self.kappa)
* np.sum(np.diag(np.dot(loadings_squared.T, np.dot(loadings_squared, N))))
/ 4
)
f2 = (
self.kappa
* np.sum(np.diag(np.dot(loadings_squared.T, np.dot(M, loadings_squared))))
/ 4
)
gradient = (1 - self.kappa) * loadings * np.dot(
loadings_squared, N
) + self.kappa * loadings * np.dot(M, loadings_squared)
criterion = f1 + f2
return {"grad": gradient, "criterion": criterion}
def _geomin_obj(self, loadings): # noqa: D401
"""
The Geomin function objective.
Parameters
----------
loadings : array-like
The loading matrix.
Returns
-------
gradient_dict : dict
A dictionary containing the following keys:
(1) ``grad`` : :obj:`numpy.ndarray`, containing the gradients.
(2) ``criterion`` : float, containing the criterion for the objective.
"""
p, k = loadings.shape
loadings2 = loadings**2 + self.delta
pro = np.exp(np.log(loadings2).sum(1) / k)
rep = np.repeat(pro, k, axis=0).reshape(p, k)
gradient = (2 / k) * (loadings / loadings2) * rep
criterion = np.sum(pro)
return {"grad": gradient, "criterion": criterion}
def _oblique(self, loadings, method):
"""
Perform oblique rotations, except 'promax'.
A generic function for performing all oblique rotations, except for
promax, which is implemented separately.
Parameters
----------
loadings : array-like
The loading matrix
method : str
The obligue rotation method to use.
Returns
-------
loadings : :obj:`numpy.ndarray`, shape (``n_features``, ``n_factors``)
The loadings matrix
rotation_mtx : :obj:`numpy.ndarray`, shape (``n_factors``, ``n_factors``)
The rotation matrix
phi : :obj:`numpy.ndarray`, shape (``n_factors``, ``n_factors``)
The factor correlations matrix. This only exists if the
rotation is oblique.
"""
if method == "oblimin":
objective = self._oblimin_obj
elif method == "quartimin":
objective = self._quartimin_obj
elif method == "geomin_obl":
objective = self._geomin_obj
# initialize the rotation matrix
_, n_cols = loadings.shape
rotation_matrix = np.eye(n_cols)
# default alpha level
alpha = 1
rotation_matrix_inv = np.linalg.inv(rotation_matrix)
new_loadings = np.dot(loadings, rotation_matrix_inv.T)
obj = objective(new_loadings)
gradient = -np.dot(new_loadings.T, np.dot(obj["grad"], rotation_matrix_inv)).T
criterion = obj["criterion"]
obj_t = objective(new_loadings)
# main iteration loop, up to `max_iter`, calculate the gradient
for _ in range(0, self.max_iter + 1):
gradient_new = gradient - np.dot(
rotation_matrix,
np.diag(np.dot(np.ones(gradient.shape[0]), rotation_matrix * gradient)),
)
s = np.sqrt(np.sum(np.diag(np.dot(gradient_new.T, gradient_new))))
if s < self.tol:
break
alpha = 2 * alpha
# calculate the Hessian of the objective function
for _ in range(0, 11):
X = rotation_matrix - alpha * gradient_new
v = 1 / np.sqrt(np.dot(np.ones(X.shape[0]), X**2))
new_rotation_matrix = np.dot(X, np.diag(v))
new_loadings = np.dot(loadings, np.linalg.inv(new_rotation_matrix).T)
obj_t = objective(new_loadings)
improvement = criterion - obj_t["criterion"]
if improvement > 0.5 * s**2 * alpha:
break
alpha = alpha / 2
rotation_matrix = new_rotation_matrix
criterion = obj_t["criterion"]
gradient = -np.dot(
np.dot(new_loadings.T, obj_t["grad"]),
np.linalg.inv(new_rotation_matrix),
).T
# calculate phi
phi = np.dot(rotation_matrix.T, rotation_matrix)
# convert loadings matrix to data frame
loadings = new_loadings.copy()
return loadings, rotation_matrix, phi
def _orthogonal(self, loadings, method):
"""
Perform orthogonal rotations, except 'varimax'.
A generic function for performing all orthogonal rotations, except for
varimax, which is implemented separately.
Parameters
----------
loadings : :obj:`numpy.ndarray`
The loading matrix
method : str
The orthogonal rotation method to use.
Returns
-------
loadings : :obj:`numpy.ndarray`
The loadings matrix
rotation_mtx : :obj:`numpy.ndarray`, shape (``n_factors``, ``n_factors``)
The rotation matrix
"""
if method == "oblimax":
objective = self._oblimax_obj
elif method == "quartimax":
objective = self._quartimax_obj
elif method == "equamax":
objective = self._equamax_obj
elif method == "geomin_ort":
objective = self._geomin_obj
arr = loadings.copy()
# initialize the rotation matrix
_, n_cols = arr.shape
rotation_matrix = np.eye(n_cols)
# default alpha level
alpha = 1
new_loadings = np.dot(arr, rotation_matrix)
obj = objective(new_loadings)
gradient = np.dot(arr.T, obj["grad"])
criterion = obj["criterion"]
obj_t = objective(new_loadings)
# main iteration loop, up to `max_iter`, calculate the gradient
for _ in range(0, self.max_iter + 1):
M = np.dot(rotation_matrix.T, gradient)
S = (M + M.T) / 2
gradient_new = gradient - np.dot(rotation_matrix, S)
s = np.sqrt(np.sum(np.diag(np.dot(gradient_new.T, gradient_new))))
if s < self.tol:
break
alpha = 2 * alpha
# calculate the Hessian of the objective function
for _ in range(0, 11):
X = rotation_matrix - alpha * gradient_new
U, _, V = np.linalg.svd(X)
new_rotation_matrix = np.dot(U, V)
new_loadings = np.dot(arr, new_rotation_matrix)
obj_t = objective(new_loadings)
if obj_t["criterion"] < (criterion - 0.5 * s**2 * alpha):
break
alpha = alpha / 2
rotation_matrix = new_rotation_matrix
criterion = obj_t["criterion"]
gradient = np.dot(arr.T, obj_t["grad"])
# convert loadings matrix to data frame
loadings = new_loadings.copy()
return loadings, rotation_matrix
def _varimax(self, loadings):
"""
Perform varimax (orthogonal) rotation, with optional Kaiser normalization.
Parameters
----------
loadings : array-like
The loading matrix.
Returns
-------
loadings : :obj:`numpy.ndarray`, shape (``n_features``, ``n_factors``)
The loadings matrix.
rotation_mtx : :obj:`numpy.ndarray`, shape (``n_factors``, ``n_factors``)
The rotation matrix.
"""
X = loadings.copy()
n_rows, n_cols = X.shape
if n_cols < 2:
return X
# normalize the loadings matrix
# using sqrt of the sum of squares (Kaiser)
if self.normalize:
normalized_mtx = np.apply_along_axis(
lambda x: np.sqrt(np.sum(x**2)), 1, X.copy()
)
X = (X.T / normalized_mtx).T
# initialize the rotation matrix
# to N x N identity matrix
rotation_mtx = np.eye(n_cols)
d = 0
for _ in range(self.max_iter):
old_d = d
# take inner product of loading matrix
# and rotation matrix
basis = np.dot(X, rotation_mtx)
# transform data for singular value decomposition using updated formula :
# B <- t(x) %*% (z^3 - z %*% diag(drop(rep(1, p) %*% z^2))/p)
diagonal = np.diag(np.squeeze(np.repeat(1, n_rows).dot(basis**2)))
transformed = X.T.dot(basis**3 - basis.dot(diagonal) / n_rows)
# perform SVD on
# the transformed matrix
U, S, V = np.linalg.svd(transformed)
# take inner product of U and V, and sum of S
rotation_mtx = np.dot(U, V)
d = np.sum(S)
# check convergence
if d < old_d * (1 + self.tol):
break
# take inner product of loading matrix
# and rotation matrix
X = np.dot(X, rotation_mtx)
# de-normalize the data
if self.normalize:
X = X.T * normalized_mtx
else:
X = X.T
# convert loadings matrix to data frame
loadings = X.T.copy()
return loadings, rotation_mtx
def _promax(self, loadings):
"""
Perform promax (oblique) rotation, with optional Kaiser normalization.
Parameters
----------
loadings : array-like
The loading matrix
Returns
-------
loadings : :obj:`numpy.ndarray`, shape (``n_features``, ``n_factors``)
The loadings matrix
rotation_mtx : :obj:`numpy.ndarray`, shape (``n_factors``, ``n_factors``)
The rotation matrix
phi : :obj:`numpy.ndarray` or None, shape (``n_factors``, ``n_factors``)
The factor correlations matrix. This only exists if the rotation
is oblique.
"""
X = loadings.copy()
_, n_cols = X.shape
if n_cols < 2:
return X
if self.normalize:
# pre-normalization is done in R's
# `kaiser()` function when rotate='Promax'.
array = X.copy()
h2 = np.diag(np.dot(array, array.T))
h2 = np.reshape(h2, (h2.shape[0], 1))
weights = array / np.sqrt(h2)
else:
weights = X.copy()
# first get varimax rotation
X, rotation_mtx = self._varimax(weights)
Y = X * np.abs(X) ** (self.power - 1)
# fit linear regression model
coef = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, Y))
# calculate diagonal of inverse square
try:
diag_inv = np.diag(sp.linalg.inv(np.dot(coef.T, coef)))
except np.linalg.LinAlgError:
diag_inv = np.diag(sp.linalg.pinv(np.dot(coef.T, coef)))
# transform and calculate inner products
coef = np.dot(coef, np.diag(np.sqrt(diag_inv)))
z = np.dot(X, coef)
if self.normalize:
# post-normalization is done in R's
# `kaiser()` function when rotate='Promax'
z = z * np.sqrt(h2)
rotation_mtx = np.dot(rotation_mtx, coef)
coef_inv = np.linalg.inv(coef)
phi = np.dot(coef_inv, coef_inv.T)
# convert loadings matrix to data frame
loadings = z.copy()
return loadings, rotation_mtx, phi
[docs]
def fit(self, X, y=None):
"""
Compute the factor rotation.
Parameters
----------
X : array-like
The factor loading matrix, shape (``n_features``, ``n_factors``)
y : ignored
Returns
-------
self
Example
-------
>>> import pandas as pd
>>> from factor_analyzer import FactorAnalyzer, Rotator
>>> df_features = pd.read_csv('test02.csv')
>>> fa = FactorAnalyzer(rotation=None)
>>> fa.fit(df_features)
>>> rotator = Rotator()
>>> rotator.fit(fa.loadings_)
"""
self.fit_transform(X)
return self