A Code listing: svd.cpp
Note that the code below uses \(i\) and \(j\) as matrix indices rather than \(u\) and \(i\) which is specific to the Movielens models in this project.
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(RcppProgress)]]
#include <RcppArmadillo.h>
#include <progress.hpp>
#include <progress_bar.hpp>
/**
* @brief Simon Funk's Matrix Factorization.
*
* Approximate Y as U*V^T where U and V each have @p nFeatures columns.
*
* @param coo_i User indexes of the rating matrix Y.
* @param coo_j Movie indexes of the rating matrix Y.
* @param coo_x Ratings in the rating matrix Y. Note Y is a sparse matrix, where
* a zero represents no rating given.
* @param nFeatures the number of features to use, i.e. the number of columns
* in U and V.
* @steps Number of epochs. Each epoch refines the U and V estimates by iterating
* through all known ratings once.
* @regCoef Regularization coefficient, prevents overfitting.
* @learningRate learning rate of gradient descent.
*
* @return An @c RCpp::list object containing U and V.
*
* @see https://sifter.org/~simon/journal/20061211.html
* @see https://github.com/ludovikcoba/rrecsys/
*/
// [[Rcpp::export]]
::List funkCpp(
Rcpp::NumericVector coo_i,
Rcpp::NumericVector coo_j,
Rcpp::NumericVector coo_x,
Rcppint nFeatures,
int steps,
double regCoef,
double learningRate
)
{
int nUsers = Rcpp::max(coo_i)+1; // number of users
int nItems = Rcpp::max(coo_j)+1; // number of movies (items)
int nRatings = coo_x.size(); // number of known ratings
// Seed U and V with random values
::mat U(nUsers, nFeatures, arma::fill::randu);
arma::mat V(nItems, nFeatures, arma::fill::randu);
arma*= sqrt(0.5/nFeatures);
U *= sqrt(0.5/nFeatures);
V
// Diagnostics logging
::Rcerr << "nUsers:" << nUsers << ", ";
Rcpp::Rcerr << "nItems:" << nItems << ", ";
Rcpp::Rcerr << "nRatings:" << nRatings << std::endl;
Rcpp
// Progress bar for R console
(steps, true);
Progress p
// Main loop
for (int ss = 0; ss < steps; ss++) {
// Kill program if user has requested it (Ctrl+C in most consoles)
::checkUserInterrupt();
Rcpp
// iterate over known ratings
for (int r = 0; r < nRatings; r++) {
int i = coo_i[r]; // user index
int j = coo_j[r]; // item index
double err = coo_x[r] - arma::dot(U.row(i), V.row(j)); // prediction error
// update features
.row(i) += learningRate * (err*V.row(j) - regCoef*U.row(i));
U.row(j) += learningRate * (err*U.row(i) - regCoef*V.row(j));
V}
// Report progress
.increment();
p}
::Rcerr << std::endl; // add gap between progress bars of multiple runs
Rcpp
// Return list(U,V)
::List ret;
Rcpp["U"] = U;
ret["V"] = V;
retreturn ret;
}