diff --git a/ref/Makefile b/ref/Makefile
index 901f230bb54afb328c675183542edafff56c789c..cc7aaa4467017777414fb7f3914968b935e58323 100644
--- a/ref/Makefile
+++ b/ref/Makefile
@@ -1,13 +1,34 @@
-CC=/usr/bin/cc
-CFLAGS=-O3 -DHAVE_CBRT -DXC_DONT_COMPILE_KXC -DXC_DONT_COMPILE_LXC -std=gnu99 -lm
-COMMON_FILES=common/gga.c common/gga_x_pbe.c common/util.c common/functionals.c
+# GCC
+#CC=/usr/bin/cc
+#CFLAGS=-O3 -DHAVE_CBRT -DXC_DONT_COMPILE_KXC -DXC_DONT_COMPILE_LXC -std=gnu99 -lm -fopenmp
 
-all: gga_x_pbe
+# Intel compiler
+CC=/apps/intel/ComposerXE2019/compilers_and_libraries_2019.5.281/linux/bin/intel64/icc
+#CFLAGS=-xHost -O3 -DHAVE_CBRT -DXC_DONT_COMPILE_KXC -DXC_DONT_COMPILE_LXC -std=gnu99 -lm -fopenmp
+CFLAGS=-xCORE-AVX512 -Ofast -qopt-zmm-usage=high -DHAVE_CBRT -DXC_DONT_COMPILE_KXC -DXC_DONT_COMPILE_LXC -std=gnu99 -lm -fopenmp
 
-gga_x_pbe: gga_x_pbe.c $(COMMON_FILES)
-	$(CC) $^ -o $@ $(CFLAGS)
+LINKER=$(CC)
+BUILD_DIR=build
+Q?=@
+
+VPATH=common libxc genxc
+OBJ =   $(patsubst common/%.c, $(BUILD_DIR)/%.o, $(wildcard common/*.c))
+OBJ +=  $(patsubst libxc/%.c, $(BUILD_DIR)/%.o, $(wildcard libxc/*.c))
+OBJ +=  $(patsubst genxc/%.c, $(BUILD_DIR)/%.o, $(wildcard genxc/*.c))
+
+main: $(BUILD_DIR) main.c $(OBJ)
+	$(info ===>  LINKING AND COMPILING $@)
+	$(Q)${LINKER} -o $@ main.c $(OBJ) $(CFLAGS)
+
+$(BUILD_DIR)/%.o: %.c
+	$(info ===>  COMPILE  $@)
+	$(Q)$(CC) -c $< -o $@ $(CFLAGS)
+
+$(BUILD_DIR):
+	@mkdir -p $(BUILD_DIR)
 
 .PHONY: clean
 
 clean:
 	@rm -f gga_x_pbe
+	@rm -rf $(BUILD_DIR)
diff --git a/ref/common/gga_x_pbeopt.h b/ref/common/gga_x_pbeopt.h
deleted file mode 100644
index df3a25be8364c9a6350bff3ca14bf859f056a019..0000000000000000000000000000000000000000
--- a/ref/common/gga_x_pbeopt.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#define XC_GGA_X_PBE_OPT           1101 /* Perdew, Burke & Ernzerhof exchange             */
-#define XC_GGA_X_PBE_R_OPT         1102 /* Perdew, Burke & Ernzerhof exchange (revised)   */
-#define XC_GGA_X_PBE_SOL_OPT       1116 /* Perdew, Burke & Ernzerhof exchange (solids)    */
-#define XC_GGA_X_XPBE_OPT          1123 /* xPBE reparametrization by Xu & Goddard         */
-#define XC_GGA_X_PBE_JSJR_OPT      1126 /* JSJR reparametrization by Pedroza, Silva & Capelle */
-#define XC_GGA_X_PBEK1_VDW_OPT     1140 /* PBE reparametrization for vdW                  */
-#define XC_GGA_X_APBE_OPT          1184 /* mu fixed from the semiclassical neutral atom   */
-#define XC_GGA_X_PBE_TCA_OPT       1059 /* PBE revised by Tognetti et al                  */
-#define XC_GGA_X_PBE_MOL_OPT       1049 /* Del Campo, Gazquez, Trickey and Vela (PBE-like) */
-#define XC_GGA_X_LAMBDA_LO_N_OPT   1045 /* lambda_LO(N) version of PBE                    */
-#define XC_GGA_X_LAMBDA_CH_N_OPT   1044 /* lambda_CH(N) version of PBE                    */
-#define XC_GGA_X_LAMBDA_OC2_N_OPT  1040 /* lambda_OC2(N) version of PBE                   */
-#define XC_GGA_X_BCGP_OPT          1038 /* Burke, Cancio, Gould, and Pittalis             */
-#define XC_GGA_X_PBEFE_OPT         1265 /* PBE for formation energies                     */
diff --git a/ref/common/mix_func.c b/ref/common/mix_func.c
deleted file mode 100644
index aeb2f3084fd0b49282814046c81f3b87850180e3..0000000000000000000000000000000000000000
--- a/ref/common/mix_func.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
-  Copyright (C) 2006-2021 M.A.L. Marques
-                2018-2021 Susi Lehtola
-                2019 X. Andrade
-
-  This Source Code Form is subject to the terms of the Mozilla Public
-  License, v. 2.0. If a copy of the MPL was not distributed with this
-  file, You can obtain one at http://mozilla.org/MPL/2.0/.
-*/
-
-
-#include "util.h"
-
-/* initializes the mixing */
-void
-xc_mix_init(xc_func_type *p, int n_funcs, const int *funcs_id, const double *mix_coef)
-{
-  int ii;
-
-  assert(p != NULL);
-  assert(p->func_aux == NULL && p->mix_coef == NULL);
-
-  /* allocate structures needed for mixed functional */
-  p->n_func_aux = n_funcs;
-  p->mix_coef   = (double *) libxc_malloc(n_funcs*sizeof(double));
-  p->func_aux   = (xc_func_type **) libxc_malloc(n_funcs*sizeof(xc_func_type *));
-
-  for(ii=0; ii<n_funcs; ii++){
-    p->mix_coef[ii] = mix_coef[ii];
-    p->func_aux[ii] = (xc_func_type *) libxc_malloc(sizeof(xc_func_type));
-    xc_func_init (p->func_aux[ii], funcs_id[ii], p->nspin);
-  }
-
-  /* initialize variables */
-  p->hyb_number_terms = 0;
-  p->hyb_type  = NULL;
-  p->hyb_coeff = NULL;
-  p->hyb_omega = NULL;
-
-  p->nlc_b     = 0.0;
-  p->nlc_C     = 0.0;
-}
-
-#ifdef HAVE_CUDA
-__global__ static void add_to_mix_gpu(size_t np, double * dst, double coeff, const double *src){
-  size_t ip = blockIdx.x * blockDim.x + threadIdx.x;
-  if(ip < np) dst[ip] += coeff*src[ip];
-}
-#endif
-
-static void add_to_mix(size_t np, double * dst, double coeff, const double *src){
-#ifndef HAVE_CUDA
-  size_t ip;
-  for(ip = 0; ip < np; ip++) dst[ip] += coeff*src[ip];
-#else
-  size_t nblocks = np/CUDA_BLOCK_SIZE;
-  if(np != nblocks*CUDA_BLOCK_SIZE) nblocks++;
-  add_to_mix_gpu<<<nblocks, CUDA_BLOCK_SIZE>>>(np, dst, coeff, src);
-#endif
-}
-
-#define is_mgga(id)   ((id) == XC_FAMILY_MGGA)
-#define is_gga(id)    ((id) == XC_FAMILY_GGA || is_mgga(id))
-#define is_lda(id)    ((id) == XC_FAMILY_LDA ||  is_gga(id))
-#define safe_free(pt) if(pt != NULL) libxc_free(pt)
-#define sum_var(VAR) add_to_mix(np*dim->VAR, VAR, func->mix_coef[ii], x ## VAR);
-
-void
-xc_mix_func(const xc_func_type *func, size_t np,
-            const double *rho, const double *sigma, const double *lapl, const double *tau,
-            double *zk MGGA_OUT_PARAMS_NO_EXC(XC_COMMA double *, ))
-{
-  const xc_func_type *aux;
-  double *xzk MGGA_OUT_PARAMS_NO_EXC(XC_COMMA *, x);
-  int ii;
-
-  const xc_dimensions *dim = &(func->dim);
-
-  /* Sanity check: have we claimed the highest possible derivatives?
-     First, check for the lowest common derivative (also need to make
-     sure the derivatives have been compiled in!)
-  */
-  int have_vxc = XC_FLAGS_I_HAVE_VXC;
-  int have_fxc = XC_FLAGS_I_HAVE_FXC;
-  int have_kxc = XC_FLAGS_I_HAVE_KXC;
-  int have_lxc = XC_FLAGS_I_HAVE_LXC;
-  for(ii=0; ii<func->n_func_aux; ii++){
-    aux = func->func_aux[ii];
-    if(! (aux->info->flags & XC_FLAGS_HAVE_VXC))
-      have_vxc = 0;
-    if(! (aux->info->flags & XC_FLAGS_HAVE_FXC))
-      have_fxc = 0;
-    if(! (aux->info->flags & XC_FLAGS_HAVE_KXC))
-      have_kxc = 0;
-    if(! (aux->info->flags & XC_FLAGS_HAVE_LXC))
-      have_lxc = 0;
-  }
-  /* Then, for the actual checks */
-  assert(have_lxc == (func->info->flags & XC_FLAGS_I_HAVE_LXC));
-  assert(have_kxc == (func->info->flags & XC_FLAGS_I_HAVE_KXC));
-  assert(have_fxc == (func->info->flags & XC_FLAGS_I_HAVE_FXC));
-  assert(have_vxc == (func->info->flags & XC_FLAGS_I_HAVE_VXC));
-
-  /* Sanity check: if component needs the Laplacian, then the mix
-     must require it too */
-  int need_laplacian = 0;
-  for(ii=0; ii<func->n_func_aux; ii++){
-    aux = func->func_aux[ii];
-    if(aux->info->flags & XC_FLAGS_NEEDS_LAPLACIAN)
-      need_laplacian = XC_FLAGS_NEEDS_LAPLACIAN;
-  }
-  assert((func->info->flags & XC_FLAGS_NEEDS_LAPLACIAN) == need_laplacian);
-  /* Same for tau */
-  int need_tau = 0;
-  for(ii=0; ii<func->n_func_aux; ii++){
-    aux = func->func_aux[ii];
-    if(aux->info->flags & XC_FLAGS_NEEDS_TAU)
-      need_tau = XC_FLAGS_NEEDS_TAU;
-  }
-  assert((func->info->flags & XC_FLAGS_NEEDS_TAU) == need_tau);
-
-  /* Check compatibility of the individual components */
-  for(ii=0; ii<func->n_func_aux; ii++){
-    aux = func->func_aux[ii];
-    /* Sanity check: if component is GGA or meta-GGA, mix functional
-       must also be GGA or meta-GGA */
-    if(is_gga(aux->info->family))
-      assert(is_gga(func->info->family));
-    if(is_mgga(aux->info->family) && !is_mgga(func->info->family))
-      assert(is_mgga(func->info->family));
-    /* Sanity checks: if mix functional has higher derivatives, these
-       must also exist in the individual components */
-    if(func->info->flags & XC_FLAGS_HAVE_VXC)
-      assert(aux->info->flags & XC_FLAGS_HAVE_VXC);
-    if(func->info->flags & XC_FLAGS_HAVE_FXC)
-      assert(aux->info->flags & XC_FLAGS_HAVE_FXC);
-    if(func->info->flags & XC_FLAGS_HAVE_KXC)
-      assert(aux->info->flags & XC_FLAGS_HAVE_KXC);
-    if(func->info->flags & XC_FLAGS_HAVE_LXC)
-      assert(aux->info->flags & XC_FLAGS_HAVE_LXC);
-  }
-
-  /* prepare buffers that will hold the results from the individual functionals */
-  xzk MGGA_OUT_PARAMS_NO_EXC(=, x) = NULL;
-
-  /* allocate buffers */
-  xc_mgga_vars_allocate_all(func->info->family, np, dim,
-                            zk != NULL, vrho != NULL, v2rho2 != NULL, v3rho3 != NULL, v4rho4 != NULL,
-                            &xzk MGGA_OUT_PARAMS_NO_EXC(XC_COMMA &, x));
-
-  /* Proceed by computing the mix */
-  for(ii=0; ii<func->n_func_aux; ii++){
-    aux = func->func_aux[ii];
-
-    /* Evaluate the functional */
-    switch(aux->info->family){
-    case XC_FAMILY_LDA:
-      xc_lda(aux, np, rho,
-             xzk LDA_OUT_PARAMS_NO_EXC(XC_COMMA, x));
-      break;
-    case XC_FAMILY_GGA:
-      xc_gga(aux, np, rho, sigma,
-             xzk GGA_OUT_PARAMS_NO_EXC(XC_COMMA, x));
-      break;
-    case XC_FAMILY_MGGA:
-      xc_mgga(aux, np, rho, sigma, lapl, tau,
-              xzk MGGA_OUT_PARAMS_NO_EXC(XC_COMMA, x));
-      break;
-    }
-
-    /* Do the mixing */
-    if(zk != NULL) {
-      sum_var(zk);
-    }
-
- #ifndef XC_DONT_COMPILE_VXC
-    if(vrho != NULL) {
-      sum_var(vrho);
-
-      if(is_gga(aux->info->family)) {
-        sum_var(vsigma);
-      }
-
-      if(is_mgga(aux->info->family)) {
-        if(aux->info->flags & XC_FLAGS_NEEDS_LAPLACIAN) {
-          sum_var(vlapl);
-        }
-        if(aux->info->flags & XC_FLAGS_NEEDS_TAU) {
-          sum_var(vtau);
-        }
-      }
-    }
-
-#ifndef XC_DONT_COMPILE_FXC
-    if(v2rho2 != NULL){
-      sum_var(v2rho2);
-
-      if(is_gga(aux->info->family)) {
-        sum_var(v2rhosigma);
-        sum_var(v2sigma2);
-      }
-
-      if(is_mgga(aux->info->family)) {
-        if(aux->info->flags & XC_FLAGS_NEEDS_LAPLACIAN) {
-          sum_var(v2rholapl);
-          sum_var(v2sigmalapl);
-          sum_var(v2lapl2);
-        }
-        if(aux->info->flags & XC_FLAGS_NEEDS_TAU) {
-          sum_var(v2rhotau);
-          sum_var(v2sigmatau);
-          sum_var(v2tau2);
-        }
-        if((aux->info->flags & XC_FLAGS_NEEDS_LAPLACIAN) && (aux->info->flags & XC_FLAGS_NEEDS_TAU)) {
-          sum_var(v2lapltau);
-        }
-      }
-    }
-
-#ifndef XC_DONT_COMPILE_KXC
-    if(v3rho3 != NULL){
-      sum_var(v3rho3);
-
-      if(is_gga(aux->info->family)) {
-        sum_var(v3rho2sigma);
-        sum_var(v3rhosigma2);
-        sum_var(v3sigma3);
-      }
-
-      if(is_mgga(aux->info->family)) {
-        if(aux->info->flags & XC_FLAGS_NEEDS_LAPLACIAN) {
-          sum_var(v3rho2lapl);
-          sum_var(v3rhosigmalapl);
-          sum_var(v3rholapl2);
-          sum_var(v3sigma2lapl);
-          sum_var(v3sigmalapl2);
-          sum_var(v3lapl3);
-        }
-        if(aux->info->flags & XC_FLAGS_NEEDS_TAU) {
-          sum_var(v3rho2tau);
-          sum_var(v3rhosigmatau);
-          sum_var(v3rhotau2);
-          sum_var(v3sigma2tau);
-          sum_var(v3sigmatau2);
-          sum_var(v3tau3);
-        }
-        if((aux->info->flags & XC_FLAGS_NEEDS_LAPLACIAN) && (aux->info->flags & XC_FLAGS_NEEDS_TAU)) {
-          sum_var(v3rholapltau);
-          sum_var(v3sigmalapltau);
-          sum_var(v3lapl2tau);
-          sum_var(v3lapltau2);
-        }
-      }
-    }
-
-#ifndef XC_DONT_COMPILE_LXC
-    if(v4rho4 != NULL){
-      sum_var(v4rho4);
-
-      if(is_gga(aux->info->family)) {
-        sum_var(v4rho3sigma);
-        sum_var(v4rho2sigma2);
-        sum_var(v4rhosigma3);
-        sum_var(v4sigma4);
-      }
-      if(is_mgga(aux->info->family)) {
-        if(aux->info->flags & XC_FLAGS_NEEDS_LAPLACIAN) {
-          sum_var(v4rho3lapl);
-          sum_var(v4rho2sigmalapl);
-          sum_var(v4rho2lapl2);
-          sum_var(v4rhosigma2lapl);
-          sum_var(v4rhosigmalapl2);
-          sum_var(v4rholapl3);
-          sum_var(v4sigma3lapl);
-          sum_var(v4sigma2lapl2);
-          sum_var(v4sigmalapl3);
-          sum_var(v4lapl4);
-        }
-        if(aux->info->flags & XC_FLAGS_NEEDS_TAU) {
-          sum_var(v4rho3tau);
-          sum_var(v4rho2sigmatau);
-          sum_var(v4rho2tau2);
-          sum_var(v4rhosigma2tau);
-          sum_var(v4rhosigmatau2);
-          sum_var(v4rhotau3);
-          sum_var(v4sigma3tau);
-          sum_var(v4sigma2tau2);
-          sum_var(v4sigmatau3);
-          sum_var(v4tau4);
-        }
-        if((aux->info->flags & XC_FLAGS_NEEDS_LAPLACIAN) && (aux->info->flags & XC_FLAGS_NEEDS_TAU)) {
-          sum_var(v4rho2lapltau);
-          sum_var(v4rhosigmalapltau);
-          sum_var(v4rholapl2tau);
-          sum_var(v4rholapltau2);
-          sum_var(v4sigma2lapltau);
-          sum_var(v4sigmalapl2tau);
-          sum_var(v4sigmalapltau2);
-          sum_var(v4lapl3tau);
-          sum_var(v4lapl2tau2);
-          sum_var(v4lapltau3);
-        }
-      }
-    }
-#endif
-#endif
-#endif
-#endif
-  } /* end functional loop */
-
-  /* deallocate internal buffers */
-  xc_mgga_vars_free_all(xzk MGGA_OUT_PARAMS_NO_EXC(XC_COMMA, x));
-}
diff --git a/ref/common/registered_funcs.h b/ref/common/registered_funcs.h
index 48552caae0624d741d47afeab614ffe4724ef549..b278b111ea8e539725b1d145a0c41cb4adf11fb2 100644
--- a/ref/common/registered_funcs.h
+++ b/ref/common/registered_funcs.h
@@ -13,14 +13,14 @@ xc_functional_key_t xc_functional_keys[] = {
 {"gga_x_pbe_jsjr", 126},
 {"gga_x_pbek1_vdw", 140},
 {"gga_x_pbefe", 265},
-{"gga_x_pbe_mol_opt", 1049},
-{"gga_x_pbe_tca_opt", 1059},
-{"gga_x_pbe_opt", 1101},
-{"gga_x_pbe_r_opt", 1102},
-{"gga_x_pbe_sol_opt", 1116},
-{"gga_x_pbe_jsjr_opt", 1126},
-{"gga_x_pbek1_vdw_opt", 1140},
-{"gga_x_pbefe_opt", 1265},
+{"gga_x_pbe_mol_genxc", 1049},
+{"gga_x_pbe_tca_genxc", 1059},
+{"gga_x_pbe_genxc", 1101},
+{"gga_x_pbe_r_genxc", 1102},
+{"gga_x_pbe_sol_genxc", 1116},
+{"gga_x_pbe_jsjr_genxc", 1126},
+{"gga_x_pbek1_vdw_genxc", 1140},
+{"gga_x_pbefe_genxc", 1265},
 {"", -1},
 };
 
@@ -35,15 +35,15 @@ extern xc_func_info_type xc_func_info_gga_x_pbefe;
 extern xc_func_info_type xc_func_info_gga_x_bcgp;
 
 /*
-extern xc_func_info_type xc_func_info_gga_x_pbe_mol_opt;
-extern xc_func_info_type xc_func_info_gga_x_pbe_tca_opt;
-extern xc_func_info_type xc_func_info_gga_x_pbe_opt;
-extern xc_func_info_type xc_func_info_gga_x_pbe_r_opt;
-extern xc_func_info_type xc_func_info_gga_x_pbe_sol_opt;
-extern xc_func_info_type xc_func_info_gga_x_pbe_jsjr_opt;
-extern xc_func_info_type xc_func_info_gga_x_pbek1_vdw_opt;
-extern xc_func_info_type xc_func_info_gga_x_pbefe_opt;
-extern xc_func_info_type xc_func_info_gga_x_bcgp_opt;
+extern xc_func_info_type xc_func_info_gga_x_pbe_mol_genxc;
+extern xc_func_info_type xc_func_info_gga_x_pbe_tca_genxc;
+extern xc_func_info_type xc_func_info_gga_x_pbe_genxc;
+extern xc_func_info_type xc_func_info_gga_x_pbe_r_genxc;
+extern xc_func_info_type xc_func_info_gga_x_pbe_sol_genxc;
+extern xc_func_info_type xc_func_info_gga_x_pbe_jsjr_genxc;
+extern xc_func_info_type xc_func_info_gga_x_pbek1_vdw_genxc;
+extern xc_func_info_type xc_func_info_gga_x_pbefe_genxc;
+extern xc_func_info_type xc_func_info_gga_x_bcgp_genxc;
 */
 
 const xc_func_info_type *xc_lda_known_funct[] = {};
@@ -59,15 +59,15 @@ const xc_func_info_type *xc_gga_known_funct[] = {
   &xc_func_info_gga_x_pbefe,
   &xc_func_info_gga_x_bcgp,
 /*
-  &xc_func_info_gga_x_pbe_mol_opt,
-  &xc_func_info_gga_x_pbe_tca_opt,
-  &xc_func_info_gga_x_pbe_opt,
-  &xc_func_info_gga_x_pbe_r_opt,
-  &xc_func_info_gga_x_pbe_sol_opt,
-  &xc_func_info_gga_x_pbe_jsjr_opt,
-  &xc_func_info_gga_x_pbek1_vdw_opt,
-  &xc_func_info_gga_x_pbefe_opt,
-  &xc_func_info_gga_x_bcgp_opt,
+  &xc_func_info_gga_x_pbe_mol_genxc,
+  &xc_func_info_gga_x_pbe_tca_genxc,
+  &xc_func_info_gga_x_pbe_genxc,
+  &xc_func_info_gga_x_pbe_r_genxc,
+  &xc_func_info_gga_x_pbe_sol_genxc,
+  &xc_func_info_gga_x_pbe_jsjr_genxc,
+  &xc_func_info_gga_x_pbek1_vdw_genxc,
+  &xc_func_info_gga_x_pbefe_genxc,
+  &xc_func_info_gga_x_bcgp_genxc,
 */
 };
 
diff --git a/ref/common/gga_x_pbeopt.c b/ref/genxc/gga_x_pbe_genxc.c
similarity index 79%
rename from ref/common/gga_x_pbeopt.c
rename to ref/genxc/gga_x_pbe_genxc.c
index ef52c37f5cec93f2846833998a3e531f95412f35..e32f683ed909cd7b0fbee7400d8f139ae3835325 100644
--- a/ref/common/gga_x_pbeopt.c
+++ b/ref/genxc/gga_x_pbe_genxc.c
@@ -6,9 +6,12 @@
  file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */
 
-#include "util.h"
-#include "gga_x_pbeopt.h"
+
 #include <omp.h>
+//---
+#include "../common/util.h"
+#include "gga_x_pbe_genxc.h"
+
 
 typedef struct{
   double kappa, mu;
@@ -58,18 +61,18 @@ static const double pbe_bcgp_values[PBE_N_PAR] =
 static const double pbe_fe_values[PBE_N_PAR] =
   {0.437, 0.346};
 
-//#include "work_gga.c"
-//#define work_gga_x_pbe work_gga
-#include "work_gga_x_pbe.c"
-//#include "maple2c/gga_exc/gga_x_pbe.c"
-#include "../maple2c/gga_x_pbeopt.c"
+//#include "../libxc/work_gga.h"
+//#define work_gga_genxc work_gga
+//#include "../libxc/gga_x_pbe.h"
+#include "gga_x_pbe_genxc_funcs.h"
+#include "work_gga_genxc.h"
 
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_pbe_opt = {
-  XC_GGA_X_PBE_OPT,
+const xc_func_info_type xc_func_info_gga_x_pbe_genxc = {
+  XC_GGA_X_PBE_GENXC,
   XC_EXCHANGE,
   "Perdew, Burke & Ernzerhof",
   XC_FAMILY_GGA,
@@ -78,14 +81,14 @@ const xc_func_info_type xc_func_info_gga_x_pbe_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_pbe_r_opt = {
-  XC_GGA_X_PBE_R_OPT,
+const xc_func_info_type xc_func_info_gga_x_pbe_r_genxc = {
+  XC_GGA_X_PBE_R_GENXC,
   XC_EXCHANGE,
   "Revised PBE from Zhang & Yang",
   XC_FAMILY_GGA,
@@ -94,14 +97,14 @@ const xc_func_info_type xc_func_info_gga_x_pbe_r_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_r_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_pbe_sol_opt = {
-  XC_GGA_X_PBE_SOL_OPT,
+const xc_func_info_type xc_func_info_gga_x_pbe_sol_genxc = {
+  XC_GGA_X_PBE_SOL_GENXC,
   XC_EXCHANGE,
   "Perdew, Burke & Ernzerhof SOL",
   XC_FAMILY_GGA,
@@ -110,14 +113,14 @@ const xc_func_info_type xc_func_info_gga_x_pbe_sol_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_sol_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_xpbe_opt = {
-  XC_GGA_X_XPBE_OPT,
+const xc_func_info_type xc_func_info_gga_x_xpbe_genxc = {
+  XC_GGA_X_XPBE_GENXC,
   XC_EXCHANGE,
   "Extended PBE by Xu & Goddard III",
   XC_FAMILY_GGA,
@@ -126,14 +129,14 @@ const xc_func_info_type xc_func_info_gga_x_xpbe_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_xpbe_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_pbe_jsjr_opt = {
-  XC_GGA_X_PBE_JSJR_OPT,
+const xc_func_info_type xc_func_info_gga_x_pbe_jsjr_genxc = {
+  XC_GGA_X_PBE_JSJR_GENXC,
   XC_EXCHANGE,
   "Reparametrized PBE by Pedroza, Silva & Capelle",
   XC_FAMILY_GGA,
@@ -142,14 +145,14 @@ const xc_func_info_type xc_func_info_gga_x_pbe_jsjr_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_jsjr_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_pbek1_vdw_opt = {
-  XC_GGA_X_PBEK1_VDW_OPT,
+const xc_func_info_type xc_func_info_gga_x_pbek1_vdw_genxc = {
+  XC_GGA_X_PBEK1_VDW_GENXC,
   XC_EXCHANGE,
   "Reparametrized PBE for vdW",
   XC_FAMILY_GGA,
@@ -158,14 +161,14 @@ const xc_func_info_type xc_func_info_gga_x_pbek1_vdw_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_k1_vdw_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_apbe_opt = {
-  XC_GGA_X_APBE_OPT,
+const xc_func_info_type xc_func_info_gga_x_apbe_genxc = {
+  XC_GGA_X_APBE_GENXC,
   XC_EXCHANGE,
   "mu fixed from the semiclassical neutral atom",
   XC_FAMILY_GGA,
@@ -174,14 +177,14 @@ const xc_func_info_type xc_func_info_gga_x_apbe_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_apbe_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_pbe_tca_opt = {
-  XC_GGA_X_PBE_TCA_OPT,
+const xc_func_info_type xc_func_info_gga_x_pbe_tca_genxc = {
+  XC_GGA_X_PBE_TCA_GENXC,
   XC_EXCHANGE,
   "PBE revised by Tognetti et al",
   XC_FAMILY_GGA,
@@ -190,14 +193,14 @@ const xc_func_info_type xc_func_info_gga_x_pbe_tca_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_tca_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_pbe_mol_opt = {
-  XC_GGA_X_PBE_MOL_OPT,
+const xc_func_info_type xc_func_info_gga_x_pbe_mol_genxc = {
+  XC_GGA_X_PBE_MOL_GENXC,
   XC_EXCHANGE,
   "Reparametrized PBE by del Campo, Gazquez, Trickey & Vela",
   XC_FAMILY_GGA,
@@ -206,14 +209,14 @@ const xc_func_info_type xc_func_info_gga_x_pbe_mol_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_mol_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_bcgp_opt = {
-  XC_GGA_X_BCGP_OPT,
+const xc_func_info_type xc_func_info_gga_x_bcgp_genxc = {
+  XC_GGA_X_BCGP_GENXC,
   XC_EXCHANGE,
   "Burke, Cancio, Gould, and Pittalis",
   XC_FAMILY_GGA,
@@ -222,14 +225,14 @@ const xc_func_info_type xc_func_info_gga_x_bcgp_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_bcgp_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_pbefe_opt = {
-  XC_GGA_X_PBEFE_OPT,
+const xc_func_info_type xc_func_info_gga_x_pbefe_genxc = {
+  XC_GGA_X_PBEFE_GENXC,
   XC_EXCHANGE,
   "PBE for formation energies",
   XC_FAMILY_GGA,
@@ -238,7 +241,7 @@ const xc_func_info_type xc_func_info_gga_x_pbefe_opt = {
   1e-15,
   {PBE_N_PAR, pbe_names, pbe_desc, pbe_fe_values, set_ext_params_cpy},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #define PBEL_N_PAR 3
@@ -277,8 +280,8 @@ pbe_lambda_set_ext_params(xc_func_type *p, const double *ext_params)
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_lambda_lo_n_opt = {
-  XC_GGA_X_LAMBDA_LO_N_OPT,
+const xc_func_info_type xc_func_info_gga_x_lambda_lo_n_genxc = {
+  XC_GGA_X_LAMBDA_LO_N_GENXC,
   XC_EXCHANGE,
   "lambda_LO(N) version of PBE",
   XC_FAMILY_GGA,
@@ -287,14 +290,14 @@ const xc_func_info_type xc_func_info_gga_x_lambda_lo_n_opt = {
   1e-15,
   {PBEL_N_PAR, pbe_lambda_names, pbe_lambda_desc, pbe_lambda_lo_n_values, pbe_lambda_set_ext_params},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_lambda_ch_n_opt = {
-  XC_GGA_X_LAMBDA_CH_N_OPT,
+const xc_func_info_type xc_func_info_gga_x_lambda_ch_n_genxc = {
+  XC_GGA_X_LAMBDA_CH_N_GENXC,
   XC_EXCHANGE,
   "lambda_CH(N) version of PBE",
   XC_FAMILY_GGA,
@@ -303,14 +306,14 @@ const xc_func_info_type xc_func_info_gga_x_lambda_ch_n_opt = {
   1e-15,
   {PBEL_N_PAR, pbe_lambda_names, pbe_lambda_desc, pbe_lambda_ch_n_values, pbe_lambda_set_ext_params},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 #ifdef __cplusplus
 extern "C"
 #endif
-const xc_func_info_type xc_func_info_gga_x_lambda_oc2_n_opt = {
-  XC_GGA_X_LAMBDA_OC2_N_OPT,
+const xc_func_info_type xc_func_info_gga_x_lambda_oc2_n_genxc = {
+  XC_GGA_X_LAMBDA_OC2_N_GENXC,
   XC_EXCHANGE,
   "lambda_OC2(N) version of PBE",
   XC_FAMILY_GGA,
@@ -319,7 +322,7 @@ const xc_func_info_type xc_func_info_gga_x_lambda_oc2_n_opt = {
   1e-15,
   {PBEL_N_PAR, pbe_lambda_names, pbe_lambda_desc, pbe_lambda_oc2_n_values, pbe_lambda_set_ext_params},
   gga_x_pbe_init, NULL,
-  NULL, work_gga_x_pbe, NULL
+  NULL, work_gga_genxc, NULL
 };
 
 
diff --git a/ref/genxc/gga_x_pbe_genxc.h b/ref/genxc/gga_x_pbe_genxc.h
new file mode 100644
index 0000000000000000000000000000000000000000..a9404263b81b685cd411be01506be925b0d8564c
--- /dev/null
+++ b/ref/genxc/gga_x_pbe_genxc.h
@@ -0,0 +1,14 @@
+#define XC_GGA_X_PBE_GENXC          1101 /* Perdew, Burke & Ernzerhof exchange             */
+#define XC_GGA_X_PBE_R_GENXC        1102 /* Perdew, Burke & Ernzerhof exchange (revised)   */
+#define XC_GGA_X_PBE_SOL_GENXC      1116 /* Perdew, Burke & Ernzerhof exchange (solids)    */
+#define XC_GGA_X_XPBE_GENXC         1123 /* xPBE reparametrization by Xu & Goddard         */
+#define XC_GGA_X_PBE_JSJR_GENXC     1126 /* JSJR reparametrization by Pedroza, Silva & Capelle */
+#define XC_GGA_X_PBEK1_VDW_GENXC    1140 /* PBE reparametrization for vdW                  */
+#define XC_GGA_X_APBE_GENXC         1184 /* mu fixed from the semiclassical neutral atom   */
+#define XC_GGA_X_PBE_TCA_GENXC      1059 /* PBE revised by Tognetti et al                  */
+#define XC_GGA_X_PBE_MOL_GENXC      1049 /* Del Campo, Gazquez, Trickey and Vela (PBE-like) */
+#define XC_GGA_X_LAMBDA_LO_N_GENXC  1045 /* lambda_LO(N) version of PBE                    */
+#define XC_GGA_X_LAMBDA_CH_N_GENXC  1044 /* lambda_CH(N) version of PBE                    */
+#define XC_GGA_X_LAMBDA_OC2_N_GENXC 1040 /* lambda_OC2(N) version of PBE                   */
+#define XC_GGA_X_BCGP_GENXC         1038 /* Burke, Cancio, Gould, and Pittalis             */
+#define XC_GGA_X_PBEFE_GENXC        1265 /* PBE for formation energies                     */
diff --git a/ref/maple2c/gga_x_pbeopt.c b/ref/genxc/gga_x_pbe_genxc_funcs.h
similarity index 100%
rename from ref/maple2c/gga_x_pbeopt.c
rename to ref/genxc/gga_x_pbe_genxc_funcs.h
diff --git a/ref/common/work_gga_x_pbe.c b/ref/genxc/work_gga_genxc.h
similarity index 99%
rename from ref/common/work_gga_x_pbe.c
rename to ref/genxc/work_gga_genxc.h
index 4d419714d97cda3742dbe0ef29c1a584c4a0e94f..fa62e7fa41d536631fca41ede7ac75f94f5141d8 100644
--- a/ref/common/work_gga_x_pbe.c
+++ b/ref/genxc/work_gga_genxc.h
@@ -24,7 +24,7 @@
  * @param[in,out] func_type: pointer to functional structure
  */
 static void
-work_gga_x_pbe(const XC(func_type) *p, size_t np,
+work_gga_genxc(const XC(func_type) *p, size_t np,
 #ifdef USE_INTERNAL_COUNTERS
          const double *irho, const double *isigma, double *izk,
          double *ivrho, double *ivsigma, double *iv2rho2, double *iv2rhosigma, double *iv2sigma2,
diff --git a/ref/gga_x_pbe b/ref/gga_x_pbe
deleted file mode 100755
index 2492791fdf0e80f1e34578a42375834d95b426d5..0000000000000000000000000000000000000000
Binary files a/ref/gga_x_pbe and /dev/null differ
diff --git a/ref/gga_x_pbe.c b/ref/gga_x_pbe.c
deleted file mode 100644
index 3bfda673faaa24c4794c5c9b416e72b43471ee51..0000000000000000000000000000000000000000
--- a/ref/gga_x_pbe.c
+++ /dev/null
@@ -1,118 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include "common/xc.h"
-#include "common/gga_x_pbe.h"
-
-double getTimeStamp() {
-    struct timespec ts;
-    clock_gettime(CLOCK_MONOTONIC, &ts);
-    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
-}
-
-void cache_flush(const void *p, unsigned int allocation_size) {
-  const size_t cache_line = 64;
-  const char *cp = (const char *)p;
-
-  if(p == NULL || allocation_size <= 0) {
-    return;
-  }
-
-  for(int i = 0; i < allocation_size; i += cache_line) {
-    asm volatile("clflush (%0)\n\t" : : "r"(&cp[i]) : "memory");
-  }
-
-  asm volatile("sfence\n\t" : : : "memory");
-}
-
-int main() {
-  xc_func_type gga;
-  int i;
-  int npoints = 10000000;
-  //int npoints = 100;
-  double *rho, *sigma;
-  double *zk, zkp, *vrho, vrhop[2], *vsigma, vsigmap[3];
-  double *v2rho2, *v2rhosigma, *v2sigma2;
-  double *v3rho3, *v3rho2sigma, *v3rhosigma2, *v3sigma3;
-  double *v4rho4, *v4rho3sigma, *v4rho2sigma2, *v4rhosigma3, *v4sigma4;
-  double S, E;
-
-  rho          = (double*) malloc( 2*npoints*sizeof(double));
-  sigma        = (double*) malloc( 3*npoints*sizeof(double));
-  zk           = (double*) malloc( 1*npoints*sizeof(double));
-  vrho         = (double*) malloc( 2*npoints*sizeof(double));
-  vsigma       = (double*) malloc( 3*npoints*sizeof(double));
-  v2rho2       = (double*) malloc( 3*npoints*sizeof(double));
-  v2rhosigma   = (double*) malloc( 6*npoints*sizeof(double));
-  v2sigma2     = (double*) malloc( 6*npoints*sizeof(double));
-  #ifdef KXC
-  v3rho3       = (double*) malloc( 4*npoints*sizeof(double));
-  v3rho2sigma  = (double*) malloc( 9*npoints*sizeof(double));
-  v3rhosigma2  = (double*) malloc(12*npoints*sizeof(double));
-  v3sigma3     = (double*) malloc(10*npoints*sizeof(double));
-  #else
-  v3rho3       = (double*) NULL;
-  v3rho2sigma  = (double*) NULL;
-  v3rhosigma2  = (double*) NULL;
-  v3sigma3     = (double*) NULL;
-  #endif
-  #ifdef LXC
-  v4rho4       = (double*) malloc( 5*npoints*sizeof(double));
-  v4rho3sigma  = (double*) malloc(12*npoints*sizeof(double));
-  v4rho2sigma2 = (double*) malloc(18*npoints*sizeof(double));
-  v4rhosigma3  = (double*) malloc(20*npoints*sizeof(double));
-  v4sigma4     = (double*) malloc(15*npoints*sizeof(double));
-  #else
-  v4rho4       = (double*) NULL;
-  v4rho3sigma  = (double*) NULL;
-  v4rho2sigma2 = (double*) NULL;
-  v4rhosigma3  = (double*) NULL;
-  v4sigma4     = (double*) NULL;
-  #endif
-
-  if(xc_func_init(&gga, XC_GGA_X_BCGP, XC_POLARIZED) < 0) {
-    fprintf(stderr, "XC_GGA_X_BCGP not found!\n");
-    return -1;
-  }
-
-  for(i=0; i<npoints; i++){
-    rho[2*i + 0]   = 0.048 + i/(double)(npoints);
-    rho[2*i + 1]   = 0.025;
-    sigma[3*i + 0] = 0.0046;
-    sigma[3*i + 1] = 0.0044;
-    sigma[3*i + 2] = 0.0041;
-  }
-
-  cache_flush(rho, 2 * npoints * sizeof(double));
-  cache_flush(sigma, 3 * npoints * sizeof(double));
-  cache_flush(zk, 1 * npoints * sizeof(double));
-  cache_flush(vrho, 2 * npoints * sizeof(double));
-  cache_flush(vsigma, 3 * npoints * sizeof(double));
-  cache_flush(v2rho2, 3 * npoints * sizeof(double));
-  cache_flush(v2rhosigma, 6 * npoints * sizeof(double));
-  cache_flush(v2sigma2, 6 * npoints * sizeof(double));
-  #ifdef KXC
-  cache_flush(v3rho3, 4 * npoints * sizeof(double));
-  cache_flush(v3rho2sigma, 9 * npoints * sizeof(double));
-  cache_flush(v3rhosigma2, 12 * npoints * sizeof(double));
-  cache_flush(v3sigma3, 10 * npoints * sizeof(double));
-  #endif
-  #ifdef LXC
-  cache_flush(v4rho4, 5 * npoints * sizeof(double));
-  cache_flush(v4rho3sigma, 12 * npoints * sizeof(double));
-  cache_flush(v4rho2sigma2, 18 * npoints * sizeof(double));
-  cache_flush(v4rhosigma3, 20 * npoints * sizeof(double));
-  cache_flush(v4sigma4, 15 * npoints * sizeof(double));
-  #endif
-
-  S = getTimeStamp();
-  xc_gga(&gga, npoints, rho, sigma, zk, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, v4rho4, v4rho3sigma, v4rho2sigma2, v4rhosigma3, v4sigma4);
-  E = getTimeStamp();
-  fprintf(stderr, "GGA_X_PBE: %.4fs\n", E-S);
-  /*for(i=0; i<npoints; i++){
-    fprintf(stderr, "%16.10lf\t%16.10lf\t%16.10lf\n", rho[2*i + 0], vrho[2*i + 0], v2rho2[3*i + 0]);
-  }*/
-
-  xc_func_end(&gga);
-  return 0;
-}
diff --git a/ref/gga_x_pbe_both.c b/ref/gga_x_pbe_both.c
deleted file mode 100644
index b92e430aabb0be2bdf2a5aa3e12a08190d332306..0000000000000000000000000000000000000000
--- a/ref/gga_x_pbe_both.c
+++ /dev/null
@@ -1,161 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <xc.h>
-#include <time.h>
-
-double getTimeStamp() {
-    struct timespec ts;
-    clock_gettime(CLOCK_MONOTONIC, &ts);
-    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
-}
-
-void cache_flush(const void *p, unsigned int allocation_size) {
-  const size_t cache_line = 64;
-  const char *cp = (const char *)p;
-
-  if(p == NULL || allocation_size <= 0) {
-    return;
-  }
-
-  for(int i = 0; i < allocation_size; i += cache_line) {
-    asm volatile("clflush (%0)\n\t" : : "r"(&cp[i]) : "memory");
-  }
-
-  asm volatile("sfence\n\t" : : : "memory");
-}
-
-int main() {
-  xc_func_type gga, ggaopt;
-  int i;
-  int npoints = 10000000;
-  //int npoints = 100;
-  double *rho, *sigma;
-  double *zk, zkp, *vrho, vrhop[2], *vsigma, vsigmap[3];
-  double *v2rho2, *v2rhosigma, *v2sigma2;
-  double *v3rho3, *v3rho2sigma, *v3rhosigma2, *v3sigma3;
-  double *v4rho4, *v4rho3sigma, *v4rho2sigma2, *v4rhosigma3, *v4sigma4;
-  double S, E;
-
-  rho          = (double*) malloc( 2*npoints*sizeof(double));
-  sigma        = (double*) malloc( 3*npoints*sizeof(double));
-  zk           = (double*) malloc( 1*npoints*sizeof(double));
-  vrho         = (double*) malloc( 2*npoints*sizeof(double));
-  vsigma       = (double*) malloc( 3*npoints*sizeof(double));
-  v2rho2       = (double*) malloc( 3*npoints*sizeof(double));
-  v2rhosigma   = (double*) malloc( 6*npoints*sizeof(double));
-  v2sigma2     = (double*) malloc( 6*npoints*sizeof(double));
-  #ifdef KXC
-  v3rho3       = (double*) malloc( 4*npoints*sizeof(double));
-  v3rho2sigma  = (double*) malloc( 9*npoints*sizeof(double));
-  v3rhosigma2  = (double*) malloc(12*npoints*sizeof(double));
-  v3sigma3     = (double*) malloc(10*npoints*sizeof(double));
-  #else
-  v3rho3       = (double*) NULL;
-  v3rho2sigma  = (double*) NULL;
-  v3rhosigma2  = (double*) NULL;
-  v3sigma3     = (double*) NULL;
-  #endif
-  #ifdef LXC
-  v4rho4       = (double*) malloc( 5*npoints*sizeof(double));
-  v4rho3sigma  = (double*) malloc(12*npoints*sizeof(double));
-  v4rho2sigma2 = (double*) malloc(18*npoints*sizeof(double));
-  v4rhosigma3  = (double*) malloc(20*npoints*sizeof(double));
-  v4sigma4     = (double*) malloc(15*npoints*sizeof(double));
-  #else
-  v4rho4       = (double*) NULL;
-  v4rho3sigma  = (double*) NULL;
-  v4rho2sigma2 = (double*) NULL;
-  v4rhosigma3  = (double*) NULL;
-  v4sigma4     = (double*) NULL;
-  #endif
-
-  if(xc_func_init(&gga, XC_GGA_X_BCGP, XC_POLARIZED) < 0) {
-    fprintf(stderr, "XC_GGA_X_BCGP not found!\n");
-    return -1;
-  }
-
-  if(xc_func_init(&ggaopt, XC_GGA_X_BCGP_OPT, XC_POLARIZED) < 0) {
-    fprintf(stderr, "XC_GGA_X_BCGP_OPT not found!\n");
-    return -1;
-  }
-
-  for(i=0; i<npoints; i++){
-    rho[2*i + 0]   = 0.048 + i/(double)(npoints);
-    rho[2*i + 1]   = 0.025;
-    sigma[3*i + 0] = 0.0046;
-    sigma[3*i + 1] = 0.0044;
-    sigma[3*i + 2] = 0.0041;
-  }
-
-  cache_flush(rho, 2 * npoints * sizeof(double));
-  cache_flush(sigma, 3 * npoints * sizeof(double));
-  cache_flush(zk, 1 * npoints * sizeof(double));
-  cache_flush(vrho, 2 * npoints * sizeof(double));
-  cache_flush(vsigma, 3 * npoints * sizeof(double));
-  cache_flush(v2rho2, 3 * npoints * sizeof(double));
-  cache_flush(v2rhosigma, 6 * npoints * sizeof(double));
-  cache_flush(v2sigma2, 6 * npoints * sizeof(double));
-  #ifdef KXC
-  cache_flush(v3rho3, 4 * npoints * sizeof(double));
-  cache_flush(v3rho2sigma, 9 * npoints * sizeof(double));
-  cache_flush(v3rhosigma2, 12 * npoints * sizeof(double));
-  cache_flush(v3sigma3, 10 * npoints * sizeof(double));
-  #endif
-  #ifdef LXC
-  cache_flush(v4rho4, 5 * npoints * sizeof(double));
-  cache_flush(v4rho3sigma, 12 * npoints * sizeof(double));
-  cache_flush(v4rho2sigma2, 18 * npoints * sizeof(double));
-  cache_flush(v4rhosigma3, 20 * npoints * sizeof(double));
-  cache_flush(v4sigma4, 15 * npoints * sizeof(double));
-  #endif
-
-  S = getTimeStamp();
-  xc_gga(&ggaopt, npoints, rho, sigma, zk, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, v4rho4, v4rho3sigma, v4rho2sigma2, v4rhosigma3, v4sigma4);
-  E = getTimeStamp();
-  fprintf(stderr, "GGA_X_PBE_OPT: %.4fs\n", E-S);
-  /*for(i=0; i<npoints; i++){
-    fprintf(stderr, "%16.10lf\t%16.10lf\t%16.10lf\n", rho[2*i + 0], vrho[2*i + 0], v2rho2[3*i + 0]);
-  }*/
-
-  for(i=0; i<npoints; i++){
-    rho[2*i + 0]   = 0.048 + i/(double)(npoints);
-    rho[2*i + 1]   = 0.025;
-    sigma[3*i + 0] = 0.0046;
-    sigma[3*i + 1] = 0.0044;
-    sigma[3*i + 2] = 0.0041;
-  }
-
-  cache_flush(rho, 2 * npoints * sizeof(double));
-  cache_flush(sigma, 3 * npoints * sizeof(double));
-  cache_flush(zk, 1 * npoints * sizeof(double));
-  cache_flush(vrho, 2 * npoints * sizeof(double));
-  cache_flush(vsigma, 3 * npoints * sizeof(double));
-  cache_flush(v2rho2, 3 * npoints * sizeof(double));
-  cache_flush(v2rhosigma, 6 * npoints * sizeof(double));
-  cache_flush(v2sigma2, 6 * npoints * sizeof(double));
-  #ifdef KXC
-  cache_flush(v3rho3, 4 * npoints * sizeof(double));
-  cache_flush(v3rho2sigma, 9 * npoints * sizeof(double));
-  cache_flush(v3rhosigma2, 12 * npoints * sizeof(double));
-  cache_flush(v3sigma3, 10 * npoints * sizeof(double));
-  #endif
-  #ifdef LXC
-  cache_flush(v4rho4, 5 * npoints * sizeof(double));
-  cache_flush(v4rho3sigma, 12 * npoints * sizeof(double));
-  cache_flush(v4rho2sigma2, 18 * npoints * sizeof(double));
-  cache_flush(v4rhosigma3, 20 * npoints * sizeof(double));
-  cache_flush(v4sigma4, 15 * npoints * sizeof(double));
-  #endif
-
-  S = getTimeStamp();
-  xc_gga(&gga, npoints, rho, sigma, zk, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, v4rho4, v4rho3sigma, v4rho2sigma2, v4rhosigma3, v4sigma4);
-  E = getTimeStamp();
-  fprintf(stderr, "GGA_X_PBE: %.4fs\n", E-S);
-  /*for(i=0; i<npoints; i++){
-    fprintf(stderr, "%16.10lf\t%16.10lf\t%16.10lf\n", rho[2*i + 0], vrho[2*i + 0], v2rho2[3*i + 0]);
-  }*/
-
-  xc_func_end(&gga);
-  xc_func_end(&ggaopt);
-  return 0;
-}
diff --git a/ref/gga_x_pbeopt.c b/ref/gga_x_pbeopt.c
deleted file mode 100644
index ec3f5090785db31006bd7186e9c9ea1ea80c5b08..0000000000000000000000000000000000000000
--- a/ref/gga_x_pbeopt.c
+++ /dev/null
@@ -1,119 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <xc.h>
-#include <time.h>
-#include <omp.h>
-
-double getTimeStamp() {
-    struct timespec ts;
-    clock_gettime(CLOCK_MONOTONIC, &ts);
-    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
-}
-
-void cache_flush(const void *p, unsigned int allocation_size) {
-  const size_t cache_line = 64;
-  const char *cp = (const char *)p;
-
-  if(p == NULL || allocation_size <= 0) {
-    return;
-  }
-
-  for(int i = 0; i < allocation_size; i += cache_line) {
-    asm volatile("clflush (%0)\n\t" : : "r"(&cp[i]) : "memory");
-  }
-
-  asm volatile("sfence\n\t" : : : "memory");
-}
-
-int main() {
-  xc_func_type ggaopt;
-  int i;
-  int npoints = 10000000;
-  //int npoints = 100;
-  double *rho, *sigma;
-  double *zk, zkp, *vrho, vrhop[2], *vsigma, vsigmap[3];
-  double *v2rho2, *v2rhosigma, *v2sigma2;
-  double *v3rho3, *v3rho2sigma, *v3rhosigma2, *v3sigma3;
-  double *v4rho4, *v4rho3sigma, *v4rho2sigma2, *v4rhosigma3, *v4sigma4;
-  double S, E;
-
-  rho          = (double*) malloc( 2*npoints*sizeof(double));
-  sigma        = (double*) malloc( 3*npoints*sizeof(double));
-  zk           = (double*) malloc( 1*npoints*sizeof(double));
-  vrho         = (double*) malloc( 2*npoints*sizeof(double));
-  vsigma       = (double*) malloc( 3*npoints*sizeof(double));
-  v2rho2       = (double*) malloc( 3*npoints*sizeof(double));
-  v2rhosigma   = (double*) malloc( 6*npoints*sizeof(double));
-  v2sigma2     = (double*) malloc( 6*npoints*sizeof(double));
-  #ifdef KXC
-  v3rho3       = (double*) malloc( 4*npoints*sizeof(double));
-  v3rho2sigma  = (double*) malloc( 9*npoints*sizeof(double));
-  v3rhosigma2  = (double*) malloc(12*npoints*sizeof(double));
-  v3sigma3     = (double*) malloc(10*npoints*sizeof(double));
-  #else
-  v3rho3       = (double*) NULL;
-  v3rho2sigma  = (double*) NULL;
-  v3rhosigma2  = (double*) NULL;
-  v3sigma3     = (double*) NULL;
-  #endif
-  #ifdef LXC
-  v4rho4       = (double*) malloc( 5*npoints*sizeof(double));
-  v4rho3sigma  = (double*) malloc(12*npoints*sizeof(double));
-  v4rho2sigma2 = (double*) malloc(18*npoints*sizeof(double));
-  v4rhosigma3  = (double*) malloc(20*npoints*sizeof(double));
-  v4sigma4     = (double*) malloc(15*npoints*sizeof(double));
-  #else
-  v4rho4       = (double*) NULL;
-  v4rho3sigma  = (double*) NULL;
-  v4rho2sigma2 = (double*) NULL;
-  v4rhosigma3  = (double*) NULL;
-  v4sigma4     = (double*) NULL;
-  #endif
-
-  if(xc_func_init(&ggaopt, XC_GGA_X_BCGP_OPT, XC_POLARIZED) < 0) {
-    fprintf(stderr, "XC_GGA_X_BCGP_OPT not found!\n");
-    return -1;
-  }
-
-  #pragma omp parallel for
-  for(i=0; i<npoints; i++){
-    rho[2*i + 0]   = 0.048 + i/(double)(npoints);
-    rho[2*i + 1]   = 0.025;
-    sigma[3*i + 0] = 0.0046;
-    sigma[3*i + 1] = 0.0044;
-    sigma[3*i + 2] = 0.0041;
-  }
-
-  cache_flush(rho, 2 * npoints * sizeof(double));
-  cache_flush(sigma, 3 * npoints * sizeof(double));
-  cache_flush(zk, 1 * npoints * sizeof(double));
-  cache_flush(vrho, 2 * npoints * sizeof(double));
-  cache_flush(vsigma, 3 * npoints * sizeof(double));
-  cache_flush(v2rho2, 3 * npoints * sizeof(double));
-  cache_flush(v2rhosigma, 6 * npoints * sizeof(double));
-  cache_flush(v2sigma2, 6 * npoints * sizeof(double));
-  #ifdef KXC
-  cache_flush(v3rho3, 4 * npoints * sizeof(double));
-  cache_flush(v3rho2sigma, 9 * npoints * sizeof(double));
-  cache_flush(v3rhosigma2, 12 * npoints * sizeof(double));
-  cache_flush(v3sigma3, 10 * npoints * sizeof(double));
-  #endif
-  #ifdef LXC
-  cache_flush(v4rho4, 5 * npoints * sizeof(double));
-  cache_flush(v4rho3sigma, 12 * npoints * sizeof(double));
-  cache_flush(v4rho2sigma2, 18 * npoints * sizeof(double));
-  cache_flush(v4rhosigma3, 20 * npoints * sizeof(double));
-  cache_flush(v4sigma4, 15 * npoints * sizeof(double));
-  #endif
-
-  S = getTimeStamp();
-  xc_gga(&ggaopt, npoints, rho, sigma, zk, vrho, vsigma, v2rho2, v2rhosigma, v2sigma2, v3rho3, v3rho2sigma, v3rhosigma2, v3sigma3, v4rho4, v4rho3sigma, v4rho2sigma2, v4rhosigma3, v4sigma4);
-  E = getTimeStamp();
-  fprintf(stderr, "GGA_X_PBE_OPT: %.4fs\n", E-S);
-  /*for(i=0; i<npoints; i++){
-    fprintf(stderr, "%16.10lf\t%16.10lf\t%16.10lf\n", rho[2*i + 0], vrho[2*i + 0], v2rho2[3*i + 0]);
-  }*/
-
-  xc_func_end(&ggaopt);
-  return 0;
-}
diff --git a/ref/common/gga_x_pbe.c b/ref/libxc/gga_x_pbe.c
similarity index 99%
rename from ref/common/gga_x_pbe.c
rename to ref/libxc/gga_x_pbe.c
index 605036f04f9a207232f6319035900a01b400883f..2bc3cac53bae7e0c2a025ac551b8fb69139b07ea 100644
--- a/ref/common/gga_x_pbe.c
+++ b/ref/libxc/gga_x_pbe.c
@@ -6,7 +6,7 @@
  file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */
 
-#include "util.h"
+#include "../common/util.h"
 #include "gga_x_pbe.h"
 
 typedef struct{
@@ -57,9 +57,8 @@ static const double pbe_bcgp_values[PBE_N_PAR] =
 static const double pbe_fe_values[PBE_N_PAR] =
   {0.437, 0.346};
 
-#include "../maple2c/gga_x_pbe.c"
-#include "work_gga.c"
-
+#include "gga_x_pbe_funcs.h"
+#include "work_gga.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/ref/common/gga_x_pbe.h b/ref/libxc/gga_x_pbe.h
similarity index 100%
rename from ref/common/gga_x_pbe.h
rename to ref/libxc/gga_x_pbe.h
diff --git a/ref/maple2c/gga_x_pbe.c b/ref/libxc/gga_x_pbe_funcs.h
similarity index 100%
rename from ref/maple2c/gga_x_pbe.c
rename to ref/libxc/gga_x_pbe_funcs.h
diff --git a/ref/common/work_gga.c b/ref/libxc/work_gga.h
similarity index 99%
rename from ref/common/work_gga.c
rename to ref/libxc/work_gga.h
index 9b47b93f8791c07cd2782da62d1fca0cdca47825..7e326d0e574e1df8d2a6a9882da8fc2099028023 100644
--- a/ref/common/work_gga.c
+++ b/ref/libxc/work_gga.h
@@ -17,7 +17,7 @@
 #include <fenv.h>
 #endif
 
-#include "xc.h"
+#include "../common/xc.h"
 
 /* hack to avoid compiler warnings */
 #define NOARG
diff --git a/ref/main.c b/ref/main.c
new file mode 100644
index 0000000000000000000000000000000000000000..19fdae35510339def50311767553d168ca20f322
--- /dev/null
+++ b/ref/main.c
@@ -0,0 +1,154 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <float.h>
+#include "common/xc.h"
+#include "libxc/gga_x_pbe.h"
+#include "genxc/gga_x_pbe_genxc.h"
+
+// Precision for correctness checking
+#define EPSILON     0.00000001
+
+int nearlyEqual(double a, double b) {
+    //double abs_a = abs(a);
+    //double abs_b = abs(b);
+    double diff = abs(a - b);
+
+    //if(a == b) { return 1; }
+    //if(a == 0 || b == 0 || abs_a + abs_b < MIN_NORMAL) { return diff < (EPSILON * MIN_NORMAL); }
+    //return (diff / min((abs_a + abs_b), DBL_MAX)) < EPSILON;
+    return diff < EPSILON;
+}
+
+double getTimeStamp() {
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (double)ts.tv_sec + (double)ts.tv_nsec * 1.e-9;
+}
+
+int main() {
+  xc_func_type gga_libxc, gga_genxc;
+  int i;
+  int npoints = 10000000;
+  //int npoints = 100;
+  double *rho_libxc, *sigma_libxc;
+  double *rho_genxc, *sigma_genxc;
+  double *zk_libxc, *vrho_libxc, *vsigma_libxc;
+  double *zk_genxc, *vrho_genxc, *vsigma_genxc;
+  double *v2rho2_libxc, *v2rhosigma_libxc, *v2sigma2_libxc;
+  double *v2rho2_genxc, *v2rhosigma_genxc, *v2sigma2_genxc;
+  double *v3rho3_libxc, *v3rho2sigma_libxc, *v3rhosigma2_libxc, *v3sigma3_libxc;
+  double *v3rho3_genxc, *v3rho2sigma_genxc, *v3rhosigma2_genxc, *v3sigma3_genxc;
+  double *v4rho4_libxc, *v4rho3sigma_libxc, *v4rho2sigma2_libxc, *v4rhosigma3_libxc, *v4sigma4_libxc;
+  double *v4rho4_genxc, *v4rho3sigma_genxc, *v4rho2sigma2_genxc, *v4rhosigma3_genxc, *v4sigma4_genxc;
+  double S, E;
+
+  rho_libxc          = (double*) malloc( 2*npoints*sizeof(double));
+  sigma_libxc        = (double*) malloc( 3*npoints*sizeof(double));
+  zk_libxc           = (double*) malloc( 1*npoints*sizeof(double));
+  vrho_libxc         = (double*) malloc( 2*npoints*sizeof(double));
+  vsigma_libxc       = (double*) malloc( 3*npoints*sizeof(double));
+  v2rho2_libxc       = (double*) malloc( 3*npoints*sizeof(double));
+  v2rhosigma_libxc   = (double*) malloc( 6*npoints*sizeof(double));
+  v2sigma2_libxc     = (double*) malloc( 6*npoints*sizeof(double));
+  rho_genxc          = (double*) malloc( 2*npoints*sizeof(double));
+  sigma_genxc        = (double*) malloc( 3*npoints*sizeof(double));
+  zk_genxc           = (double*) malloc( 1*npoints*sizeof(double));
+  vrho_genxc         = (double*) malloc( 2*npoints*sizeof(double));
+  vsigma_genxc       = (double*) malloc( 3*npoints*sizeof(double));
+  v2rho2_genxc       = (double*) malloc( 3*npoints*sizeof(double));
+  v2rhosigma_genxc   = (double*) malloc( 6*npoints*sizeof(double));
+  v2sigma2_genxc     = (double*) malloc( 6*npoints*sizeof(double));
+  #ifdef KXC
+  v3rho3_libxc       = (double*) malloc( 4*npoints*sizeof(double));
+  v3rho2sigma_libxc  = (double*) malloc( 9*npoints*sizeof(double));
+  v3rhosigma2_libxc  = (double*) malloc(12*npoints*sizeof(double));
+  v3sigma3_libxc     = (double*) malloc(10*npoints*sizeof(double));
+  v3rho3_genxc       = (double*) malloc( 4*npoints*sizeof(double));
+  v3rho2sigma_genxc  = (double*) malloc( 9*npoints*sizeof(double));
+  v3rhosigma2_genxc  = (double*) malloc(12*npoints*sizeof(double));
+  v3sigma3_genxc     = (double*) malloc(10*npoints*sizeof(double));
+  #else
+  v3rho3_libxc       = (double*) NULL;
+  v3rho2sigma_libxc  = (double*) NULL;
+  v3rhosigma2_libxc  = (double*) NULL;
+  v3sigma3_libxc     = (double*) NULL;
+  v3rho3_genxc       = (double*) NULL;
+  v3rho2sigma_genxc  = (double*) NULL;
+  v3rhosigma2_genxc  = (double*) NULL;
+  v3sigma3_genxc     = (double*) NULL;
+  #endif
+  #ifdef LXC
+  v4rho4_libxc       = (double*) malloc( 5*npoints*sizeof(double));
+  v4rho3sigma_libxc  = (double*) malloc(12*npoints*sizeof(double));
+  v4rho2sigma2_libxc = (double*) malloc(18*npoints*sizeof(double));
+  v4rhosigma3_libxc  = (double*) malloc(20*npoints*sizeof(double));
+  v4sigma4_libxc     = (double*) malloc(15*npoints*sizeof(double));
+  v4rho4_genxc       = (double*) malloc( 5*npoints*sizeof(double));
+  v4rho3sigma_genxc  = (double*) malloc(12*npoints*sizeof(double));
+  v4rho2sigma2_genxc = (double*) malloc(18*npoints*sizeof(double));
+  v4rhosigma3_genxc  = (double*) malloc(20*npoints*sizeof(double));
+  v4sigma4_genxc     = (double*) malloc(15*npoints*sizeof(double));
+  #else
+  v4rho4_genxc       = (double*) NULL;
+  v4rho3sigma_genxc  = (double*) NULL;
+  v4rho2sigma2_genxc = (double*) NULL;
+  v4rhosigma3_genxc  = (double*) NULL;
+  v4sigma4_genxc     = (double*) NULL;
+  #endif
+
+  if(xc_func_init(&gga_libxc, XC_GGA_X_BCGP, XC_POLARIZED) < 0) {
+    fprintf(stderr, "XC_GGA_X_BCGP not found!\n");
+    return -1;
+  }
+
+  for(i=0; i<npoints; i++){
+    rho_libxc[2*i + 0]   = 0.048 + i/(double)(npoints);
+    rho_libxc[2*i + 1]   = 0.025;
+    sigma_libxc[3*i + 0] = 0.0046;
+    sigma_libxc[3*i + 1] = 0.0044;
+    sigma_libxc[3*i + 2] = 0.0041;
+  }
+
+  S = getTimeStamp();
+  xc_gga(
+    &gga_libxc, npoints, rho_libxc, sigma_libxc, zk_libxc, vrho_libxc, vsigma_libxc, v2rho2_libxc, v2rhosigma_libxc, v2sigma2_libxc,
+    v3rho3_libxc, v3rho2sigma_libxc, v3rhosigma2_libxc, v3sigma3_libxc, v4rho4_libxc, v4rho3sigma_libxc, v4rho2sigma2_libxc,
+    v4rhosigma3_libxc, v4sigma4_libxc
+  );
+  E = getTimeStamp();
+  fprintf(stderr, "GGA_X_PBE: %.4fs\n", E-S);
+
+  if(xc_func_init(&gga_genxc, XC_GGA_X_BCGP_GENXC, XC_POLARIZED) < 0) {
+    fprintf(stderr, "XC_GGA_X_BCGP_GENXC not found!\n");
+    return -1;
+  }
+
+  for(i=0; i<npoints; i++){
+    rho_genxc[2*i + 0]   = 0.048 + i/(double)(npoints);
+    rho_genxc[2*i + 1]   = 0.025;
+    sigma_genxc[3*i + 0] = 0.0046;
+    sigma_genxc[3*i + 1] = 0.0044;
+    sigma_genxc[3*i + 2] = 0.0041;
+  }
+
+  S = getTimeStamp();
+  xc_gga(
+    &gga_genxc, npoints, rho_genxc, sigma_genxc, zk_genxc, vrho_genxc, vsigma_genxc, v2rho2_genxc, v2rhosigma_genxc, v2sigma2_genxc,
+    v3rho3_genxc, v3rho2sigma_genxc, v3rhosigma2_genxc, v3sigma3_genxc, v4rho4_genxc, v4rho3sigma_genxc, v4rho2sigma2_genxc,
+    v4rhosigma3_genxc, v4sigma4_genxc
+  );
+  E = getTimeStamp();
+  fprintf(stderr, "GGA_X_PBE_GENXC: %.4fs\n", E-S);
+
+  #define CHECK_CORRECTNESS(arr1,arr2,i);  if(!nearlyEqual(arr1[i], arr2[i])) { fprintf(stderr, "Results differ!\n"); return -1; }
+  for(i = 0; i < npoints; i++) {
+    CHECK_CORRECTNESS(rho_libxc, rho_genxc, 2 * i + 0);
+    CHECK_CORRECTNESS(vrho_libxc, vrho_genxc, 2 * i + 0);
+    CHECK_CORRECTNESS(v2rho2_libxc, v2rho2_genxc, 3 * i + 0);
+  }
+
+  xc_func_end(&gga_libxc);
+  xc_func_end(&gga_genxc);
+  return 0;
+}