diff --git a/rbc/externals/__init__.py b/rbc/externals/__init__.py
index 13a8edad..98f126b8 100644
--- a/rbc/externals/__init__.py
+++ b/rbc/externals/__init__.py
@@ -4,11 +4,15 @@
 
 
 def gen_codegen(fn_name):
-    def codegen(context, builder, sig, args):
-        # Need to retrieve the function name again
-        fndesc = funcdesc.ExternalFunctionDescriptor(fn_name, sig.return_type, sig.args)
-        func = context.declare_external_function(builder.module, fndesc)
-        return builder.call(func, args)
+    if fn_name.startswith('llvm.'):
+        def codegen(context, builder, sig, args):
+            func = builder.module.declare_intrinsic(fn_name, [a.type for a in args])
+            return builder.call(func, args)
+    else:
+        def codegen(context, builder, sig, args):
+            fndesc = funcdesc.ExternalFunctionDescriptor(fn_name, sig.return_type, sig.args)
+            func = context.declare_external_function(builder.module, fndesc)
+            return builder.call(func, args)
 
     return codegen
 
diff --git a/rbc/heavydb/__init__.py b/rbc/heavydb/__init__.py
index e432731f..8a728865 100644
--- a/rbc/heavydb/__init__.py
+++ b/rbc/heavydb/__init__.py
@@ -15,6 +15,7 @@
 from .day_time_interval import *  # noqa: F401, F403
 from .year_month_time_interval import *  # noqa: F401, F403
 from .remoteheavydb import *  # noqa: F401, F403
+from .heavydb_compiler import *  # noqa: F401, F403
 
 from . import mathimpl as math  # noqa: F401
 from . import npyimpl as np  # noqa: F401
diff --git a/rbc/heavydb/buffer.py b/rbc/heavydb/buffer.py
index 4a472655..1359d1c1 100644
--- a/rbc/heavydb/buffer.py
+++ b/rbc/heavydb/buffer.py
@@ -30,7 +30,7 @@
 import numpy as np
 from rbc import typesystem
 from rbc.targetinfo import TargetInfo
-from numba.core import datamodel, cgutils, extending, types, imputils
+from numba.core import datamodel, cgutils, types, imputils, extending
 
 int8_t = ir.IntType(8)
 int32_t = ir.IntType(32)
diff --git a/rbc/heavydb/heavydb_compiler.py b/rbc/heavydb/heavydb_compiler.py
new file mode 100644
index 00000000..5865a51d
--- /dev/null
+++ b/rbc/heavydb/heavydb_compiler.py
@@ -0,0 +1,398 @@
+import warnings
+from contextlib import contextmanager
+from functools import partial
+
+import llvmlite.binding as llvm
+from numba import _dynfunc, njit
+from numba.core import (base, callconv, codegen, compiler_lock, cpu,
+                        descriptors, dispatcher, imputils, options, typing,
+                        utils)
+from numba.core.target_extension import (Generic, dispatcher_registry,
+                                         jit_registry, target_registry)
+from numba.cuda.target import CUDATypingContext
+from numba.np import ufunc_db
+
+from rbc.targetinfo import TargetInfo
+
+
+class HeavyDB_CPU(Generic):
+    """Mark the target as HeavyDB CPU
+    """
+
+
+class HeavyDB_GPU(Generic):
+    """Mark the target as HeavyDB GPU
+    """
+
+
+target_registry['heavydb_cpu'] = HeavyDB_CPU
+target_registry['heavydb_gpu'] = HeavyDB_GPU
+
+
+def custom_jit(*args, target=None, **kwargs):
+    assert 'target' not in kwargs
+    assert '_target' not in kwargs
+    return njit(*args, _target=target, **kwargs)
+
+
+jit_registry[target_registry['heavydb_cpu']] = partial(custom_jit, target='heavydb_cpu')
+jit_registry[target_registry['heavydb_gpu']] = partial(custom_jit, target='heavydb_gpu')
+
+
+# heavydb_cpu_registry = imputils.Registry(name='heavydb_cpu_registry')
+# heavydb_gpu_registry = imputils.Registry(name='heavydb_gpu_registry')
+
+
+class _NestedContext(object):
+    _typing_context = None
+    _target_context = None
+
+    @contextmanager
+    def nested(self, typing_context, target_context):
+        old_nested = self._typing_context, self._target_context
+        try:
+            self._typing_context = typing_context
+            self._target_context = target_context
+            yield
+        finally:
+            self._typing_context, self._target_context = old_nested
+
+
+_options_mixin = options.include_default_options(
+    "no_rewrites",
+    "no_cpython_wrapper",
+    "no_cfunc_wrapper",
+    "fastmath",
+    "inline",
+    "boundscheck",
+    "nopython",
+    # Add "target_backend" as a accepted option for the CPU in @jit(...)
+    "target_backend",
+)
+
+
+class HeavyDBTargetOptions(_options_mixin, options.TargetOptions):
+    def finalize(self, flags, options):
+        flags.enable_pyobject = False
+        flags.enable_looplift = False
+        flags.nrt = False
+        flags.debuginfo = False
+        flags.boundscheck = False
+        flags.enable_pyobject_looplift = False
+        flags.no_rewrites = True
+        flags.auto_parallel = cpu.ParallelOptions(False)
+        flags.inherit_if_not_set("fastmath")
+        flags.inherit_if_not_set("error_model", default="python")
+        # Add "target_backend" as a option that inherits from the caller
+        flags.inherit_if_not_set("target_backend")
+
+
+class HeavyDBTarget(descriptors.TargetDescriptor):
+    options = HeavyDBTargetOptions
+    _nested = _NestedContext()
+
+    @utils.cached_property
+    def _toplevel_target_context(self):
+        # Lazily-initialized top-level target context, for all threads
+        return JITRemoteTargetContext(self.typing_context, self._target_name)
+
+    @utils.cached_property
+    def _toplevel_typing_context(self):
+        # Lazily-initialized top-level typing context, for all threads
+        return {'heavydb_cpu': JITRemoteCPUTypingContext,
+                'heavydb_gpu': JITRemoteGPUTypingContext}[self._target_name]()
+
+    @property
+    def target_context(self):
+        """
+        The target context for CPU/GPU targets.
+        """
+        nested = self._nested._target_context
+        if nested is not None:
+            return nested
+        else:
+            return self._toplevel_target_context
+
+    @property
+    def typing_context(self):
+        """
+        The typing context for CPU targets.
+        """
+        nested = self._nested._typing_context
+        if nested is not None:
+            return nested
+        else:
+            return self._toplevel_typing_context
+
+    def nested_context(self, typing_context, target_context):
+        """
+        A context manager temporarily replacing the contexts with the
+        given ones, for the current thread of execution.
+        """
+        return self._nested.nested(typing_context, target_context)
+
+
+# Create a target instance
+heavydb_cpu_target = HeavyDBTarget("heavydb_cpu")
+heavydb_gpu_target = HeavyDBTarget("heavydb_gpu")
+
+
+# Declare a dispatcher for the CPU/GPU targets
+class HeavyDBCPUDispatcher(dispatcher.Dispatcher):
+    targetdescr = heavydb_cpu_target
+
+
+class HeavyDBGPUDispatcher(dispatcher.Dispatcher):
+    targetdescr = heavydb_gpu_target
+
+
+# Register a dispatcher for the target, a lot of the code uses this
+# internally to work out what to do RE compilation
+dispatcher_registry[target_registry["heavydb_cpu"]] = HeavyDBCPUDispatcher
+dispatcher_registry[target_registry["heavydb_gpu"]] = HeavyDBGPUDispatcher
+
+
+class JITRemoteCodeLibrary(codegen.JITCodeLibrary):
+    """JITRemoteCodeLibrary was introduce to prevent numba from calling functions
+    that checks if the module is final. See xnd-project/rbc issue #87.
+    """
+
+    def get_pointer_to_function(self, name):
+        """We can return any random number here! This is just to prevent numba from
+        trying to check if the symbol given by "name" is defined in the module.
+        In cases were RBC is calling an external function (i.e. allocate_varlen_buffer)
+        the symbol will not be defined in the module, resulting in an error.
+        """
+        return 0
+
+    def _finalize_specific(self):
+        """Same as codegen.JITCodeLibrary._finalize_specific but without
+        calling _ensure_finalize at the end
+        """
+        self._codegen._scan_and_fix_unresolved_refs(self._final_module)
+
+
+class JITRemoteCodegen(codegen.JITCPUCodegen):
+    _library_class = JITRemoteCodeLibrary
+
+    def _get_host_cpu_name(self):
+        target_info = TargetInfo()
+        return target_info.device_name
+
+    def _get_host_cpu_features(self):
+        target_info = TargetInfo()
+        features = target_info.device_features
+        server_llvm_version = target_info.llvm_version
+        if server_llvm_version is None or target_info.is_gpu:
+            return ''
+        client_llvm_version = llvm.llvm_version_info
+
+        # See https://github.com/xnd-project/rbc/issues/45
+        remove_features = {
+            (12, 12): [], (11, 11): [], (10, 10): [], (9, 9): [], (8, 8): [],
+            (11, 8): ['tsxldtrk', 'amx-tile', 'amx-bf16', 'serialize', 'amx-int8',
+                      'avx512vp2intersect', 'tsxldtrk', 'amx-tile', 'amx-bf16',
+                      'serialize', 'amx-int8', 'avx512vp2intersect', 'tsxldtrk',
+                      'amx-tile', 'amx-bf16', 'serialize', 'amx-int8',
+                      'avx512vp2intersect', 'cx8', 'enqcmd', 'avx512bf16'],
+            (11, 10): ['tsxldtrk', 'amx-tile', 'amx-bf16', 'serialize', 'amx-int8'],
+            (9, 11): ['sse2', 'cx16', 'sahf', 'tbm', 'avx512ifma', 'sha',
+                      'gfni', 'fma4', 'vpclmulqdq', 'prfchw', 'bmi2', 'cldemote',
+                      'fsgsbase', 'ptwrite', 'xsavec', 'popcnt', 'mpx',
+                      'avx512bitalg', 'movdiri', 'xsaves', 'avx512er',
+                      'avx512vnni', 'avx512vpopcntdq', 'pconfig', 'clwb',
+                      'avx512f', 'clzero', 'pku', 'mmx', 'lwp', 'rdpid', 'xop',
+                      'rdseed', 'waitpkg', 'movdir64b', 'sse4a', 'avx512bw',
+                      'clflushopt', 'xsave', 'avx512vbmi2', '64bit', 'avx512vl',
+                      'invpcid', 'avx512cd', 'avx', 'vaes', 'cx8', 'fma', 'rtm',
+                      'bmi', 'enqcmd', 'rdrnd', 'mwaitx', 'sse4.1', 'sse4.2', 'avx2',
+                      'fxsr', 'wbnoinvd', 'sse', 'lzcnt', 'pclmul', 'prefetchwt1',
+                      'f16c', 'ssse3', 'sgx', 'shstk', 'cmov', 'avx512vbmi',
+                      'avx512bf16', 'movbe', 'xsaveopt', 'avx512dq', 'adx',
+                      'avx512pf', 'sse3'],
+            (9, 8): ['cx8', 'enqcmd', 'avx512bf16'],
+        }.get((server_llvm_version[0], client_llvm_version[0]), [])
+        if remove_features is None:
+            warnings.warn(
+                f'{type(self).__name__}._get_host_cpu_features: `remove_features` dictionary'
+                ' requires an update: detected different LLVM versions in server '
+                f'{server_llvm_version} and client {client_llvm_version}.'
+                f' CPU features: {features}.')
+        else:
+            features += ','
+            for f in remove_features:
+                features = features.replace('+' + f + ',', '').replace('-' + f + ',', '')
+            features.rstrip(',')
+        return features
+
+    def _customize_tm_options(self, options):
+        super()._customize_tm_options(options)
+        # fix reloc_model as the base method sets it using local target
+        target_info = TargetInfo()
+        if target_info.arch.startswith('x86'):
+            reloc_model = 'static'
+        else:
+            reloc_model = 'default'
+        options['reloc'] = reloc_model
+
+    def set_env(self, env_name, env):
+        return None
+
+
+class JITRemoteCPUTypingContext(typing.Context):
+    """JITRemote Typing Context
+    """
+
+
+class JITRemoteGPUTypingContext(CUDATypingContext):
+    """JITRemote Typing Context
+    """
+
+
+class JITRemoteTargetContext(base.BaseContext):
+    # Whether dynamic globals (CPU runtime addresses) is allowed
+    allow_dynamic_globals = True  # should this be False?
+
+    def __init__(self, typing_context, target):
+        if target not in ('heavydb_cpu', 'heavydb_gpu'):
+            raise ValueError(f'Target "{target}" not supported')
+        super().__init__(typing_context, target)
+
+    @compiler_lock.global_compiler_lock
+    def init(self):
+        target_info = TargetInfo()
+        self.address_size = target_info.bits
+        self.is32bit = (self.address_size == 32)
+        self._internal_codegen = JITRemoteCodegen("numba.exec")
+        self._target_data = llvm.create_target_data(target_info.datalayout)
+
+    def refresh(self):
+        # if self.target_name == 'heavydb_cpu':
+        #     registry = heavydb_cpu_registry
+        # else:
+        #     registry = heavydb_gpu_registry
+
+        # try:
+        #     loader = self._registries[registry]
+        # except KeyError:
+        #     loader = imputils.RegistryLoader(registry)
+        #     self._registries[registry] = loader
+
+        # self.install_registry(registry)
+        # Also refresh typing context, since @overload declarations can
+        # affect it.
+        # self.typing_context.refresh()
+        super().refresh()
+
+    def load_additional_registries(self):
+        # Add implementations that work via import
+        from numba.cpython import (builtins, charseq, enumimpl,  # noqa: F401
+                                   hashing, heapq, iterators, listobj, numbers,
+                                   rangeobj, setobj, slicing, tupleobj,
+                                   unicode)
+
+        self.install_registry(imputils.builtin_registry)
+
+        # uncomment as needed!
+        # from numba.core import optional
+        # from numba.np import linalg, polynomial
+        # from numba.typed import typeddict, dictimpl
+        # from numba.typed import typedlist, listobject
+        # from numba.experimental import jitclass, function_type
+        # from numba.np import npdatetime
+        # from numba.np import arraymath, arrayobj  # noqa: F401
+
+        # from rbc.heavydb import mathimpl
+
+        # Add target specific implementations
+        from numba.cpython import mathimpl
+        from numba.cuda import mathimpl as cuda_mathimpl
+        from numba.np import npyimpl
+
+        if self.target_name == 'heavydb_cpu':
+            self.install_registry(npyimpl.registry)
+            self.install_registry(mathimpl.registry)
+        else:
+            self.install_registry(cuda_mathimpl.registry)
+        # from numba.cpython import cmathimpl, mathimpl, printimpl, randomimpl
+        # from numba.misc import cffiimpl
+        # from numba.experimental.jitclass.base import ClassBuilder as \
+        #     jitclassimpl
+        # self.install_registry(cmathimpl.registry)
+        # self.install_registry(cffiimpl.registry)
+        # self.install_registry(mathimpl.registry)
+        # self.install_registry(printimpl.registry)
+        # self.install_registry(randomimpl.registry)
+        # self.install_registry(jitclassimpl.class_impl_registry)
+
+    def codegen(self):
+        return self._internal_codegen
+
+    @utils.cached_property
+    def call_conv(self):
+        return callconv.CPUCallConv(self)
+
+    @property
+    def target_data(self):
+        return self._target_data
+
+    def create_cpython_wrapper(self,
+                               library,
+                               fndesc,
+                               env,
+                               call_helper,
+                               release_gil=False):
+        # There's no cpython wrapper on HeavyDB
+        pass
+
+    def create_cfunc_wrapper(self,
+                             library,
+                             fndesc,
+                             env,
+                             call_helper,
+                             release_gil=False):
+        # There's no cfunc wrapper on HeavyDB
+        pass
+
+    def get_executable(self, library, fndesc, env):
+        """
+        Returns
+        -------
+        (cfunc, fnptr)
+
+        - cfunc
+            callable function (Can be None)
+        - fnptr
+            callable function address
+        - env
+            an execution environment (from _dynfunc)
+        """
+        # although we don't use this function, it seems to be required
+        # by some parts of codegen in Numba.
+
+        # Code generation
+        fnptr = library.get_pointer_to_function(
+            fndesc.llvm_cpython_wrapper_name
+        )
+
+        # Note: we avoid reusing the original docstring to avoid encoding
+        # issues on Python 2, see issue #1908
+        doc = "compiled wrapper for %r" % (fndesc.qualname,)
+        cfunc = _dynfunc.make_function(
+            fndesc.lookup_module(),
+            fndesc.qualname.rsplit(".", 1)[-1],
+            doc,
+            fnptr,
+            env,
+            # objects to keepalive with the function
+            (library,),
+        )
+        library.codegen.set_env(self.get_env_name(fndesc), env)
+        return cfunc
+
+    def post_lowering(self, mod, library):
+        pass
+
+    # Overrides
+    def get_ufunc_info(self, ufunc_key):
+        return ufunc_db.get_ufunc_info(ufunc_key)
diff --git a/rbc/heavydb/mathimpl.py b/rbc/heavydb/mathimpl.py
index b508e8b5..ce6f4377 100644
--- a/rbc/heavydb/mathimpl.py
+++ b/rbc/heavydb/mathimpl.py
@@ -1,9 +1,18 @@
+import warnings
 import math
-from rbc.externals import gen_codegen, dispatch_codegen
-from numba.core.typing.templates import infer_global
-from numba.core.imputils import lower_builtin
-from numba.core.typing.templates import ConcreteTemplate, signature
-from numba.types import float32, float64, int32, int64, uint64, intp
+from rbc.externals import gen_codegen
+from numba.core.typing.templates import ConcreteTemplate, signature, infer_global
+from numba.core.types import float32, float64, int32, int64, uint64, intp
+from numba.core.intrinsics import INTR_TO_CMATH
+from numba.core.extending import lower_builtin as lower_cpu
+from numba.cuda.mathimpl import lower as lower_gpu  # noqa: F401
+# from .heavydb_compiler import heavydb_cpu_registry, heavydb_gpu_registry
+
+# lower_cpu = heavydb_cpu_registry.lower
+# lower_gpu = heavydb_gpu_registry.lower
+
+# registry = Registry()
+# infer_global = registry.register_global
 
 
 # Adding missing cases in Numba
@@ -75,22 +84,71 @@ class Math_converter(ConcreteTemplate):
 binarys = []
 binarys += [("copysign", "copysignf", math.copysign)]
 binarys += [("atan2", "atan2f", math.atan2)]
-binarys += [("pow", "powf", math.pow)]
 binarys += [("fmod", "fmodf", math.fmod)]
 binarys += [("hypot", "hypotf", math.hypot)]
 binarys += [("remainder", "remainderf", math.remainder)]
 
 
+rbc_INTR_TO_CMATH = {
+    "powf": "llvm.pow.f32",
+    "pow": "llvm.pow.f64",
+
+    "sinf": "llvm.sin.f32",
+    "sin": "llvm.sin.f64",
+
+    "cosf": "llvm.cos.f32",
+    "cos": "llvm.cos.f64",
+
+    "sqrtf": "llvm.sqrt.f32",
+    "sqrt": "llvm.sqrt.f64",
+
+    "expf": "llvm.exp.f32",
+    "exp": "llvm.exp.f64",
+
+    "logf": "llvm.log.f32",
+    "log": "llvm.log.f64",
+
+    "log10f": "llvm.log10.f32",
+    "log10": "llvm.log10.f64",
+
+    "fabsf": "llvm.fabs.f32",
+    "fabs": "llvm.fabs.f64",
+
+    "floorf": "llvm.floor.f32",
+    "floor": "llvm.floor.f64",
+
+    "ceilf": "llvm.ceil.f32",
+    "ceil": "llvm.ceil.f64",
+
+    "truncf": "llvm.trunc.f32",
+    "trunc": "llvm.trunc.f64",
+}
+
+
+if len(rbc_INTR_TO_CMATH) != len(INTR_TO_CMATH):
+    warnings.warn("List of intrinsics is outdated! Please update!")
+
+
 def impl_unary(fname, key, typ):
-    cpu = gen_codegen(fname)
-    gpu = gen_codegen(f"__nv_{fname}")
-    lower_builtin(key, typ)(dispatch_codegen(cpu, gpu))
+    if fname in rbc_INTR_TO_CMATH.keys():
+        # use llvm intrinsics when possible
+        cpu = gen_codegen(rbc_INTR_TO_CMATH.get(fname))
+    else:
+        cpu = gen_codegen(fname)
+    # gpu = gen_codegen(f"__nv_{fname}")
+    lower_cpu(key, typ)(cpu)
+    # lower_gpu(key, typ)(gpu)
 
 
 def impl_binary(fname, key, typ):
-    cpu = gen_codegen(fname)
-    gpu = gen_codegen(f"__nv_{fname}")
-    lower_builtin(key, typ, typ)(dispatch_codegen(cpu, gpu))
+    if fname in rbc_INTR_TO_CMATH.keys():
+        # use llvm intrinsics when possible
+        cpu = gen_codegen(rbc_INTR_TO_CMATH.get(fname))
+    else:
+        cpu = gen_codegen(fname)
+    # gpu = gen_codegen(f"__nv_{fname}")
+    lower_cpu(key, typ, typ)(cpu)
+    # lower_gpu(key, typ, typ)(gpu)
 
 
 for fname64, fname32, key in unarys:
@@ -105,17 +163,42 @@ def impl_binary(fname, key, typ):
 
 # manual mapping
 def impl_ldexp():
+    # cpu
     ldexp_cpu = gen_codegen('ldexp')
-    ldexp_gpu = gen_codegen('__nv_ldexp')
-
     ldexpf_cpu = gen_codegen('ldexpf')
-    ldexpf_gpu = gen_codegen('__nv_ldexpf')
-
-    lower_builtin(math.ldexp, float64, int32)(dispatch_codegen(ldexp_cpu, ldexp_gpu))
-    lower_builtin(math.ldexp, float32, int32)(dispatch_codegen(ldexpf_cpu, ldexpf_gpu))
+    lower_cpu(math.ldexp, float64, int32)(ldexp_cpu)
+    lower_cpu(math.ldexp, float32, int32)(ldexpf_cpu)
+
+    # gpu
+    # ldexp_gpu = gen_codegen('__nv_ldexp')
+    # ldexpf_gpu = gen_codegen('__nv_ldexpf')
+    # lower_gpu(math.ldexp, float64, int32)(ldexp_gpu)
+    # lower_gpu(math.ldexp, float32, int32)(ldexpf_gpu)
+
+
+def impl_pow():
+    # cpu
+    pow_cpu = gen_codegen('pow')
+    powf_cpu = gen_codegen('powf')
+    lower_cpu(math.pow, float64, float64)(pow_cpu)
+    lower_cpu(math.pow, float32, float32)(powf_cpu)
+    lower_cpu(math.pow, float64, int32)(pow_cpu)
+    lower_cpu(math.pow, float32, int32)(powf_cpu)
+
+    # gpu
+    # pow_gpu = gen_codegen('__nv_pow')
+    # powf_gpu = gen_codegen('__nv_powf')
+    # powi_gpu = gen_codegen('__nv_powi')
+    # powif_gpu = gen_codegen('__nv_powif')
+    # lower_gpu(math.pow, float64, float64)(pow_gpu)
+    # lower_gpu(math.pow, float32, float32)(powf_gpu)
+    # lower_gpu(math.pow, float64, int32)(powi_gpu)
+    # lower_gpu(math.pow, float32, int32)(powif_gpu)
 
 
 impl_ldexp()
+impl_pow()
+
 
 # CPU only:
 # math.gcd
diff --git a/rbc/heavydb/remoteheavydb.py b/rbc/heavydb/remoteheavydb.py
index eb52cc94..5b272d86 100644
--- a/rbc/heavydb/remoteheavydb.py
+++ b/rbc/heavydb/remoteheavydb.py
@@ -259,7 +259,7 @@ def is_sizer(t):
 
 
 def get_sizer_enum(t):
-    """Return sizer enum value as defined by the heavydb server.
+    """Return sizer enum value as defined by the HeavyDB server.
     """
     sizer = t.annotation()['sizer']
     sizer = output_buffer_sizer_map.get(sizer or None, sizer)
@@ -1102,7 +1102,7 @@ def retrieve_targets(self):
                 target_info.add_library('stdlib')
                 target_info.add_library('heavydb')
             elif target_info.is_gpu:
-                if self.version < (6, 2):
+                if self.version < (6, 4):
                     # BC note: older heavydb versions do not define
                     # has_libdevice and assume that libdevice exists
                     self.has_cuda_libdevice = True
diff --git a/rbc/irtools.py b/rbc/irtools.py
index 220b4a23..4701b17c 100644
--- a/rbc/irtools.py
+++ b/rbc/irtools.py
@@ -3,7 +3,6 @@
 
 import re
 import warnings
-from contextlib import contextmanager
 from collections import defaultdict
 from llvmlite import ir
 import llvmlite.binding as llvm
@@ -11,12 +10,33 @@
 from .errors import UnsupportedError
 from . import libfuncs
 from rbc.externals import stdio
-from numba.core import codegen, cpu, compiler_lock, \
+from numba.core import cpu, \
     registry, typing, compiler, sigutils, cgutils, \
-    extending, imputils
+    extending, target_extension, retarget, dispatcher
+from numba import njit
 from numba.core import errors as nb_errors
 
 
+class Retarget(retarget.BasicRetarget):
+
+    def __init__(self, target_name):
+        self.target_name = target_name
+        super().__init__()
+
+    @property
+    def output_target(self):
+        return self.target_name
+
+    def compile_retarget(self, cpu_disp):
+        kernel = njit(_target=self.target_name)(cpu_disp.py_func)
+        return kernel
+
+
+def switch_target(target_name):
+    tc = dispatcher.TargetConfigurationStack
+    return tc.switch_target(Retarget(target_name))
+
+
 int32_t = ir.IntType(32)
 int1_t = ir.IntType(1)
 
@@ -64,132 +84,6 @@ def get_called_functions(library, funcname=None):
 
 # ---------------------------------------------------------------------------
 
-class JITRemoteCodeLibrary(codegen.JITCodeLibrary):
-    """JITRemoteCodeLibrary was introduce to prevent numba from calling functions
-    that checks if the module is final. See xnd-project/rbc issue #87.
-    """
-
-    def get_pointer_to_function(self, name):
-        """We can return any random number here! This is just to prevent numba from
-        trying to check if the symbol given by "name" is defined in the module.
-        In cases were RBC is calling an external function (i.e. allocate_varlen_buffer)
-        the symbol will not be defined in the module, resulting in an error.
-        """
-        return 0
-
-    def _finalize_specific(self):
-        """Same as codegen.JITCodeLibrary._finalize_specific but without
-        calling _ensure_finalize at the end
-        """
-        self._codegen._scan_and_fix_unresolved_refs(self._final_module)
-
-
-class JITRemoteCodegen(codegen.JITCPUCodegen):
-    _library_class = JITRemoteCodeLibrary
-
-    def _get_host_cpu_name(self):
-        target_info = TargetInfo()
-        return target_info.device_name
-
-    def _get_host_cpu_features(self):
-        target_info = TargetInfo()
-        features = target_info.device_features
-        server_llvm_version = target_info.llvm_version
-        if server_llvm_version is None or target_info.is_gpu:
-            return ''
-        client_llvm_version = llvm.llvm_version_info
-
-        # See https://github.com/xnd-project/rbc/issues/45
-        remove_features = {
-            (12, 12): [], (11, 11): [], (10, 10): [], (9, 9): [], (8, 8): [],
-            (11, 8): ['tsxldtrk', 'amx-tile', 'amx-bf16', 'serialize', 'amx-int8',
-                      'avx512vp2intersect', 'tsxldtrk', 'amx-tile', 'amx-bf16',
-                      'serialize', 'amx-int8', 'avx512vp2intersect', 'tsxldtrk',
-                      'amx-tile', 'amx-bf16', 'serialize', 'amx-int8',
-                      'avx512vp2intersect', 'cx8', 'enqcmd', 'avx512bf16'],
-            (11, 10): ['tsxldtrk', 'amx-tile', 'amx-bf16', 'serialize', 'amx-int8'],
-            (9, 11): ['sse2', 'cx16', 'sahf', 'tbm', 'avx512ifma', 'sha',
-                      'gfni', 'fma4', 'vpclmulqdq', 'prfchw', 'bmi2', 'cldemote',
-                      'fsgsbase', 'ptwrite', 'xsavec', 'popcnt', 'mpx',
-                      'avx512bitalg', 'movdiri', 'xsaves', 'avx512er',
-                      'avx512vnni', 'avx512vpopcntdq', 'pconfig', 'clwb',
-                      'avx512f', 'clzero', 'pku', 'mmx', 'lwp', 'rdpid', 'xop',
-                      'rdseed', 'waitpkg', 'movdir64b', 'sse4a', 'avx512bw',
-                      'clflushopt', 'xsave', 'avx512vbmi2', '64bit', 'avx512vl',
-                      'invpcid', 'avx512cd', 'avx', 'vaes', 'cx8', 'fma', 'rtm',
-                      'bmi', 'enqcmd', 'rdrnd', 'mwaitx', 'sse4.1', 'sse4.2', 'avx2',
-                      'fxsr', 'wbnoinvd', 'sse', 'lzcnt', 'pclmul', 'prefetchwt1',
-                      'f16c', 'ssse3', 'sgx', 'shstk', 'cmov', 'avx512vbmi',
-                      'avx512bf16', 'movbe', 'xsaveopt', 'avx512dq', 'adx',
-                      'avx512pf', 'sse3'],
-            (9, 8): ['cx8', 'enqcmd', 'avx512bf16'],
-        }.get((server_llvm_version[0], client_llvm_version[0]), None)
-        if remove_features is None:
-            warnings.warn(
-                f'{type(self).__name__}._get_host_cpu_features: `remove_features` dictionary'
-                ' requires an update: detected different LLVM versions in server '
-                f'{server_llvm_version} and client {client_llvm_version}.'
-                f' CPU features: {features}.')
-        else:
-            features += ','
-            for f in remove_features:
-                features = features.replace('+' + f + ',', '').replace('-' + f + ',', '')
-            features.rstrip(',')
-        return features
-
-    def _customize_tm_options(self, options):
-        super()._customize_tm_options(options)
-        # fix reloc_model as the base method sets it using local target
-        target_info = TargetInfo()
-        if target_info.arch.startswith('x86'):
-            reloc_model = 'static'
-        else:
-            reloc_model = 'default'
-        options['reloc'] = reloc_model
-
-    def set_env(self, env_name, env):
-        return None
-
-
-class JITRemoteTypingContext(typing.Context):
-    def load_additional_registries(self):
-        self.install_registry(typing.templates.builtin_registry)
-        super().load_additional_registries()
-
-
-class JITRemoteTargetContext(cpu.CPUContext):
-
-    @compiler_lock.global_compiler_lock
-    def init(self):
-        target_info = TargetInfo()
-        self.address_size = target_info.bits
-        self.is32bit = (self.address_size == 32)
-        self._internal_codegen = JITRemoteCodegen("numba.exec")
-
-    def load_additional_registries(self):
-        self.install_registry(imputils.builtin_registry)
-        super().load_additional_registries()
-
-    def get_executable(self, library, fndesc, env):
-        return None
-
-    def post_lowering(self, mod, library):
-        pass
-
-
-# ---------------------------------------------------------------------------
-# Code generation methods
-
-
-@contextmanager
-def replace_numba_internals_hack():
-    # Hackish solution to prevent numba from calling _ensure_finalize. See issue #87
-    _internal_codegen_bkp = registry.cpu_target.target_context._internal_codegen
-    registry.cpu_target.target_context._internal_codegen = JITRemoteCodegen("numba.exec")
-    yield
-    registry.cpu_target.target_context._internal_codegen = _internal_codegen_bkp
-
-
 def make_wrapper(fname, atypes, rtype, cres, target: TargetInfo, verbose=False):
     """Make wrapper function to numba compile result.
 
@@ -268,7 +162,7 @@ def make_wrapper(fname, atypes, rtype, cres, target: TargetInfo, verbose=False):
 
 
 def compile_instance(func, sig,
-                     target: TargetInfo,
+                     target_info: TargetInfo,
                      typing_context,
                      target_context,
                      pipeline_class,
@@ -312,7 +206,7 @@ def compile_instance(func, sig,
     result = get_called_functions(cres.library, cres.fndesc.llvm_func_name)
 
     for f in result['declarations']:
-        if target.supports(f):
+        if target_info.supports(f):
             continue
         warnings.warn(f'Skipping {fname} that uses undefined function `{f}`')
         return
@@ -320,18 +214,18 @@ def compile_instance(func, sig,
     nvvmlib = libfuncs.Library.get('nvvm')
     llvmlib = libfuncs.Library.get('llvm')
     for f in result['intrinsics']:
-        if target.is_gpu:
+        if target_info.is_gpu:
             if f in nvvmlib:
                 continue
 
-        if target.is_cpu:
+        if target_info.is_cpu:
             if f in llvmlib:
                 continue
 
         warnings.warn(f'Skipping {fname} that uses unsupported intrinsic `{f}`')
         return
 
-    make_wrapper(fname, args, return_type, cres, target, verbose=debug)
+    make_wrapper(fname, args, return_type, cres, target_info, verbose=debug)
 
     main_module = main_library._final_module
     for lib in result['libraries']:
@@ -376,83 +270,104 @@ def compile_to_LLVM(functions_and_signatures,
       LLVM module instance. To get the IR string, use `str(module)`.
 
     """
-    target_desc = registry.cpu_target
-
-    typing_context = JITRemoteTypingContext()
-    target_context = JITRemoteTargetContext(typing_context)
+    # avoid circula import error
+    # * remotejit imports irtools
+    # * irtools import heavydb
+    # * heavydb import remotejit
+    from rbc.heavydb import (JITRemoteCPUTypingContext,
+                             JITRemoteGPUTypingContext,
+                             JITRemoteTargetContext,
+                             heavydb_cpu_target,
+                             heavydb_gpu_target)
+
+    device = target_info.name
+    software = target_info.software[0]
+
+    if software == 'HeavyDB':
+        target_name = f'heavydb_{device}'
+        target_desc = heavydb_cpu_target if device == 'cpu' else heavydb_gpu_target
+        typing_context = JITRemoteCPUTypingContext() if device == 'cpu' else \
+            JITRemoteGPUTypingContext()
+        target_context = JITRemoteTargetContext(typing_context, target_name)
+    else:
+        target_name = 'cpu'
+        target_desc = registry.cpu_target
+        typing_context = typing.Context()
+        target_context = cpu.CPUContext(typing_context, target_name)
 
     # Bring over Array overloads (a hack):
     target_context._defns = target_desc.target_context._defns
 
-    with replace_numba_internals_hack():
-        codegen = target_context.codegen()
-        main_library = codegen.create_library('rbc.irtools.compile_to_IR')
-        main_module = main_library._final_module
-
-        if user_defined_llvm_ir is not None:
-            if isinstance(user_defined_llvm_ir, str):
-                user_defined_llvm_ir = llvm.parse_assembly(user_defined_llvm_ir)
-            assert isinstance(user_defined_llvm_ir, llvm.ModuleRef)
-            main_module.link_in(user_defined_llvm_ir, preserve=True)
-
-        succesful_fids = []
-        function_names = []
-        for func, signatures in functions_and_signatures:
-            for fid, sig in signatures.items():
-                fname = compile_instance(func, sig, target_info, typing_context,
-                                         target_context, pipeline_class,
-                                         main_library,
-                                         debug=debug)
-                if fname is not None:
-                    succesful_fids.append(fid)
-                    function_names.append(fname)
-
-        add_metadata_flag(main_library,
-                          pass_column_arguments_by_value=0,
-                          manage_memory_buffer=1)
-        main_library._optimize_final_module()
+    codegen = target_context.codegen()
+    main_library = codegen.create_library(f'rbc.irtools.compile_to_IR_{software}_{device}')
+    main_module = main_library._final_module
 
-        # Remove unused defined functions and declarations
-        used_symbols = defaultdict(set)
-        for fname in function_names:
-            for k, v in get_called_functions(main_library, fname).items():
-                used_symbols[k].update(v)
+    if user_defined_llvm_ir is not None:
+        if isinstance(user_defined_llvm_ir, str):
+            user_defined_llvm_ir = llvm.parse_assembly(user_defined_llvm_ir)
+        assert isinstance(user_defined_llvm_ir, llvm.ModuleRef)
+        main_module.link_in(user_defined_llvm_ir, preserve=True)
+
+    succesful_fids = []
+    function_names = []
+    for func, signatures in functions_and_signatures:
+        for fid, sig in signatures.items():
+            with switch_target(target_name):
+                with target_extension.target_override(target_name):
+                    fname = compile_instance(func, sig, target_info, typing_context,
+                                             target_context, pipeline_class,
+                                             main_library,
+                                             debug=debug)
+                    if fname is not None:
+                        succesful_fids.append(fid)
+                        function_names.append(fname)
+
+    add_metadata_flag(main_library,
+                      pass_column_arguments_by_value=0,
+                      manage_memory_buffer=1)
+    main_library._optimize_final_module()
+
+    # Remove unused defined functions and declarations
+    used_symbols = defaultdict(set)
+    for fname in function_names:
+        for k, v in get_called_functions(main_library, fname).items():
+            used_symbols[k].update(v)
+
+    all_symbols = get_called_functions(main_library)
+
+    unused_symbols = defaultdict(set)
+    for k, lst in all_symbols.items():
+        if k == 'libraries':
+            continue
+        for fn in lst:
+            if fn not in used_symbols[k]:
+                unused_symbols[k].add(fn)
+
+    changed = False
+    for f in main_module.functions:
+        fn = f.name
+        if fn.startswith('llvm.'):
+            if f.name in unused_symbols['intrinsics']:
+                f.linkage = llvm.Linkage.external
+                changed = True
+        elif f.is_declaration:
+            if f.name in unused_symbols['declarations']:
+                f.linkage = llvm.Linkage.external
+                changed = True
+        else:
+            if f.name in unused_symbols['defined']:
+                f.linkage = llvm.Linkage.private
+                changed = True
 
-        all_symbols = get_called_functions(main_library)
+    # TODO: determine unused global_variables and struct_types
 
-        unused_symbols = defaultdict(set)
-        for k, lst in all_symbols.items():
-            if k == 'libraries':
-                continue
-            for fn in lst:
-                if fn not in used_symbols[k]:
-                    unused_symbols[k].add(fn)
-
-        changed = False
-        for f in main_module.functions:
-            fn = f.name
-            if fn.startswith('llvm.'):
-                if f.name in unused_symbols['intrinsics']:
-                    f.linkage = llvm.Linkage.external
-                    changed = True
-            elif f.is_declaration:
-                if f.name in unused_symbols['declarations']:
-                    f.linkage = llvm.Linkage.external
-                    changed = True
-            else:
-                if f.name in unused_symbols['defined']:
-                    f.linkage = llvm.Linkage.private
-                    changed = True
-
-        # TODO: determine unused global_variables and struct_types
-
-        if changed:
-            main_library._optimize_final_module()
-
-        main_module.verify()
-        main_library._finalized = True
-        main_module.triple = target_info.triple
-        main_module.data_layout = target_info.datalayout
+    if changed:
+        main_library._optimize_final_module()
+
+    main_module.verify()
+    main_library._finalized = True
+    main_module.triple = target_info.triple
+    main_module.data_layout = target_info.datalayout
 
     return main_module, succesful_fids
 
diff --git a/rbc/remotejit.py b/rbc/remotejit.py
index ee76aec5..4bf08816 100644
--- a/rbc/remotejit.py
+++ b/rbc/remotejit.py
@@ -949,6 +949,7 @@ def targets(self) -> dict:
         target_info = TargetInfo.host()
         target_info.set('has_numba', True)
         target_info.set('has_cpython', True)
+        target_info.set('software', 'remotejit')
         return dict(cpu=target_info.tojson())
 
     @dispatchermethod
diff --git a/rbc/tests/__init__.py b/rbc/tests/__init__.py
index 8ce7ffbd..595c3e30 100644
--- a/rbc/tests/__init__.py
+++ b/rbc/tests/__init__.py
@@ -144,6 +144,30 @@ def values(self):
         }
 
 
+class _mathTestTable(_DefaultTestTable):
+
+    @classmethod
+    def suffix(cls):
+        return "math"
+
+    @property
+    def sqltypes(self):
+        return ('BOOLEAN', 'BOOLEAN', 'DOUBLE', 'DOUBLE', 'DOUBLE', 'INT',
+                'INT')
+
+    @property
+    def values(self):
+        return {
+            'a': [False, False, True, False, False],
+            'b': [False, True, False, True, False],
+            'x': [0.123 + 1/10.0, 0.123 + 2/10.0, 0.123 + 3/10.0, 0.123 + 4/10.0, 0.123 + 5/10.0],
+            'y': [1/6.0, 2/6.0, 3/6.0, 4/6.0, 5/6.0],
+            'z': [2.23, 3.23, 4.23, 5.23, 6.23],
+            'i': [1, 2, 3, 4, 5],
+            'j': [10, 20, 30, 40, 50],
+        }
+
+
 class _arraynullTestTable(_arrayTestTable):
 
     @classmethod
@@ -217,7 +241,7 @@ def values(self):
 
 
 def heavydb_fixture(caller_globals, minimal_version=(0, 0),
-                    suffices=['', '10', 'null', 'array', 'arraynull', 'text', 'timestamp'],
+                    suffices=['', '10', 'null', 'array', 'arraynull', 'text', 'timestamp', 'math'],
                     load_test_data=True, debug=False):
     """Usage from a rbc/tests/test_xyz.py file:
 
@@ -251,6 +275,8 @@ def heavydb():
                                        i1, b with row size 5, contains null
                                        values.
 
+    f'{heavydb.table_name}math' - contains scalar values for math operations,
+
     f'{heavydb.table_name}text' - contains text t4, t2, t1, s, n
                                   where 't' prefix is for text encoding dict
                                   and 'n' is for text encoding none.
@@ -366,7 +392,8 @@ def require_version(version, message=None, label=None):
     # MULTIPOLYGON, See
     # https://docs.heavy.ai/sql/data-definition-ddl/datatypes-and-fixed-encoding
     for cls in (_DefaultTestTable, _10TestTable, _nullTestTable, _arrayTestTable,
-                _arraynullTestTable, _TextTestTable, _TimestampTestTable):
+                _arraynullTestTable, _TextTestTable, _TimestampTestTable,
+                _mathTestTable):
         suffix = cls.suffix()
         if suffix in suffices:
             obj = cls()
diff --git a/rbc/tests/heavydb/test_array_math.py b/rbc/tests/heavydb/test_array_math.py
index a730d76d..1f2f3a9f 100644
--- a/rbc/tests/heavydb/test_array_math.py
+++ b/rbc/tests/heavydb/test_array_math.py
@@ -114,6 +114,7 @@ def test_heavydb_array_binary_math(heavydb, method, signature, columns):
     heavydb.reset()
 
     s = f'def np_{method}(a, b): return array_api.{method}(a, b)'
+    print(s)
     exec(s, globals())
 
     heavydb(signature)(eval('np_{}'.format(method)))
diff --git a/rbc/tests/heavydb/test_column_basic.py b/rbc/tests/heavydb/test_column_basic.py
index ddb292ff..b9ef7f2d 100644
--- a/rbc/tests/heavydb/test_column_basic.py
+++ b/rbc/tests/heavydb/test_column_basic.py
@@ -3,6 +3,8 @@
 from collections import defaultdict
 import pytest
 import numpy as np
+import math
+from numba import njit
 
 
 rbc_heavydb = pytest.importorskip('rbc.heavydb')
@@ -822,6 +824,26 @@ def convolve(x, kernel, m, y):
     assert list(result) == expected
 
 
+def test_issue343(heavydb):
+    # Before generating llvm code, the irtools entry point needs
+    # to switch the target context from CPU to GPU, so that functions
+    # are bind to the correct target. In the case below, math.exp
+    # is bind to '@llvm.exp.f64' on CPU and '@__nv_exp' on GPU.
+    if not (heavydb.has_cuda and heavydb.has_libdevice):
+        pytest.skip('test requires heavydb build with GPU support and libdevice')
+
+    @njit
+    def bar(x):
+        return math.exp(x)
+
+    @heavydb('double(double)', devices=['cpu', 'gpu'])
+    def foo(x):
+        return math.exp(x) + bar(x)
+
+    assert '__nv_exp' in str(foo)
+    assert 'llvm.exp.f64' in str(foo)
+
+
 def test_column_dtype(heavydb):
     from numba import types
     table = heavydb.table_name
diff --git a/rbc/tests/heavydb/test_heavydb.py b/rbc/tests/heavydb/test_heavydb.py
index eb48dfca..34b6b753 100644
--- a/rbc/tests/heavydb/test_heavydb.py
+++ b/rbc/tests/heavydb/test_heavydb.py
@@ -370,30 +370,6 @@ def test_casting(heavydb):
     float    | FAIL | FAIL | FAIL | FAIL | OK   | OK   |
     double   | FAIL | FAIL | FAIL | FAIL | FAIL | OK   |
 
-    OmnisciDB version 5.8
-    ----------------------
-             | Functions applied to <itype value>
-    itype    | i8   | i16  | i32  | i64  | f32  | f64  |
-    ---------+------+------+------+------+------+------+
-    tinyint  | OK   | OK   | OK   | OK   | OK   | OK   |
-    smallint | FAIL | OK   | OK   | OK   | OK   | OK   |
-    int      | FAIL | FAIL | OK   | OK   | OK   | OK   |
-    bigint   | FAIL | FAIL | FAIL | OK   | FAIL | OK   |
-    float    | FAIL | FAIL | FAIL | FAIL | OK   | OK   |
-    double   | FAIL | FAIL | FAIL | FAIL | FAIL | OK   |
-
-    OmnisciDB version 5.7 and older
-    -------------------------------
-             | Functions applied to <itype value>
-    itype    | i8   | i16  | i32  | i64  | f32  | f64  |
-    ---------+------+------+------+------+------+------+
-    tinyint  | OK   | OK   | OK   | OK   | FAIL | FAIL |
-    smallint | FAIL | OK   | OK   | OK   | FAIL | FAIL |
-    int      | FAIL | FAIL | OK   | OK   | FAIL | FAIL |
-    bigint   | FAIL | FAIL | FAIL | OK   | FAIL | FAIL |
-    float    | FAIL | FAIL | FAIL | FAIL | OK   | OK   |
-    double   | FAIL | FAIL | FAIL | FAIL | FAIL | OK   |
-
     test_binding is superior test with respect to successful UDF
     executions but it does not check exception messages.
     """
@@ -841,7 +817,7 @@ def test_reconnect(heavydb):
 
 
 def test_non_admin_user(heavydb):
-    heavydb.require_version((5, 9), 'Requires omniscidb 5.9 or newer')
+    heavydb.require_version((5, 9), 'Requires HeavyDB 5.9 or newer')
 
     user = 'rbc_test_non_admin_user'
     password = 'Xy2kq_3lM'
diff --git a/rbc/tests/heavydb/test_math.py b/rbc/tests/heavydb/test_math.py
index 019c1abb..faa07527 100644
--- a/rbc/tests/heavydb/test_math.py
+++ b/rbc/tests/heavydb/test_math.py
@@ -5,55 +5,22 @@
 
 import rbc.heavydb as rbc_heavydb
 from rbc.stdlib import array_api
+from rbc.tests import heavydb_fixture
 
 available_version, reason = rbc_heavydb.is_available()
 pytestmark = pytest.mark.skipif(not available_version, reason=reason)
 
 
-@pytest.fixture(scope='module')
-def nb_version():
-    from rbc.utils import get_version
-    return get_version('numba')
-
-
 @pytest.fixture(scope='module')
 def heavydb():
-    # TODO: use heavydb_fixture from rbc/tests/__init__.py
-    config = rbc_heavydb.get_client_config(debug=not True)
-    m = rbc_heavydb.RemoteHeavyDB(**config)
-    table_name = 'rbc_test_heavydb_math'
-
-    m.sql_execute(f'DROP TABLE IF EXISTS {table_name}')
-
-    m.sql_execute(
-        f'CREATE TABLE IF NOT EXISTS {table_name}'
-        ' (a BOOLEAN, b BOOLEAN, x DOUBLE, y DOUBLE, z DOUBLE, i INT, '
-        'j INT, t INT[], td DOUBLE[], te INT[]);')
-
-    for _i in range(1, 6):
-        a = str((_i % 3) == 0).lower()
-        b = str((_i % 2) == 0).lower()
-        x = 0.123 + _i/10.0
-        y = _i/6.0
-        z = _i + 1.23
-        i = _i
-        j = i * 10
-        t = 'ARRAY[%s]' % (', '.join(str(j + i) for i in range(-i, i+1)))
-        td = 'ARRAY[%s]' % (', '.join(str(j + i/1.0) for i in range(-i, i+1)))
-        te = 'Array[]'
-        m.sql_execute(
-            f'insert into {table_name} values (\'{a}\', \'{b}\', {x}, {y},'
-            f' {z}, {i}, {j}, {t}, {td}, {te})')
-
-    m.table_name = table_name
-    yield m
-
-    m.sql_execute(f'DROP TABLE IF EXISTS {table_name}')
+    for o in heavydb_fixture(globals(), debug=False,
+                             suffices=['math']):
+        yield o
 
 
 math_functions = [
     # Number-theoretic and representation functions
-    ('ceil', 'int64(double)'),
+    ('ceil', 'double(double)'),
     ('comb', 'int64(int64, int64)'),
     ('copysign', 'double(double, double)'),
     ('fabs', 'double(double)'),
@@ -72,7 +39,7 @@ def heavydb():
     ('modf', 'double(double, double)'),
     ('perm', 'int(int, int)'),
     ('prod', 'int64(int64[])'),
-    ('remainder', 'double(double, double)'),
+    # ('remainder', 'double(double, double)'),
     ('trunc', 'double(double)'),
     # Power and logarithmic functions
     ('exp', 'double(double)'),
@@ -83,7 +50,7 @@ def heavydb():
     ('log10', 'double(double)'),
     ('pow', 'double(double, double)'),
     ('sqrt', 'double(double)'),
-    # # Trigonometric functions
+    # Trigonometric functions
     ('acos', 'double(double)'),
     ('asin', 'double(double)'),
     ('atan', 'double(double)'),
@@ -95,14 +62,14 @@ def heavydb():
     ('tan', 'double(double)'),
     ('degrees', 'double(double)'),
     ('radians', 'double(double)'),
-    # # Hyperbolic functions
+    # Hyperbolic functions
     ('acosh', 'double(double)'),
     ('asinh', 'double(double)'),
     ('atanh', 'double(double)'),
     ('cosh', 'double(double)'),
     ('sinh', 'double(double)'),
     ('tanh', 'double(double)'),
-    # # Special functions
+    # Special functions
     ('erf', 'double(double)'),
     ('erfc', 'double(double)'),
     ('gamma', 'double(double)'),
@@ -122,7 +89,7 @@ def heavydb():
 @pytest.mark.parametrize("device", devices)
 @pytest.mark.parametrize("fn_name, signature", math_functions,
                          ids=["math." + item[0] for item in math_functions])
-def test_math_function(heavydb, device, nb_version, fn_name, signature):
+def test_math_function(heavydb, device, fn_name, signature):
     heavydb.reset()
 
     if not heavydb.has_cuda and device == 'gpu':
@@ -188,7 +155,7 @@ def test_math_function(heavydb, device, nb_version, fn_name, signature):
     if fn_name in ['ldexp']:
         xs = 'x, i'
 
-    query = f'select {xs}, {fprefix}{fn_name}({xs}) from {heavydb.table_name}'
+    query = f'select {xs}, {fprefix}{fn_name}({xs}) from {heavydb.table_name}math'
     descr, result = heavydb.sql_execute(query)
     for args in list(result):
         result = args[-1]
@@ -265,9 +232,9 @@ def test_math_function(heavydb, device, nb_version, fn_name, signature):
     ('ldexp', 'double(double, int)', np.ldexp),
     ('frexp0', 'double(double)', lambda x: np.frexp(x)[0]),
     # Rounding functions:
-    ('around', 'double(double)', lambda x: np.around(x)),
-    ('round2',  # round and round_ are not good names
-     'double(double)', lambda x: np.round_(x)),  # force arity to 1
+    # ('around', 'double(double)', lambda x: np.around(x)),
+    # ('round2',  # round and round_ are not good names
+    #  'double(double)', lambda x: np.round_(x)),  # force arity to 1
     ('floor', 'double(double)', np.floor),
     ('ceil', 'double(double)', np.ceil),
     ('trunc', 'double(double)', np.trunc),
@@ -321,7 +288,7 @@ def test_math_function(heavydb, device, nb_version, fn_name, signature):
 @pytest.mark.parametrize("device", devices)
 @pytest.mark.parametrize("fn_name, signature, np_func", numpy_functions,
                          ids=["np." + item[0] for item in numpy_functions])
-def test_numpy_function(heavydb, device, nb_version, fn_name, signature, np_func):
+def test_numpy_function(heavydb, device, fn_name, signature, np_func):
     heavydb.reset()
 
     if not heavydb.has_cuda and device == 'gpu':
@@ -381,7 +348,7 @@ def test_numpy_function(heavydb, device, nb_version, fn_name, signature, np_func
     else:
         raise NotImplementedError(kind)
 
-    query = f'select {xs}, {fn_name}({xs}) from {heavydb.table_name}'
+    query = f'select {xs}, {fn_name}({xs}) from {heavydb.table_name}math'
     descr, result = heavydb.sql_execute(query)
     for args in list(result):
         result = args[-1]
diff --git a/rbc/tests/test_externals_libdevice.py b/rbc/tests/test_externals_libdevice.py
index 223c6047..71b78459 100644
--- a/rbc/tests/test_externals_libdevice.py
+++ b/rbc/tests/test_externals_libdevice.py
@@ -15,6 +15,9 @@
     funcs.append((fname, str(retty), argtys, has_ptr_arg))
 
 
+fns = {}
+
+
 @pytest.fixture(scope="module")
 def heavydb():
 
@@ -50,6 +53,7 @@ def fn(a, b, c):
 
         fn.__name__ = f"{heavydb.table_name}_{fname[5:]}"
         fn = heavydb(f"{retty}({', '.join(argtypes)})", devices=["gpu"])(fn)
+        fns[fname] = fn
 
     for fname, retty, argtys, has_ptr_arg in funcs:
         if has_ptr_arg:
@@ -84,4 +88,7 @@ def test_externals_libdevice(heavydb, fname, retty, argtys, has_ptr_arg):
         cols = ", ".join(tuple(map(lambda x: cols_dict[x], argtys)))
         query = f"SELECT {func_name}({cols}) FROM {table}"
 
-    _, _ = heavydb.sql_execute(query)
+    _, result = heavydb.sql_execute(query)
+
+    assert fname in str(fns[fname])
+    # to-do: check results
diff --git a/utils/client_ssh_tunnel.conf b/utils/client_ssh_tunnel.conf
index 08a05d08..07e4dd50 100644
--- a/utils/client_ssh_tunnel.conf
+++ b/utils/client_ssh_tunnel.conf
@@ -3,10 +3,10 @@
 # ssh tunneling.
 #
 # Usage:
-#   1. Run omnscidb server with ssh port forwarding::
+#   1. Run heavydb server with ssh port forwarding::
 #
 #      ssh -L 6274:127.0.0.1:16274  <remote host name or IP>
-#      bin/heavydb --enable-runtime-udf --enable-table-functions -p 16274 --http-port 16278 --calcite-port 16279
+#      bin/heavydb --enable-dev-table-functions --enable-runtime-udf --enable-table-functions -p 16274 --http-port 16278 --calcite-port 16279
 #
 #   2. Relate the heavydb server to client:
 #