From d28e14f74b9f521519d271a420aa09dd80df3baf Mon Sep 17 00:00:00 2001 From: sezruby Date: Fri, 26 Feb 2021 11:59:40 -0800 Subject: [PATCH 1/3] Use indexes subdirectory for custom index system path --- .../scala/com/microsoft/hyperspace/index/PathResolver.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala b/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala index fa3538ad8..f92f0c56c 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala @@ -63,9 +63,8 @@ private[hyperspace] class PathResolver(conf: SQLConf, hadoopConf: Configuration) * @return Hyperspace index system path. */ def systemPath: Path = { - val defaultIndexesPath = - new Path(conf.getConfString("spark.sql.warehouse.dir"), "indexes") - new Path(conf.getConfString(IndexConstants.INDEX_SYSTEM_PATH, defaultIndexesPath.toString)) + val defaultIndexesPath = conf.getConfString("spark.sql.warehouse.dir") + new Path(conf.getConfString(IndexConstants.INDEX_SYSTEM_PATH, defaultIndexesPath), "indexes") } } From fec013afb254c80d3b5e4be7585dac757b5bf43e Mon Sep 17 00:00:00 2001 From: sezruby Date: Fri, 5 Mar 2021 09:52:00 -0800 Subject: [PATCH 2/3] Add INDEX_DIR_NAME config --- .../microsoft/hyperspace/index/IndexConstants.scala | 6 ++++-- .../microsoft/hyperspace/index/PathResolver.scala | 12 ++++++++++-- .../microsoft/hyperspace/index/HyperspaceSuite.scala | 1 + .../index/IndexCollectionManagerTest.scala | 1 + 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/main/scala/com/microsoft/hyperspace/index/IndexConstants.scala b/src/main/scala/com/microsoft/hyperspace/index/IndexConstants.scala index e4e930358..9d8f4c40d 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/IndexConstants.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/IndexConstants.scala @@ -19,12 +19,14 @@ package com.microsoft.hyperspace.index import org.apache.spark.sql.internal.SQLConf object IndexConstants { - val INDEXES_DIR = "indexes" - // Config used for setting the system path, which is considered as a "root" path for Hyperspace; // e.g, indexes are created under the system path. val INDEX_SYSTEM_PATH = "spark.hyperspace.system.path" + // Config used for subdirectory name under the system path. + val INDEX_DIR_NAME = "spark.hyperspace.system.indexDirName" + val INDEX_DIR_NAME_DEFAULT = "indexes" + // Config used to set the number of buckets for the index. val INDEX_NUM_BUCKETS_LEGACY = "spark.hyperspace.index.num.buckets" val INDEX_NUM_BUCKETS = "spark.hyperspace.index.numBuckets" diff --git a/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala b/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala index f92f0c56c..703f8d4fd 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala @@ -63,8 +63,16 @@ private[hyperspace] class PathResolver(conf: SQLConf, hadoopConf: Configuration) * @return Hyperspace index system path. */ def systemPath: Path = { - val defaultIndexesPath = conf.getConfString("spark.sql.warehouse.dir") - new Path(conf.getConfString(IndexConstants.INDEX_SYSTEM_PATH, defaultIndexesPath), "indexes") + val indexDirName = + conf.getConfString(IndexConstants.INDEX_DIR_NAME, IndexConstants.INDEX_DIR_NAME_DEFAULT) + val indexSystemPath = conf.getConfString( + IndexConstants.INDEX_SYSTEM_PATH, + conf.getConfString("spark.sql.warehouse.dir")) + if (indexDirName.isEmpty) { + new Path(indexSystemPath) + } else { + new Path(indexSystemPath, indexDirName) + } } } diff --git a/src/test/scala/com/microsoft/hyperspace/index/HyperspaceSuite.scala b/src/test/scala/com/microsoft/hyperspace/index/HyperspaceSuite.scala index b6a374845..87537681d 100644 --- a/src/test/scala/com/microsoft/hyperspace/index/HyperspaceSuite.scala +++ b/src/test/scala/com/microsoft/hyperspace/index/HyperspaceSuite.scala @@ -34,6 +34,7 @@ trait HyperspaceSuite extends SparkFunSuite with SparkInvolvedSuite { super.beforeAll() FileUtils.delete(systemPath) spark.conf.set(IndexConstants.INDEX_SYSTEM_PATH, systemPath.toUri.toString) + spark.conf.set(IndexConstants.INDEX_DIR_NAME, "") clearCache() } diff --git a/src/test/scala/com/microsoft/hyperspace/index/IndexCollectionManagerTest.scala b/src/test/scala/com/microsoft/hyperspace/index/IndexCollectionManagerTest.scala index 13dc15a88..2c91926a6 100644 --- a/src/test/scala/com/microsoft/hyperspace/index/IndexCollectionManagerTest.scala +++ b/src/test/scala/com/microsoft/hyperspace/index/IndexCollectionManagerTest.scala @@ -71,6 +71,7 @@ class IndexCollectionManagerTest extends SparkFunSuite with SparkInvolvedSuite { override def beforeAll(): Unit = { super.beforeAll() spark.conf.set(IndexConstants.INDEX_SYSTEM_PATH, indexSystemPath) + spark.conf.set(IndexConstants.INDEX_DIR_NAME, "") when(mockFileSystemFactory.create(any[Path], any[Configuration])).thenReturn(mockFileSystem) indexCollectionManager = new IndexCollectionManager( From 4537281683053d08827269442683558c9a8a425b Mon Sep 17 00:00:00 2001 From: sezruby Date: Mon, 12 Apr 2021 11:02:53 -0700 Subject: [PATCH 3/3] Change systemPath => indexLocationDir --- .../hyperspace/index/IndexCollectionManager.scala | 2 +- .../com/microsoft/hyperspace/index/IndexConstants.scala | 2 +- .../com/microsoft/hyperspace/index/PathResolver.scala | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/scala/com/microsoft/hyperspace/index/IndexCollectionManager.scala b/src/main/scala/com/microsoft/hyperspace/index/IndexCollectionManager.scala index c896e1317..4271c35ad 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/IndexCollectionManager.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/IndexCollectionManager.scala @@ -152,7 +152,7 @@ class IndexCollectionManager( private def indexLogManagers: Seq[IndexLogManager] = { val hadoopConf = spark.sessionState.newHadoopConf() - val rootPath = PathResolver(conf, hadoopConf).systemPath + val rootPath = PathResolver(conf, hadoopConf).indexLocationDir val fs = fileSystemFactory.create(rootPath, hadoopConf) val indexPaths: Seq[Path] = if (fs.exists(rootPath)) { fs.listStatus(rootPath).map(_.getPath) diff --git a/src/main/scala/com/microsoft/hyperspace/index/IndexConstants.scala b/src/main/scala/com/microsoft/hyperspace/index/IndexConstants.scala index 86e231e31..b50315091 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/IndexConstants.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/IndexConstants.scala @@ -25,7 +25,7 @@ object IndexConstants { // Config used for subdirectory name under the system path. val INDEX_DIR_NAME = "spark.hyperspace.system.indexDirName" - val INDEX_DIR_NAME_DEFAULT = "indexes" + val INDEX_DIR_NAME_DEFAULT = "hyperspace" // Config used to set the number of buckets for the index. val INDEX_NUM_BUCKETS_LEGACY = "spark.hyperspace.index.num.buckets" diff --git a/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala b/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala index 703f8d4fd..fff3db9ba 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/PathResolver.scala @@ -37,7 +37,7 @@ private[hyperspace] class PathResolver(conf: SQLConf, hadoopConf: Configuration) * @return resolved index path */ def getIndexPath(name: String): Path = { - val root = systemPath + val root = indexLocationDir val fs = root.getFileSystem(hadoopConf) if (fs.exists(root)) { // Note that fs.exists() is case-sensitive in some platforms and case-insensitive @@ -58,11 +58,11 @@ private[hyperspace] class PathResolver(conf: SQLConf, hadoopConf: Configuration) } /** - * Get the Hyperspace index system path. + * Get the Hyperspace index location dir path. * - * @return Hyperspace index system path. + * @return Hyperspace index location dir path. */ - def systemPath: Path = { + def indexLocationDir: Path = { val indexDirName = conf.getConfString(IndexConstants.INDEX_DIR_NAME, IndexConstants.INDEX_DIR_NAME_DEFAULT) val indexSystemPath = conf.getConfString(