From 96dc0a64952c3b4a5b3fedc6a03b788dc94720b2 Mon Sep 17 00:00:00 2001 From: s-akhtar-baig Date: Fri, 21 Jul 2023 12:49:30 -0400 Subject: [PATCH 1/6] Add record count operation --- .../java/iceberg_cli/IcebergApplication.java | 3 ++ .../java/iceberg_cli/IcebergConnector.java | 30 +++++++++++++++ .../java/iceberg_cli/MetastoreConnector.java | 10 +++++ .../src/main/java/iceberg_cli/cli/Parser.java | 5 +++ .../java/iceberg_cli/utils/PrintUtils.java | 21 ++++++++-- .../iceberg_cli/utils/output/CsvOutput.java | 32 ++-------------- .../iceberg_cli/utils/output/JsonOutput.java | 16 ++++++-- .../java/iceberg_cli/utils/output/Output.java | 32 ++++++++++------ .../test/java/iceberg_cli/FunctionalTest.java | 38 ++++++++++++++----- .../iceberg_cli/TestUserInputConsole.java | 35 ++++++++++++++--- 10 files changed, 161 insertions(+), 61 deletions(-) diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergApplication.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergApplication.java index 08b9025..0b936d9 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergApplication.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergApplication.java @@ -121,6 +121,9 @@ else if (namespace != null) case "metadata": output = printUtils.printTableMetadata(); break; + case "recordcount": + output = printUtils.printRecordCount(); + break; case "tasks": output = printUtils.printTasks(); break; diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergConnector.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergConnector.java index 165a3b1..5b02dfd 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergConnector.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergConnector.java @@ -334,9 +334,12 @@ public Map>> getPlanFiles() { List> taskMapList = new ArrayList>(); Map taskMap = new HashMap(); DataFile file = scanTask.file(); + Long recordCount = file.recordCount(); + this.fileRecordCount += recordCount; taskMap.put("content", file.content().toString()); taskMap.put("file_path", file.path().toString()); taskMap.put("file_format", file.format().toString()); + taskMap.put("record_count", Long.toString(recordCount)); taskMap.put("start", Long.toString(scanTask.start())); taskMap.put("length", Long.toString(scanTask.length())); taskMap.put("spec", scanTask.spec().toString()); @@ -365,9 +368,12 @@ public Map>> getPlanTasks() { for (FileScanTask fileTask : scanTask.files()) { Map taskMap = new HashMap(); DataFile file = fileTask.file(); + Long recordCount = fileTask.estimatedRowsCount(); + this.taskRecordCount += recordCount; taskMap.put("content", file.content().toString()); taskMap.put("file_path", file.path().toString()); taskMap.put("file_format", file.format().toString()); + taskMap.put("record_count", Long.toString(recordCount)); taskMap.put("start", Long.toString(fileTask.start())); taskMap.put("length", Long.toString(fileTask.length())); taskMap.put("spec", fileTask.spec().toString()); @@ -455,6 +461,30 @@ public String getUUID() { TableMetadata metadata = ((HasTableOperations) iceberg_table).operations().current(); return metadata.uuid(); } + + public Long getFileRecordCount() { + if (iceberg_table == null) + loadTable(); + + Iterable scanTasks = m_scan.planFiles(); + for (FileScanTask scanTask : scanTasks) { + fileRecordCount += scanTask.file().recordCount(); + } + + return fileRecordCount; + } + + public Long getTaskRecordCount() { + if (iceberg_table == null) + loadTable(); + + Iterable scanTasks = m_scan.planTasks(); + for (CombinedScanTask scanTask : scanTasks) { + taskRecordCount += scanTask.files().stream().mapToLong(f -> f.estimatedRowsCount()).sum(); + } + + return taskRecordCount; + } public Snapshot getCurrentSnapshot() { if (iceberg_table == null) diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/MetastoreConnector.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/MetastoreConnector.java index 4824ac3..0901b90 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/MetastoreConnector.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/MetastoreConnector.java @@ -35,6 +35,8 @@ public abstract class MetastoreConnector { protected Long m_snapshotId = null; + protected Long fileRecordCount = 0L; + protected Long taskRecordCount = 0L; public MetastoreConnector(CustomCatalog catalog, String namespace, String tableName, Credentials creds) { } @@ -97,6 +99,14 @@ public void setSnapshotId(Long snapshotId) { this.m_snapshotId = snapshotId; } + public Long getFileRecordCount() { + return fileRecordCount; + } + + public Long getTaskRecordCount() { + return taskRecordCount; + } + @SuppressWarnings("serial") class TableNotFoundException extends RuntimeException { public TableNotFoundException(String message) { diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/cli/Parser.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/cli/Parser.java index 477c6f9..4e1cfa7 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/cli/Parser.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/cli/Parser.java @@ -119,6 +119,11 @@ private void initializeCommands() { metadata.addArgument("identifier", "Table identifier", true); m_commands.put("metadata", metadata); + Command recordcount = new Command("recordcount", "Get total number of records in a table"); + recordcount.addOption("--help", "Show this help message and exit"); + recordcount.addArgument("identifier", "Table identifier", true); + m_commands.put("recordcount", recordcount); + Command read = new Command("read", "Read from a table"); read.addOption("--help", "Show this help message and exit"); read.addArgument("identifier", "Table identifier", true); diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/PrintUtils.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/PrintUtils.java index cdcf90e..8f9e042 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/PrintUtils.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/PrintUtils.java @@ -57,19 +57,30 @@ public String printTableMetadata() throws Exception { return output.tableMetadata(snapshot, targetSchema, tableLocation, dataLocation, type); } + /** + * Get total record count from MetastoreConnector and return to the user is the given format + * @throws Exception + */ + public String printRecordCount() throws Exception { + Long totalEstimatedRecords = metaConn.getFileRecordCount(); + + return output.tableRecordCount(totalEstimatedRecords); + } + /** * Get table details from MetastoreConnector and output to the user is the given format * @throws Exception */ public String printTableDetails() throws Exception { Map>> planFileTasks = metaConn.getPlanFiles(); + Long totalEstimatedRecords = metaConn.getFileRecordCount(); Snapshot snapshot = metaConn.getCurrentSnapshot(); Schema targetSchema = metaConn.getTableSchema(); String tableLocation = metaConn.getTableLocation(); String dataLocation = metaConn.getTableDataLocation(); String type = metaConn.getTableType(); - return output.tableDetails(planFileTasks, snapshot, targetSchema, tableLocation, dataLocation, type); + return output.tableDetails(planFileTasks, snapshot, targetSchema, tableLocation, dataLocation, type, totalEstimatedRecords); } /** @@ -142,7 +153,9 @@ public String printUUID() throws Exception { public String printFiles() throws Exception { String outputString = null; - String planFiles = output.tableFiles(metaConn.getPlanFiles()); + Map>> taskMapList = metaConn.getPlanFiles(); + Long totalEstimatedRecords = metaConn.getFileRecordCount(); + String planFiles = output.tableFiles(taskMapList, totalEstimatedRecords); Long snapshotId = metaConn.getCurrentSnapshotId(); switch (format) { case "json": @@ -165,7 +178,9 @@ public String printFiles() throws Exception { public String printTasks() throws Exception { String outputString = null; - String planFiles = output.tableFiles(metaConn.getPlanTasks()); + Map>> taskMapList = metaConn.getPlanTasks(); + Long totalEstimatedRecords = metaConn.getTaskRecordCount(); + String planFiles = output.tableFiles(taskMapList, totalEstimatedRecords); Long snapshotId = metaConn.getCurrentSnapshotId(); switch (format) { case "json": diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/CsvOutput.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/CsvOutput.java index 3135aaa..4e33f81 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/CsvOutput.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/CsvOutput.java @@ -14,6 +14,10 @@ import iceberg_cli.utils.DataConversion; public class CsvOutput extends Output { + + public CsvOutput() { + this.delimiter = ','; + } @Override public String listTables(List tables) throws Exception { @@ -47,34 +51,6 @@ public String namespaceDetails(Map details) throws Exception { return builder.toString(); } - @Override - public String tableFiles(Map>> planFileTasks) throws Exception { - StringBuilder builder = new StringBuilder(); - - // Add data files - char delim = ','; - if (planFileTasks != null) { - builder.append(String.format("TOTAL TASKS : %d\n", planFileTasks.size())); - for (Map.Entry>> entry : planFileTasks.entrySet()) { - builder.append(String.format("TOTAL FILES IN TASK %d : %d\n", entry.getKey(),entry.getValue().size())); - for (Map task : entry.getValue()) { - String taskInfo = String.format("%s%c%s%c%s%c%s%c%s%c%s%c%s", - task.get("content"), delim, - task.get("file_path"), delim, - task.get("file_format"), delim, - task.get("start"), delim, - task.get("length"), delim, - task.get("spec"), delim, - task.get("residual") - ); - builder.append(String.format("%s\n", taskInfo)); - } - } - } - - return builder.toString(); - } - @Override public String allSnapshots(java.lang.Iterable snapshots) throws Exception { return DataConversion.snapshotsAsCsv(snapshots); diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/JsonOutput.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/JsonOutput.java index c611312..7ab6301 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/JsonOutput.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/JsonOutput.java @@ -33,13 +33,21 @@ public String tableMetadata(Snapshot snapshot, Schema schema, String tableLocati return tableMetadata.toString(); } + + @Override + public String tableRecordCount(Long totalEstimatedRecords) throws Exception { + JSONObject recordCount = new JSONObject(); + recordCount.put("record_count", totalEstimatedRecords); + + return recordCount.toString(); + } @Override public String tableDetails(Map>> planFileTasks, Snapshot snapshot, - Schema schema, String tableLocation, String dataLocation, String type) throws Exception { + Schema schema, String tableLocation, String dataLocation, String type, Long totalEstimatedRecords) throws Exception { JSONObject tableDetails = new JSONObject(); - JSONObject dataFiles = new JSONObject(tableFiles(planFileTasks)); + JSONObject dataFiles = new JSONObject(tableFiles(planFileTasks, totalEstimatedRecords)); JSONObject schemaObject = new JSONObject(tableSchema(schema)); JSONObject currentSnapshot = new JSONObject(currentSnapshot(snapshot)); tableDetails.put("files", dataFiles); @@ -95,12 +103,13 @@ public String namespaceDetails(Map details) throws Exception { } @Override - public String tableFiles(Map>> planFileTasks) throws Exception { + public String tableFiles(Map>> planFileTasks, Long totalEstimatedRecords) throws Exception { if (planFileTasks == null) { return null; } JSONObject planFilesJsonObject = new JSONObject(); + planFilesJsonObject.put("total_estimated_records", totalEstimatedRecords); for (Map.Entry>> entry : planFileTasks.entrySet()) { JSONArray tasksArray = new JSONArray(); for (Map task : entry.getValue()) { @@ -108,6 +117,7 @@ public String tableFiles(Map>> planFileTasks) taskobj.put("content", task.get("content")); taskobj.put("file_path", task.get("file_path")); taskobj.put("file_format", task.get("file_format")); + taskobj.put("record_count", task.get("record_count")); taskobj.put("start", task.get("start")); taskobj.put("length", task.get("length")); taskobj.put("spec", task.get("spec")); diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/Output.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/Output.java index 47a9b62..cf17e8c 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/Output.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/utils/output/Output.java @@ -13,6 +13,7 @@ import org.apache.iceberg.Snapshot; public class Output { + protected char delimiter = ' '; public String tableMetadata(Snapshot snapshot, Schema schema, String tableLocation, String dataLocation, String type) throws Exception { StringBuilder builder = new StringBuilder(); @@ -30,10 +31,18 @@ public String tableMetadata(Snapshot snapshot, Schema schema, String tableLocati return builder.toString(); } + public String tableRecordCount(Long totalEstimatedRecords) throws Exception { + StringBuilder builder = new StringBuilder(); + builder.append("TOTAL RECORDS\n"); + builder.append(totalEstimatedRecords); + + return builder.toString(); + } + public String tableDetails(Map>> planFileTasks, Snapshot snapshot, - Schema schema, String tableLocation, String dataLocation, String type) throws Exception { + Schema schema, String tableLocation, String dataLocation, String type, Long totalEstimatedRecords) throws Exception { StringBuilder builder = new StringBuilder(); - builder.append(tableFiles(planFileTasks)); + builder.append(tableFiles(planFileTasks, totalEstimatedRecords)); builder.append("SNAPSHOT\n"); builder.append(currentSnapshot(snapshot)); builder.append("\nSCHEMA\n"); @@ -94,23 +103,24 @@ public String namespaceDetails(Map details) throws Exception { return builder.toString(); } - public String tableFiles(Map>> planFileTasks) throws Exception { + public String tableFiles(Map>> planFileTasks, Long totalEstimatedRecords) throws Exception { StringBuilder builder = new StringBuilder(); // Add data files - char delim = ' '; if (planFileTasks != null) { + builder.append(String.format("TOTAL ESTIMATED RECORDS : %d\n", totalEstimatedRecords)); builder.append(String.format("TOTAL TASKS : %d\n", planFileTasks.size())); for (Map.Entry>> entry : planFileTasks.entrySet()) { builder.append(String.format("TOTAL FILES IN TASK %d : %d\n", entry.getKey(),entry.getValue().size())); for (Map task : entry.getValue()) { - String taskInfo = String.format("%s%c%s%c%s%c%s%c%s%c%s%c%s", - task.get("content"), delim, - task.get("file_path"), delim, - task.get("file_format"), delim, - task.get("start"), delim, - task.get("length"), delim, - task.get("spec"), delim, + String taskInfo = String.format("%s%c%s%c%s%c%s%c%s%c%s%c%s%c%s", + task.get("content"), this.delimiter, + task.get("file_path"), this.delimiter, + task.get("file_format"), this.delimiter, + task.get("record_count"), this.delimiter, + task.get("start"), this.delimiter, + task.get("length"), this.delimiter, + task.get("spec"), this.delimiter, task.get("residual") ); builder.append(String.format("%s\n", taskInfo)); diff --git a/tools/java-iceberg-cli/src/test/java/iceberg_cli/FunctionalTest.java b/tools/java-iceberg-cli/src/test/java/iceberg_cli/FunctionalTest.java index 4c9309e..5e5596b 100644 --- a/tools/java-iceberg-cli/src/test/java/iceberg_cli/FunctionalTest.java +++ b/tools/java-iceberg-cli/src/test/java/iceberg_cli/FunctionalTest.java @@ -43,7 +43,7 @@ class FunctionalTest { static MetastoreConnector metaConn; static String namespace; static String tableName; - static Integer total_tests = 7; + static Integer total_tests = 9; static Integer passed_tests = 0; static ArrayList failed_tests = new ArrayList(); @@ -171,9 +171,27 @@ void readtable() throws ServletException { throw new ServletException("Error: " + t.getMessage(), t); } } - + @Test @Order(6) + @DisplayName("Test the functionality of recordcount table") + void recordcount() throws ServletException { + try { + Long expected = 1L; + + System.out.println("Running test 6..."); + Long actual = metaConn.getFileRecordCount(); + Assertions.assertEquals(expected, actual); + System.out.println("Test 6 completed"); + passed_tests += 1; + } catch (Throwable t) { + failed_tests.add("recordcount"); + throw new ServletException("Error: " + t.getMessage(), t); + } + } + + @Test + @Order(7) @DisplayName("Test the functionality of rewrite files") void rewritefiles() throws ServletException { try { @@ -181,7 +199,7 @@ void rewritefiles() throws ServletException { MetastoreConnector metaConnDup = new IcebergConnector(catalog, namespace, (tableName + "Dup"), creds); Schema schema = SchemaParser.fromJson("{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"ID\",\"required\":true,\"type\":\"int\"},{\"id\":2,\"name\":\"Name\",\"required\":true,\"type\":\"string\"},{\"id\":3,\"name\":\"Price\",\"required\":true,\"type\":\"double\"},{\"id\":4,\"name\":\"Purchase_date\",\"required\":true,\"type\":\"timestamp\"}]}"); - System.out.println("Running test 8..."); + System.out.println("Running test 7..."); String record = "{\"records\":[{\"ID\":1,\"Name\":\"Testing\",\"Price\": 1000,\"Purchase_date\":\"2022-11-09T12:13:54.480\"}]}"; String dataFiles = metaConn.writeTable(record, null); boolean status = metaConn.commitTable(dataFiles); @@ -196,7 +214,7 @@ void rewritefiles() throws ServletException { // Clean up rewritten table status = metaConnDup.dropTable(); Assertions.assertEquals(true, status); - System.out.println("Test 8 completed"); + System.out.println("Test 7 completed"); passed_tests += 1; } catch (Throwable t) { failed_tests.add("rewritefiles"); @@ -205,14 +223,14 @@ void rewritefiles() throws ServletException { } @Test - @Order(7) + @Order(8) @DisplayName("Test the functionality of drop table") void droptable() throws ServletException { try { - System.out.println("Running test 6..."); + System.out.println("Running test 8..."); boolean status = metaConn.dropTable(); Assertions.assertEquals(true, status); - System.out.println("Test 6 completed"); + System.out.println("Test 8 completed"); passed_tests += 1; } catch (Throwable t) { failed_tests.add("droptable"); @@ -221,16 +239,16 @@ void droptable() throws ServletException { } @Test - @Order(8) + @Order(9) @DisplayName("Test the functionality of drop namespace") void dropnamespace() throws ServletException { try { Namespace nmspc = Namespace.of(namespace); - System.out.println("Running test 7..."); + System.out.println("Running test 9..."); boolean status = metaConn.dropNamespace(nmspc); Assertions.assertEquals(true, status); - System.out.println("Test 7 completed"); + System.out.println("Test 9 completed"); passed_tests += 1; } catch (Throwable t) { failed_tests.add("dropnamespace"); diff --git a/tools/java-iceberg-cli/src/test/java/iceberg_cli/TestUserInputConsole.java b/tools/java-iceberg-cli/src/test/java/iceberg_cli/TestUserInputConsole.java index 99ac151..76fc660 100644 --- a/tools/java-iceberg-cli/src/test/java/iceberg_cli/TestUserInputConsole.java +++ b/tools/java-iceberg-cli/src/test/java/iceberg_cli/TestUserInputConsole.java @@ -20,7 +20,7 @@ class TestUserInputConsole { static String warehouse; static String namespace; static String tablename; - static Integer total_tests = 6; + static Integer total_tests = 7; static Integer passed_tests = 0; static ArrayList failed_tests = new ArrayList(); @@ -132,6 +132,29 @@ void listtables() throws ServletException { @Test @Order(5) + @DisplayName("Test the functionality of recordcount action") + void recordcount() throws ServletException { + String[] args = new String[4]; + args[0] = "-u"; + args[1] = uri; + args[2] = "recordcount"; + args[3] = namespace + "." + tablename; + + try { + System.out.println("Running test 6..."); + String out = new IcebergApplication().processRequest(args); + Assertions.assertEquals("TOTAL RECORDS\n" + 1, out); + System.out.println("Test 5 completed"); + passed_tests += 1; + } catch (Throwable t) { + failed_tests.add("listtables"); + throw new ServletException("Error: " + t.getMessage(), t); + } + + } + + @Test + @Order(6) @DisplayName("Test the functionality of drop action") void droptable() throws ServletException { String[] args = new String[4]; @@ -141,10 +164,10 @@ void droptable() throws ServletException { args[3] = namespace + "." + tablename; try { - System.out.println("Running test 5..."); + System.out.println("Running test 6..."); String out = new IcebergApplication().processRequest(args); Assertions.assertEquals("Operation successful? true", out); - System.out.println("Test 5 completed"); + System.out.println("Test 6 completed"); passed_tests += 1; } catch (Throwable t) { failed_tests.add("droptable"); @@ -154,7 +177,7 @@ void droptable() throws ServletException { } @Test - @Order(6) + @Order(7) @DisplayName("Test the functionality of dropnamespace action") void dropnamespace() throws ServletException { String[] args = new String[4]; @@ -164,10 +187,10 @@ void dropnamespace() throws ServletException { args[3] = namespace; try { - System.out.println("Running test 6..."); + System.out.println("Running test 7..."); String out = new IcebergApplication().processRequest(args); Assertions.assertEquals("Operation successful? true", out); - System.out.println("Test 6 completed"); + System.out.println("Test 7 completed"); passed_tests += 1; } catch (Throwable t) { failed_tests.add("dropnamespace"); From 1d3b30e6089fc0890c30dd787b58abb316121c05 Mon Sep 17 00:00:00 2001 From: s-akhtar-baig Date: Fri, 21 Jul 2023 12:56:57 -0400 Subject: [PATCH 2/6] Reload table scan after operations that commit to a table --- .../java/iceberg_cli/IcebergConnector.java | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergConnector.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergConnector.java index 5b02dfd..9701112 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergConnector.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/IcebergConnector.java @@ -130,6 +130,15 @@ private void initCatalog(CustomCatalog catalog) throws IOException { m_catalog.initialize("hive", properties); } + private void loadScan() { + // Use snapshot passed by the user. + // By default, use the latest snapshot. + m_scan = iceberg_table.newScan(); + if (m_snapshotId != null) { + m_scan = m_scan.useSnapshot(m_snapshotId); + } + } + public void setTableIdentifier(String namespace, String tableName) { m_tableIdentifier = TableIdentifier.of(namespace, tableName); } @@ -150,13 +159,7 @@ public Table loadTable(TableIdentifier identifier) { public void loadTable() { iceberg_table = loadTable(m_tableIdentifier); - - // Use snapshot passed by the user. - // By default, use the latest snapshot. - m_scan = iceberg_table.newScan(); - if (m_snapshotId != null) { - m_scan = m_scan.useSnapshot(m_snapshotId); - } + loadScan(); } public boolean createTable(Schema schema, PartitionSpec spec, boolean overwrite) { @@ -279,6 +282,9 @@ public boolean alterTable(String newSchema) throws Exception { // all good - commit changes updateSchema.commit(); + // Reload table scan + loadScan(); + return true; } @@ -287,7 +293,11 @@ public boolean dropTable() { loadTable(); System.out.println("Dropping the table " + m_tableIdentifier); - if (m_catalog.dropTable(m_tableIdentifier)) { + boolean status = m_catalog.dropTable(m_tableIdentifier); + // Reload table scan + loadScan(); + + if (status) { System.out.println("Table dropped successfully"); return true; } @@ -764,6 +774,9 @@ public boolean commitTable(String dataFiles) throws Exception { io.close(); System.out.println("Txn Complete!"); + // Reload table scan after a commit + loadScan(); + return true; } @@ -795,6 +808,9 @@ public boolean rewriteFiles(String dataFiles) throws Exception { transaction.commitTransaction(); io.close(); System.out.println("Txn Complete!"); + + // Reload table scan after a commit + loadScan(); return true; } From 35585f490e0e2aa24702ed0f7c3ea3842985a570 Mon Sep 17 00:00:00 2001 From: s-akhtar-baig Date: Fri, 21 Jul 2023 13:12:52 -0400 Subject: [PATCH 3/6] Fix info message --- .../src/test/java/iceberg_cli/TestUserInputConsole.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/java-iceberg-cli/src/test/java/iceberg_cli/TestUserInputConsole.java b/tools/java-iceberg-cli/src/test/java/iceberg_cli/TestUserInputConsole.java index 76fc660..5426de9 100644 --- a/tools/java-iceberg-cli/src/test/java/iceberg_cli/TestUserInputConsole.java +++ b/tools/java-iceberg-cli/src/test/java/iceberg_cli/TestUserInputConsole.java @@ -141,7 +141,7 @@ void recordcount() throws ServletException { args[3] = namespace + "." + tablename; try { - System.out.println("Running test 6..."); + System.out.println("Running test 5..."); String out = new IcebergApplication().processRequest(args); Assertions.assertEquals("TOTAL RECORDS\n" + 1, out); System.out.println("Test 5 completed"); From c098cb53fdfeca35e21765291be5beb1263815cb Mon Sep 17 00:00:00 2001 From: s-akhtar-baig Date: Tue, 25 Jul 2023 11:58:59 -0400 Subject: [PATCH 4/6] Add short option for table format --- .../src/main/java/iceberg_cli/cli/OptionsParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/java-iceberg-cli/src/main/java/iceberg_cli/cli/OptionsParser.java b/tools/java-iceberg-cli/src/main/java/iceberg_cli/cli/OptionsParser.java index f0326b3..deda61c 100644 --- a/tools/java-iceberg-cli/src/main/java/iceberg_cli/cli/OptionsParser.java +++ b/tools/java-iceberg-cli/src/main/java/iceberg_cli/cli/OptionsParser.java @@ -60,7 +60,7 @@ public void parseOptions(Map commands, String[] args) throws Pa options.addOption(Option.builder("w").longOpt("warehouse").argName("value").hasArg().desc("Table location").build()); options.addOption(Option.builder("o").longOpt("output").argName("console|csv|json").hasArg().desc("Show output in this format").build()); options.addOption(Option.builder().longOpt("catalog").argName("value").hasArg().desc("Read properties for this catalog from the config file").build()); - options.addOption(Option.builder().longOpt("format").argName("iceberg|hive").hasArg().desc("The format of the table we want to display").build()); + options.addOption(Option.builder("m").longOpt("format").argName("iceberg|hive").hasArg().desc("The format of the table we want to display").build()); options.addOption(Option.builder().longOpt("snapshot").argName("snapshot ID").hasArg().desc("Snapshot ID to use").build()); CommandLineParser parser = new DefaultParser(); From dd9303b0e72e8405d839e59bea7d37cf34b856c0 Mon Sep 17 00:00:00 2001 From: s-akhtar-baig Date: Tue, 25 Jul 2023 12:04:22 -0400 Subject: [PATCH 5/6] signed-off s-akhtar-baig Signed-off-by: s-akhtar-baig From ca0371b304146dd6d4e8cac9f5971855eb1a6cd0 Mon Sep 17 00:00:00 2001 From: s-akhtar-baig Date: Tue, 25 Jul 2023 13:31:29 -0400 Subject: [PATCH 6/6] Update supported operations Signed-off-by: s-akhtar-baig --- README.md | 12 +++++++----- docs/sample_cli_commands.md | 7 +++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 318344a..e9ef109 100644 --- a/README.md +++ b/README.md @@ -65,13 +65,13 @@ java -jar /home/java-iceberg-cli/target/ --help ### CLI Pass in the URI to the Hive Metastore using -u or --uri options and optionally specify a storage path when creating a new namespace using -w or --warehouse options. By default, the default FS value specified in Hive's configuration file will be used as the warehouse path. -Set credentials for the object store using environment variables. Please note that you would need to specify AWS_ENDPOINT if using a non-AWS object store. +Set credentials for the object store using environment variables. Please note that you would need to specify ENDPOINT if using a non-AWS object store. ``` export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= export AWS_REGION= # specify endpoint to a non-AWS object store, if applicable -export AWS_ENDPOINT= +export ENDPOINT= ``` Credentials can also be passed to the CLI as: @@ -86,7 +86,7 @@ export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= export AWS_REGION= # specify endpoint to a non-AWS object store, if applicable -export AWS_ENDPOINT= +export ENDPOINT= export URI= export WAREHOUSE= ``` @@ -105,7 +105,7 @@ usage: java -jar [options] command [args] -c,--credential Supported credentials : AWS --catalog Read properties for this catalog from the config file - --format The format of the table we want to + -m,--format The format of the table we want to display -h,--help Show this help message -o,--output Show output in this format @@ -117,12 +117,13 @@ Commands: drop Drop a table or a namespace schema Fetch schema of a table metadata Get table metadata + recordcount Get total number of records in a table read Read from a table commit Commit file(s) to a table - rewrite Rewrite file(s) in a table list List tables or namespaces type Fetch table type uuid Fetch uuid of a table + rewrite Rewrite (replace) file(s) in a table spec Fetch partition spec of a table rename Rename a table a table create Create a table or a namespace @@ -132,6 +133,7 @@ Commands: write Write to a table snapshot Fetch latest or all snapshot(s) of a table tasks List scan tasks of a table + alter Alter a table ``` Each subcommand provides a help message of its own. ``` diff --git a/docs/sample_cli_commands.md b/docs/sample_cli_commands.md index 6702f5f..4e5a030 100644 --- a/docs/sample_cli_commands.md +++ b/docs/sample_cli_commands.md @@ -182,11 +182,18 @@ table { ``` +* Get record count of a table. Table *test_table* in namespace *test* in this example. +``` +% java -jar -u recordcount test.test_table + +``` + ### Details * Get details of a table. Table *test_table* in namespace *test* in this example. ``` % java -jar -u describe test.test_table +TOTAL ESTIMATED RECORDS : 1 TOTAL TASKS: 1 TOTAL FILES IN TASK 0 : 1 DATA PARQUET 0 2000 [] true