From 096fdafd206905ff88c9e2ef810d1a72a7cd1acc Mon Sep 17 00:00:00 2001 From: Michael Cizmar Date: Mon, 19 Jan 2026 22:02:36 -0600 Subject: [PATCH 1/3] Iniital draft of appsearch output connector. Still needs to be updated. --- connectors/appsearch/build.xml | 100 ++++ .../output/appsearch/AppSearchAction.java | 92 ++++ .../output/appsearch/AppSearchConfig.java | 215 ++++++++ .../output/appsearch/AppSearchConnection.java | 289 +++++++++++ .../output/appsearch/AppSearchConnector.java | 414 +++++++++++++++ .../output/appsearch/AppSearchDelete.java | 122 +++++ .../output/appsearch/AppSearchIndex.java | 285 +++++++++++ .../output/appsearch/AppSearchParam.java | 92 ++++ .../agents/output/appsearch/Messages.java | 119 +++++ .../agents/output/appsearch/ResponseBody.java | 36 ++ .../agents/output/appsearch/Utils.java | 78 +++ .../output/appsearch/common_en_US.properties | 32 ++ .../output/appsearch/common_es_ES.properties | 32 ++ .../output/appsearch/common_fr_FR.properties | 32 ++ .../output/appsearch/common_ja_JP.properties | 32 ++ .../output/appsearch/common_zh_CN.properties | 32 ++ .../output/appsearch/editConfiguration.js | 162 ++++++ .../editConfiguration_Parameters.html | 76 +++ .../appsearch/editConfiguration_Server.html | 74 +++ .../output/appsearch/viewConfiguration.html | 85 ++++ .../output/appsearch/AppSearchIndexTest.java | 35 ++ .../appsearch/tests/APISanityHSQLDBIT.java | 326 ++++++++++++ .../output/appsearch/tests/BaseHSQLDB.java | 33 ++ .../output/appsearch/tests/BaseITHSQLDB.java | 74 +++ .../appsearch/tests/BasePostgresql.java | 33 ++ .../output/appsearch/tests/BaseUIHSQLDB.java | 41 ++ .../appsearch/tests/NavigationHSQLDBUI.java | 157 ++++++ .../output/appsearch/tests/UtilsTest.java | 63 +++ connectors/appsearch/pom.xml | 478 ++++++++++++++++++ 29 files changed, 3639 insertions(+) create mode 100644 connectors/appsearch/build.xml create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchAction.java create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConfig.java create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnection.java create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnector.java create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchDelete.java create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndex.java create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchParam.java create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Messages.java create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/ResponseBody.java create mode 100644 connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Utils.java create mode 100644 connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_en_US.properties create mode 100644 connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_es_ES.properties create mode 100644 connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_fr_FR.properties create mode 100644 connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_ja_JP.properties create mode 100644 connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_zh_CN.properties create mode 100644 connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration.js create mode 100644 connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration_Parameters.html create mode 100644 connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration_Server.html create mode 100644 connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/viewConfiguration.html create mode 100644 connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndexTest.java create mode 100644 connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/APISanityHSQLDBIT.java create mode 100644 connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseHSQLDB.java create mode 100644 connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseITHSQLDB.java create mode 100644 connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BasePostgresql.java create mode 100644 connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseUIHSQLDB.java create mode 100644 connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/NavigationHSQLDBUI.java create mode 100644 connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/UtilsTest.java create mode 100644 connectors/appsearch/pom.xml diff --git a/connectors/appsearch/build.xml b/connectors/appsearch/build.xml new file mode 100644 index 0000000000..10c7d03791 --- /dev/null +++ b/connectors/appsearch/build.xml @@ -0,0 +1,100 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchAction.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchAction.java new file mode 100644 index 0000000000..b95315332e --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchAction.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.HttpClient; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.SocketTimeoutException; +import java.util.Locale; + +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.agents.interfaces.ServiceInterruption; +import org.apache.manifoldcf.crawler.system.Logging; + +public class AppSearchAction extends AppSearchConnection { + + public enum CommandEnum { + _optimize, _refresh, _stats, _forcemerge, _none; + } + + public AppSearchAction(HttpClient client, AppSearchConfig config) + throws ManifoldCFException { + super(config, client); + } + + public void executeGET(CommandEnum cmd, boolean checkConnection) + throws ManifoldCFException, ServiceInterruption { + String command = cmd.toString(); + if (cmd.equals(CommandEnum._none)) { + command = ""; + } + + StringBuffer url = getApiUrl(command); + HttpGet method = new HttpGet(url.toString()); + call(method); + String error = checkJson(jsonException); + if (getResult() == Result.OK && error == null) { + return; + } + setResult("JSONERROR", Result.ERROR, error); + Logging.connectors.warn("AppSearch: Commit failed: " + getResponse()); + } + + public void executePOST(CommandEnum cmd, boolean checkConnection) throws ManifoldCFException, ServiceInterruption { + String command = cmd.toString(); + if (cmd.equals(CommandEnum._none)) { + command = ""; + } + + StringBuffer url = getApiUrl(command); + HttpPost method = new HttpPost(url.toString()); + call(method); + String error = checkJson(jsonException); + if (getResult() == Result.OK && error == null) { + return; + } + setResult("JSONERROR", Result.ERROR, error); + Logging.connectors.warn("AppSearch: Commit failed: " + getResponse()); + } + + @Override + protected void handleIOException(IOException e) throws ManifoldCFException, ServiceInterruption { + if (e instanceof InterruptedIOException && !(e instanceof SocketTimeoutException)) { + throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED); + } + setResult(e.getClass().getSimpleName().toUpperCase(Locale.ROOT), Result.ERROR, e.getMessage()); + long currentTime = System.currentTimeMillis(); + + throw new ServiceInterruption("IO exception: " + e.getMessage(), e, + currentTime + 60000L, + currentTime + 1L * 60L * 60000L, + 1, false); + } + +} diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConfig.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConfig.java new file mode 100644 index 0000000000..33c6a95921 --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConfig.java @@ -0,0 +1,215 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import org.apache.manifoldcf.core.interfaces.IDFactory; +import org.apache.manifoldcf.connectorcommon.interfaces.IKeystoreManager; +import org.apache.manifoldcf.connectorcommon.interfaces.KeystoreManagerFactory; +import org.apache.manifoldcf.core.interfaces.ConfigParams; +import org.apache.manifoldcf.core.interfaces.IPostParameters; +import org.apache.manifoldcf.core.interfaces.IThreadContext; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; + +import java.util.Locale; +import java.io.IOException; + +public class AppSearchConfig extends AppSearchParam { + + /** + * + */ + private static final long serialVersionUID = -2071296573398352538L; + + /** + * Parameters used for the configuration + */ + final private static ParameterEnum[] CONFIGURATIONLIST + = { + // ParameterEnum.SERVERVERSION, + ParameterEnum.SERVERLOCATION, + ParameterEnum.SERVERKEYSTORE, + ParameterEnum.ENGINENAME, + ParameterEnum.SECRETAPIKEY, + ParameterEnum.CONTENTATTRIBUTENAME, + ParameterEnum.URIATTRIBUTENAME, + ParameterEnum.CREATEDDATEATTRIBUTENAME, + ParameterEnum.MODIFIEDDATEATTRIBUTENAME, + ParameterEnum.INDEXINGDATEATTRIBUTENAME, + ParameterEnum.MIMETYPEATTRIBUTENAME, + ParameterEnum.EXTRACTORPATTERN, + ParameterEnum.REPLACEMENTSTRING + }; + + /** + * Build a set of AppSearchParameters by reading ConfigParams. If the value returned by ConfigParams.getParameter + * is null, the default value is set. + * + * @param params + */ + public AppSearchConfig(ConfigParams params) { + super(CONFIGURATIONLIST); + for (ParameterEnum param : CONFIGURATIONLIST) { + final boolean isPassword = param.name().endsWith("PASSWORD"); + // Nothing special is needed for keystores here; the keystore in string format is the value + // of the field + //final boolean isKeystore = param.name().endsWith("KEYSTORE"); + String value; + if (isPassword) { + put(param, params.getObfuscatedParameter(param.name()), param.defaultValue); + } else { + put(param, params.getParameter(param.name()), param.defaultValue); + } + } + } + + private void put(ParameterEnum param, String value, String defaultValue) { + if (value == null) { + put(param, defaultValue); + } else { + put(param, value); + } + } + + /** + * @return a unique identifier for one index on one AppSearch instance. + */ + public String getUniqueIndexIdentifier() { + StringBuffer sb = new StringBuffer(); + sb.append(getServerLocation()); + if (sb.charAt(sb.length() - 1) != '/') { + sb.append('/'); + } + sb.append(getEngineName()); + return sb.toString(); + } + + public final static String contextToConfig(IThreadContext threadContext, IPostParameters variableContext, ConfigParams parameters) throws ManifoldCFException { + String rval = null; + for (ParameterEnum param : CONFIGURATIONLIST) { + final String paramName = param.name().toLowerCase(Locale.ROOT); + final boolean isPassword = param.name().endsWith("PASSWORD"); + final boolean isKeystore = param.name().endsWith("KEYSTORE"); + if (isKeystore) { + String keystoreValue = variableContext.getParameter(paramName); //parameters.getParameter(SharePointConfig.PARAM_SERVERKEYSTORE); + IKeystoreManager mgr; + if (keystoreValue != null && keystoreValue.length() > 0) { + mgr = KeystoreManagerFactory.make("", keystoreValue); + } else { + mgr = KeystoreManagerFactory.make(""); + } + + // All the functionality needed to gather keystore-related variables must go here. + // Specifically, we need to also handle add/delete signals from the UI as well as + // preserving the original keystore and making sure it gets gathered if no other signal + // is present. + String configOp = variableContext.getParameter(paramName + "_op"); + if (configOp != null) { + if (configOp.equals("Delete")) { + String alias = variableContext.getParameter(paramName + "_alias"); + mgr.remove(alias); + } else if (configOp.equals("Add")) { + String alias = IDFactory.make(threadContext); + byte[] certificateValue = variableContext.getBinaryBytes(paramName + "_certificate"); + java.io.InputStream is = new java.io.ByteArrayInputStream(certificateValue); + String certError = null; + try { + mgr.importCertificate(alias, is); + } catch (Throwable e) { + certError = e.getMessage(); + } finally { + try { + is.close(); + } catch (IOException e) { + // Don't report anything + } + } + + if (certError != null) { + // Redirect to error page + rval = "Illegal certificate: " + certError; + } + } + } + parameters.setParameter(param.name(), mgr.getString()); + } else { + String p = variableContext.getParameter(paramName); + if (p != null) { + if (isPassword) { + parameters.setObfuscatedParameter(param.name(), variableContext.mapKeyToPassword(p)); + } else { + parameters.setParameter(param.name(), p); + } + } + } + } + + return rval; + } + + final public String getServerLocation() { + return get(ParameterEnum.SERVERLOCATION); + } + + final public IKeystoreManager getSSLKeystore() + throws ManifoldCFException { + final String packedKeystore = get(ParameterEnum.SERVERKEYSTORE); + if (packedKeystore == null || packedKeystore.length() == 0) { + return null; + } + return KeystoreManagerFactory.make("", packedKeystore); + } + + final public String getEngineName() { + return get(ParameterEnum.ENGINENAME); + } + + final public String getSecretApiKey() { + return get(ParameterEnum.SECRETAPIKEY); + } + + final public String getContentAttributeName() { + return get(ParameterEnum.CONTENTATTRIBUTENAME); + } + + final public String getUriAttributeName() { + return get(ParameterEnum.URIATTRIBUTENAME); + } + + final public String getCreatedDateAttributeName() { + return get(ParameterEnum.CREATEDDATEATTRIBUTENAME); + } + + final public String getModifiedDateAttributeName() { + return get(ParameterEnum.MODIFIEDDATEATTRIBUTENAME); + } + + final public String getIndexingDateAttributeName() { + return get(ParameterEnum.INDEXINGDATEATTRIBUTENAME); + } + + final public String getMimeTypeAttributeName() { + return get(ParameterEnum.MIMETYPEATTRIBUTENAME); + } + + final public String getExtractorPattern() { + return get(ParameterEnum.EXTRACTORPATTERN); + } + + final public String getReplacementString() { + return get(ParameterEnum.REPLACEMENTSTRING); + } +} diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnection.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnection.java new file mode 100644 index 0000000000..1639da9e93 --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnection.java @@ -0,0 +1,289 @@ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import com.google.gson.Gson; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.util.Locale; + +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpRequestBase; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.HttpResponse; +import org.apache.http.impl.auth.BasicScheme; +import org.apache.http.impl.client.BasicAuthCache; +import org.apache.http.client.AuthCache; +import org.apache.http.HttpException; +import org.apache.http.HttpHeaders; +import org.apache.http.HttpHost; +import org.apache.http.util.EntityUtils; + +import org.apache.manifoldcf.agents.interfaces.IOutputHistoryActivity; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.agents.interfaces.ServiceInterruption; +import org.apache.manifoldcf.core.util.URLEncoder; + +public class AppSearchConnection { + + public enum Result { + OK, ERROR, UNKNOWN; + } + + protected final AppSearchConfig config; + + private final HttpClient client; + private final String serverLocation; + private final String engineName; + + private String resultDescription = ""; + private String callUrlSnippet = null; + private String response = null; + private String resultCode = null; + private Result result = Result.UNKNOWN; + + protected final static String jsonException = "\"errors\""; + + protected AppSearchConnection(AppSearchConfig config, HttpClient client) { + this.config = config; + this.client = client; + serverLocation = config.getServerLocation(); + engineName = config.getEngineName(); + } + + protected StringBuffer getApiUrl(String command) throws ManifoldCFException { + StringBuffer url = new StringBuffer(serverLocation); + if (!serverLocation.endsWith("/")) { + url.append('/'); + } + + url.append("api/as/v1/engines/").append(URLEncoder.encode(engineName)).append("/"); + url.append(command); + callUrlSnippet = url.toString(); + + return url; + } + + protected static class CallThread extends Thread { + + protected final HttpClient client; + protected final HttpRequestBase method; + protected int resultCode = -1; + protected String response = null; + protected Throwable exception = null; + + public CallThread(HttpClient client, HttpRequestBase method) { + this.client = client; + this.method = method; + setDaemon(true); + } + + @Override + public void run() { + HttpHost target = new HttpHost(method.getURI().getHost(), method.getURI().getPort(), method.getURI().getScheme()); + // Create AuthCache instance + AuthCache authCache = new BasicAuthCache(); + // Generate BASIC scheme object and add it to the local + // auth cache + BasicScheme basicAuth = new BasicScheme(); + authCache.put(target, basicAuth); + + // Add AuthCache to the execution context + HttpClientContext localContext = HttpClientContext.create(); + localContext.setAuthCache(authCache); + + try { + try { + HttpResponse resp = client.execute(method, localContext); + resultCode = resp.getStatusLine().getStatusCode(); + response = EntityUtils.toString(resp.getEntity(), "UTF-8"); + } finally { + method.abort(); + } + } catch (java.net.SocketTimeoutException e) { + exception = e; + } catch (InterruptedIOException e) { + // Just exit + } catch (Throwable e) { + exception = e; + } + } + + public void finishUp() + throws HttpException, IOException, InterruptedException { + join(); + Throwable t = exception; + if (t != null) { + if (t instanceof HttpException) { + throw (HttpException) t; + } else if (t instanceof IOException) { + throw (IOException) t; + } else if (t instanceof RuntimeException) { + throw (RuntimeException) t; + } else if (t instanceof Error) { + throw (Error) t; + } else { + throw new RuntimeException("Unexpected exception thrown: " + t.getMessage(), t); + } + } + } + + public int getResultCode() { + return resultCode; + } + + public String getResponse() { + return response; + } + + public Throwable getException() { + return exception; + } + } + + /** + * Call AppSearch. + * + * @param method + * @return false if there was a "rejection". + * @throws org.apache.manifoldcf.core.interfaces.ManifoldCFException + * @throws org.apache.manifoldcf.agents.interfaces.ServiceInterruption + */ + protected boolean call(HttpRequestBase method) + throws ManifoldCFException, ServiceInterruption { + method.setHeader(HttpHeaders.AUTHORIZATION, "Bearer " + this.config.getSecretApiKey()); + method.setHeader(HttpHeaders.CONTENT_TYPE, "application/json"); + CallThread ct = new CallThread(client, method); + try { + ct.start(); + try { + ct.finishUp(); + response = ct.getResponse(); + return handleResultCode(ct.getResultCode(), response); + } catch (InterruptedException e) { + ct.interrupt(); + throw new ManifoldCFException("Interrupted: " + e.getMessage(), e, ManifoldCFException.INTERRUPTED); + } + } catch (HttpException e) { + handleHttpException(e); + return false; + } catch (IOException e) { + handleIOException(e); + return false; + } + } + + protected boolean handleResultCode(int code, String response) + throws ManifoldCFException, ServiceInterruption { + if (code == 200 || code == 201) { + setResult("OK", Result.OK, null); + return true; + } else if (code == 404) { + setResult(IOutputHistoryActivity.HTTP_ERROR, Result.ERROR, "Page not found: " + response); + throw new ManifoldCFException("Server/page not found"); + } else if (code >= 400 && code < 500) { + setResult(IOutputHistoryActivity.HTTP_ERROR, Result.ERROR, "HTTP code = " + code + ", Response = " + response); + return false; + } else if (code >= 500 && code < 600) { + setResult(IOutputHistoryActivity.HTTP_ERROR, Result.ERROR, "Server exception: " + response); + long currentTime = System.currentTimeMillis(); + throw new ServiceInterruption("Server exception: " + response, + new ManifoldCFException(response), + currentTime + 300000L, + currentTime + 20L * 60000L, + -1, + false); + } + setResult(IOutputHistoryActivity.HTTP_ERROR, Result.UNKNOWN, "HTTP code = " + code + ", Response = " + response); + throw new ManifoldCFException("Unexpected HTTP result code: " + code + ": " + response); + } + + protected void handleHttpException(HttpException e) + throws ManifoldCFException, ServiceInterruption { + setResult(e.getClass().getSimpleName().toUpperCase(Locale.ROOT), Result.ERROR, e.getMessage()); + throw new ManifoldCFException(e); + } + + protected void handleIOException(IOException e) + throws ManifoldCFException, ServiceInterruption { + if (e instanceof java.io.InterruptedIOException && !(e instanceof java.net.SocketTimeoutException)) { + throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED); + } + setResult(e.getClass().getSimpleName().toUpperCase(Locale.ROOT), Result.ERROR, e.getMessage()); + long currentTime = System.currentTimeMillis(); + // All IO exceptions are treated as service interruptions, retried for an hour + throw new ServiceInterruption("IO exception: " + e.getMessage(), e, + currentTime + 60000L, + currentTime + 1L * 60L * 60000L, + -1, + true); + } + + protected String checkJson(String jsonQuery) throws ManifoldCFException { + if (response != null) { + Gson gson = new Gson(); + ResponseBody res = null; + + // check if response is an array or object + if (response.startsWith("[")) { + ResponseBody[] resArray = gson.fromJson(response, ResponseBody[].class); + if (resArray.length > 0) { + res = resArray[0]; + } + } else if (response.startsWith("{")) { + res = gson.fromJson(response, ResponseBody.class); + } + + if (res != null) { + if (res.getErrors() != null && res.getErrors().length > 0) { + return res.getErrors()[0]; + } else { + return null; + } + } + + String[] tokens = response.replaceAll("\\[", "").replaceAll("\\]", "").replaceAll("\\{", "").replaceAll("\\}", "").split(","); + for (String token : tokens) { + if (token.contains(jsonQuery)) { + return token.substring(token.indexOf(":") + 1); + } + } + } + + return null; + } + + protected void setResult(String resultCode, Result res, String desc) { + if (res != null) { + result = res; + } + if (desc != null) { + if (desc.length() > 0) { + resultDescription = desc; + } + } + setResultCode(resultCode); + } + + public String getResultDescription() { + return resultDescription; + } + + protected String getResponse() { + return response; + } + + public Result getResult() { + return result; + } + + public String getCallUrlSnippet() { + return callUrlSnippet; + } + + public String getResultCode() { + return resultCode; + } + + public void setResultCode(String resultCode) { + this.resultCode = resultCode; + } +} diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnector.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnector.java new file mode 100644 index 0000000000..7bafb46876 --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnector.java @@ -0,0 +1,414 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Iterator; +import java.util.HashMap; + +import org.apache.http.conn.HttpClientConnectionManager; +import org.apache.http.client.HttpClient; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.protocol.HttpRequestExecutor; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.impl.client.BasicCredentialsProvider; +import org.apache.http.config.SocketConfig; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.socket.PlainConnectionSocketFactory; +import org.apache.http.auth.Credentials; +import org.apache.http.auth.AuthScope; + +import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity; +import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity; +import org.apache.manifoldcf.agents.interfaces.RepositoryDocument; +import org.apache.manifoldcf.agents.interfaces.ServiceInterruption; +import org.apache.manifoldcf.agents.output.BaseOutputConnector; +import com.mcplusa.manifoldcf.agents.output.appsearch.AppSearchAction.CommandEnum; +import com.mcplusa.manifoldcf.agents.output.appsearch.AppSearchConnection.Result; +import org.apache.manifoldcf.core.interfaces.Specification; +import org.apache.manifoldcf.core.interfaces.ConfigParams; +import org.apache.manifoldcf.core.interfaces.IHTTPOutput; +import org.apache.manifoldcf.core.interfaces.IPostParameters; +import org.apache.manifoldcf.core.interfaces.IThreadContext; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.core.interfaces.VersionContext; +import org.apache.manifoldcf.connectorcommon.interfaces.IKeystoreManager; +import org.apache.manifoldcf.crawler.system.Logging; + +/** + * This is the "output connector" for appsearch. + */ +public class AppSearchConnector extends BaseOutputConnector { + + private final static String APPSEARCH_INDEXATION_ACTIVITY = "Indexation"; + private final static String APPSEARCH_DELETION_ACTIVITY = "Deletion"; + + private final static String[] APPSEARCH_ACTIVITIES = {APPSEARCH_INDEXATION_ACTIVITY, APPSEARCH_DELETION_ACTIVITY}; + + private final static String APPSEARCH_TAB_SERVER = "AppSearchConnector.Server"; + private final static String APPSEARCH_TAB_PARAMETERS = "AppSearchConnector.Parameters"; + + /** + * Forward to the javascript to check the configuration parameters + */ + private static final String EDIT_CONFIG_HEADER_FORWARD = "editConfiguration.js"; + + /** + * Forward to the HTML template to edit the configuration parameters + */ + private static final String EDIT_CONFIG_FORWARD_SERVER = "editConfiguration_Server.html"; + private static final String EDIT_CONFIG_FORWARD_PARAMETERS = "editConfiguration_Parameters.html"; + + /** + * Forward to the HTML template to view the configuration parameters + */ + private static final String VIEW_CONFIG_FORWARD = "viewConfiguration.html"; + + /** + * Connection expiration interval + */ + private static final long EXPIRATION_INTERVAL = 60000L; + + private HttpClientConnectionManager connectionManager = null; + private HttpClient client = null; + private long expirationTime = -1L; + + public AppSearchConnector() { + } + + @Override + public void connect(ConfigParams configParams) { + super.connect(configParams); + } + + protected HttpClient getSession() + throws ManifoldCFException { + if (client == null) { + int socketTimeout = 900000; + int connectionTimeout = 60000; + + // Load configuration from parameters + final AppSearchConfig config = new AppSearchConfig(params); + final IKeystoreManager keystoreManager = config.getSSLKeystore(); + + final Credentials credentials = null; + + // Set up ingest ssl if indicated + SSLConnectionSocketFactory myFactory = SSLConnectionSocketFactory.getSocketFactory(); + + // Set up connection manager + PoolingHttpClientConnectionManager poolingConnectionManager = new PoolingHttpClientConnectionManager(RegistryBuilder.create() + .register("http", PlainConnectionSocketFactory.getSocketFactory()) + .register("https", myFactory) + .build()); + poolingConnectionManager.setDefaultMaxPerRoute(1); + poolingConnectionManager.setValidateAfterInactivity(2000); + poolingConnectionManager.setDefaultSocketConfig(SocketConfig.custom() + .setTcpNoDelay(true) + .setSoTimeout(socketTimeout) + .build()); + connectionManager = poolingConnectionManager; + + CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); + if (credentials != null) { + credentialsProvider.setCredentials(AuthScope.ANY, credentials); + } + + RequestConfig.Builder requestBuilder = RequestConfig.custom() + .setCircularRedirectsAllowed(true) + .setSocketTimeout(socketTimeout) + .setExpectContinueEnabled(true) + .setConnectTimeout(connectionTimeout) + .setConnectionRequestTimeout(socketTimeout); + + client = HttpClients.custom() + .setConnectionManager(connectionManager) + .setMaxConnTotal(1) + .disableAutomaticRetries() + .setDefaultRequestConfig(requestBuilder.build()) + .setDefaultCredentialsProvider(credentialsProvider) + .setRequestExecutor(new HttpRequestExecutor(socketTimeout)) + .build(); + + } + expirationTime = System.currentTimeMillis() + EXPIRATION_INTERVAL; + return client; + } + + protected void closeSession() { + if (connectionManager != null) { + connectionManager.shutdown(); + connectionManager = null; + } + client = null; + expirationTime = -1L; + } + + /** + * This method is called to assess whether to count this connector instance should actually be counted as being + * connected. + * + * @return true if the connector instance is actually connected. + */ + @Override + public boolean isConnected() { + return connectionManager != null; + } + + @Override + public void disconnect() + throws ManifoldCFException { + super.disconnect(); + closeSession(); + } + + @Override + public void poll() + throws ManifoldCFException { + super.poll(); + if (connectionManager != null) { + if (System.currentTimeMillis() > expirationTime) { + closeSession(); + } + } + } + + @Override + public String[] getActivitiesList() { + return APPSEARCH_ACTIVITIES; + } + + /** + * Read the content of a resource, replace the variable ${PARAMNAME} with the value and copy it to the out. + * + * @param resName + * @param out + * @throws ManifoldCFException + */ + private static void outputResource(String resName, IHTTPOutput out, + Locale locale, AppSearchParam params, + String tabName, Integer sequenceNumber, Integer currentSequenceNumber) throws ManifoldCFException { + Map paramMap = null; + if (params != null) { + paramMap = params.buildMap(out); + if (tabName != null) { + paramMap.put("TabName", tabName); + } + if (currentSequenceNumber != null) { + paramMap.put("SelectedNum", currentSequenceNumber.toString()); + } + } else { + paramMap = new HashMap<>(); + } + if (sequenceNumber != null) { + paramMap.put("SeqNum", sequenceNumber.toString()); + } + + Messages.outputResourceWithVelocity(out, locale, resName, paramMap); + } + + @Override + public void outputConfigurationHeader(IThreadContext threadContext, + IHTTPOutput out, Locale locale, ConfigParams parameters, + List tabsArray) throws ManifoldCFException, IOException { + super.outputConfigurationHeader(threadContext, out, locale, parameters, + tabsArray); + tabsArray.add(Messages.getString(locale, APPSEARCH_TAB_SERVER)); + tabsArray.add(Messages.getString(locale, APPSEARCH_TAB_PARAMETERS)); + outputResource(EDIT_CONFIG_HEADER_FORWARD, out, locale, null, null, null, null); + } + + @Override + public void outputConfigurationBody(IThreadContext threadContext, + IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName) + throws ManifoldCFException, IOException { + try { + super.outputConfigurationBody(threadContext, out, locale, parameters, + tabName); + AppSearchConfig config = this.getConfigParameters(parameters); + outputResource(EDIT_CONFIG_FORWARD_SERVER, out, locale, config, tabName, null, null); + outputResource(EDIT_CONFIG_FORWARD_PARAMETERS, out, locale, config, tabName, null, null); + } catch (Exception e) { + e.printStackTrace(); + } + } + + /** + * Build a Set of AppSearch parameters. If configParams is null, getConfiguration() is used. + * + * @param configParams + */ + final private AppSearchConfig getConfigParameters( + ConfigParams configParams) { + if (configParams == null) { + configParams = getConfiguration(); + } + return new AppSearchConfig(configParams); + } + + @Override + public VersionContext getPipelineDescription(Specification os) + throws ManifoldCFException { + return new VersionContext("", params, os); + } + + @Override + public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out, + Locale locale, ConfigParams parameters) throws ManifoldCFException, + IOException { + outputResource(VIEW_CONFIG_FORWARD, out, locale, + getConfigParameters(parameters), null, null, null); + } + + @Override + public String processConfigurationPost(IThreadContext threadContext, + IPostParameters variableContext, ConfigParams parameters) + throws ManifoldCFException { + return AppSearchConfig.contextToConfig(threadContext, variableContext, parameters); + } + + /** + * Convert an unqualified ACL to qualified form. + * + * @param acl is the initial, unqualified ACL. + * @param authorityNameString is the name of the governing authority for this document's acls, or null if none. + * @param activities is the activities object, so we can report what's happening. + * @return the modified ACL. + * @throws org.apache.manifoldcf.core.interfaces.ManifoldCFException + */ + protected static String[] convertACL(String[] acl, String authorityNameString, IOutputAddActivity activities) + throws ManifoldCFException { + if (acl != null) { + String[] rval = new String[acl.length]; + int i = 0; + while (i < rval.length) { + rval[i] = activities.qualifyAccessToken(authorityNameString, acl[i]); + i++; + } + return rval; + } + return new String[0]; + } + + /** + * Add (or replace) a document in the output data store using the connector. This method presumes that the connector + * object has been configured, and it is thus able to communicate with the output data store should that be necessary. + * + * @param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output + * data store will use to process and serve the document. This URI is constructed by the repository connector which + * fetches the document, and is thus universal across all output connectors. + * @param pipelineDescription includes the description string that was constructed for this document by the + * getOutputDescription() method. + * @param document is the document data to be processed (handed to the output data store). + * @param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in + * with the repository document. May be null. + * @param activities is the handle to an object that the implementer of a pipeline connector may use to perform + * operations, such as logging processing activity, or sending a modified document to the next stage in the pipeline. + * @return the document status (accepted or permanently rejected). + * @throws org.apache.manifoldcf.core.interfaces.ManifoldCFException + * @throws IOException only if there's a stream error reading the document data. + * @throws org.apache.manifoldcf.agents.interfaces.ServiceInterruption + */ + @Override + public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, + RepositoryDocument document, String authorityNameString, + IOutputAddActivity activities) throws ManifoldCFException, + ServiceInterruption, IOException { + HttpClient addDocClient = getSession(); + AppSearchConfig config = getConfigParameters(null); + + InputStream inputStream = document.getBinaryStream(); + + String[] acls = null; + String[] denyAcls = null; + String[] shareAcls = null; + String[] shareDenyAcls = null; + String[] parentAcls = null; + String[] parentDenyAcls = null; + Iterator a = document.securityTypesIterator(); + while (a.hasNext()) { + String securityType = a.next(); + String[] convertedAcls = convertACL(document.getSecurityACL(securityType), authorityNameString, activities); + String[] convertedDenyAcls = convertACL(document.getSecurityDenyACL(securityType), authorityNameString, activities); + if (securityType.equals(RepositoryDocument.SECURITY_TYPE_DOCUMENT)) { + acls = convertedAcls; + denyAcls = convertedDenyAcls; + } else if (securityType.equals(RepositoryDocument.SECURITY_TYPE_SHARE)) { + shareAcls = convertedAcls; + shareDenyAcls = convertedDenyAcls; + } else if (securityType.equals(RepositoryDocument.SECURITY_TYPE_PARENT)) { + parentAcls = convertedAcls; + parentDenyAcls = convertedDenyAcls; + } else { + activities.recordActivity(null, APPSEARCH_INDEXATION_ACTIVITY, document.getBinaryLength(), documentURI, activities.UNKNOWN_SECURITY, "Rejected document that has security info which AppSearch does not recognize: '" + securityType + "'"); + return DOCUMENTSTATUS_REJECTED; + } + } + + long startTime = System.currentTimeMillis(); + AppSearchIndex appsearch = new AppSearchIndex(addDocClient, config); + try { + appsearch.execute(documentURI, document, inputStream, acls, denyAcls, shareAcls, shareDenyAcls, parentAcls, parentDenyAcls); + if (appsearch.getResult() != Result.OK) { + return DOCUMENTSTATUS_REJECTED; + } + return DOCUMENTSTATUS_ACCEPTED; + } finally { + activities.recordActivity(startTime, APPSEARCH_INDEXATION_ACTIVITY, document.getBinaryLength(), documentURI, appsearch.getResultCode(), appsearch.getResultDescription()); + } + } + + @Override + public void removeDocument(String documentURI, String outputDescription, + IOutputRemoveActivity activities) throws ManifoldCFException, + ServiceInterruption { + HttpClient removeDocClient = getSession(); + long startTime = System.currentTimeMillis(); + AppSearchDelete od = new AppSearchDelete(removeDocClient, getConfigParameters(null)); + try { + od.execute(documentURI); + } finally { + activities.recordActivity(startTime, APPSEARCH_DELETION_ACTIVITY, null, documentURI, od.getResultCode(), od.getResultDescription()); + } + } + + @Override + public String check() throws ManifoldCFException { + HttpClient checkStatusClient = getSession(); + AppSearchAction oss = new AppSearchAction(checkStatusClient, getConfigParameters(null)); + try { + oss.executeGET(CommandEnum._none, true); + String resultName = oss.getResult().name(); + Logging.connectors.warn("Check Result: " + oss.getResult().toString()); + if (resultName.equals("OK")) { + return super.check(); + } + return resultName + " " + oss.getResultDescription(); + } catch (ServiceInterruption e) { + return "Transient exception: " + e.getMessage(); + } + } + +} diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchDelete.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchDelete.java new file mode 100644 index 0000000000..ffd3703cdb --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchDelete.java @@ -0,0 +1,122 @@ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; + +import org.apache.http.Consts; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.agents.interfaces.ServiceInterruption; +import org.apache.manifoldcf.crawler.system.Logging; + +public class AppSearchDelete extends AppSearchConnection { + + public AppSearchDelete(HttpClient client, AppSearchConfig config) { + super(config, client); + } + + class CustomHttpDelete extends HttpPost { + + public CustomHttpDelete(String url) { + super(url); + } + + @Override + public String getMethod() { + return "DELETE"; + } + } + + public void execute(String documentURI) + throws ManifoldCFException, ServiceInterruption { + String newURI = documentURI; + + if (config.getExtractorPattern() != null && !config.getExtractorPattern().isEmpty() + && config.getReplacementString() != null && !config.getReplacementString().isEmpty()) { + newURI = Utils.processExpression(config.getExtractorPattern(), config.getReplacementString(), documentURI); + } + + String idField = Utils.getValidId(newURI); + StringBuffer url = getApiUrl("documents"); + CustomHttpDelete method = new CustomHttpDelete(url.toString()); + + Gson gson = new Gson(); + JsonArray documents = new JsonArray(); + documents.add(idField); + + int amountOfChunks = getAmountDocumentChunks(idField); + if (amountOfChunks > 1) { + for (int i = 1; i < amountOfChunks; i++) { + documents.add(idField + "#chunk/" + (i + 1)); + } + } + + String body = gson.toJson(documents); + method.setEntity(new StringEntity(body, Consts.UTF_8)); + + call(method); + String error = checkJson(jsonException); + if (getResult() == Result.OK && error == null) { + setResult("OK", Result.OK, null); + return; + } + + setResult("JSONERROR", Result.ERROR, error); + try { + Logging.initializeLoggers(); + Logging.connectors.warn("AppSearch: Delete failed: " + getResponse()); + } catch (Exception ex) { + System.out.println(ex.getMessage()); + } + } + + private int getAmountDocumentChunks(String documentId) { + try { + StringBuffer url = getApiUrl("search"); + HttpPost method = new HttpPost(url.toString()); + + Gson gson = new Gson(); + String searchQuery = "{\"query\":\"" + documentId + "\",\"result_fields\":{\"title\":{\"raw\": {}}},\"page\":{\"size\":100}}"; + method.setEntity(new StringEntity(searchQuery, Consts.UTF_8)); + + call(method); + + String error = checkJson(jsonException); + if (getResult() == Result.OK && error == null) { + JsonObject response = gson.fromJson(getResponse(), JsonObject.class); + + if (response != null) { + JsonArray results = response.get("results").getAsJsonArray(); + if (results != null && results.size() > 1) { + return results.size(); + } + } + + return 1; + } + setResult("JSONERROR", Result.ERROR, error); + Logging.connectors.warn("AppSearch: Commit failed: " + getResponse()); + + } catch (ManifoldCFException ex) { + System.out.println(ex.getMessage()); + return 0; + } catch (ServiceInterruption ex) { + System.out.println(ex.getMessage()); + return 0; + } + return 0; + } + + @Override + protected boolean handleResultCode(int code, String response) + throws ManifoldCFException, ServiceInterruption { + if (code == 404) { + setResult("OK", Result.OK, null); + return true; + } + return super.handleResultCode(code, response); + } +} diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndex.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndex.java new file mode 100644 index 0000000000..34b658b877 --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndex.java @@ -0,0 +1,285 @@ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.Iterator; + +import java.text.SimpleDateFormat; +import java.util.Locale; +import java.util.TimeZone; + +import java.util.Date; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.http.client.HttpClient; +import org.apache.http.Consts; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; +import org.apache.manifoldcf.agents.interfaces.RepositoryDocument; +import org.apache.manifoldcf.agents.interfaces.ServiceInterruption; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.core.util.URLEncoder; +import org.apache.manifoldcf.crawler.system.Logging; + +public class AppSearchIndex extends AppSearchConnection { + + /** + * The allow attribute name + */ + protected static final String allowAttributeName = "allow_token_"; + /** + * The deny attribute name + */ + protected static final String denyAttributeName = "deny_token_"; + /** + * The no-security token + */ + protected static final String noSecurityToken = "__nosecurity__"; + + protected static final boolean useNullValue = false; + + private static final int APPSEARCH_MAX_FILESIZE = 100000; + + private static final SimpleDateFormat DATE_FORMATTER; + + static { + String ISO_FORMAT = "yyyy-MM-dd'T'HH:mm:ss"; + TimeZone UTC = TimeZone.getTimeZone("UTC"); + DATE_FORMATTER = new SimpleDateFormat(ISO_FORMAT, Locale.ROOT); + DATE_FORMATTER.setTimeZone(UTC); + } + + protected static String formatAsString(final Date dateValue) { + return DATE_FORMATTER.format(dateValue); + } + + public AppSearchIndex(HttpClient client, AppSearchConfig config) { + super(config, client); + } + + private String generateDocJson(String documentURI, RepositoryDocument document, String documentContent, + String contentAttributeName, Integer groupKey, int chunkNumber) { + Gson gson = new Gson(); + JsonArray documents = new JsonArray(); + JsonObject doc = new JsonObject(); + + Iterator i = document.getFields(); + while (i.hasNext()) { + String fieldName = i.next(); + String fieldNameClean = Utils.cleanFieldName(fieldName); + Date[] dateFieldValues = document.getFieldAsDates(fieldName); + if (dateFieldValues != null) { + + // Check if it is single or multi value + if (dateFieldValues.length > 1) { + doc.addProperty(fieldNameClean, gson.toJson(dateFieldValues)); + } else if (dateFieldValues.length == 1) { + doc.addProperty(fieldNameClean, dateFieldValues[0].toString()); + } + } else { + String[] fieldValues; + try { + fieldValues = document.getFieldAsStrings(fieldName); + + // Check if it is single or multi value + if (fieldValues.length > 1) { + doc.addProperty(fieldNameClean, gson.toJson(fieldValues)); + } else if (fieldValues.length == 1) { + doc.addProperty(fieldNameClean, fieldValues[0]); + } + } catch (IOException ex) { + Logger.getLogger(AppSearchIndex.class.getName()).log(Level.SEVERE, null, ex); + } + } + } + + // Standard document fields + final Date createdDate = document.getCreatedDate(); + if (createdDate != null && config.getCreatedDateAttributeName() != null + && config.getCreatedDateAttributeName().length() > 0) { + doc.addProperty(config.getCreatedDateAttributeName(), formatAsString(createdDate)); + } + + final Date modifiedDate = document.getModifiedDate(); + if (modifiedDate != null && config.getModifiedDateAttributeName() != null + && config.getModifiedDateAttributeName().length() > 0) { + doc.addProperty(config.getModifiedDateAttributeName(), formatAsString(modifiedDate)); + } + + final Date indexingDate = document.getIndexingDate(); + if (indexingDate != null && config.getIndexingDateAttributeName() != null + && config.getIndexingDateAttributeName().length() > 0) { + doc.addProperty(config.getIndexingDateAttributeName(), formatAsString(indexingDate)); + } + + final String mimeType = document.getMimeType(); + if (mimeType != null && config.getMimeTypeAttributeName() != null + && config.getMimeTypeAttributeName().length() > 0) { + doc.addProperty(config.getMimeTypeAttributeName(), gson.toJson(mimeType)); + } + + if (config.getExtractorPattern() != null && !config.getExtractorPattern().isEmpty() + && config.getReplacementString() != null && !config.getReplacementString().isEmpty()) { + String originalUrl = documentURI; + doc.addProperty("original_url", originalUrl); + documentURI = Utils.processExpression(config.getExtractorPattern(), config.getReplacementString(), documentURI); + } + + if (documentURI != null && config.getUriAttributeName() != null && config.getUriAttributeName().length() > 0) { + doc.addProperty(config.getUriAttributeName(), documentURI); + } + + // Content field + if (contentAttributeName != null && documentContent != null) { + doc.addProperty(contentAttributeName, documentContent); + } + + if (groupKey != null) { + doc.addProperty("groupkey", groupKey); + } + + if (chunkNumber == 1) { + doc.addProperty("isparent", true); + } + + String idDocument = Utils.getValidId(documentURI); + + if (chunkNumber > 1) { + doc.addProperty("parentid", idDocument); + idDocument += "#chunk/" + chunkNumber; + } + + doc.addProperty("id", idDocument); + documents.add(doc); + + return gson.toJson(doc); + } + + private boolean pushDocument(String reqBody) { + try { + StringBuffer url = getApiUrl("documents"); + HttpPost post = new HttpPost(url.toString()); + Logging.connectors.debug("HttPutUri: " + url.toString()); + + post.setEntity(new StringEntity(reqBody, Consts.UTF_8)); + + call(post); + + String error = checkJson(jsonException); + if (getResult() == Result.OK && error == null) { + setResult("OK", Result.OK, null); + return true; + } + + if (error != null) { + setResult("JSONERROR", Result.ERROR, error); + Logging.connectors.warn("AppSearch: Index failed: " + getResponse()); + return false; + } + + setResult("JSONERROR", Result.ERROR, error); + Logging.connectors.warn("AppSearch: Index failed: " + getResponse()); + return true; + + } catch (Exception ex) { + setResult("JSONERROR", Result.ERROR, ex.getMessage()); + Logging.connectors.warn("AppSearch: Index failed: " + ex.getMessage()); + return false; + } + } + + /** + * Do the indexing. + * + * @param documentURI + * @param document + * @param inputStream + * @param acls + * @param denyAcls + * @param shareAcls + * @param shareDenyAcls + * @param parentAcls + * @param parentDenyAcls + * @return false to indicate that the document was rejected. + * @throws org.apache.manifoldcf.core.interfaces.ManifoldCFException + * @throws org.apache.manifoldcf.agents.interfaces.ServiceInterruption + */ + public boolean execute(String documentURI, RepositoryDocument document, InputStream inputStream, String[] acls, + String[] denyAcls, String[] shareAcls, String[] shareDenyAcls, String[] parentAcls, String[] parentDenyAcls) + throws ManifoldCFException, ServiceInterruption { + Integer groupKey = null; + + String documentContent = getDocumentContent(inputStream); + String initialDocument = generateDocJson(documentURI, document, documentContent, config.getContentAttributeName(), + null, 0); + + // push if the document is small + if (initialDocument.getBytes().length < APPSEARCH_MAX_FILESIZE) { + return pushDocument(initialDocument); + } + + // Prepare to split the document and assign a group key + groupKey = documentURI.hashCode(); + + // Generate a doc without content to calculate the size + String documentWithoutContent = generateDocJson(documentURI, document, null, null, null, 0); + int documentSize = documentWithoutContent.getBytes().length; + int FREE_BYTES_RECOMMENDED = 10000; + int bytesChunks = (APPSEARCH_MAX_FILESIZE - documentSize) - FREE_BYTES_RECOMMENDED; + Logging.connectors.debug("Document size: " + documentSize + ", will generate chunks of " + bytesChunks + " bytes"); + + String[] contentChunk = Utils.chunkSplit(documentContent, bytesChunks); + + Logging.connectors.debug("Number of chunks generated: " + contentChunk.length); + for (int i = 0; i < contentChunk.length; i++) { + String documentChunk = generateDocJson(documentURI, document, contentChunk[i], config.getContentAttributeName(), + groupKey, i); + Logging.connectors.debug("Chunk " + i + " size: " + documentChunk.getBytes().length); + pushDocument(documentChunk); + } + + return true; + } + + protected String getDocumentContent(InputStream inputStream) { + StringBuilder sb = new StringBuilder(); + char[] buffer = new char[65536]; + try (Reader r = new InputStreamReader(inputStream, Consts.UTF_8)) { + while (true) { + try { + int amt = r.read(buffer, 0, buffer.length); + if (amt == -1) { + break; + } + for (int j = 0; j < amt; j++) { + final char x = buffer[j]; + if (x == '\n' || x == '\r' || x == '\t' || x == '\b' || x == '\f') { + sb.append(""); + } else if (x < 32) { + sb.append("\\u").append(String.format(Locale.ROOT, "%04x", (int) x)); + } else { + if (x == '\"' || x == '\\' || x == '/') { + sb.append('\\'); + } + sb.append(x); + } + } + } catch (IOException ex) { + Logger.getLogger(AppSearchIndex.class.getName()).log(Level.SEVERE, null, ex); + break; + } + } + } catch (IOException ex) { + Logger.getLogger(AppSearchIndex.class.getName()).log(Level.SEVERE, null, ex); + } + + return sb.toString(); + } + +} diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchParam.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchParam.java new file mode 100644 index 0000000000..6564a577fc --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchParam.java @@ -0,0 +1,92 @@ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import java.util.HashMap; +import java.util.Map; +import java.util.ArrayList; +import java.util.List; + +import org.apache.manifoldcf.core.interfaces.IHTTPOutput; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.connectorcommon.interfaces.IKeystoreManager; +import org.apache.manifoldcf.connectorcommon.interfaces.KeystoreManagerFactory; + +import com.mcplusa.manifoldcf.agents.output.appsearch.AppSearchParam.ParameterEnum; + +/** + * Parameters data for the appsearch output connector. + */ +public class AppSearchParam extends HashMap { + + /** + * Parameters constants + */ + public enum ParameterEnum { + SERVERLOCATION("http://localhost:3002"), + ENGINENAME("myengine"), + SERVERKEYSTORE(""), + SECRETAPIKEY(""), + USEMAPPERATTACHMENTS("false"), + CONTENTATTRIBUTENAME("content"), + URIATTRIBUTENAME("url"), + CREATEDDATEATTRIBUTENAME("created"), + MODIFIEDDATEATTRIBUTENAME("last_modified"), + INDEXINGDATEATTRIBUTENAME("indexed"), + MIMETYPEATTRIBUTENAME("mime_type"), + EXTRACTORPATTERN(""), + REPLACEMENTSTRING(""), + FIELDLIST(""); + + final protected String defaultValue; + + private ParameterEnum(String defaultValue) { + this.defaultValue = defaultValue; + } + } + + private static final long serialVersionUID = -1593234685772720029L; + + protected AppSearchParam(ParameterEnum[] params) { + super(params.length); + } + + final public Map buildMap(IHTTPOutput out) throws ManifoldCFException { + Map rval = new HashMap<>(); + for (Map.Entry entry : this.entrySet()) { + final String key = entry.getKey().name(); + final boolean isPassword = key.endsWith("PASSWORD"); + final boolean isKeystore = key.endsWith("KEYSTORE"); + if (isPassword) { + // Do not put passwords in plain text in forms + rval.put(key, out.mapPasswordToKey(entry.getValue())); + } else if (isKeystore) { + String keystore = entry.getValue(); + IKeystoreManager localKeystore; + if (keystore == null || keystore.length() == 0) { + localKeystore = KeystoreManagerFactory.make(""); + } else { + localKeystore = KeystoreManagerFactory.make("", keystore); + } + + List> certificates = new ArrayList<>(); + + String[] contents = localKeystore.getContents(); + for (String alias : contents) { + String description = localKeystore.getDescription(alias); + if (description.length() > 128) { + description = description.substring(0, 125) + "..."; + } + Map certificate = new HashMap<>(); + certificate.put("ALIAS", alias); + certificate.put("DESCRIPTION", description); + certificates.add(certificate); + } + rval.put(key, keystore); + rval.put(key + "_LIST", certificates); + } else { + rval.put(key, entry.getValue()); + } + } + return rval; + } + +} diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Messages.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Messages.java new file mode 100644 index 0000000000..741db769dd --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Messages.java @@ -0,0 +1,119 @@ +/* $Id: Messages.java 1295926 2012-03-01 21:56:27Z kwright $ */ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import java.util.Locale; +import java.util.Map; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.core.interfaces.IHTTPOutput; + +public class Messages extends org.apache.manifoldcf.ui.i18n.Messages { + + public static final String DEFAULT_BUNDLE_NAME = "com.mcplusa.manifoldcf.agents.output.appsearch.common"; + public static final String DEFAULT_PATH_NAME = "com.mcplusa.manifoldcf.crawler.connectors.appsearch"; + + /** + * Constructor - do no instantiate + */ + protected Messages() { + } + + public static String getString(Locale locale, String messageKey) { + return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getAttributeString(Locale locale, String messageKey) { + return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getBodyString(Locale locale, String messageKey) { + return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getAttributeJavascriptString(Locale locale, String messageKey) { + return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getBodyJavascriptString(Locale locale, String messageKey) { + return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null); + } + + public static String getString(Locale locale, String messageKey, Object[] args) { + return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + public static String getAttributeString(Locale locale, String messageKey, Object[] args) { + return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + public static String getBodyString(Locale locale, String messageKey, Object[] args) { + return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args) { + return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args) { + return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args); + } + + // More general methods which allow bundlenames and class loaders to be specified. + public static String getString(String bundleName, Locale locale, String messageKey, Object[] args) { + return getString(Messages.class, bundleName, locale, messageKey, args); + } + + public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args) { + return getAttributeString(Messages.class, bundleName, locale, messageKey, args); + } + + public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args) { + return getBodyString(Messages.class, bundleName, locale, messageKey, args); + } + + public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args) { + return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args); + } + + public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args) { + return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args); + } + + // Resource output + public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey, + Map substitutionParameters, boolean mapToUpperCase) + throws ManifoldCFException { + outputResource(output, Messages.class, DEFAULT_PATH_NAME, locale, resourceKey, + substitutionParameters, mapToUpperCase); + } + + public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey, + Map substitutionParameters, boolean mapToUpperCase) + throws ManifoldCFException { + outputResourceWithVelocity(output, Messages.class, DEFAULT_BUNDLE_NAME, DEFAULT_PATH_NAME, locale, resourceKey, + substitutionParameters, mapToUpperCase); + } + + public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey, + Map contextObjects) + throws ManifoldCFException { + outputResourceWithVelocity(output, Messages.class, DEFAULT_BUNDLE_NAME, DEFAULT_PATH_NAME, locale, resourceKey, + contextObjects); + } + +} diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/ResponseBody.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/ResponseBody.java new file mode 100644 index 0000000000..001c7e1a59 --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/ResponseBody.java @@ -0,0 +1,36 @@ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +public class ResponseBody { + + private String id; + private String[] errors; + + public ResponseBody() { + } + + public ResponseBody(String id, String[] errors) { + this.id = id; + this.errors = errors; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String[] getErrors() { + return errors; + } + + public void setErrors(String[] errors) { + this.errors = errors; + } + + @Override + public String toString() { + return "ResponseBody{" + "id=" + id + ", errors=" + errors + '}'; + } +} diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Utils.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Utils.java new file mode 100644 index 0000000000..1e9b32d927 --- /dev/null +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Utils.java @@ -0,0 +1,78 @@ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.core.util.URLEncoder; + +public final class Utils { + + public static final int APPSEARCH_MAX_ID_LENGTH = 780; + + private Utils() { + // empty constructor + } + + public static String cleanFieldName(String fieldName) { + String regexWhitespaces = "\\s+"; + String regexSpecialCharacters = "\\W"; + return fieldName.replaceAll(regexWhitespaces, "_").replaceAll(regexSpecialCharacters, "").toLowerCase(); + } + + public static String[] chunkSplit(String original, int length) { + List chunks = new ArrayList<>(); + + ByteArrayInputStream bis = new ByteArrayInputStream(original.getBytes()); + int n = 0; + byte[] buffer = new byte[length]; + + try { + while ((n = bis.read(buffer)) > 0) { + String result = ""; + for (byte b : buffer) { + result += (char) b; + } + Arrays.fill(buffer, (byte) 0); + chunks.add(result.trim()); + } + } catch (IOException e) { + return null; + } + + return chunks.toArray(new String[0]); + } + + public static String getValidId(String url) { + String encodedUrl = URLEncoder.encode(url); + if (encodedUrl.length() < APPSEARCH_MAX_ID_LENGTH) { + return URLEncoder.encode(url); + } + + return encodedUrl.substring(0, Math.min(APPSEARCH_MAX_ID_LENGTH, encodedUrl.length())); + } + + public static String processExpression(String expression, String replacementstring, String originalValue) { + final Pattern regExpPattern = Pattern.compile(expression); + final Matcher m = regExpPattern.matcher(originalValue); + String value = replacementstring; + + if (m.find()) { + int groups = m.groupCount(); + for (int index = 0; index <= groups; index++) { + String matchGroup = "$(" + index +")"; + if (value.indexOf(matchGroup) >= 0) { + value = value.replace("$(" + index +")", m.group(index)); + } + } + return value; + } + + return originalValue; + } +} diff --git a/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_en_US.properties b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_en_US.properties new file mode 100644 index 0000000000..414b1a4354 --- /dev/null +++ b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_en_US.properties @@ -0,0 +1,32 @@ +AppSearchConnector.Server=Server +AppSearchConnector.Parameters=Parameters + +AppSearchConnector.ServerLocation=Server location +AppSearchConnector.SecretApiKey=Private API Key: +AppSearchConnector.URLColon=(URL): +AppSearchConnector.SSLCertificateListColon=SSL certificate list: +AppSearchConnector.NoCertificatesPresent=No certificates present +AppSearchConnector.DeleteCert=Delete certificate +AppSearchConnector.Delete=Delete +AppSearchConnector.AddCert=Add certificate +AppSearchConnector.Add=Add +AppSearchConnector.Certificate=Certificate +AppSearchConnector.EngineNameColon=Engine name: +AppSearchConnector.ContentAttributeNameColon=Content field name: +AppSearchConnector.CreatedDateAttributeNameColon=Created date field name: +AppSearchConnector.ModifiedDateAttributeNameColon=Modified date field name: +AppSearchConnector.IndexingDateAttributeNameColon=Indexing date field name: +AppSearchConnector.MimeTypeAttributeNameColon=Mime type field name: +AppSearchConnector.ExtractorPatternColon=Extractor Pattern: +AppSearchConnector.ReplacementStringColon=Replacement String: + +AppSearchConnector.PleaseSupplyValidAppSearchLocation=Please supply a valid Appsearch server location +AppSearchConnector.ChooseACertificateFile=Please choose a certificate file +AppSearchConnector.PleaseSupplyValidEngineName=Please supply a valid engine name, it can only contain lowercase letters, numbers, and hyphens. +AppSearchConnector.PleaseSupplyValidFieldName=Please supply a valid field name, fields can only contain lowercase letters, numbers, and underscores. + +AppSearchConnector.AppSearch=AppSearch + +AppSearchConnector.MaxFileSizeBytesColon=Max file size (bytes): +AppSearchConnector.AllowedMIMETypesColon=Allowed MIME Types: +AppSearchConnector.AllowedFileExtensionsColon=Allowed file extensions\: \ No newline at end of file diff --git a/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_es_ES.properties b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_es_ES.properties new file mode 100644 index 0000000000..b0df91d5ca --- /dev/null +++ b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_es_ES.properties @@ -0,0 +1,32 @@ +AppSearchConnector.Server=Server +AppSearchConnector.Parameters=Par\u00e1metros + +AppSearchConnector.ServerLocation=Ubicaci\u00f3n del servidor +AppSearchConnector.SecretApiKey=Clave privada de API: +AppSearchConnector.URLColon=(URL): +AppSearchConnector.SSLCertificateListColon=SSL certificate list: +AppSearchConnector.NoCertificatesPresent=No certificates present +AppSearchConnector.DeleteCert=Delete certificate +AppSearchConnector.Delete=Delete +AppSearchConnector.AddCert=Add certificate +AppSearchConnector.Add=Add +AppSearchConnector.Certificate=Certificate +AppSearchConnector.EngineNameColon=Nombre del \u00cdndice: +AppSearchConnector.ContentAttributeNameColon=Content field name: +AppSearchConnector.CreatedDateAttributeNameColon=Created date field name: +AppSearchConnector.ModifiedDateAttributeNameColon=Modified date field name: +AppSearchConnector.IndexingDateAttributeNameColon=Indexing date field name: +AppSearchConnector.MimeTypeAttributeNameColon=Mime type field name: +AppSearchConnector.ExtractorPatternColon=Extractor Pattern: +AppSearchConnector.ReplacementStringColon=Replacement String: + +AppSearchConnector.PleaseSupplyValidAppSearchLocation=Por favor suministre una ubicaci\u00f3n de servidor v\u00e1lida AppSearch +AppSearchConnector.ChooseACertificateFile=Please choose a certificate file +AppSearchConnector.PleaseSupplyValidEngineName=Por favor proporcione un nombre de \u00cdndice v\u00e1lido, solo puede contener min\u00fasculas, n\u00fameros y guion. +AppSearchConnector.PleaseSupplyValidFieldName=Por favor proporcione un nombre de campo v\u00e1lido, este solo puede contener min\u00fasculas, n\u00fameros y guion bajo. + +AppSearchConnector.AppSearch=Appsearch + +AppSearchConnector.MaxFileSizeBytesColon=Tama\u00f1o m\u00e1ximo de archivo (bytes): +AppSearchConnector.AllowedMIMETypesColon=Tipos de MIME permitidos: +AppSearchConnector.AllowedFileExtensionsColon=Extensiones de archivo permitidos: diff --git a/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_fr_FR.properties b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_fr_FR.properties new file mode 100644 index 0000000000..414b1a4354 --- /dev/null +++ b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_fr_FR.properties @@ -0,0 +1,32 @@ +AppSearchConnector.Server=Server +AppSearchConnector.Parameters=Parameters + +AppSearchConnector.ServerLocation=Server location +AppSearchConnector.SecretApiKey=Private API Key: +AppSearchConnector.URLColon=(URL): +AppSearchConnector.SSLCertificateListColon=SSL certificate list: +AppSearchConnector.NoCertificatesPresent=No certificates present +AppSearchConnector.DeleteCert=Delete certificate +AppSearchConnector.Delete=Delete +AppSearchConnector.AddCert=Add certificate +AppSearchConnector.Add=Add +AppSearchConnector.Certificate=Certificate +AppSearchConnector.EngineNameColon=Engine name: +AppSearchConnector.ContentAttributeNameColon=Content field name: +AppSearchConnector.CreatedDateAttributeNameColon=Created date field name: +AppSearchConnector.ModifiedDateAttributeNameColon=Modified date field name: +AppSearchConnector.IndexingDateAttributeNameColon=Indexing date field name: +AppSearchConnector.MimeTypeAttributeNameColon=Mime type field name: +AppSearchConnector.ExtractorPatternColon=Extractor Pattern: +AppSearchConnector.ReplacementStringColon=Replacement String: + +AppSearchConnector.PleaseSupplyValidAppSearchLocation=Please supply a valid Appsearch server location +AppSearchConnector.ChooseACertificateFile=Please choose a certificate file +AppSearchConnector.PleaseSupplyValidEngineName=Please supply a valid engine name, it can only contain lowercase letters, numbers, and hyphens. +AppSearchConnector.PleaseSupplyValidFieldName=Please supply a valid field name, fields can only contain lowercase letters, numbers, and underscores. + +AppSearchConnector.AppSearch=AppSearch + +AppSearchConnector.MaxFileSizeBytesColon=Max file size (bytes): +AppSearchConnector.AllowedMIMETypesColon=Allowed MIME Types: +AppSearchConnector.AllowedFileExtensionsColon=Allowed file extensions\: \ No newline at end of file diff --git a/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_ja_JP.properties b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_ja_JP.properties new file mode 100644 index 0000000000..414b1a4354 --- /dev/null +++ b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_ja_JP.properties @@ -0,0 +1,32 @@ +AppSearchConnector.Server=Server +AppSearchConnector.Parameters=Parameters + +AppSearchConnector.ServerLocation=Server location +AppSearchConnector.SecretApiKey=Private API Key: +AppSearchConnector.URLColon=(URL): +AppSearchConnector.SSLCertificateListColon=SSL certificate list: +AppSearchConnector.NoCertificatesPresent=No certificates present +AppSearchConnector.DeleteCert=Delete certificate +AppSearchConnector.Delete=Delete +AppSearchConnector.AddCert=Add certificate +AppSearchConnector.Add=Add +AppSearchConnector.Certificate=Certificate +AppSearchConnector.EngineNameColon=Engine name: +AppSearchConnector.ContentAttributeNameColon=Content field name: +AppSearchConnector.CreatedDateAttributeNameColon=Created date field name: +AppSearchConnector.ModifiedDateAttributeNameColon=Modified date field name: +AppSearchConnector.IndexingDateAttributeNameColon=Indexing date field name: +AppSearchConnector.MimeTypeAttributeNameColon=Mime type field name: +AppSearchConnector.ExtractorPatternColon=Extractor Pattern: +AppSearchConnector.ReplacementStringColon=Replacement String: + +AppSearchConnector.PleaseSupplyValidAppSearchLocation=Please supply a valid Appsearch server location +AppSearchConnector.ChooseACertificateFile=Please choose a certificate file +AppSearchConnector.PleaseSupplyValidEngineName=Please supply a valid engine name, it can only contain lowercase letters, numbers, and hyphens. +AppSearchConnector.PleaseSupplyValidFieldName=Please supply a valid field name, fields can only contain lowercase letters, numbers, and underscores. + +AppSearchConnector.AppSearch=AppSearch + +AppSearchConnector.MaxFileSizeBytesColon=Max file size (bytes): +AppSearchConnector.AllowedMIMETypesColon=Allowed MIME Types: +AppSearchConnector.AllowedFileExtensionsColon=Allowed file extensions\: \ No newline at end of file diff --git a/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_zh_CN.properties b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_zh_CN.properties new file mode 100644 index 0000000000..414b1a4354 --- /dev/null +++ b/connectors/appsearch/connector/src/main/native2ascii/com/mcplusa/manifoldcf/agents/output/appsearch/common_zh_CN.properties @@ -0,0 +1,32 @@ +AppSearchConnector.Server=Server +AppSearchConnector.Parameters=Parameters + +AppSearchConnector.ServerLocation=Server location +AppSearchConnector.SecretApiKey=Private API Key: +AppSearchConnector.URLColon=(URL): +AppSearchConnector.SSLCertificateListColon=SSL certificate list: +AppSearchConnector.NoCertificatesPresent=No certificates present +AppSearchConnector.DeleteCert=Delete certificate +AppSearchConnector.Delete=Delete +AppSearchConnector.AddCert=Add certificate +AppSearchConnector.Add=Add +AppSearchConnector.Certificate=Certificate +AppSearchConnector.EngineNameColon=Engine name: +AppSearchConnector.ContentAttributeNameColon=Content field name: +AppSearchConnector.CreatedDateAttributeNameColon=Created date field name: +AppSearchConnector.ModifiedDateAttributeNameColon=Modified date field name: +AppSearchConnector.IndexingDateAttributeNameColon=Indexing date field name: +AppSearchConnector.MimeTypeAttributeNameColon=Mime type field name: +AppSearchConnector.ExtractorPatternColon=Extractor Pattern: +AppSearchConnector.ReplacementStringColon=Replacement String: + +AppSearchConnector.PleaseSupplyValidAppSearchLocation=Please supply a valid Appsearch server location +AppSearchConnector.ChooseACertificateFile=Please choose a certificate file +AppSearchConnector.PleaseSupplyValidEngineName=Please supply a valid engine name, it can only contain lowercase letters, numbers, and hyphens. +AppSearchConnector.PleaseSupplyValidFieldName=Please supply a valid field name, fields can only contain lowercase letters, numbers, and underscores. + +AppSearchConnector.AppSearch=AppSearch + +AppSearchConnector.MaxFileSizeBytesColon=Max file size (bytes): +AppSearchConnector.AllowedMIMETypesColon=Allowed MIME Types: +AppSearchConnector.AllowedFileExtensionsColon=Allowed file extensions\: \ No newline at end of file diff --git a/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration.js b/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration.js new file mode 100644 index 0000000000..5bde4ee8c7 --- /dev/null +++ b/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration.js @@ -0,0 +1,162 @@ + + + diff --git a/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration_Parameters.html b/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration_Parameters.html new file mode 100644 index 0000000000..5a89456650 --- /dev/null +++ b/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration_Parameters.html @@ -0,0 +1,76 @@ + + +#if($TabName == $ResourceBundle.getString('AppSearchConnector.Parameters')) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.EngineNameColon'))
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ContentAttributeNameColon'))
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.CreatedDateAttributeNameColon'))
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ModifiedDateAttributeNameColon'))
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.IndexingDateAttributeNameColon'))
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.MimeTypeAttributeNameColon'))
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ExtractorPatternColon')) + +
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ReplacementStringColon')) + +
+ +#else + + + + + + + + + + +#end diff --git a/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration_Server.html b/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration_Server.html new file mode 100644 index 0000000000..f07a8a6fef --- /dev/null +++ b/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/editConfiguration_Server.html @@ -0,0 +1,74 @@ + + + + +#if($TabName == $ResourceBundle.getString('AppSearchConnector.Server')) + + + + + + + + + + + + + + + + + + +
+ $Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ServerLocation')) + $Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.URLColon')) + +
+ $Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.SecretApiKey')) + +

$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.SSLCertificateListColon')) + + + + #if($SERVERKEYSTORE_LIST.size() == 0) + + #else + #foreach($certificate in $SERVERKEYSTORE_LIST) + + + + + #end + #end +
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.NoCertificatesPresent'))
+ + + $Encoder.bodyEscape($certificate.get('DESCRIPTION')) +
+   + $Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.Certificate'))  + +
+ +#else + + + +#end diff --git a/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/viewConfiguration.html b/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/viewConfiguration.html new file mode 100644 index 0000000000..08ffd8c49f --- /dev/null +++ b/connectors/appsearch/connector/src/main/resources/com/mcplusa/manifoldcf/agents/output/appsearch/viewConfiguration.html @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ServerLocation')) + $Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.URLColon'))$Encoder.bodyEscape($SERVERLOCATION)
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.SecretApiKey'))private-*****

$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.SSLCertificateListColon')) + + #if($SERVERKEYSTORE_LIST.size() == 0) + + #else + #foreach($certificate in $SERVERKEYSTORE_LIST) + + + + #end + #end +
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.NoCertificatesPresent'))
+ $Encoder.bodyEscape($certificate.get('DESCRIPTION')) +
+

$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.EngineNameColon'))$Encoder.bodyEscape($ENGINENAME)
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ContentAttributeNameColon'))$Encoder.bodyEscape($CONTENTATTRIBUTENAME)
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.CreatedDateAttributeNameColon'))$Encoder.bodyEscape($CREATEDDATEATTRIBUTENAME)
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ModifiedDateAttributeNameColon'))$Encoder.bodyEscape($MODIFIEDDATEATTRIBUTENAME)
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.IndexingDateAttributeNameColon'))$Encoder.bodyEscape($INDEXINGDATEATTRIBUTENAME)
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.MimeTypeAttributeNameColon'))$Encoder.bodyEscape($MIMETYPEATTRIBUTENAME)
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ExtractorPatternColon'))$Encoder.bodyEscape($EXTRACTORPATTERN)
$Encoder.bodyEscape($ResourceBundle.getString('AppSearchConnector.ReplacementStringColon'))$Encoder.bodyEscape($REPLACEMENTSTRING)
\ No newline at end of file diff --git a/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndexTest.java b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndexTest.java new file mode 100644 index 0000000000..8fae2728ab --- /dev/null +++ b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndexTest.java @@ -0,0 +1,35 @@ +package com.mcplusa.manifoldcf.agents.output.appsearch; + +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.http.client.HttpClient; +import org.apache.manifoldcf.core.interfaces.ConfigParams; +import org.junit.Test; + +public class AppSearchIndexTest { + + @Test + public void testGetDocumentContent() throws FileNotFoundException { + ConfigParams params = new ConfigParams(); + AppSearchConfig config = new AppSearchConfig(params); + HttpClient client = null; + AppSearchIndex index = new AppSearchIndex(client, config); + + ClassLoader classLoader = getClass().getClassLoader(); + File file = new File(classLoader.getResource("document-content.txt").getFile()); + String content = index.getDocumentContent(new FileInputStream(file)); + + Pattern pattern = Pattern.compile("\\n\\r\\t\\b\\f"); + Matcher matcher = pattern.matcher(content); + + if(matcher.find()) { + fail("Content should not contains escape sequences"); + } + } +} diff --git a/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/APISanityHSQLDBIT.java b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/APISanityHSQLDBIT.java new file mode 100644 index 0000000000..0b8067a716 --- /dev/null +++ b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/APISanityHSQLDBIT.java @@ -0,0 +1,326 @@ +/* $Id: APISanityHSQLDBIT.java 1627345 2014-09-24 15:18:02Z kwright $ */ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch.tests; + +import org.apache.manifoldcf.core.interfaces.Configuration; +import org.apache.manifoldcf.core.interfaces.ConfigurationNode; +import org.apache.manifoldcf.core.interfaces.ManifoldCFException; +import org.apache.manifoldcf.crawler.system.ManifoldCF; +import org.junit.Test; + +public class APISanityHSQLDBIT extends BaseITHSQLDB { + + @Test + public void sanityCheck() + throws Exception { + try { + + int i; + + // Create a basic file system connection, and save it. + ConfigurationNode connectionObject; + ConfigurationNode child; + Configuration requestObject; + Configuration result; + + connectionObject = new ConfigurationNode("repositoryconnection"); + + child = new ConfigurationNode("name"); + child.setValue("Test Connection"); + connectionObject.addChild(connectionObject.getChildCount(), child); + + child = new ConfigurationNode("class_name"); + child.setValue("org.apache.manifoldcf.crawler.tests.TestingRepositoryConnector"); + connectionObject.addChild(connectionObject.getChildCount(), child); + + child = new ConfigurationNode("description"); + child.setValue("Test Connection"); + connectionObject.addChild(connectionObject.getChildCount(), child); + + child = new ConfigurationNode("max_connections"); + child.setValue("10"); + connectionObject.addChild(connectionObject.getChildCount(), child); + + child = new ConfigurationNode("configuration"); + + //Testing Repository Connector parameters + // MHL + /* + ConfigurationNode cmisBindingNode = new ConfigurationNode("_PARAMETER_"); + cmisBindingNode.setAttribute("name", CmisConfig.BINDING_PARAM); + cmisBindingNode.setValue(CmisConfig.BINDING_DEFAULT_VALUE); + child.addChild(child.getChildCount(), cmisBindingNode); + */ + connectionObject.addChild(connectionObject.getChildCount(), child); + + requestObject = new Configuration(); + requestObject.addChild(0, connectionObject); + + result = performAPIPutOperationViaNodes("repositoryconnections/Test%20Connection", 201, requestObject); + + i = 0; + while (i < result.getChildCount()) { + ConfigurationNode resultNode = result.findChild(i++); + if (resultNode.getType().equals("error")) { + throw new Exception(resultNode.getValue()); + } + } + + // Create a basic null output connection, and save it. + connectionObject = new ConfigurationNode("outputconnection"); + + child = new ConfigurationNode("name"); + child.setValue("AppSearch"); + connectionObject.addChild(connectionObject.getChildCount(), child); + + child = new ConfigurationNode("class_name"); + child.setValue("com.mcplusa.manifoldcf.agents.output.appsearch.AppSearchConnector"); + connectionObject.addChild(connectionObject.getChildCount(), child); + + child = new ConfigurationNode("description"); + child.setValue("AppSearch Connection"); + connectionObject.addChild(connectionObject.getChildCount(), child); + + child = new ConfigurationNode("max_connections"); + child.setValue("100"); + connectionObject.addChild(connectionObject.getChildCount(), child); + + child = new ConfigurationNode("configuration"); + + //AppSearch Output Connector parameters + //serverlocation + ConfigurationNode serverLocation = new ConfigurationNode("_PARAMETER_"); + serverLocation.setAttribute("name", "serverlocation"); + serverLocation.setValue("http://localhost:9200"); + child.addChild(child.getChildCount(), serverLocation); + + //indexname + ConfigurationNode engineName = new ConfigurationNode("_PARAMETER_"); + engineName.setAttribute("name", "enginename"); + engineName.setValue("index"); + child.addChild(child.getChildCount(), engineName); + + connectionObject.addChild(connectionObject.getChildCount(), child); + + requestObject = new Configuration(); + requestObject.addChild(0, connectionObject); + + result = performAPIPutOperationViaNodes("outputconnections/AppSearch", 201, requestObject); + + i = 0; + while (i < result.getChildCount()) { + ConfigurationNode resultNode = result.findChild(i++); + if (resultNode.getType().equals("error")) { + throw new Exception(resultNode.getValue()); + } + } + + // Create a job. + ConfigurationNode jobObject = new ConfigurationNode("job"); + + child = new ConfigurationNode("description"); + child.setValue("Test Job"); + jobObject.addChild(jobObject.getChildCount(), child); + + child = new ConfigurationNode("repository_connection"); + child.setValue("Test Connection"); + jobObject.addChild(jobObject.getChildCount(), child); + + // Revamped way of adding output connection + child = new ConfigurationNode("pipelinestage"); + ConfigurationNode pipelineChild = new ConfigurationNode("stage_id"); + pipelineChild.setValue("0"); + child.addChild(child.getChildCount(), pipelineChild); + pipelineChild = new ConfigurationNode("stage_isoutput"); + pipelineChild.setValue("true"); + child.addChild(child.getChildCount(), pipelineChild); + pipelineChild = new ConfigurationNode("stage_connectionname"); + pipelineChild.setValue("AppSearch"); + child.addChild(child.getChildCount(), pipelineChild); + jobObject.addChild(jobObject.getChildCount(), child); + + child = new ConfigurationNode("run_mode"); + child.setValue("scan once"); + jobObject.addChild(jobObject.getChildCount(), child); + + child = new ConfigurationNode("start_mode"); + child.setValue("manual"); + jobObject.addChild(jobObject.getChildCount(), child); + + child = new ConfigurationNode("hopcount_mode"); + child.setValue("accurate"); + jobObject.addChild(jobObject.getChildCount(), child); + + child = new ConfigurationNode("document_specification"); + + jobObject.addChild(jobObject.getChildCount(), child); + + requestObject = new Configuration(); + requestObject.addChild(0, jobObject); + + result = performAPIPostOperationViaNodes("jobs", 201, requestObject); + + String jobIDString = null; + i = 0; + while (i < result.getChildCount()) { + ConfigurationNode resultNode = result.findChild(i++); + if (resultNode.getType().equals("error")) { + throw new Exception(resultNode.getValue()); + } else if (resultNode.getType().equals("job_id")) { + jobIDString = resultNode.getValue(); + } + } + if (jobIDString == null) { + throw new Exception("Missing job_id from return!"); + } + + // Now, start the job, and wait until it completes. + startJob(jobIDString); + waitJobInactive(jobIDString, 120000L); + + // Check to be sure we actually processed the right number of documents. + // The test data area has 3 documents and one directory, and we have to count the root directory too. + long count; + count = getJobDocumentsProcessed(jobIDString); + if (count != 3) { + throw new ManifoldCFException("Wrong number of documents processed - expected 3, saw " + new Long(count).toString()); + } + + // Now, delete the job. + deleteJob(jobIDString); + + waitJobDeleted(jobIDString, 120000L); + + // Cleanup is automatic by the base class, so we can feel free to leave jobs and connections lying around. + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + protected void startJob(String jobIDString) + throws Exception { + Configuration requestObject = new Configuration(); + + Configuration result = performAPIPutOperationViaNodes("start/" + jobIDString, 201, requestObject); + int i = 0; + while (i < result.getChildCount()) { + ConfigurationNode resultNode = result.findChild(i++); + if (resultNode.getType().equals("error")) { + throw new Exception(resultNode.getValue()); + } + } + } + + protected void deleteJob(String jobIDString) + throws Exception { + Configuration result = performAPIDeleteOperationViaNodes("jobs/" + jobIDString, 200); + int i = 0; + while (i < result.getChildCount()) { + ConfigurationNode resultNode = result.findChild(i++); + if (resultNode.getType().equals("error")) { + throw new Exception(resultNode.getValue()); + } + } + + } + + protected String getJobStatus(String jobIDString) + throws Exception { + Configuration result = performAPIGetOperationViaNodes("jobstatuses/" + jobIDString, 200); + String status = null; + int i = 0; + while (i < result.getChildCount()) { + ConfigurationNode resultNode = result.findChild(i++); + if (resultNode.getType().equals("error")) { + throw new Exception(resultNode.getValue()); + } else if (resultNode.getType().equals("jobstatus")) { + int j = 0; + while (j < resultNode.getChildCount()) { + ConfigurationNode childNode = resultNode.findChild(j++); + if (childNode.getType().equals("status")) { + status = childNode.getValue(); + } + } + } + } + return status; + } + + protected long getJobDocumentsProcessed(String jobIDString) + throws Exception { + Configuration result = performAPIGetOperationViaNodes("jobstatuses/" + jobIDString, 200); + String documentsProcessed = null; + int i = 0; + while (i < result.getChildCount()) { + ConfigurationNode resultNode = result.findChild(i++); + if (resultNode.getType().equals("error")) { + throw new Exception(resultNode.getValue()); + } else if (resultNode.getType().equals("jobstatus")) { + int j = 0; + while (j < resultNode.getChildCount()) { + ConfigurationNode childNode = resultNode.findChild(j++); + if (childNode.getType().equals("documents_processed")) { + documentsProcessed = childNode.getValue(); + } + } + } + } + if (documentsProcessed == null) { + throw new Exception("Expected a documents_processed field, didn't find it"); + } + return new Long(documentsProcessed).longValue(); + } + + protected void waitJobInactive(String jobIDString, long maxTime) + throws Exception { + long startTime = System.currentTimeMillis(); + while (System.currentTimeMillis() < startTime + maxTime) { + String status = getJobStatus(jobIDString); + if (status == null) { + throw new Exception("No such job: '" + jobIDString + "'"); + } + if (status.equals("not yet run")) { + throw new Exception("Job was never started."); + } + if (status.equals("done")) { + return; + } + if (status.equals("error")) { + throw new Exception("Job reports error."); + } + ManifoldCF.sleep(1000L); + continue; + } + throw new ManifoldCFException("ManifoldCF did not terminate in the allotted time of " + new Long(maxTime).toString() + " milliseconds"); + } + + protected void waitJobDeleted(String jobIDString, long maxTime) + throws Exception { + long startTime = System.currentTimeMillis(); + while (System.currentTimeMillis() < startTime + maxTime) { + String status = getJobStatus(jobIDString); + if (status == null) { + return; + } + ManifoldCF.sleep(1000L); + } + throw new ManifoldCFException("ManifoldCF did not delete in the allotted time of " + new Long(maxTime).toString() + " milliseconds"); + } + +} diff --git a/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseHSQLDB.java b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseHSQLDB.java new file mode 100644 index 0000000000..bcad0529fb --- /dev/null +++ b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseHSQLDB.java @@ -0,0 +1,33 @@ +/* $Id: BaseHSQLDB.java 1302522 2012-03-19 16:06:44Z kwright $ */ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch.tests; + +/** + * This is a testing base class that is responsible for setting up/tearing down the agents framework. + */ +public class BaseHSQLDB extends org.apache.manifoldcf.crawler.tests.ConnectorBaseHSQLDB { + + protected String[] getOutputNames() { + return new String[]{"App Search"}; + } + + protected String[] getOutputClasses() { + return new String[]{"com.mcplusa.manifoldcf.agents.output.appsearch.AppSearchConnector"}; + } + +} diff --git a/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseITHSQLDB.java b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseITHSQLDB.java new file mode 100644 index 0000000000..94605391bd --- /dev/null +++ b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseITHSQLDB.java @@ -0,0 +1,74 @@ +/* $Id: BaseITHSQLDB.java 1627345 2014-09-24 15:18:02Z kwright $ */ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch.tests; + +import org.elasticsearch.node.Node; +import org.junit.After; +import org.junit.Before; + +import org.eclipse.jetty.server.handler.ContextHandlerCollection; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.webapp.WebAppContext; + +import static org.elasticsearch.node.NodeBuilder.*; + +/** + * Base integration tests class for Elastic Search tested against a CMIS repository + * + * @author Piergiorgio Lucidi + * + * + */ +public class BaseITHSQLDB extends org.apache.manifoldcf.crawler.tests.BaseITHSQLDB { + + protected Node node = null; + + protected String[] getConnectorNames() { + return new String[]{"CMIS"}; + } + + protected String[] getConnectorClasses() { + return new String[]{"org.apache.manifoldcf.crawler.tests.TestingRepositoryConnector"}; + } + + protected String[] getOutputNames() { + return new String[]{"AppSearch"}; + } + + protected String[] getOutputClasses() { + return new String[]{"com.mcplusa.manifoldcf.agents.output.appsearch.AppSearchConnector"}; + } + + @Before + public void setupElasticSearch() + throws Exception { + //Initialize ElasticSearch server + //the default port is 9200 + System.out.println("ElasticSearch is starting..."); + node = nodeBuilder().local(true).node(); + System.out.println("ElasticSearch is started on port 9200"); + } + + @After + public void cleanUpElasticSearch() { + if (node != null) { + node.close(); + } + } + +} diff --git a/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BasePostgresql.java b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BasePostgresql.java new file mode 100644 index 0000000000..cbf5754a72 --- /dev/null +++ b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BasePostgresql.java @@ -0,0 +1,33 @@ +/* $Id: BasePostgresql.java 1302522 2012-03-19 16:06:44Z kwright $ */ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch.tests; + +/** + * This is a testing base class that is responsible for setting up/tearing down the agents framework. + */ +public class BasePostgresql extends org.apache.manifoldcf.crawler.tests.ConnectorBasePostgresql { + + protected String[] getOutputNames() { + return new String[]{"App Search"}; + } + + protected String[] getOutputClasses() { + return new String[]{"com.mcplusa.manifoldcf.agents.output.appsearch.AppSearchConnector"}; + } + +} diff --git a/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseUIHSQLDB.java b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseUIHSQLDB.java new file mode 100644 index 0000000000..88fbf8e499 --- /dev/null +++ b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/BaseUIHSQLDB.java @@ -0,0 +1,41 @@ +/* $Id: BaseUIHSQLDB.java 1627345 2014-09-24 15:18:02Z kwright $ */ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch.tests; + +/** + * Tests that run the "agents daemon" should be derived from this + */ +public class BaseUIHSQLDB extends org.apache.manifoldcf.crawler.tests.ConnectorBaseUIHSQLDB { + + protected String[] getConnectorNames() { + return new String[]{"Test Connector"}; + } + + protected String[] getConnectorClasses() { + return new String[]{"org.apache.manifoldcf.crawler.tests.TestingRepositoryConnector"}; + } + + protected String[] getOutputNames() { + return new String[]{"AppSearch"}; + } + + protected String[] getOutputClasses() { + return new String[]{"com.mcplusa.manifoldcf.agents.output.appsearch.AppSearchConnector"}; + } + +} diff --git a/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/NavigationHSQLDBUI.java b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/NavigationHSQLDBUI.java new file mode 100644 index 0000000000..147e3067d8 --- /dev/null +++ b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/NavigationHSQLDBUI.java @@ -0,0 +1,157 @@ +/* $Id: NavigationHSQLDBUI.java 1856509 2019-03-28 23:26:45Z kishore $ */ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.mcplusa.manifoldcf.agents.output.appsearch.tests; + +import java.util.Locale; + +import org.apache.manifoldcf.core.tests.SeleniumTester; +import org.junit.Test; + +/** + * Basic UI navigation tests + */ +public class NavigationHSQLDBUI extends BaseUIHSQLDB { + + @Test + public void createConnectionsAndJob() + throws Exception { + testerInstance.start(SeleniumTester.BrowserType.CHROME, "en-US", "http://localhost:8346/mcf-crawler-ui/index.jsp"); + + //Login + testerInstance.waitForElementWithName("loginform"); + testerInstance.setValue("userID", "admin"); + testerInstance.setValue("password", "admin"); + testerInstance.clickButton("Login"); + testerInstance.verifyHeader("Welcome to Apache ManifoldCFâ„¢"); + testerInstance.navigateTo("List output connections"); + testerInstance.clickButton("Add a new output connection"); + + // Fill in a name + testerInstance.waitForElementWithName("connname"); + testerInstance.setValue("connname", "MyOutputConnection"); + + //Goto to Type tab + testerInstance.clickTab("Type"); + + // Select a type + testerInstance.waitForElementWithName("classname"); + testerInstance.selectValue("classname", "com.mcplusa.manifoldcf.agents.output.appsearch.AppSearchConnector"); + testerInstance.clickButton("Continue"); + + // Visit the Throttling tab + testerInstance.clickTab("Throttling"); + + // Server tab + testerInstance.clickTab("Server"); + testerInstance.setValue("serverlocation", "http://localhost:9200/"); + + // Parameters tab + testerInstance.clickTab("Parameters"); + testerInstance.setValue("enginename", "index"); + + // Go back to the Name tab + testerInstance.clickTab("Name"); + + // Now save the connection. + testerInstance.clickButton("Save"); + testerInstance.verifyThereIsNoError(); + + // Define a repository connection via the UI + testerInstance.navigateTo("List repository connections"); + testerInstance.clickButton("Add new connection"); + + testerInstance.waitForElementWithName("connname"); + testerInstance.setValue("connname", "MyRepositoryConnection"); + + // Select a type + testerInstance.clickTab("Type"); + testerInstance.selectValue("classname", "org.apache.manifoldcf.crawler.tests.TestingRepositoryConnector"); + testerInstance.clickButton("Continue"); + + // Visit the Throttling tab + testerInstance.clickTab("Throttling"); + + // Go back to the Name tab + testerInstance.clickTab("Name"); + + // Save + testerInstance.clickButton("Save"); + testerInstance.verifyThereIsNoError(); + + // Create a job + testerInstance.navigateTo("List jobs"); + //Add a job + testerInstance.clickButton("Add a new job"); + testerInstance.waitForElementWithName("description"); + //Fill in a name + testerInstance.setValue("description", "MyJob"); + testerInstance.clickTab("Connection"); + + // Select the connections + testerInstance.selectValue("output_connectionname", "MyOutputConnection"); + testerInstance.selectValue("output_precedent", "-1"); + testerInstance.clickButton("Add output", true); + testerInstance.waitForElementWithName("connectionname"); + testerInstance.selectValue("connectionname", "MyRepositoryConnection"); + + testerInstance.clickButton("Continue"); + + // Visit all the tabs. Scheduling tab first + testerInstance.clickTab("Scheduling"); + testerInstance.selectValue("dayofweek", "0"); + testerInstance.selectValue("hourofday", "1"); + testerInstance.selectValue("minutesofhour", "30"); + testerInstance.selectValue("monthofyear", "11"); + testerInstance.selectValue("dayofmonth", "none"); + testerInstance.setValue("duration", "120"); + testerInstance.clickButton("Add Scheduled Time", true); + testerInstance.waitForElementWithName("editjob"); + + // MHL + // Save the job + testerInstance.clickButton("Save"); + testerInstance.verifyThereIsNoError(); + + testerInstance.waitForPresenceById("job"); + String jobID = testerInstance.getAttributeValueById("job", "jobid"); + + //Navigate to List Jobs + testerInstance.navigateTo("List jobs"); + testerInstance.waitForElementWithName("listjobs"); + + //Delete the job + testerInstance.clickButtonByTitle("Delete job " + jobID); + testerInstance.acceptAlert(); + testerInstance.verifyThereIsNoError(); + + //Wait for the job to go away + testerInstance.waitForJobDeleteEN(jobID, 120); + + // Delete the repository connection + testerInstance.navigateTo("List repository connections"); + testerInstance.clickButtonByTitle("Delete MyRepositoryConnection"); + testerInstance.acceptAlert(); + + // Delete the output connection + testerInstance.navigateTo("List output connections"); + testerInstance.clickButtonByTitle("Delete MyOutputConnection"); + testerInstance.acceptAlert(); + + } + +} diff --git a/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/UtilsTest.java b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/UtilsTest.java new file mode 100644 index 0000000000..613b2e0ef3 --- /dev/null +++ b/connectors/appsearch/connector/src/test/java/com/mcplusa/manifoldcf/agents/output/appsearch/tests/UtilsTest.java @@ -0,0 +1,63 @@ +package com.mcplusa.manifoldcf.agents.output.appsearch.tests; + +import org.junit.Test; +import static org.junit.Assert.*; + +import com.mcplusa.manifoldcf.agents.output.appsearch.Utils; + +public class UtilsTest { + + public UtilsTest() { + } + + /** + * Test of cleanFieldName method, of class Utils. + */ + @Test + public void testCleanFieldName() { + assertEquals("twitterog", Utils.cleanFieldName("twitter:og")); + assertEquals("twitterimage", Utils.cleanFieldName("Twitter:Image")); + assertEquals("twitter_description", Utils.cleanFieldName("twitter description")); + assertEquals("fieldwithnum123", Utils.cleanFieldName("field-with-num-123")); + assertEquals("fields", Utils.cleanFieldName("")); + } + + @Test + public void testChunkSplit() { + String value = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; + String[] str = Utils.chunkSplit(value, 200); + + assertEquals(3, str.length); + assertEquals(200, str[0].length()); + assertEquals(200, str[1].length()); + assertEquals(44, str[2].length()); + } + + @Test + public void testGetValidId() { + String url = "http://localhost:8080/mcplusa/myportal/agents/portal/quoteenroll/digs%20-%20quoting%20%20enrollment%20(individual)/!ut/p/a1/rZHLTsMwEEV_hS6yjDx5OWZpdRFImzYCAYk3lZM6D5TYSWoqPh8HFu2GQhHejEeae-aOLmIoQ0zyY1tz3SrJu7lneLfdBtTxI1iRhzsMFEfrpZ_6AFFoBnIzAN88Cj_pXxBDrJR60A3KeS2kvimV1KZaMKhJ886C8U1pIeSkOtNM3Pz5QewO3IJG9WIGDGW7RzkB7hZFIWxyyx3bL8LAJo6L7QoELitMPAH7r4WXLefmpvBkOoqfiTHth6vYTRxIAT1eufMy8D74Z2DqXg2Mf5Fz-zqOjJq05nzeNcr-FpchuVOyTGpjkOvGbmWlUHYmQtmZCGWfoqF_6omHq83G5gUBL-iOa0oXiw9FOxLu/dl5/d5/L0lJS2FZcHBpbW1LYVlwcGltbVlwcGchIS9vSHd3QUFBSXdpRUFJSkRBQ1VZaUVJVTVCZ09DbFFBQUlBQVNvU0FyUnFBQURBQWF0QXdMTzlRQUFFQUJ3WWVBR0tTQUFDa0k1Z21HU3dTaXJTQUFDZ0s5ZzBIUS80SmlHcGhxRWFoR29ScUVhbEdwaC9aNl9PTzVBMTRHMEs4Ukg2MEE2R0xDNFA0MDBHNy9hZ2VudCBjb250ZW50JTBwb3J0YWwlMHF1b3RlZW5yb2xsJTBkaWdzIC0gcXVvdGluZyAgZW5yb2xsbWVudCAoaW5kaXZpZHVhbCkvZjQ0YmEyOWUtODQwOC00YjFlLTg4MzktMTFlMjI4NDgxYTVhL2RpZ3MgLSBxdW90aW5nICBlbnJvbGxtZW50IChpbmRpdmlkdWFsKQ"; + String id = Utils.getValidId(url); + + assertEquals(780, id.length()); + assertEquals( + "http%3A%2F%2Flocalhost%3A8080%2Fmcplusa%2Fmyportal%2Fagents%2Fportal%2Fquoteenroll%2Fdigs%2520-%2520quoting%2520%2520enrollment%2520%28individual%29%2F%21ut%2Fp%2Fa1%2FrZHLTsMwEEV_hS6yjDx5OWZpdRFImzYCAYk3lZM6D5TYSWoqPh8HFu2GQhHejEeae-aOLmIoQ0zyY1tz3SrJu7lneLfdBtTxI1iRhzsMFEfrpZ_6AFFoBnIzAN88Cj_pXxBDrJR60A3KeS2kvimV1KZaMKhJ886C8U1pIeSkOtNM3Pz5QewO3IJG9WIGDGW7RzkB7hZFIWxyyx3bL8LAJo6L7QoELitMPAH7r4WXLefmpvBkOoqfiTHth6vYTRxIAT1eufMy8D74Z2DqXg2Mf5Fz-zqOjJq05nzeNcr-FpchuVOyTGpjkOvGbmWlUHYmQtmZCGWfoqF_6omHq83G5gUBL-iOa0oXiw9FOxLu%2Fdl5%2Fd5%2FL0lJS2FZcHBpbW1LYVlwcGltbVlwcGchIS9vSHd3QUFBSXdpRUFJSkRBQ1VZaUVJVTVCZ09DbFFBQUlBQVNvU0FyUnFBQURBQWF0QXdMTzlRQUFFQUJ3WWVBR0tTQUFDa0k1Z21HU3dTaXJTQUFDZ0s5ZzBIUS80SmlHcGhxRWFoR29ScUVhbEdwaC9aNl9PTzVBMTRHMEs4Ukg2MEE2R0xDNFA0MDBHNy9hZ2VudCBjb2", + id); + } + + @Test + public void testProcessExpression() { + String url = "http://localhost:8080/mcplusa/myportal/agents/portal/quoteenroll/digs%20-%20quoting%20%20enrollment%20(individual)/!ut/p/a1/rZHLTsMwEEV_hS"; + + // Single group + String pattern = "http://localhost:8080/(.*)/!ut.*"; + String replacementstring = "http://my-custom-domain.com/$(1)"; + String data = Utils.processExpression(pattern, replacementstring, url); + assertEquals("http://my-custom-domain.com/mcplusa/myportal/agents/portal/quoteenroll/digs%20-%20quoting%20%20enrollment%20(individual)", data); + + // Multiple groups + String patternGroups = "http://(.*):8080/(.*)/!ut.*"; + String replacementstringGroups = "http://$(1).com/$(1)/$(2)"; + String dataGroups = Utils.processExpression(patternGroups, replacementstringGroups, url); + assertEquals("http://localhost.com/localhost/mcplusa/myportal/agents/portal/quoteenroll/digs%20-%20quoting%20%20enrollment%20(individual)", dataGroups); + } +} diff --git a/connectors/appsearch/pom.xml b/connectors/appsearch/pom.xml new file mode 100644 index 0000000000..6a9a1d562d --- /dev/null +++ b/connectors/appsearch/pom.xml @@ -0,0 +1,478 @@ + + + + + + org.apache.manifoldcf + mcf-connectors + 2.14 + ./local-maven-repo/org/apache/manifoldcf/mcf-connectors/2.14/mcf-connectors-2.14.pom + + 4.0.0 + + + + local-maven-repo + file://${project.basedir}/local-maven-repo + + + sonatype + https://oss.sonatype.org/content/repositories/releases + + + + + UTF-8 + UTF-8 + + + mcf-appsearch-connector + ManifoldCF - Connectors - AppSearch + + + integration-test + ${basedir}/connector/src/main/java + ${basedir}/connector/src/test/java + + + ${basedir}/connector/src/main/native2ascii + + **/*.properties + + + + ${basedir}/connector/src/main/resources + + **/*.html + **/*.js + + + + + + ${basedir}/connector/src/test/resources + + + + + + + org.codehaus.mojo + native2ascii-maven-plugin + 1.0-beta-1 + + target/classes + + + + native2ascii-utf8 + + native2ascii + + + UTF8 + + **/*.properties + + + + + + + + maven-dependency-plugin + + + copy-war + generate-resources + + copy + + + target/dependency + + + ${project.groupId} + mcf-api-service + ${project.version} + war + false + mcf-api-service.war + + + ${project.groupId} + mcf-authority-service + ${project.version} + war + false + mcf-authority-service.war + + + ${project.groupId} + mcf-crawler-ui + ${project.version} + war + false + mcf-crawler-ui.war + + + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + **/*Postgresql*.java + **/*MySQL*.java + + 1 + false + target/test-output + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 2.18.1 + + ${skipITs} + + ../dependency/mcf-crawler-ui.war + ../dependency/mcf-authority-service.war + ../dependency/mcf-api-service.war + + + **/*Postgresql*.java + **/*MySQL*.java + + 1 + false + target/test-output + + + + integration-test + + integration-test + + + + verify + + verify + + + + + + + + + + + + ${project.groupId} + mcf-core + ${project.version} + + + ${project.groupId} + mcf-connector-common + ${project.version} + + + ${project.groupId} + mcf-agents + ${project.version} + + + ${project.groupId} + mcf-pull-agent + ${project.version} + + + ${project.groupId} + mcf-ui-core + ${project.version} + + + com.google.code.gson + gson + 2.8.6 + + + org.apache.httpcomponents + httpclient + ${httpcomponent.httpclient.version} + + + commons-logging + commons-logging + ${commons-logging.version} + + + commons-codec + commons-codec + ${commons-codec.version} + + + log4j + log4j + 1.2.17 + + + + + + junit + junit + ${junit.version} + test + + + ${project.groupId} + mcf-core + ${project.version} + test-jar + test + + + ${project.groupId} + mcf-agents + ${project.version} + test-jar + test + + + ${project.groupId} + mcf-pull-agent + ${project.version} + test-jar + test + + + org.postgresql + postgresql + ${postgresql.version} + test + + + org.hsqldb + hsqldb + ${hsqldb.version} + test + + + mysql + mysql-connector-java + ${mysql.version} + test + + + ${project.groupId} + mcf-api-service + ${project.version} + war + test + + + ${project.groupId} + mcf-authority-service + ${project.version} + war + test + + + ${project.groupId} + mcf-crawler-ui + ${project.version} + war + test + + + + org.eclipse.jetty + jetty-server + ${jetty.version} + test + + + org.eclipse.jetty + jetty-util + ${jetty.version} + test + + + org.eclipse.jetty + jetty-webapp + ${jetty.version} + test + + + org.eclipse.jetty + jetty-servlet + ${jetty.version} + test + + + org.eclipse.jetty + jetty-http + ${jetty.version} + test + + + org.eclipse.jetty + jetty-io + ${jetty.version} + test + + + org.eclipse.jetty + jetty-security + ${jetty.version} + test + + + org.eclipse.jetty + jetty-continuation + ${jetty.version} + test + + + org.eclipse.jetty + jetty-xml + ${jetty.version} + test + + + + org.mortbay.jetty + jsp-api-2.1-glassfish + ${glassfish.version} + test + + + org.mortbay.jetty + jsp-2.1-glassfish + ${glassfish.version} + test + + + + org.slf4j + slf4j-api + ${slf4j.version} + test + + + org.slf4j + slf4j-simple + ${slf4j.version} + test + + + + + org.elasticsearch + elasticsearch + 1.0.1 + test + + + org.apache.lucene + lucene-core + 4.6.1 + test + + + org.apache.lucene + lucene-analyzers-common + 4.6.1 + test + + + org.apache.lucene + lucene-codecs + 4.6.1 + test + + + org.apache.lucene + lucene-queries + 4.6.1 + test + + + org.apache.lucene + lucene-memory + 4.6.1 + test + + + org.apache.lucene + lucene-highlighter + 4.6.1 + test + + + org.apache.lucene + lucene-queryparser + 4.6.1 + test + + + org.apache.lucene + lucene-misc + 4.6.1 + test + + + org.apache.lucene + lucene-join + 4.6.1 + test + + + org.apache.lucene + lucene-grouping + 4.6.1 + test + + + + + org.elasticsearch + elasticsearch-mapper-attachments + 2.0.0.RC1 + test + + + log4j + log4j + + + + + + From a589b1d2fa0de88a00e96a80b92e20182c414b82 Mon Sep 17 00:00:00 2001 From: Michael Cizmar Date: Mon, 19 Jan 2026 22:25:18 -0600 Subject: [PATCH 2/3] changes from initial code review --- connectors/appsearch/build.xml | 4 ++-- .../output/appsearch/AppSearchConnector.java | 10 ++------ .../output/appsearch/AppSearchDelete.java | 17 +++++++++++-- .../output/appsearch/AppSearchIndex.java | 24 +++++++++++++------ .../agents/output/appsearch/Utils.java | 22 ++++++++--------- 5 files changed, 47 insertions(+), 30 deletions(-) diff --git a/connectors/appsearch/build.xml b/connectors/appsearch/build.xml index 10c7d03791..38f7ea5e9d 100644 --- a/connectors/appsearch/build.xml +++ b/connectors/appsearch/build.xml @@ -92,8 +92,8 @@ - - + + diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnector.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnector.java index 7bafb46876..ccef476ea5 100644 --- a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnector.java +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchConnector.java @@ -37,8 +37,6 @@ import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.socket.PlainConnectionSocketFactory; -import org.apache.http.auth.Credentials; -import org.apache.http.auth.AuthScope; import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity; import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity; @@ -106,14 +104,13 @@ public void connect(ConfigParams configParams) { protected HttpClient getSession() throws ManifoldCFException { if (client == null) { - int socketTimeout = 900000; - int connectionTimeout = 60000; + int socketTimeout = 900000; // TODO: Make this configurable + int connectionTimeout = 60000; // TODO: Make this configurable // Load configuration from parameters final AppSearchConfig config = new AppSearchConfig(params); final IKeystoreManager keystoreManager = config.getSSLKeystore(); - final Credentials credentials = null; // Set up ingest ssl if indicated SSLConnectionSocketFactory myFactory = SSLConnectionSocketFactory.getSocketFactory(); @@ -132,9 +129,6 @@ protected HttpClient getSession() connectionManager = poolingConnectionManager; CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); - if (credentials != null) { - credentialsProvider.setCredentials(AuthScope.ANY, credentials); - } RequestConfig.Builder requestBuilder = RequestConfig.custom() .setCircularRedirectsAllowed(true) diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchDelete.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchDelete.java index ffd3703cdb..ea4f5b4720 100644 --- a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchDelete.java +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchDelete.java @@ -79,8 +79,21 @@ private int getAmountDocumentChunks(String documentId) { HttpPost method = new HttpPost(url.toString()); Gson gson = new Gson(); - String searchQuery = "{\"query\":\"" + documentId + "\",\"result_fields\":{\"title\":{\"raw\": {}}},\"page\":{\"size\":100}}"; - method.setEntity(new StringEntity(searchQuery, Consts.UTF_8)); + JsonObject searchQuery = new JsonObject(); + searchQuery.addProperty("query", documentId); + + JsonObject resultFields = new JsonObject(); + JsonObject title = new JsonObject(); + JsonObject raw = new JsonObject(); + title.add("raw", raw); + resultFields.add("title", title); + searchQuery.add("result_fields", resultFields); + + JsonObject page = new JsonObject(); + page.addProperty("size", 100); + searchQuery.add("page", page); + + method.setEntity(new StringEntity(gson.toJson(searchQuery), Consts.UTF_8)); call(method); diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndex.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndex.java index 34b658b877..7a519be021 100644 --- a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndex.java +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/AppSearchIndex.java @@ -80,7 +80,7 @@ private String generateDocJson(String documentURI, RepositoryDocument document, if (dateFieldValues.length > 1) { doc.addProperty(fieldNameClean, gson.toJson(dateFieldValues)); } else if (dateFieldValues.length == 1) { - doc.addProperty(fieldNameClean, dateFieldValues[0].toString()); + doc.addProperty(fieldNameClean, formatAsString(dateFieldValues[0])); } } else { String[] fieldValues; @@ -183,9 +183,10 @@ private boolean pushDocument(String reqBody) { return false; } - setResult("JSONERROR", Result.ERROR, error); + // If result is not OK but no specific error, still fail + setResult("JSONERROR", Result.ERROR, "Unknown error: " + getResponse()); Logging.connectors.warn("AppSearch: Index failed: " + getResponse()); - return true; + return false; } catch (Exception ex) { setResult("JSONERROR", Result.ERROR, ex.getMessage()); @@ -236,12 +237,21 @@ public boolean execute(String documentURI, RepositoryDocument document, InputStr String[] contentChunk = Utils.chunkSplit(documentContent, bytesChunks); + if (contentChunk == null) { + Logging.connectors.warn("AppSearch: Failed to split document into chunks for URI: " + documentURI); + setResult("CHUNKSPLITERROR", Result.ERROR, "Failed to split document content into chunks"); + return false; + } + Logging.connectors.debug("Number of chunks generated: " + contentChunk.length); for (int i = 0; i < contentChunk.length; i++) { String documentChunk = generateDocJson(documentURI, document, contentChunk[i], config.getContentAttributeName(), - groupKey, i); - Logging.connectors.debug("Chunk " + i + " size: " + documentChunk.getBytes().length); - pushDocument(documentChunk); + groupKey, i + 1); + Logging.connectors.debug("Chunk " + (i + 1) + " size: " + documentChunk.getBytes().length); + if (!pushDocument(documentChunk)) { + Logging.connectors.warn("AppSearch: Failed to push chunk " + (i + 1) + " for URI: " + documentURI); + return false; + } } return true; @@ -272,7 +282,7 @@ protected String getDocumentContent(InputStream inputStream) { } } catch (IOException ex) { Logger.getLogger(AppSearchIndex.class.getName()).log(Level.SEVERE, null, ex); - break; + break; // TODO: Revisit this with a better solution [This plus while = true] } } } catch (IOException ex) { diff --git a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Utils.java b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Utils.java index 1e9b32d927..433ffde924 100644 --- a/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Utils.java +++ b/connectors/appsearch/connector/src/main/java/com/mcplusa/manifoldcf/agents/output/appsearch/Utils.java @@ -28,21 +28,21 @@ public static String cleanFieldName(String fieldName) { public static String[] chunkSplit(String original, int length) { List chunks = new ArrayList<>(); - ByteArrayInputStream bis = new ByteArrayInputStream(original.getBytes()); - int n = 0; - byte[] buffer = new byte[length]; - - try { + try (ByteArrayInputStream bis = new ByteArrayInputStream(original.getBytes())) { + byte[] buffer = new byte[length]; + int n; + while ((n = bis.read(buffer)) > 0) { - String result = ""; - for (byte b : buffer) { - result += (char) b; + StringBuilder result = new StringBuilder(n); + // Only process the first n bytes that were actually read + for (int i = 0; i < n; i++) { + result.append((char) buffer[i]); } - Arrays.fill(buffer, (byte) 0); - chunks.add(result.trim()); + chunks.add(result.toString().trim()); } } catch (IOException e) { - return null; + // return empty array if error - TODO: Revisit this with a better solution + return new String[0]; } return chunks.toArray(new String[0]); From 72255a83ba189eea37d6ef9e24685b5691b8e73a Mon Sep 17 00:00:00 2001 From: Michael Cizmar Date: Mon, 19 Jan 2026 22:38:44 -0600 Subject: [PATCH 3/3] Some changes to make the pom work when embedded in the main project. --- connectors/appsearch/pom.xml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/connectors/appsearch/pom.xml b/connectors/appsearch/pom.xml index 6a9a1d562d..c4d8f9e27d 100644 --- a/connectors/appsearch/pom.xml +++ b/connectors/appsearch/pom.xml @@ -20,16 +20,11 @@ org.apache.manifoldcf mcf-connectors - 2.14 - ./local-maven-repo/org/apache/manifoldcf/mcf-connectors/2.14/mcf-connectors-2.14.pom + 2.30-SNAPSHOT 4.0.0 - - local-maven-repo - file://${project.basedir}/local-maven-repo - sonatype https://oss.sonatype.org/content/repositories/releases