diff --git a/oak-blob-cloud-azure/pom.xml b/oak-blob-cloud-azure/pom.xml index fe757a448a6..fcea9579087 100644 --- a/oak-blob-cloud-azure/pom.xml +++ b/oak-blob-cloud-azure/pom.xml @@ -40,10 +40,9 @@ com.fasterxml.jackson.annotation;resolution:=optional, com.fasterxml.jackson.databind*;resolution:=optional, - com.fasterxml.jackson.dataformat.xml;resolution:=optional, + com.fasterxml.jackson.dataformat.xml*;resolution:=optional, com.fasterxml.jackson.datatype*;resolution:=optional, - com.azure.identity.broker.implementation;resolution:=optional, - com.azure.xml;resolution:=optional, + com.azure.identity.broker*;resolution:=optional, com.microsoft.aad.msal4jextensions*;resolution:=optional, com.sun.net.httpserver;resolution:=optional, sun.misc;resolution:=optional, @@ -64,10 +63,14 @@ sun.io azure-storage, + azure-storage-blob, + azure-storage-common, + azure-storage-internal-avro, azure-keyvault-core, azure-core, azure-identity, azure-json, + azure-xml, guava, jsr305, reactive-streams, @@ -167,19 +170,46 @@ com.microsoft.azure azure-keyvault-core + + + + com.azure + azure-storage-blob + 12.34.0 + + + com.azure + azure-storage-common + 12.33.0 + + + com.azure + azure-storage-internal-avro + 12.19.0 + + + com.azure azure-identity + 1.18.3 com.azure azure-core + 1.58.0 com.azure azure-json + 1.5.1 + + + com.azure + azure-xml + 1.2.1 org.reactivestreams @@ -188,6 +218,7 @@ com.microsoft.azure msal4j + 1.23.1 io.projectreactor @@ -200,6 +231,7 @@ com.azure azure-core-http-netty + 1.16.4 io.netty @@ -328,6 +360,12 @@ mockito-core test + + com.tngtech.archunit + archunit-junit4 + 1.3.0 + test + org.testcontainers testcontainers diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AbstractAzureDataStoreService.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AbstractAzureDataStoreService.java deleted file mode 100644 index d15f7505c63..00000000000 --- a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AbstractAzureDataStoreService.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage; - -import java.util.Dictionary; -import java.util.Hashtable; -import java.util.Map; -import java.util.Properties; - -import org.apache.jackrabbit.oak.spi.blob.data.DataStore; -import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; -import org.apache.jackrabbit.oak.plugins.blob.AbstractSharedCachingDataStore; -import org.apache.jackrabbit.oak.plugins.blob.datastore.AbstractDataStoreService; -import org.osgi.framework.Constants; -import org.osgi.framework.ServiceRegistration; -import org.osgi.service.component.ComponentContext; - -public abstract class AbstractAzureDataStoreService extends AbstractDataStoreService { - private static final String DESCRIPTION = "oak.datastore.description"; - - private ServiceRegistration delegateReg; - - @Override - protected DataStore createDataStore(ComponentContext context, Map config) { - Properties properties = new Properties(); - properties.putAll(config); - - AzureDataStore dataStore = new AzureDataStore(); - dataStore.setStatisticsProvider(getStatisticsProvider()); - dataStore.setProperties(properties); - - Dictionary props = new Hashtable(); - props.put(Constants.SERVICE_PID, dataStore.getClass().getName()); - props.put(DESCRIPTION, getDescription()); - - delegateReg = context.getBundleContext().registerService(new String[] { - AbstractSharedCachingDataStore.class.getName(), - AbstractSharedCachingDataStore.class.getName() - }, dataStore , props); - - return dataStore; - } - - protected void deactivate() throws DataStoreException { - if (delegateReg != null) { - delegateReg.unregister(); - } - super.deactivate(); - } - - @Override - protected String[] getDescription() { - return new String[] {"type=AzureBlob"}; - } -} diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreService.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreService.java deleted file mode 100644 index 3ad2e5e46e8..00000000000 --- a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreService.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage; - -import org.apache.jackrabbit.oak.stats.StatisticsProvider; -import org.osgi.service.component.annotations.Component; -import org.osgi.service.component.annotations.ConfigurationPolicy; -import org.osgi.service.component.annotations.Reference; - -@Component(configurationPolicy = ConfigurationPolicy.REQUIRE, name = AzureDataStoreService.NAME) -public class AzureDataStoreService extends AbstractAzureDataStoreService { - - @Reference - private StatisticsProvider statisticsProvider; - - public static final String NAME = "org.apache.jackrabbit.oak.plugins.blob.datastore.AzureDataStore"; - - protected StatisticsProvider getStatisticsProvider(){ - return statisticsProvider; - } - - protected void setStatisticsProvider(StatisticsProvider statisticsProvider) { - this.statisticsProvider = statisticsProvider; - } -} diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapper.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapper.java new file mode 100644 index 00000000000..5ce215a58c1 --- /dev/null +++ b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapper.java @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage; + +import org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12.AzureDataStoreV12; +import org.apache.jackrabbit.oak.commons.PropertiesUtil; +import org.apache.jackrabbit.oak.plugins.blob.AbstractSharedCachingDataStore; +import org.apache.jackrabbit.oak.plugins.blob.datastore.AbstractDataStoreService; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.*; +import org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier; +import org.apache.jackrabbit.oak.spi.blob.data.DataRecord; +import org.apache.jackrabbit.oak.spi.blob.data.DataStore; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.apache.jackrabbit.oak.stats.StatisticsProvider; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.osgi.framework.Constants; +import org.osgi.framework.ServiceRegistration; +import org.osgi.service.component.ComponentContext; +import org.osgi.service.component.annotations.Component; +import org.osgi.service.component.annotations.ConfigurationPolicy; +import org.osgi.service.component.annotations.Deactivate; +import org.osgi.service.component.annotations.Reference; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.InputStream; +import java.net.URI; +import java.util.*; + + +/** + * OSGi component that selects between Azure SDK v8 ({@link AzureDataStore}) and v12 + * ({@link AzureDataStoreV12}) at activation time based on configuration, then registers the + * chosen implementation under the legacy v8 PID so consumers bound to that PID keep working. + * + *

Replaces the old dual-service architecture (AzureDataStoreService + AzureDataStoreServiceV12 + * + AzureSDKConditionGate) that caused deadlocks during OSGi service swap on FT toggle. + */ +@Component( + name = AzureDataStoreWrapper.NAME, + configurationPid = AzureDataStoreWrapper.NAME, + configurationPolicy = ConfigurationPolicy.REQUIRE +) +public class AzureDataStoreWrapper extends AbstractDataStoreService { + + private static final Logger log = LoggerFactory.getLogger(AzureDataStoreWrapper.class); + + public static final String NAME = "org.apache.jackrabbit.oak.plugins.blob.datastore.AzureDataStore"; + + // Same name for now; kept as separate constants so they can diverge if the sources need different keys later. + static final String ENV_VAR_V12_ENABLED = "blobstoreAzureV12Enabled"; + static final String OSGI_CONFIG_V12_ENABLED = "blobstoreAzureV12Enabled"; + static final String JVM_PROPERTY_V12_ENABLED = "blob.azure.v12.enabled"; + // Package-private so DelegatingDataStore (inner class) and same-package tests can reach it without reflection. + AbstractSharedCachingDataStore activeImpl; + @Reference + private StatisticsProvider statisticsProvider; + private ServiceRegistration delegateReg; + + static ServiceRegistration registerService(ComponentContext context, AbstractSharedCachingDataStore service) { + Dictionary delegateProps = new Hashtable<>(); + // Use the v8 PID so consumers bound to "org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.AzureDataStore" + // still receive this service without needing a config change. + delegateProps.put(Constants.SERVICE_PID, AzureDataStore.class.getName()); + delegateProps.put("oak.datastore.description", new String[]{"type=AzureBlob"}); + return context.getBundleContext().registerService( + AbstractSharedCachingDataStore.class.getName(), service, delegateProps); + } + + /** + * Priority: JVM property (test/local override) > env var (fleet-wide container config) > OSGi config (normal production path). + * Higher-authority sources win so operators can override without touching OSGi config. + */ + static boolean getUseV12Value(Map config) { + if (System.getProperty(JVM_PROPERTY_V12_ENABLED) != null) { + boolean useV12 = Boolean.getBoolean(JVM_PROPERTY_V12_ENABLED); + log.info("Azure SDK v12 flag: JVM property {}={}", JVM_PROPERTY_V12_ENABLED, useV12); + return useV12; + } + String envVar = System.getenv(ENV_VAR_V12_ENABLED); + if (envVar != null) { + boolean useV12 = Boolean.parseBoolean(envVar); + log.info("Azure SDK v12 flag: environment variable {}={}", ENV_VAR_V12_ENABLED, useV12); + return useV12; + } + if (config.containsKey(OSGI_CONFIG_V12_ENABLED)) { + boolean useV12 = PropertiesUtil.toBoolean(config.get(OSGI_CONFIG_V12_ENABLED), false); + log.info("Azure SDK v12 flag: OSGi config {}={}", OSGI_CONFIG_V12_ENABLED, useV12); + return useV12; + } + log.info("Azure SDK v12 flag: not configured, using default (false)"); + return false; + } + + static AbstractSharedCachingDataStore createV8Store(Properties props) { + AzureDataStore v8 = new AzureDataStore(); + v8.setProperties(props); + return v8; + } + + static AbstractSharedCachingDataStore createV12Store(Properties props) { + AzureDataStoreV12 v12 = new AzureDataStoreV12(); + v12.setProperties(props); + return v12; + } + + private static Properties toProperties(Map config) { + Properties p = new Properties(); + p.putAll(config); + return p; + } + + // -- Helpers --------------------------------------------------------- + + @Override + protected DataStore createDataStore(ComponentContext context, Map config) { + boolean useV12 = getUseV12Value(config); + if (useV12) { + log.info("Starting blob store using Azure SDK v12"); + activeImpl = createV12Store(toProperties(config)); + } else { + log.info("Starting blob store using Azure SDK v8"); + activeImpl = createV8Store(toProperties(config)); + } + activeImpl.setStatisticsProvider(getStatisticsProvider()); + // Registers activeImpl separately as AbstractSharedCachingDataStore so consumers + // bound to that type (e.g. oak-repository-service) get the concrete store directly, + // not just the DataStore view the base class exposes. + delegateReg = registerService(context, activeImpl); + + return new DelegatingDataStore(); + } + + @Override + @Deactivate + protected void deactivate() throws DataStoreException { + if (delegateReg != null) { + // Must unregister before super.deactivate() closes the store; otherwise a + // consumer that unbinds late could receive an already-closed DataStore. + delegateReg.unregister(); + delegateReg = null; + } + super.deactivate(); + } + + @Override + protected @NotNull StatisticsProvider getStatisticsProvider() { + return statisticsProvider; + } + + @Override + protected void setStatisticsProvider(StatisticsProvider statisticsProvider) { + this.statisticsProvider = statisticsProvider; + } + + @Override + protected String[] getDescription() { + return new String[]{"type=AzureBlob"}; + } + + // -- Inner DelegatingDataStore (returned from createDataStore) ------- + + /** + * Thin DataStore proxy handed to the base class (AbstractDataStoreService). + * + *

createDataStore must return a DataStore, but we also need to register activeImpl + * separately as AbstractSharedCachingDataStore for consumers that bind to that richer type. + * Returning activeImpl directly would hand ownership to the base class and prevent the + * separate registration. This delegate keeps the two registrations independent. + */ + class DelegatingDataStore implements DataStore, ConfigurableDataRecordAccessProvider { + + @Override + public void init(String homeDir) throws DataStoreException { + activeImpl.init(homeDir); + } + + @Override + public DataRecord addRecord(InputStream stream) throws DataStoreException { + return activeImpl.addRecord(stream); + } + + @Override + public DataRecord getRecord(DataIdentifier identifier) throws DataStoreException { + return activeImpl.getRecord(identifier); + } + + @Override + @Nullable + public DataRecord getRecordIfStored(DataIdentifier identifier) throws DataStoreException { + return activeImpl.getRecordIfStored(identifier); + } + + @Override + @Nullable + public DataRecord getRecordFromReference(String reference) throws DataStoreException { + return activeImpl.getRecordFromReference(reference); + } + + @Override + public Iterator getAllIdentifiers() throws DataStoreException { + return activeImpl.getAllIdentifiers(); + } + + @Override + public void updateModifiedDateOnAccess(long before) { + activeImpl.updateModifiedDateOnAccess(before); + } + + @Override + public int deleteAllOlderThan(long min) throws DataStoreException { + return activeImpl.deleteAllOlderThan(min); + } + + @Override + public void clearInUse() { + activeImpl.clearInUse(); + } + + @Override + public int getMinRecordLength() { + return activeImpl.getMinRecordLength(); + } + + @Override + public void close() throws DataStoreException { + activeImpl.close(); + } + + // Safe: both AzureDataStore (v8) and AzureDataStoreV12 implement ConfigurableDataRecordAccessProvider. + private ConfigurableDataRecordAccessProvider provider() { + return (ConfigurableDataRecordAccessProvider) activeImpl; + } + + @Override + public void setDirectUploadURIExpirySeconds(int seconds) { + provider().setDirectUploadURIExpirySeconds(seconds); + } + + @Override + public void setDirectDownloadURIExpirySeconds(int seconds) { + provider().setDirectDownloadURIExpirySeconds(seconds); + } + + @Override + public void setDirectDownloadURICacheSize(int maxSize) { + provider().setDirectDownloadURICacheSize(maxSize); + } + + @Override + public void setBinaryTransferAccelerationEnabled(boolean enabled) { + provider().setBinaryTransferAccelerationEnabled(enabled); + } + + @Override + @Nullable + public DataRecordUpload initiateDataRecordUpload(long maxUploadSizeInBytes, int maxNumberOfURIs) + throws IllegalArgumentException, DataRecordUploadException { + return provider().initiateDataRecordUpload(maxUploadSizeInBytes, maxNumberOfURIs); + } + + @Override + @Nullable + public DataRecordUpload initiateDataRecordUpload(long maxUploadSizeInBytes, int maxNumberOfURIs, + @NotNull DataRecordUploadOptions options) + throws IllegalArgumentException, DataRecordUploadException { + return provider().initiateDataRecordUpload(maxUploadSizeInBytes, maxNumberOfURIs, options); + } + + @Override + @NotNull + public DataRecord completeDataRecordUpload(@NotNull String uploadToken) + throws IllegalArgumentException, DataRecordUploadException, DataStoreException { + return provider().completeDataRecordUpload(uploadToken); + } + + @Override + @Nullable + public URI getDownloadURI(@NotNull DataIdentifier identifier, + @NotNull DataRecordDownloadOptions downloadOptions) { + return provider().getDownloadURI(identifier, downloadOptions); + } + } +} diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobContainerProviderV12.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobContainerProviderV12.java new file mode 100644 index 00000000000..252bb4d6678 --- /dev/null +++ b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobContainerProviderV12.java @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import com.azure.core.http.HttpClient; +import com.azure.core.http.netty.NettyAsyncHttpClientBuilder; +import com.azure.identity.ClientSecretCredential; +import com.azure.identity.ClientSecretCredentialBuilder; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobContainerClientBuilder; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.blob.models.UserDelegationKey; +import com.azure.storage.blob.sas.BlobSasPermission; +import com.azure.storage.blob.sas.BlobServiceSasSignatureValues; +import com.azure.storage.blob.specialized.BlockBlobClient; +import com.azure.storage.common.policy.RequestRetryOptions; +import org.apache.commons.lang3.StringUtils; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URISyntaxException; +import java.security.InvalidKeyException; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.util.Properties; + +class AzureBlobContainerProviderV12 { + private static final Logger log = LoggerFactory.getLogger(AzureBlobContainerProviderV12.class); + private static final String DEFAULT_ENDPOINT_SUFFIX = "core.windows.net"; + private final String azureConnectionString; + private final String accountName; + private final String containerName; + private final String blobEndpoint; + private final String sasToken; + private final String accountKey; + private final String tenantId; + private final String clientId; + private final String clientSecret; + // Cached credential — token cache is per-instance, recreating on every SAS call would + // force a new OAuth round-trip each time. + private final ClientSecretCredential clientSecretCredential; + // Cached service client for user-delegation SAS generation — avoids allocating a new Netty + // event loop and connection pool on every SAS call. + private volatile BlobServiceClient cachedBlobServiceClient; + + private AzureBlobContainerProviderV12(Builder builder) { + this.azureConnectionString = builder.azureConnectionString; + this.accountName = builder.accountName; + this.containerName = builder.containerName; + this.blobEndpoint = builder.blobEndpoint; + this.sasToken = builder.sasToken; + this.accountKey = builder.accountKey; + this.tenantId = builder.tenantId; + this.clientId = builder.clientId; + this.clientSecret = builder.clientSecret; + this.clientSecretCredential = StringUtils.isNoneBlank(builder.clientId, builder.clientSecret, builder.tenantId) + ? new ClientSecretCredentialBuilder() + .clientId(builder.clientId) + .clientSecret(builder.clientSecret) + .tenantId(builder.tenantId) + .build() + : null; + } + + /** + * Constructs the Azure Storage endpoint URL. + * If a custom blobEndpoint is configured, it will be used. + * Otherwise, constructs the default endpoint using the account name. + * + * @param accountName the storage account name + * @param customBlobEndpoint optional custom blob endpoint (can be null or empty) + * @return the endpoint URL to use + */ + @NotNull + private static String getEndpointUrl(String accountName, String customBlobEndpoint) { + if (StringUtils.isNotBlank(customBlobEndpoint)) { + // Use custom endpoint (e.g., for private endpoints) + // Ensure it starts with https:// if not already present + if (!customBlobEndpoint.startsWith("http://") && !customBlobEndpoint.startsWith("https://")) { + return "https://" + customBlobEndpoint; + } + return customBlobEndpoint; + } + // Default public endpoint + return String.format("https://%s.blob.%s", accountName, DEFAULT_ENDPOINT_SUFFIX); + } + + public String getContainerName() { + return containerName; + } + + public String getAzureConnectionString() { + return azureConnectionString; + } + + @NotNull + public BlobContainerClient getBlobContainer() throws DataStoreException { + return this.getBlobContainer(null, new Properties()); + } + + @NotNull + public BlobContainerClient getBlobContainer(@Nullable RequestRetryOptions retryOptions, Properties properties) throws DataStoreException { + // connection string will be given preference over service principals / sas / account key + if (StringUtils.isNotBlank(azureConnectionString)) { + log.debug("connecting to azure blob storage via azureConnectionString"); + return UtilsV12.getBlobContainerFromConnectionString(getAzureConnectionString(), containerName, retryOptions, properties); + } else if (authenticateViaServicePrincipal()) { + log.debug("connecting to azure blob storage via service principal credentials"); + return getBlobContainerFromServicePrincipals(accountName, retryOptions, properties); + } else if (StringUtils.isNotBlank(sasToken)) { + log.debug("connecting to azure blob storage via sas token"); + final String connectionStringWithSasToken = UtilsV12.getConnectionStringForSas(sasToken, blobEndpoint, accountName); + return UtilsV12.getBlobContainer(connectionStringWithSasToken, containerName, retryOptions, properties); + } + log.debug("connecting to azure blob storage via access key"); + final String connectionStringWithAccountKey = UtilsV12.getConnectionString(accountName, accountKey, blobEndpoint); + return UtilsV12.getBlobContainer(connectionStringWithAccountKey, containerName, retryOptions, properties); + } + + @NotNull + public String generateSharedAccessSignature(RequestRetryOptions retryOptions, + String key, + BlobSasPermission blobSasPermissions, + int expirySeconds, + Properties properties) throws DataStoreException, URISyntaxException, InvalidKeyException { + return generateSharedAccessSignature(retryOptions, key, blobSasPermissions, expirySeconds, properties, null); + } + + /** + * Generates a shared access signature (SAS) for the specified blob with optional headers. + * This is the Azure SDK 12 equivalent of the V8 method that accepted {@code SharedAccessBlobHeaders}. + * + * @param retryOptions retry options for the request + * @param key the blob key + * @param blobSasPermissions the permissions for the SAS + * @param expirySeconds the number of seconds until the SAS expires + * @param properties additional properties + * @param optionalHeaders optional headers to include in the SAS (can be null) + * @return the SAS query string + * @throws DataStoreException if an error occurs + * @throws URISyntaxException if the URI is invalid + * @throws InvalidKeyException if the key is invalid + */ + @NotNull + public String generateSharedAccessSignature(RequestRetryOptions retryOptions, + String key, + BlobSasPermission blobSasPermissions, + int expirySeconds, + Properties properties, + @Nullable BlobSasHeadersV12 optionalHeaders) throws DataStoreException, URISyntaxException, InvalidKeyException { + + OffsetDateTime expiry = OffsetDateTime.now().plusSeconds(expirySeconds); + BlobServiceSasSignatureValues serviceSasSignatureValues = new BlobServiceSasSignatureValues(expiry, blobSasPermissions); + + // Apply headers if provided + if (optionalHeaders != null) { + optionalHeaders.applyTo(serviceSasSignatureValues); + } + + BlockBlobClient blob = getBlobContainer(retryOptions, properties).getBlobClient(key).getBlockBlobClient(); + + if (authenticateViaServicePrincipal()) { + return generateUserDelegationKeySignedSas(blob, serviceSasSignatureValues, expiry, properties); + } + return generateSas(blob, serviceSasSignatureValues); + } + + @NotNull + public String generateUserDelegationKeySignedSas(BlockBlobClient blobClient, + BlobServiceSasSignatureValues serviceSasSignatureValues, + OffsetDateTime expiryTime, + Properties properties) { + + BlobServiceClient blobServiceClient = getOrCreateBlobServiceClient(properties); + OffsetDateTime startTime = OffsetDateTime.now(ZoneOffset.UTC); + UserDelegationKey userDelegationKey = blobServiceClient.getUserDelegationKey(startTime, expiryTime); + return blobClient.generateUserDelegationSas(serviceSasSignatureValues, userDelegationKey); + } + + private boolean authenticateViaServicePrincipal() { + return StringUtils.isBlank(azureConnectionString) && + StringUtils.isNoneBlank(accountName, tenantId, clientId, clientSecret); + } + + private BlobServiceClient getOrCreateBlobServiceClient(Properties properties) { + if (cachedBlobServiceClient == null) { + synchronized (this) { + if (cachedBlobServiceClient == null) { + cachedBlobServiceClient = new BlobServiceClientBuilder() + .endpoint(getEndpointUrl(accountName, blobEndpoint)) + .credential(getClientSecretCredential()) + .addPolicy(new AzureHttpRequestLoggingPolicyV12()) + .httpClient(new NettyAsyncHttpClientBuilder() + .proxy(UtilsV12.computeProxyOptions(properties)) + .build()) + .buildClient(); + } + } + } + return cachedBlobServiceClient; + } + + private ClientSecretCredential getClientSecretCredential() { + return clientSecretCredential; + } + + @NotNull + private BlobContainerClient getBlobContainerFromServicePrincipals(String accountName, RequestRetryOptions retryOptions, Properties properties) { + ClientSecretCredential clientSecretCredential = getClientSecretCredential(); + AzureHttpRequestLoggingPolicyV12 loggingPolicy = new AzureHttpRequestLoggingPolicyV12(); + + String endpoint = getEndpointUrl(accountName, blobEndpoint); + HttpClient httpClient = new NettyAsyncHttpClientBuilder() + .proxy(UtilsV12.computeProxyOptions(properties)) + .build(); + BlobContainerClientBuilder builder = new BlobContainerClientBuilder() + .endpoint(endpoint) + .containerName(containerName) + .credential(clientSecretCredential) + .addPolicy(loggingPolicy) + .httpClient(httpClient); + if (retryOptions != null) { + builder.retryOptions(retryOptions); + } + return builder.buildClient(); + } + + @NotNull + private String generateSas(BlockBlobClient blob, + BlobServiceSasSignatureValues blobServiceSasSignatureValues) { + return blob.generateSas(blobServiceSasSignatureValues, null); + } + + public static class Builder { + private final String containerName; + private String azureConnectionString; + private String accountName; + private String blobEndpoint; + private String sasToken; + private String accountKey; + private String tenantId; + private String clientId; + private String clientSecret; + + private Builder(String containerName) { + this.containerName = containerName; + } + + public static Builder builder(String containerName) { + return new Builder(containerName); + } + + public Builder withAzureConnectionString(String azureConnectionString) { + this.azureConnectionString = azureConnectionString; + return this; + } + + public Builder withAccountName(String accountName) { + this.accountName = accountName; + return this; + } + + public Builder withBlobEndpoint(String blobEndpoint) { + this.blobEndpoint = blobEndpoint; + return this; + } + + public Builder withSasToken(String sasToken) { + this.sasToken = sasToken; + return this; + } + + public Builder withAccountKey(String accountKey) { + this.accountKey = accountKey; + return this; + } + + public Builder withTenantId(String tenantId) { + this.tenantId = tenantId; + return this; + } + + public Builder withClientId(String clientId) { + this.clientId = clientId; + return this; + } + + public Builder withClientSecret(String clientSecret) { + this.clientSecret = clientSecret; + return this; + } + + public Builder initializeWithProperties(Properties properties) { + withAzureConnectionString(properties.getProperty(AzureConstantsV12.AZURE_CONNECTION_STRING, "")); + withAccountName(properties.getProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, "")); + withBlobEndpoint(properties.getProperty(AzureConstantsV12.AZURE_BLOB_ENDPOINT, "")); + withSasToken(properties.getProperty(AzureConstantsV12.AZURE_SAS, "")); + withAccountKey(properties.getProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_KEY, "")); + withTenantId(properties.getProperty(AzureConstantsV12.AZURE_TENANT_ID, "")); + withClientId(properties.getProperty(AzureConstantsV12.AZURE_CLIENT_ID, "")); + withClientSecret(properties.getProperty(AzureConstantsV12.AZURE_CLIENT_SECRET, "")); + return this; + } + + public AzureBlobContainerProviderV12 build() { + return new AzureBlobContainerProviderV12(this); + } + } +} diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12.java new file mode 100644 index 00000000000..265fa1c3122 --- /dev/null +++ b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12.java @@ -0,0 +1,1172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import com.azure.core.http.rest.Response; +import com.azure.core.util.Context; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.models.*; +import com.azure.storage.blob.options.BlobUploadFromFileOptions; +import com.azure.storage.blob.options.BlockBlobCommitBlockListOptions; +import com.azure.storage.blob.sas.BlobSasPermission; +import com.azure.storage.blob.specialized.BlobOutputStream; +import com.azure.storage.blob.specialized.BlockBlobClient; +import com.azure.storage.common.policy.RequestRetryOptions; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.jackrabbit.oak.cache.api.Cache; +import org.apache.jackrabbit.oak.cache.api.CacheBuilder; +import org.apache.jackrabbit.oak.commons.PropertiesUtil; +import org.apache.jackrabbit.oak.commons.conditions.Validate; +import org.apache.jackrabbit.oak.commons.time.Stopwatch; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.*; +import org.apache.jackrabbit.oak.spi.blob.AbstractDataRecord; +import org.apache.jackrabbit.oak.spi.blob.AbstractSharedBackend; +import org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier; +import org.apache.jackrabbit.oak.spi.blob.data.DataRecord; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.apache.jackrabbit.util.Base64; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.security.InvalidKeyException; +import java.time.Duration; +import java.time.Instant; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +import static org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12.AzureConstantsV12.*; +import static org.apache.jackrabbit.oak.commons.StringUtils.emptyToNull; + + +/** + * Azure Blob Storage backend using the Azure SDK v12 (com.azure). Implements direct-upload (block-blob + * staging + commit) and presigned GET URI generation. Counterpart to AzureBlobStoreBackend (v8, legacy SDK). + * Selected at runtime by AzureDataStoreWrapper based on the blobstoreAzureV12Enabled flag. + */ +class AzureBlobStoreBackendV12 extends AbstractSharedBackend { + + private static final Logger LOG = LoggerFactory.getLogger(AzureBlobStoreBackendV12.class); + private static final Logger LOG_STREAMS_DOWNLOAD = LoggerFactory.getLogger("oak.datastore.download.streams"); + private static final Logger LOG_STREAMS_UPLOAD = LoggerFactory.getLogger("oak.datastore.upload.streams"); + + private final AtomicReference azureContainerReference = new AtomicReference<>(); + + private Properties properties; + private AzureBlobContainerProviderV12 azureBlobContainerProvider; + private int concurrentRequestCount = AZURE_BLOB_DEFAULT_CONCURRENT_REQUEST_COUNT; + private RequestRetryOptions retryOptions; + private Integer requestTimeout; + private int httpDownloadURIExpirySeconds = 0; // disabled by default + private int httpUploadURIExpirySeconds = 0; // disabled by default + private String uploadDomainOverride = null; + private String downloadDomainOverride = null; + private boolean presignedDownloadURIVerifyExists = true; + private Cache httpDownloadURICache; + // HMAC-SHA1 key used to sign and verify upload tokens. Written once on cold start, then cached in memory. + private byte[] secret; + + /** + * Get key from data identifier. Object is stored with key in ADS. + */ + private static String getKeyName(DataIdentifier identifier) { + String key = identifier.toString(); + return key.substring(0, 4) + UtilsV12.DASH + key.substring(4); + } + + /** + * Get data identifier from key. + */ + private static String getIdentifierName(String key) { + if (key.startsWith(AZURE_BLOB_META_KEY_PREFIX)) { + return null; + } + if (!key.contains(UtilsV12.DASH)) { + return null; + } + return key.substring(0, 4) + key.substring(5); + } + + private static String addMetaKeyPrefix(final String key) { + return AZURE_BLOB_META_KEY_PREFIX + key; + } + + private static String stripMetaKeyPrefix(String name) { + if (name.startsWith(AZURE_BLOB_META_KEY_PREFIX)) { + return name.substring(AZURE_BLOB_META_KEY_PREFIX.length()); + } + return name; + } + + private static void updateLastModifiedMetadata(BlockBlobClient blockBlobClient) { + blockBlobClient.setMetadata(Map.of(AZURE_BLOB_LAST_MODIFIED_KEY, String.valueOf(System.currentTimeMillis()))); + } + + private static long getLastModified(BlockBlobClient blockBlobClient) { + return getLastModified(blockBlobClient.getProperties()); + } + + private static long getLastModified(BlobProperties props) { + Map metadata = props.getMetadata(); + if (metadata == null || !metadata.containsKey(AZURE_BLOB_LAST_MODIFIED_KEY)) { + return props.getLastModified().toInstant().toEpochMilli(); + } + return Long.parseLong(metadata.get(AZURE_BLOB_LAST_MODIFIED_KEY)); + } + + // Use BlobItem.getProperties() from the list response — no extra getProperties() HTTP call. + // The custom lastModified metadata key is not available on BlobItem, so we fall back to the + // Azure server LastModified (same fallback as getLastModified() for blobs without the key). + private static long getLastModifiedFromBlobItem(BlobItem blobItem) { + return blobItem.getProperties().getLastModified().toInstant().toEpochMilli(); + } + + public void setProperties(final Properties properties) { + this.properties = properties; + } + + // Lazy: retryOptions and azureBlobContainerProvider aren't set until initContainerConnection() runs. + protected BlobContainerClient getAzureContainer() throws DataStoreException { + if (azureContainerReference.get() == null) { + azureContainerReference.compareAndSet(null, azureBlobContainerProvider.getBlobContainer(retryOptions, properties)); + } + return azureContainerReference.get(); + } + + // Swaps Thread Class Context Loader to this bundle's classloader so Azure SDK's ServiceLoader-based SPI discovery works in OSGi. + // RuntimeExceptions (including BlobStorageException) propagate as-is; other checked exceptions are wrapped. + private T withBundleContextClassLoader(AzureSDKCall call) throws DataStoreException { + ClassLoader saved = Thread.currentThread().getContextClassLoader(); + try { + Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); + return call.execute(); + } catch (DataStoreException | RuntimeException e) { + throw e; + } catch (Exception e) { + throw new DataStoreException(e); + } finally { + Thread.currentThread().setContextClassLoader(saved); + } + } + + private void withBundleContextClassLoaderVoid(AzureSDKCallVoid call) throws DataStoreException { + withBundleContextClassLoader(() -> { + call.execute(); + return null; + }); + } + + // Not idempotent — calling twice reinitializes the container connection and re-reads the reference key. + // OSGi activation calls this exactly once; tests that need a fresh state must construct a new instance. + @Override + public void init() throws DataStoreException { + Stopwatch stopwatch = Stopwatch.createStarted(); + withBundleContextClassLoaderVoid(() -> { + LOG.debug("Started backend initialization"); + loadPropertiesIfAbsent(); + initAzureDSConfig(); + initContainerConnection(); + initPresignedURIConfig(); + initReferenceKey(); + LOG.debug("Backend initialized. duration={}", stopwatch.elapsed(TimeUnit.MILLISECONDS)); + }); + } + + private void loadPropertiesIfAbsent() throws DataStoreException { + if (properties == null) { + try { + properties = UtilsV12.readConfig(UtilsV12.DEFAULT_CONFIG_FILE); + } catch (IOException e) { + throw new DataStoreException("Unable to initialize Azure Data Store from " + UtilsV12.DEFAULT_CONFIG_FILE, e); + } + } + } + + private void initContainerConnection() throws DataStoreException { + boolean createBlobContainer = PropertiesUtil.toBoolean( + emptyToNull(properties.getProperty(AzureConstantsV12.AZURE_CREATE_CONTAINER)), true); + + concurrentRequestCount = PropertiesUtil.toInteger( + properties.getProperty(AzureConstantsV12.AZURE_BLOB_CONCURRENT_REQUESTS_PER_OPERATION), + AZURE_BLOB_DEFAULT_CONCURRENT_REQUEST_COUNT); + if (concurrentRequestCount < AZURE_BLOB_DEFAULT_CONCURRENT_REQUEST_COUNT) { + LOG.warn("Invalid setting [{}] for concurrentRequestsPerOperation (too low); resetting to {}", + concurrentRequestCount, + AZURE_BLOB_DEFAULT_CONCURRENT_REQUEST_COUNT); + concurrentRequestCount = AZURE_BLOB_DEFAULT_CONCURRENT_REQUEST_COUNT; + } else if (concurrentRequestCount > AZURE_BLOB_MAX_CONCURRENT_REQUEST_COUNT) { + LOG.warn("Invalid setting [{}] for concurrentRequestsPerOperation (too high); resetting to {}", + concurrentRequestCount, + AZURE_BLOB_MAX_CONCURRENT_REQUEST_COUNT); + concurrentRequestCount = AZURE_BLOB_MAX_CONCURRENT_REQUEST_COUNT; + } + LOG.info("Using concurrentRequestsPerOperation={}", concurrentRequestCount); + + if (properties.getProperty(AzureConstantsV12.AZURE_BLOB_REQUEST_TIMEOUT) != null) { + requestTimeout = PropertiesUtil.toInteger(properties.getProperty(AzureConstantsV12.AZURE_BLOB_REQUEST_TIMEOUT), AZURE_BLOB_DEFAULT_REQUEST_TIMEOUT); + } + + retryOptions = UtilsV12.getRetryOptions(properties.getProperty(AzureConstantsV12.AZURE_BLOB_MAX_REQUEST_RETRY), requestTimeout, computeSecondaryLocationEndpoint()); + + presignedDownloadURIVerifyExists = PropertiesUtil.toBoolean( + emptyToNull(properties.getProperty(AzureConstantsV12.PRESIGNED_HTTP_DOWNLOAD_URI_VERIFY_EXISTS)), true); + + BlobContainerClient azureContainer = getAzureContainer(); + + try { + if (createBlobContainer && !azureContainer.exists()) { + azureContainer.create(); + LOG.info("New container created. containerName={}", getContainerName()); + } else { + LOG.info("Reusing existing container. containerName={}", getContainerName()); + } + } catch (BlobStorageException e) { + LOG.error("Error setting up Azure Blob store backend: {}", e.getMessage()); + throw new DataStoreException(e); + } + } + + private void initPresignedURIConfig() { + String putExpiry = properties.getProperty(AzureConstantsV12.PRESIGNED_HTTP_UPLOAD_URI_EXPIRY_SECONDS); + if (putExpiry != null) { + this.setHttpUploadURIExpirySeconds(Integer.parseInt(putExpiry)); + } + String getExpiry = properties.getProperty(AzureConstantsV12.PRESIGNED_HTTP_DOWNLOAD_URI_EXPIRY_SECONDS); + if (getExpiry != null) { + this.setHttpDownloadURIExpirySeconds(Integer.parseInt(getExpiry)); + String cacheMaxSize = properties.getProperty(AzureConstantsV12.PRESIGNED_HTTP_DOWNLOAD_URI_CACHE_MAX_SIZE); + if (cacheMaxSize != null) { + this.setHttpDownloadURICacheSize(Integer.parseInt(cacheMaxSize)); + } else { + this.setHttpDownloadURICacheSize(0); + } + } + uploadDomainOverride = properties.getProperty(AzureConstantsV12.PRESIGNED_HTTP_UPLOAD_URI_DOMAIN_OVERRIDE, null); + downloadDomainOverride = properties.getProperty(AzureConstantsV12.PRESIGNED_HTTP_DOWNLOAD_URI_DOMAIN_OVERRIDE, null); + } + + private void initReferenceKey() throws DataStoreException { + // Set to false to defer key creation until the first upload — useful in read-only or cold-standby nodes + // that should never write to blob storage during startup. + boolean createRefSecretOnInit = PropertiesUtil.toBoolean( + emptyToNull(properties.getProperty(AzureConstantsV12.AZURE_REF_ON_INIT)), true); + if (createRefSecretOnInit) { + getOrCreateReferenceKey(); + } + } + + private void initAzureDSConfig() { + azureBlobContainerProvider = AzureBlobContainerProviderV12.Builder + .builder(properties.getProperty(AzureConstantsV12.AZURE_BLOB_CONTAINER_NAME)) + .initializeWithProperties(properties) + .build(); + } + + @Override + public InputStream read(DataIdentifier identifier) throws DataStoreException { + Objects.requireNonNull(identifier, "identifier must not be null"); + + String key = getKeyName(identifier); + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + return withBundleContextClassLoader(() -> { + BlockBlobClient blob = getAzureContainer().getBlobClient(key).getBlockBlobClient(); + if (!blob.exists()) { + throw new DataStoreException("Trying to read missing blob. identifier=" + key); + } + InputStream is = blob.openInputStream(); + LOG.debug("Got input stream for blob. identifier={} duration={}", key, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + if (LOG_STREAMS_DOWNLOAD.isDebugEnabled()) { + // Log message, with exception, so we can get a trace to see where the call came from + LOG_STREAMS_DOWNLOAD.debug("Binary downloaded from Azure Blob Storage - identifier={}", key, new Exception()); + } + return is; + }); + } catch (BlobStorageException e) { + LOG.error("Error reading blob. identifier={}", key); + throw new DataStoreException("Cannot read blob. identifier=" + key, e); + } + } + + private void uploadBlob(BlockBlobClient client, File file, long len, Stopwatch stopwatch, String key) throws IOException { + // Azure SDK rejects 0 and values > AZURE_BLOB_MAX_MULTIPART_UPLOAD_PART_SIZE. + // For large files the SDK will split into multiple blocks of blockSize bytes each. + long blockSize = Math.max(1, Math.min(len, AZURE_BLOB_MAX_MULTIPART_UPLOAD_PART_SIZE)); + ParallelTransferOptions parallelTransferOptions = new ParallelTransferOptions() + .setBlockSizeLong(blockSize) + .setMaxConcurrency(concurrentRequestCount) + .setMaxSingleUploadSizeLong(AZURE_BLOB_MAX_SINGLE_PUT_UPLOAD_SIZE); + BlobUploadFromFileOptions options = new BlobUploadFromFileOptions(file.getPath()); + options.setParallelTransferOptions(parallelTransferOptions); + options.setMetadata(Map.of(AZURE_BLOB_LAST_MODIFIED_KEY, String.valueOf(System.currentTimeMillis()))); + try { + BlobClient blobClient = client.getContainerClient().getBlobClient(key); + Response blockBlob = blobClient.uploadFromFileWithResponse(options, null, null); + LOG.debug("Upload status is {} for blob {}", blockBlob.getStatusCode(), key); + } catch (UncheckedIOException ex) { + LOG.debug("Failed to upload from file:{}}", ex.getMessage()); + throw new IOException("Failed to upload blob: " + key, ex); + } catch (IllegalArgumentException ex) { + // Azure SDK validation failure (e.g. invalid options) — surface as checked IOException + // so write()'s catch block can wrap it as DataStoreException. + throw new IOException("Invalid upload parameters for blob: " + key, ex); + } + LOG.debug("Blob created. identifier={} length={} duration={}", key, len, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + if (LOG_STREAMS_UPLOAD.isDebugEnabled()) { + // Log message, with exception, so we can get a trace to see where the call came from + LOG_STREAMS_UPLOAD.debug("Binary uploaded to Azure Blob Storage - identifier={}", key, new Exception()); + } + } + + @Override + public DataRecord getRecord(DataIdentifier identifier) throws DataStoreException { + Objects.requireNonNull(identifier, "identifier must not be null"); + + String key = getKeyName(identifier); + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + return withBundleContextClassLoader(() -> { + BlockBlobClient blob = getAzureContainer().getBlobClient(key).getBlockBlobClient(); + BlobProperties props = blob.getProperties(); + AzureBlobStoreDataRecord record = new AzureBlobStoreDataRecord( + this, + azureBlobContainerProvider, + new DataIdentifier(getIdentifierName(blob.getBlobName())), + getLastModified(props), + props.getBlobSize()); + LOG.debug("Data record read for blob. identifier={} duration={} record={}", + key, stopwatch.elapsed(TimeUnit.MILLISECONDS), record); + return record; + }); + } catch (BlobStorageException e) { + if (e.getStatusCode() == 404) { + LOG.debug("Unable to get record for blob; blob does not exist. identifier={}", key); + } else { + LOG.info("Error getting data record for blob. identifier={}", key, e); + } + throw new DataStoreException("Cannot retrieve blob. identifier=" + key, e); + } + } + + @Override + public Iterator getAllIdentifiers() throws DataStoreException { + return withBundleContextClassLoader(() -> + getAzureContainer().listBlobs().stream() + .map(blobItem -> getIdentifierName(blobItem.getName())) + .filter(Objects::nonNull) + .map(DataIdentifier::new) + .collect(Collectors.toList()) + .iterator()); + } + + @Override + public Iterator getAllRecords() throws DataStoreException { + return withBundleContextClassLoader(() -> + getAzureContainer().listBlobs().stream() + .map(blobItem -> { + String identifierName = getIdentifierName(blobItem.getName()); + if (identifierName == null) { + return null; + } + return (DataRecord) new AzureBlobStoreDataRecord( + this, + azureBlobContainerProvider, + new DataIdentifier(identifierName), + getLastModifiedFromBlobItem(blobItem), + blobItem.getProperties().getContentLength()); + }) + .filter(Objects::nonNull) + .collect(Collectors.toList()) + .iterator()); + } + + @Override + public boolean exists(DataIdentifier identifier) throws DataStoreException { + Stopwatch stopwatch = Stopwatch.createStarted(); + String key = getKeyName(identifier); + return withBundleContextClassLoader(() -> { + boolean exists = getAzureContainer().getBlobClient(key).getBlockBlobClient().exists(); + LOG.debug("Blob exists={} identifier={} duration={}", exists, key, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + return exists; + }); + } + + @Override + public void close() { + //Nothing to close + } + + @Override + public void deleteRecord(DataIdentifier identifier) throws DataStoreException { + Objects.requireNonNull(identifier, "identifier must not be null"); + + String key = getKeyName(identifier); + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + withBundleContextClassLoaderVoid(() -> { + boolean result = getAzureContainer().getBlobClient(key).getBlockBlobClient().deleteIfExists(); + LOG.debug("Blob {}. identifier={} duration={}", + result ? "deleted" : "delete requested, but it does not exist (perhaps already deleted)", + key, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + }); + } catch (BlobStorageException e) { + LOG.info("Error deleting blob. identifier={}", key, e); + throw new DataStoreException(e); + } + } + + @Override + public void addMetadataRecord(InputStream input, String name) throws DataStoreException { + Objects.requireNonNull(input, "input must not be null"); + Validate.checkArgument(StringUtils.isNotEmpty(name), "name should not be empty"); + Stopwatch stopwatch = Stopwatch.createStarted(); + withBundleContextClassLoaderVoid(() -> { + addMetadataRecordImpl(input, name, -1); + LOG.debug("Metadata record added. metadataName={} duration={}", name, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + }); + } + + @Override + public void addMetadataRecord(File inputFile, String name) throws DataStoreException { + Objects.requireNonNull(inputFile, "input must not be null"); + Validate.checkArgument(StringUtils.isNotEmpty(name), "name should not be empty"); + Stopwatch stopwatch = Stopwatch.createStarted(); + withBundleContextClassLoaderVoid(() -> { + try (InputStream input = new FileInputStream(inputFile)) { + addMetadataRecordImpl(input, name, inputFile.length()); + } + LOG.debug("Metadata record added. metadataName={} duration={}", name, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + }); + } + + private BlockBlobClient getMetaBlobClient(String name) throws DataStoreException { + return getAzureContainer().getBlobClient(AzureConstantsV12.AZURE_BlOB_META_DIR_NAME + "/" + name).getBlockBlobClient(); + } + + private void addMetadataRecordImpl(final InputStream input, String name, long recordLength) throws DataStoreException { + try { + BlockBlobClient blockBlobClient = getMetaBlobClient(name); + ParallelTransferOptions transferOptions = new ParallelTransferOptions() + .setBlockSizeLong(AZURE_BLOB_PARALLEL_UPLOAD_BLOCK_SIZE) + .setMaxConcurrency(AZURE_BLOB_PARALLEL_UPLOAD_MAX_CONCURRENCY); + try (BufferedInputStream bufferedIn = new BufferedInputStream(input); + BlobOutputStream out = blockBlobClient.getBlobOutputStream( + transferOptions, null, null, null, null)) { + bufferedIn.transferTo(out); + } + updateLastModifiedMetadata(blockBlobClient); + } catch (BlobStorageException | IOException e) { + LOG.info("Error adding metadata record. metadataName={} length={}", name, recordLength, e); + throw new DataStoreException(e); + } + } + + @Override + public DataRecord getMetadataRecord(String name) { + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + return withBundleContextClassLoader(() -> { + BlockBlobClient blockBlobClient = getMetaBlobClient(name); + if (!blockBlobClient.exists()) { + LOG.warn("Trying to read missing metadata. metadataName={}", name); + return null; + } + BlobProperties metaProps = blockBlobClient.getProperties(); + long lastModified = getLastModified(metaProps); + long length = metaProps.getBlobSize(); + AzureBlobStoreDataRecord record = new AzureBlobStoreDataRecord(this, + azureBlobContainerProvider, + new DataIdentifier(name), + lastModified, + length, + true); + LOG.debug("Metadata record read. metadataName={} duration={} record={}", name, stopwatch.elapsed(TimeUnit.MILLISECONDS), record); + return record; + }); + } catch (BlobStorageException | DataStoreException e) { + LOG.info("Error reading metadata record. metadataName={}", name, e); + throw new RuntimeException(e); + } + } + + @Override + public List getAllMetadataRecords(String prefix) { + Objects.requireNonNull(prefix, "prefix must not be null"); + + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + return withBundleContextClassLoader(() -> { + List records = new ArrayList<>(); + ListBlobsOptions listBlobsOptions = new ListBlobsOptions(); + listBlobsOptions.setPrefix(AzureConstantsV12.AZURE_BlOB_META_DIR_NAME + "/" + prefix); + + for (BlobItem blobItem : getAzureContainer().listBlobs(listBlobsOptions, null)) { + records.add(new AzureBlobStoreDataRecord(this, + azureBlobContainerProvider, + new DataIdentifier(stripMetaKeyPrefix(blobItem.getName())), + blobItem.getProperties().getLastModified().toInstant().toEpochMilli(), + blobItem.getProperties().getContentLength(), + true)); + } + LOG.debug("Metadata records read. recordsRead={} metadataFolder={} duration={}", records.size(), prefix, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + return records; + }); + } catch (BlobStorageException | DataStoreException e) { + // Must not return empty — callers (GC) treat empty as "no records" and may delete all live blobs. + throw new RuntimeException("Failed to list metadata records for prefix: " + prefix, e); + } + } + + @Override + public boolean deleteMetadataRecord(String name) { + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + return withBundleContextClassLoader(() -> { + BlobClient blob = getAzureContainer().getBlobClient(addMetaKeyPrefix(name)); + boolean result = blob.deleteIfExists(); + LOG.debug("Metadata record {}. metadataName={} duration={}", + result ? "deleted" : "delete requested, but it does not exist (perhaps already deleted)", + name, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + return result; + }); + } catch (BlobStorageException | DataStoreException e) { + LOG.info("Error deleting metadata record. metadataName={}", name, e); + } + return false; + } + + @Override + public void deleteAllMetadataRecords(String prefix) { + Objects.requireNonNull(prefix, "prefix must not be null"); + + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + withBundleContextClassLoaderVoid(() -> { + int total = 0; + ListBlobsOptions listBlobsOptions = new ListBlobsOptions(); + listBlobsOptions.setPrefix(AzureConstantsV12.AZURE_BlOB_META_DIR_NAME + "/" + prefix); + + for (BlobItem blobItem : getAzureContainer().listBlobs(listBlobsOptions, null)) { + BlobClient blobClient = getAzureContainer().getBlobClient(blobItem.getName()); + if (blobClient.deleteIfExists()) { + total++; + } + } + LOG.debug("Metadata records deleted. recordsDeleted={} metadataFolder={} duration={}", + total, prefix, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + }); + } catch (BlobStorageException | DataStoreException e) { + throw new RuntimeException("Failed to delete metadata records for prefix: " + prefix, e); + } + } + + @Override + public boolean metadataRecordExists(String name) { + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + return withBundleContextClassLoader(() -> { + BlobClient blob = getAzureContainer().getBlobClient(addMetaKeyPrefix(name)); + boolean exists = blob.exists(); + LOG.debug("Metadata record {} exists {}. duration={}", name, exists, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + return exists; + }); + } catch (DataStoreException | BlobStorageException e) { + LOG.info("Error checking existence of metadata record = {}", name, e); + } + return false; + } + + protected void setHttpDownloadURIExpirySeconds(int seconds) { + httpDownloadURIExpirySeconds = seconds; + } + + protected void setHttpDownloadURICacheSize(int maxSize) { + // max size 0 or smaller is used to turn off the cache + if (maxSize > 0) { + LOG.info("presigned GET URI cache enabled, maxSize = {} items, expiry = {} seconds", maxSize, httpDownloadURIExpirySeconds / 2); + httpDownloadURICache = CacheBuilder.newBuilder() + .maximumSize(maxSize) + .expireAfterWrite(Duration.ofSeconds(httpDownloadURIExpirySeconds / 2)) + .build(); + } else { + LOG.info("presigned GET URI cache disabled"); + httpDownloadURICache = null; + } + } + + protected URI createHttpDownloadURI(@NotNull DataIdentifier identifier, + @NotNull DataRecordDownloadOptions downloadOptions) { + URI uri = null; + + Objects.requireNonNull(identifier, "identifier must not be null"); + Objects.requireNonNull(downloadOptions, "downloadOptions must not be null"); + + if (httpDownloadURIExpirySeconds > 0) { + + String domain = getDirectDownloadBlobStorageDomain(downloadOptions.isDomainOverrideIgnored()); + Objects.requireNonNull(domain, "Could not determine domain for direct download"); + + String cacheKey = identifier + + domain + + Objects.toString(downloadOptions.getContentTypeHeader(), "") + + Objects.toString(downloadOptions.getContentDispositionHeader(), ""); + if (httpDownloadURICache != null) { + uri = httpDownloadURICache.getIfPresent(cacheKey); + } + if (uri == null) { + if (presignedDownloadURIVerifyExists) { + // Check if this identifier exists. If not, we want to return null + // even if the identifier is in the download URI cache. + try { + if (!exists(identifier)) { + LOG.warn("Cannot create download URI for nonexistent blob {}; returning null", getKeyName(identifier)); + return null; + } + } catch (DataStoreException e) { + LOG.warn("Cannot create download URI for blob {} (caught DataStoreException); returning null", getKeyName(identifier), e); + return null; + } + } + + String key = getKeyName(identifier); + + // Prepare headers for the presigned URI + BlobSasHeadersV12 headers = new BlobSasHeadersV12() + .setCacheControl(String.format("private, max-age=%d, immutable", httpDownloadURIExpirySeconds)) + .setContentType(downloadOptions.getContentTypeHeader()) + .setContentDisposition(downloadOptions.getContentDispositionHeader()); + + uri = createPresignedURI(key, + new BlobSasPermission().setReadPermission(true), + httpDownloadURIExpirySeconds, + Map.of(), + domain, + headers); + if (uri != null && httpDownloadURICache != null) { + httpDownloadURICache.put(cacheKey, uri); + } + } + } + return uri; + } + + protected void setHttpUploadURIExpirySeconds(int seconds) { + httpUploadURIExpirySeconds = seconds; + } + + private DataIdentifier generateSafeRandomIdentifier() { + return new DataIdentifier( + String.format("%s-%d", + UUID.randomUUID(), + Instant.now().toEpochMilli() + ) + ); + } + + protected DataRecordUpload initiateHttpUpload(long maxUploadSizeInBytes, int maxNumberOfURIs, @NotNull final DataRecordUploadOptions options) throws DataRecordUploadException { + List uploadPartURIs = new ArrayList<>(); + long minPartSize = AZURE_BLOB_MIN_MULTIPART_UPLOAD_PART_SIZE; + long maxPartSize = AZURE_BLOB_MAX_MULTIPART_UPLOAD_PART_SIZE; + + Validate.checkArgument(maxUploadSizeInBytes > 0L, "maxUploadSizeInBytes must be > 0"); + Validate.checkArgument(maxNumberOfURIs > 0 || maxNumberOfURIs == -1, "maxNumberOfURIs must either be > 0 or -1"); + Validate.checkArgument(!(maxUploadSizeInBytes > AZURE_BLOB_MAX_SINGLE_PUT_UPLOAD_SIZE && maxNumberOfURIs == 1), "Cannot do single-put upload with file size %d - exceeds max single-put upload size of %d", maxUploadSizeInBytes, AZURE_BLOB_MAX_SINGLE_PUT_UPLOAD_SIZE); + Validate.checkArgument(maxUploadSizeInBytes <= AZURE_BLOB_MAX_BINARY_UPLOAD_SIZE, "Cannot do upload with file size %d - exceeds max upload size of %d", maxUploadSizeInBytes, AZURE_BLOB_MAX_BINARY_UPLOAD_SIZE); + + DataIdentifier newIdentifier = generateSafeRandomIdentifier(); + String blobId = getKeyName(newIdentifier); + String uploadId = null; + + if (httpUploadURIExpirySeconds > 0) { + // Always do multi-part uploads for Azure, even for small binaries. + // + // This is because Azure requires a unique header, "x-ms-blob-type=BlockBlob", to be + // set but only for single-put uploads, not multi-part. + // This would require clients to know not only the type of service provider being used + // but also the type of upload (single-put vs multi-part), which breaks abstraction. + // Instead we can insist that clients always do multi-part uploads to Azure, even + // if the multi-part upload consists of only one upload part. This doesn't require + // additional work on the part of the client since the "complete" request must always + // be sent regardless, but it helps us avoid the client having to know what type + // of provider is being used, or us having to instruct the client to use specific + // types of headers, etc. + + // Azure doesn't use upload IDs like AWS does + // Generate a fake one for compatibility - we use them to determine whether we are + // doing multi-part or single-put upload + uploadId = Base64.encode(UUID.randomUUID().toString()); + + long numParts = 0L; + if (maxNumberOfURIs > 0) { + long requestedPartSize = (long) Math.ceil(((double) maxUploadSizeInBytes) / ((double) maxNumberOfURIs)); + if (requestedPartSize <= maxPartSize) { + numParts = Math.min( + maxNumberOfURIs, + Math.min( + (long) Math.ceil(((double) maxUploadSizeInBytes) / ((double) minPartSize)), + AZURE_BLOB_MAX_ALLOWABLE_UPLOAD_URIS + ) + ); + } else { + throw new IllegalArgumentException( + String.format("Cannot do multi-part upload with requested part size %d", requestedPartSize) + ); + } + } else { + long maximalNumParts = (long) Math.ceil(((double) maxUploadSizeInBytes) / ((double) AZURE_BLOB_MIN_MULTIPART_UPLOAD_PART_SIZE)); + numParts = Math.min(maximalNumParts, AZURE_BLOB_MAX_ALLOWABLE_UPLOAD_URIS); + } + + String key = getKeyName(newIdentifier); + String domain = getDirectUploadBlobStorageDomain(options.isDomainOverrideIgnored()); + Objects.requireNonNull(domain, "Could not determine domain for direct upload"); + + BlobSasPermission perms = new BlobSasPermission() + .setWritePermission(true); + Map presignedURIRequestParams = new HashMap<>(); + // see https://docs.microsoft.com/en-us/rest/api/storageservices/put-block#uri-parameters + presignedURIRequestParams.put("comp", "block"); + for (long blockId = 1; blockId <= numParts; ++blockId) { + presignedURIRequestParams.put("blockid", + Base64.encode(String.format("%06d", blockId))); + uploadPartURIs.add( + createPresignedURI(key, + perms, + httpUploadURIExpirySeconds, + presignedURIRequestParams, + domain) + ); + } + + try { + byte[] secret = getOrCreateReferenceKey(); + String uploadToken = new DataRecordUploadToken(blobId, uploadId).getEncodedToken(secret); + return new DataRecordUpload() { + @Override + @NotNull + public String getUploadToken() { + return uploadToken; + } + + @Override + public long getMinPartSize() { + return minPartSize; + } + + @Override + public long getMaxPartSize() { + return maxPartSize; + } + + @Override + @NotNull + public Collection getUploadURIs() { + return uploadPartURIs; + } + }; + } catch (DataStoreException e) { + throw new DataRecordUploadException("Unable to obtain data store key", e); + } + } + + return null; + } + + @Override + public void write(DataIdentifier identifier, File file) throws DataStoreException { + Objects.requireNonNull(identifier, "identifier must not be null"); + Objects.requireNonNull(file, "file must not be null"); + + String key = getKeyName(identifier); + Stopwatch stopwatch = Stopwatch.createStarted(); + try { + withBundleContextClassLoaderVoid(() -> { + long len = file.length(); + LOG.debug("Blob write started. identifier={} length={}", key, len); + BlockBlobClient blob = getAzureContainer().getBlobClient(key).getBlockBlobClient(); + if (!blob.exists()) { + uploadBlob(blob, file, len, stopwatch, key); + return; + } + + BlobProperties existingProps; + try { + existingProps = blob.getProperties(); + } catch (BlobStorageException e) { + if (e.getStatusCode() == 404) { + // deleted between exists() and getProperties() — re-upload + uploadBlob(blob, file, len, stopwatch, key); + return; + } + throw e; + } + + if (existingProps.getBlobSize() != len) { + throw new DataStoreException("Length Collision. identifier=" + key + + " new length=" + len + + " old length=" + existingProps.getBlobSize()); + } + + updateLastModifiedMetadata(blob); + long lm = getLastModified(blob); + + if (LOG.isTraceEnabled()) { + LOG.trace("Blob already exists. identifier={} lastModified={}", key, lm); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Blob updated. identifier={} lastModified={} duration={}", key, + lm, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + } + }); + } catch (BlobStorageException e) { + LOG.info("Error writing blob. identifier={}", key, e); + throw new DataStoreException("Cannot write blob. identifier=" + key, e); + } catch (DataStoreException e) { + // IOException from uploadBlob() was wrapped by withBundleContextClassLoaderVoid + Throwable cause = e.getCause(); + if (cause instanceof IOException) { + LOG.debug("Error writing blob. identifier={}", key, cause); + throw new DataStoreException("Cannot write blob. identifier=" + key, cause); + } + LOG.info("Error writing blob. identifier={}", key, e); + throw e; + } + } + + private Long commitBlocksAndGetSize(BlockBlobClient client) throws DataStoreException { + List uncommittedBlocks = client.listBlocks(BlockListType.UNCOMMITTED).getUncommittedBlocks(); + if (uncommittedBlocks.isEmpty()) { + // A concurrent completeDataRecordUpload already committed these blocks. + // Calling commitBlockList([]) here would truncate the blob to 0 bytes. + List committedBlocks = client.listBlocks(BlockListType.COMMITTED).getCommittedBlocks(); + long size = committedBlocks.stream().mapToLong(Block::getSizeLong).sum(); + if (committedBlocks.isEmpty()) { + throw new DataStoreException("No committed or uncommitted blocks found — upload may not have completed"); + } + return size; + } + // Include lastModified in the same commit RPC so the blob is never committed without it. + // A separate setMetadata call after commit would leave a window where transient failure + // produces a committed blob with no lastModified key, causing premature GC. + Map metadata = new HashMap<>(); + metadata.put(AZURE_BLOB_LAST_MODIFIED_KEY, String.valueOf(System.currentTimeMillis())); + BlockBlobCommitBlockListOptions options = new BlockBlobCommitBlockListOptions( + uncommittedBlocks.stream().map(Block::getName).collect(Collectors.toList())) + .setMetadata(metadata); + client.commitBlockListWithResponse(options, null, Context.NONE); + return uncommittedBlocks.stream().mapToLong(Block::getSizeLong).sum(); + } + + protected DataRecord completeHttpUpload(@NotNull String uploadTokenStr) + throws DataRecordUploadException, DataStoreException { + + Validate.checkArgument(StringUtils.isNotEmpty(uploadTokenStr), "uploadToken required"); + + DataRecordUploadToken uploadToken = DataRecordUploadToken.fromEncodedToken(uploadTokenStr, getOrCreateReferenceKey()); + String key = uploadToken.getBlobId(); + DataIdentifier blobId = new DataIdentifier(getIdentifierName(key)); + + DataRecord record = null; + try { + record = getRecord(blobId); + // If this succeeds this means either it was a "single put" upload + // (we don't need to do anything in this case - blob is already uploaded) + // or it was completed before with the same token. + } catch (DataStoreException e1) { + // Only treat as "record not found" when the cause is a 404 from Azure. + // Transient errors (auth, network, throttle) must propagate, not silently + // trigger a commit that may overwrite or corrupt an in-flight upload. + Throwable cause = e1.getCause(); + if (!(cause instanceof BlobStorageException) || ((BlobStorageException) cause).getStatusCode() != 404) { + throw e1; + } + // record doesn't exist - so this means we are safe to do the complete request + try { + if (uploadToken.getUploadId().isPresent()) { + BlockBlobClient blockBlobClient = getAzureContainer().getBlobClient(key).getBlockBlobClient(); + long size = commitBlocksAndGetSize(blockBlobClient); + record = new AzureBlobStoreDataRecord( + this, + azureBlobContainerProvider, + blobId, + getLastModified(blockBlobClient), + size); + } else { + // Something is wrong - upload ID missing from upload token + // but record doesn't exist already, so this is invalid + throw new DataRecordUploadException( + String.format("Unable to finalize direct write of binary %s - upload ID missing from upload token", + blobId) + ); + } + } catch (BlobStorageException e2) { + throw new DataRecordUploadException( + String.format("Unable to finalize direct write of binary %s", blobId), + e2 + ); + } + } + + return record; + } + + String getDefaultBlobStorageDomain() { + String customEndpoint = properties.getProperty(AzureConstantsV12.AZURE_BLOB_ENDPOINT); + if (StringUtils.isNotBlank(customEndpoint)) { + try { + return new URI(customEndpoint).getHost(); + } catch (URISyntaxException e) { + LOG.warn("Invalid blobEndpoint URI: {}, falling back to default", customEndpoint, e); + } + } + String accountName = properties.getProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, ""); + if (StringUtils.isEmpty(accountName)) { + LOG.warn("Can't generate presigned URI - Azure account name not found in properties"); + return null; + } + return String.format("%s.blob.core.windows.net", accountName); + } + + private String getBlobStorageDomain(boolean ignoreDomainOverride, String domainOverride) { + String domain = ignoreDomainOverride ? getDefaultBlobStorageDomain() : domainOverride; + if (StringUtils.isEmpty(domain)) { + domain = getDefaultBlobStorageDomain(); + } + return domain; + } + + private String getDirectDownloadBlobStorageDomain(boolean ignoreDomainOverride) { + return getBlobStorageDomain(ignoreDomainOverride, downloadDomainOverride); + } + + private String getDirectUploadBlobStorageDomain(boolean ignoreDomainOverride) { + return getBlobStorageDomain(ignoreDomainOverride, uploadDomainOverride); + } + + private URI createPresignedURI(String key, + BlobSasPermission blobSasPermissions, + int expirySeconds, + String domain) { + return createPresignedURI(key, blobSasPermissions, expirySeconds, Map.of(), domain, null); + } + + private URI createPresignedURI(String key, + BlobSasPermission blobSasPermissions, + int expirySeconds, + Map additionalQueryParams, + String domain) { + return createPresignedURI(key, blobSasPermissions, expirySeconds, additionalQueryParams, domain, null); + } + + private URI createPresignedURI(String key, + BlobSasPermission blobSasPermissions, + int expirySeconds, + Map additionalQueryParams, + String domain, + BlobSasHeadersV12 optionalHeaders) { + if (Objects.toString(domain, "").isEmpty()) { + LOG.warn("Can't generate presigned URI - no Azure domain provided (is Azure account name configured?)"); + return null; + } + + URI presignedURI = null; + try { + String sharedAccessSignature = azureBlobContainerProvider.generateSharedAccessSignature(retryOptions, key, + blobSasPermissions, expirySeconds, properties, optionalHeaders); + + // Shared access signature is returned encoded already. + String uriString = String.format("https://%s/%s/%s?%s", + domain, + getContainerName(), + key, + sharedAccessSignature); + + if (!additionalQueryParams.isEmpty()) { + StringBuilder builder = new StringBuilder(); + for (Map.Entry e : additionalQueryParams.entrySet()) { + builder.append("&"); + builder.append(URLEncoder.encode(e.getKey(), StandardCharsets.UTF_8)); + builder.append("="); + builder.append(URLEncoder.encode(e.getValue(), StandardCharsets.UTF_8)); + } + uriString += builder.toString(); + } + + presignedURI = new URI(uriString); + } catch (DataStoreException e) { + LOG.error("No connection to Azure Blob Storage", e); + } catch (URISyntaxException | InvalidKeyException e) { + LOG.error("Can't generate a presigned URI for key {}", key, e); + } catch (BlobStorageException e) { + LOG.error("Azure request to create presigned Azure Blob Storage {} URI failed. " + + "Key: {}, Error: {}, HTTP Code: {}, Azure Error Code: {}", + blobSasPermissions.hasReadPermission() ? "GET" : + ((blobSasPermissions.hasWritePermission()) ? "PUT" : ""), + key, + e.getMessage(), + e.getStatusCode(), + e.getErrorCode()); + } + + return presignedURI; + } + + // Package-private so the inner AzureBlobStoreDataRecord can call it with TCCL set correctly. + InputStream openBlobInputStream(BlobContainerClient container, String blobKey) throws DataStoreException { + return withBundleContextClassLoader(() -> container.getBlobClient(blobKey).openInputStream()); + } + + private String getContainerName() { + return Optional.ofNullable(this.azureBlobContainerProvider) + .map(AzureBlobContainerProviderV12::getContainerName) + .orElse(null); + } + + // synchronized: two concurrent cold-start calls must not each write a different key — + // the second key would invalidate all upload tokens signed with the first. + @Override + public synchronized byte[] getOrCreateReferenceKey() throws DataStoreException { + try { + if (secret != null && secret.length != 0) { + return secret; + } else { + byte[] key; + // Read from Azure first: another cluster node may have already written the shared secret. + // All nodes must use the same HMAC key so that upload tokens are valid cluster-wide. + key = readMetadataBytes(AZURE_BLOB_REF_KEY); + if (key == null) { + key = super.getOrCreateReferenceKey(); + addMetadataRecord(new ByteArrayInputStream(key), AZURE_BLOB_REF_KEY); + } + secret = key; + return secret; + } + } catch (IOException e) { + throw new DataStoreException("Unable to get or create key " + e); + } + } + + protected byte[] readMetadataBytes(String name) throws IOException, DataStoreException { + DataRecord rec = getMetadataRecord(name); + if (rec == null) { + return null; + } + try (InputStream stream = rec.getStream()) { + return IOUtils.toByteArray(stream); + } + } + + private String computeSecondaryLocationEndpoint() { + String accountName = properties.getProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, ""); + + boolean enableSecondaryLocation = PropertiesUtil.toBoolean(properties.getProperty(AzureConstantsV12.AZURE_BLOB_ENABLE_SECONDARY_LOCATION_NAME), + AzureConstantsV12.AZURE_BLOB_ENABLE_SECONDARY_LOCATION_DEFAULT); + + if (enableSecondaryLocation) { + return String.format("https://%s-secondary.blob.core.windows.net", accountName); + } + + return null; + } + + /** + * This interface together with {@link #withBundleContextClassLoader(AzureSDKCall)} enables calls to AzureSDK within the Class Loader of the current bundle + * @param + */ + @FunctionalInterface + private interface AzureSDKCall { + T execute() throws Exception; + } + + /** + * Same as {@link AzureSDKCall} but without return value + * @see AzureSDKCall + */ + @FunctionalInterface + private interface AzureSDKCallVoid { + void execute() throws Exception; + } + + static class AzureBlobStoreDataRecord extends AbstractDataRecord { + final AzureBlobContainerProviderV12 azureBlobContainerProvider; + final long lastModified; + final long length; + final boolean isMeta; // true for metadata blobs (stored under AZURE_BlOB_META_DIR_NAME/); affects key construction in getStream() + + public AzureBlobStoreDataRecord(AbstractSharedBackend backend, AzureBlobContainerProviderV12 azureBlobContainerProvider, + DataIdentifier key, long lastModified, long length) { + this(backend, azureBlobContainerProvider, key, lastModified, length, false); + } + + public AzureBlobStoreDataRecord(AbstractSharedBackend backend, AzureBlobContainerProviderV12 azureBlobContainerProvider, + DataIdentifier key, long lastModified, long length, boolean isMeta) { + super(backend, key); + this.azureBlobContainerProvider = azureBlobContainerProvider; + this.lastModified = lastModified; + this.length = length; + this.isMeta = isMeta; + } + + @Override + public long getLength() throws DataStoreException { + return length; + } + + @Override + public InputStream getStream() throws DataStoreException { + String id = getKeyName(getIdentifier()); + // Use the backend's cached container so retry and proxy options are applied. + BlobContainerClient container = ((AzureBlobStoreBackendV12) backend).getAzureContainer(); + if (isMeta) { + id = addMetaKeyPrefix(getIdentifier().toString()); + } else { + // Don't worry about stream logging for metadata records + if (LOG_STREAMS_DOWNLOAD.isDebugEnabled()) { + // Log message, with exception, so we can get a trace to see where the call came from + LOG_STREAMS_DOWNLOAD.debug("Binary downloaded from Azure Blob Storage - identifier={} ", id, new Exception()); + } + } + return ((AzureBlobStoreBackendV12) backend).openBlobInputStream(container, id); + } + + @Override + public long getLastModified() { + return lastModified; + } + + @Override + public String toString() { + return "AzureBlobStoreDataRecord{" + + "identifier=" + getIdentifier() + + ", length=" + length + + ", lastModified=" + lastModified + + ", containerName='" + Optional.ofNullable(azureBlobContainerProvider).map(AzureBlobContainerProviderV12::getContainerName).orElse(null) + '\'' + + '}'; + } + } +} diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureConstantsV12.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureConstantsV12.java new file mode 100644 index 00000000000..76130858779 --- /dev/null +++ b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureConstantsV12.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +final class AzureConstantsV12 { + /** + * Directory name for storing metadata files in the blob storage + */ + public static final String AZURE_BlOB_META_DIR_NAME = "META"; + + /** + * Key prefix for metadata entries, includes trailing slash for directory structure + */ + public static final String AZURE_BLOB_META_KEY_PREFIX = AZURE_BlOB_META_DIR_NAME + "/"; + + /** + * Blob name (under META/) for the shared HMAC-SHA1 secret used to sign and verify upload tokens. + * All cluster nodes must read this key from storage so their tokens are mutually valid. + */ + public static final String AZURE_BLOB_REF_KEY = "reference.key"; + + /** + * Key name for storing last modified timestamp metadata + */ + public static final String AZURE_BLOB_LAST_MODIFIED_KEY = "lastModified"; + + /** + * Threshold size (8 MiB) above which streams are buffered to disk during upload operations + */ + public static final long AZURE_BLOB_BUFFERED_STREAM_THRESHOLD = 8L * 1024L * 1024L; + + /** + * Minimum part size (256 KiB) required for Azure Blob Storage multipart uploads + */ + public static final long AZURE_BLOB_MIN_MULTIPART_UPLOAD_PART_SIZE = 256L * 1024L; + + /** + * Maximum part size (4000 MiB / 4 GiB) allowed by Azure Blob Storage for multipart uploads + */ + public static final long AZURE_BLOB_MAX_MULTIPART_UPLOAD_PART_SIZE = 4000L * 1024L * 1024L; + + /** + * Maximum size (256 MiB) for single PUT operations in Azure Blob Storage + */ + public static final long AZURE_BLOB_MAX_SINGLE_PUT_UPLOAD_SIZE = 256L * 1024L * 1024L; + + /** + * Maximum total binary size (~190.7 TiB) that can be uploaded to Azure Blob Storage + */ + public static final long AZURE_BLOB_MAX_BINARY_UPLOAD_SIZE = 190L * 1024L * 1024L * 1024L * 1024L; + + /** + * Maximum number of blocks (50,000) allowed per blob in Azure Blob Storage + */ + public static final int AZURE_BLOB_MAX_ALLOWABLE_UPLOAD_URIS = 50000; + + /** + * Default number of concurrent requests for Azure Blob Storage operations + */ + public static final int AZURE_BLOB_DEFAULT_CONCURRENT_REQUEST_COUNT = 5; + + /** + * Maximum number of concurrent requests for Azure Blob Storage operations + */ + public static final int AZURE_BLOB_MAX_CONCURRENT_REQUEST_COUNT = 10; + + /** + * Block size (4 MiB) used for parallel streaming uploads via BlobOutputStream + */ + public static final long AZURE_BLOB_PARALLEL_UPLOAD_BLOCK_SIZE = 4L * 1024L * 1024L; + + /** + * Number of concurrent block upload requests for parallel streaming uploads + */ + public static final int AZURE_BLOB_PARALLEL_UPLOAD_MAX_CONCURRENCY = 4; + + /** + * Default request timeout (3 minutes) for Azure Blob Storage operations + */ + public static final int AZURE_BLOB_DEFAULT_REQUEST_TIMEOUT = 3; + + // Auth / connection + static final String AZURE_STORAGE_ACCOUNT_NAME = "accessKey"; + static final String AZURE_STORAGE_ACCOUNT_KEY = "secretKey"; + static final String AZURE_CONNECTION_STRING = "azureConnectionString"; + static final String AZURE_SAS = "azureSas"; + static final String AZURE_TENANT_ID = "tenantId"; + static final String AZURE_CLIENT_ID = "clientId"; + static final String AZURE_CLIENT_SECRET = "clientSecret"; + static final String AZURE_BLOB_ENDPOINT = "azureBlobEndpoint"; + static final String AZURE_BLOB_CONTAINER_NAME = "container"; + // Behavior + static final String AZURE_CREATE_CONTAINER = "azureCreateContainer"; + static final String AZURE_BLOB_REQUEST_TIMEOUT = "socketTimeout"; + static final String AZURE_BLOB_MAX_REQUEST_RETRY = "maxErrorRetry"; + static final String AZURE_BLOB_CONCURRENT_REQUESTS_PER_OPERATION = "maxConnections"; + static final String AZURE_BLOB_ENABLE_SECONDARY_LOCATION_NAME = "enableSecondaryLocation"; + static final boolean AZURE_BLOB_ENABLE_SECONDARY_LOCATION_DEFAULT = false; + // Proxy + static final String PROXY_HOST = "proxyHost"; + static final String PROXY_PORT = "proxyPort"; + // Presigned URIs + static final String PRESIGNED_HTTP_UPLOAD_URI_EXPIRY_SECONDS = "presignedHttpUploadURIExpirySeconds"; + static final String PRESIGNED_HTTP_DOWNLOAD_URI_EXPIRY_SECONDS = "presignedHttpDownloadURIExpirySeconds"; + static final String PRESIGNED_HTTP_DOWNLOAD_URI_CACHE_MAX_SIZE = "presignedHttpDownloadURICacheMaxSize"; + static final String PRESIGNED_HTTP_DOWNLOAD_URI_VERIFY_EXISTS = "presignedHttpDownloadURIVerifyExists"; + static final String PRESIGNED_HTTP_DOWNLOAD_URI_DOMAIN_OVERRIDE = "presignedHttpDownloadURIDomainOverride"; + static final String PRESIGNED_HTTP_UPLOAD_URI_DOMAIN_OVERRIDE = "presignedHttpUploadURIDomainOverride"; + static final String AZURE_REF_ON_INIT = "refOnInit"; + + private AzureConstantsV12() { + } +} diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataStoreV12.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataStoreV12.java new file mode 100644 index 00000000000..9e1a49778d9 --- /dev/null +++ b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataStoreV12.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import org.apache.jackrabbit.oak.plugins.blob.AbstractSharedCachingDataStore; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.*; +import org.apache.jackrabbit.oak.spi.blob.AbstractSharedBackend; +import org.apache.jackrabbit.oak.spi.blob.SharedBackend; +import org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier; +import org.apache.jackrabbit.oak.spi.blob.data.DataRecord; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +import java.net.URI; +import java.util.Properties; + +public class AzureDataStoreV12 extends AbstractSharedCachingDataStore implements ConfigurableDataRecordAccessProvider { + protected Properties properties; + private int minRecordLength = 16 * 1024; + private AzureBlobStoreBackendV12 azureBlobStoreBackend; + + @Override + protected AbstractSharedBackend createBackend() { + azureBlobStoreBackend = new AzureBlobStoreBackendV12(); + if (properties != null) { + azureBlobStoreBackend.setProperties(properties); + } + return azureBlobStoreBackend; + } + + public void setProperties(final Properties properties) { + this.properties = properties; + } + + public SharedBackend getBackend() { + return backend; + } + + @Override + public int getMinRecordLength() { + return minRecordLength; + } + + public void setMinRecordLength(int minRecordLength) { + this.minRecordLength = minRecordLength; + } + + @Override + public void setDirectUploadURIExpirySeconds(int seconds) { + if (azureBlobStoreBackend != null) { + azureBlobStoreBackend.setHttpUploadURIExpirySeconds(seconds); + } + } + + @Override + public void setBinaryTransferAccelerationEnabled(boolean enabled) { + // NOOP - not a feature of Azure Blob Storage + } + + @Nullable + @Override + public DataRecordUpload initiateDataRecordUpload(long maxUploadSizeInBytes, int maxNumberOfURIs) + throws IllegalArgumentException, DataRecordUploadException { + return initiateDataRecordUpload(maxUploadSizeInBytes, maxNumberOfURIs, DataRecordUploadOptions.DEFAULT); + } + + @Nullable + @Override + public DataRecordUpload initiateDataRecordUpload(long maxUploadSizeInBytes, int maxNumberOfURIs, @NotNull final DataRecordUploadOptions options) + throws IllegalArgumentException, DataRecordUploadException { + if (azureBlobStoreBackend == null) { + throw new DataRecordUploadException("Backend not initialized"); + } + return azureBlobStoreBackend.initiateHttpUpload(maxUploadSizeInBytes, maxNumberOfURIs, options); + } + + @NotNull + @Override + public DataRecord completeDataRecordUpload(@NotNull String uploadToken) + throws IllegalArgumentException, DataRecordUploadException, DataStoreException { + if (azureBlobStoreBackend == null) { + throw new DataRecordUploadException("Backend not initialized"); + } + return azureBlobStoreBackend.completeHttpUpload(uploadToken); + } + + @Override + public void setDirectDownloadURIExpirySeconds(int seconds) { + if (azureBlobStoreBackend != null) { + azureBlobStoreBackend.setHttpDownloadURIExpirySeconds(seconds); + } + } + + @Override + public void setDirectDownloadURICacheSize(int maxSize) { + if (azureBlobStoreBackend != null) { + azureBlobStoreBackend.setHttpDownloadURICacheSize(maxSize); + } + } + + @Nullable + @Override + public URI getDownloadURI(@NotNull DataIdentifier identifier, + @NotNull DataRecordDownloadOptions downloadOptions) { + if (azureBlobStoreBackend != null) { + return azureBlobStoreBackend.createHttpDownloadURI(identifier, downloadOptions); + } + return null; + } +} diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureHttpRequestLoggingPolicyV12.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureHttpRequestLoggingPolicyV12.java new file mode 100644 index 00000000000..8d2ed5863a1 --- /dev/null +++ b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureHttpRequestLoggingPolicyV12.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import com.azure.core.http.HttpPipelineCallContext; +import com.azure.core.http.HttpPipelineNextPolicy; +import com.azure.core.http.HttpResponse; +import com.azure.core.http.policy.HttpPipelinePolicy; + +import org.apache.jackrabbit.oak.commons.properties.SystemPropertySupplier; +import org.apache.jackrabbit.oak.commons.time.Stopwatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import reactor.core.publisher.Mono; + +import java.util.concurrent.TimeUnit; + +/** + * HTTP pipeline policy for logging Azure Blob Storage requests in the oak-blob-cloud-azure module. + *

+ * This policy logs HTTP request details including method, URL, status code, and duration. + * Verbose logging can be enabled by setting the system property: + * -Dblob.azure.http.verbose.enabled=true + *

+ * This is similar to the AzureHttpRequestLoggingPolicy in oak-segment-azure but specifically + * designed for the blob storage operations in oak-blob-cloud-azure. + */ +class AzureHttpRequestLoggingPolicyV12 implements HttpPipelinePolicy { + + private static final Logger log = LoggerFactory.getLogger(AzureHttpRequestLoggingPolicyV12.class); + + private static final String AZURE_SDK_VERBOSE_LOGGING_ENABLED = "blob.azure.v12.http.verbose.enabled"; + + private final boolean verboseEnabled = SystemPropertySupplier.create(AZURE_SDK_VERBOSE_LOGGING_ENABLED, false).get(); + + @Override + public Mono process(HttpPipelineCallContext context, HttpPipelineNextPolicy next) { + Stopwatch stopwatch = Stopwatch.createStarted(); + + return next.process().flatMap(httpResponse -> { + if (verboseEnabled) { + log.info("HTTP Blob Request: {} {} {} {} ms", + context.getHttpRequest().getHttpMethod(), + context.getHttpRequest().getUrl(), + httpResponse.getStatusCode(), + stopwatch.elapsed(TimeUnit.MILLISECONDS)); + } + + return Mono.just(httpResponse); + }); + } +} diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/BlobSasHeadersV12.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/BlobSasHeadersV12.java new file mode 100644 index 00000000000..a590c2929de --- /dev/null +++ b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/BlobSasHeadersV12.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import com.azure.storage.blob.sas.BlobServiceSasSignatureValues; +import org.jetbrains.annotations.Nullable; + +/** + * Represents the optional headers that can be returned using SAS (Shared Access Signature). + * This class is the Azure SDK 12 equivalent of the legacy {@code com.microsoft.azure.storage.blob.SharedAccessBlobHeaders}. + * + *

These headers are set on the {@link BlobServiceSasSignatureValues} object and will be + * returned to the client when the SAS token is used to access the blob.

+ * + * @see BlobServiceSasSignatureValues + */ +class BlobSasHeadersV12 { + + private String cacheControl; + private String contentDisposition; + private String contentEncoding; + private String contentLanguage; + private String contentType; + + /** + * Creates an empty BlobSasHeaders object. + */ + public BlobSasHeadersV12() { + } + + /** + * Creates a BlobSasHeaders object with the specified values. + * + * @param cacheControl the cache-control header value + * @param contentDisposition the content-disposition header value + * @param contentEncoding the content-encoding header value + * @param contentLanguage the content-language header value + * @param contentType the content-type header value + */ + public BlobSasHeadersV12(@Nullable String cacheControl, + @Nullable String contentDisposition, + @Nullable String contentEncoding, + @Nullable String contentLanguage, + @Nullable String contentType) { + this.cacheControl = cacheControl; + this.contentDisposition = contentDisposition; + this.contentEncoding = contentEncoding; + this.contentLanguage = contentLanguage; + this.contentType = contentType; + } + + /** + * Gets the cache-control header value. + * + * @return the cache-control header value + */ + @Nullable + public String getCacheControl() { + return cacheControl; + } + + /** + * Sets the cache-control header value. + * + * @param cacheControl the cache-control header value + * @return this BlobSasHeaders object for method chaining + */ + public BlobSasHeadersV12 setCacheControl(@Nullable String cacheControl) { + this.cacheControl = cacheControl; + return this; + } + + /** + * Gets the content-disposition header value. + * + * @return the content-disposition header value + */ + @Nullable + public String getContentDisposition() { + return contentDisposition; + } + + /** + * Sets the content-disposition header value. + * + * @param contentDisposition the content-disposition header value + * @return this BlobSasHeaders object for method chaining + */ + public BlobSasHeadersV12 setContentDisposition(@Nullable String contentDisposition) { + this.contentDisposition = contentDisposition; + return this; + } + + /** + * Gets the content-encoding header value. + * + * @return the content-encoding header value + */ + @Nullable + public String getContentEncoding() { + return contentEncoding; + } + + /** + * Sets the content-encoding header value. + * + * @param contentEncoding the content-encoding header value + * @return this BlobSasHeaders object for method chaining + */ + public BlobSasHeadersV12 setContentEncoding(@Nullable String contentEncoding) { + this.contentEncoding = contentEncoding; + return this; + } + + /** + * Gets the content-language header value. + * + * @return the content-language header value + */ + @Nullable + public String getContentLanguage() { + return contentLanguage; + } + + /** + * Sets the content-language header value. + * + * @param contentLanguage the content-language header value + * @return this BlobSasHeaders object for method chaining + */ + public BlobSasHeadersV12 setContentLanguage(@Nullable String contentLanguage) { + this.contentLanguage = contentLanguage; + return this; + } + + /** + * Gets the content-type header value. + * + * @return the content-type header value + */ + @Nullable + public String getContentType() { + return contentType; + } + + /** + * Sets the content-type header value. + * + * @param contentType the content-type header value + * @return this BlobSasHeaders object for method chaining + */ + public BlobSasHeadersV12 setContentType(@Nullable String contentType) { + this.contentType = contentType; + return this; + } + + /** + * Applies these headers to the given {@link BlobServiceSasSignatureValues} object. + * Only non-null headers are set. + * + * @param sasSignatureValues the BlobServiceSasSignatureValues object to apply headers to + */ + public void applyTo(BlobServiceSasSignatureValues sasSignatureValues) { + if (sasSignatureValues == null) { + return; + } + + if (cacheControl != null) { + sasSignatureValues.setCacheControl(cacheControl); + } + if (contentDisposition != null) { + sasSignatureValues.setContentDisposition(contentDisposition); + } + if (contentEncoding != null) { + sasSignatureValues.setContentEncoding(contentEncoding); + } + if (contentLanguage != null) { + sasSignatureValues.setContentLanguage(contentLanguage); + } + if (contentType != null) { + sasSignatureValues.setContentType(contentType); + } + } + + /** + * Checks if any headers are set (non-null). + * + * @return true if at least one header is set, false otherwise + */ + public boolean hasHeaders() { + return cacheControl != null || contentDisposition != null || contentEncoding != null + || contentLanguage != null || contentType != null; + } +} diff --git a/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/UtilsV12.java b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/UtilsV12.java new file mode 100644 index 00000000000..d5ee944c826 --- /dev/null +++ b/oak-blob-cloud-azure/src/main/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/UtilsV12.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import com.azure.core.http.HttpClient; +import com.azure.core.http.ProxyOptions; +import com.azure.core.http.netty.NettyAsyncHttpClientBuilder; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.common.policy.RequestRetryOptions; +import com.azure.storage.common.policy.RetryPolicyType; +import org.apache.commons.lang3.StringUtils; +import org.apache.jackrabbit.oak.commons.PropertiesUtil; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.InetSocketAddress; +import java.util.Objects; +import java.util.Properties; + +final class UtilsV12 { + public static final String DASH = "-"; + public static final String DEFAULT_CONFIG_FILE = "azurev12.properties"; + + private UtilsV12() { + } + + public static BlobContainerClient getBlobContainer(@NotNull final String connectionString, + @NotNull final String containerName, + @Nullable final RequestRetryOptions retryOptions, + final Properties properties) throws DataStoreException { + try { + AzureHttpRequestLoggingPolicyV12 loggingPolicy = new AzureHttpRequestLoggingPolicyV12(); + + BlobServiceClientBuilder builder = new BlobServiceClientBuilder() + .connectionString(connectionString) + .retryOptions(retryOptions) + .addPolicy(loggingPolicy); + + HttpClient httpClient = new NettyAsyncHttpClientBuilder() + .proxy(computeProxyOptions(properties)) + .build(); + + builder.httpClient(httpClient); + + BlobServiceClient blobServiceClient = builder.buildClient(); + return blobServiceClient.getBlobContainerClient(containerName); + + } catch (Exception e) { + throw new DataStoreException(e); + } + } + + public static ProxyOptions computeProxyOptions(final Properties properties) { + String proxyHost = properties.getProperty(AzureConstantsV12.PROXY_HOST); + String proxyPort = properties.getProperty(AzureConstantsV12.PROXY_PORT); + + if (!(Objects.toString(proxyHost, "").isEmpty() || Objects.toString(proxyPort, "").isEmpty())) { + return new ProxyOptions(ProxyOptions.Type.HTTP, + new InetSocketAddress(proxyHost, Integer.parseInt(proxyPort))); + } + return null; + } + + public static RequestRetryOptions getRetryOptions(final String maxRequestRetryCount, Integer requestTimeout, String secondaryLocation) { + int retries = PropertiesUtil.toInteger(maxRequestRetryCount, -1); + if (retries < 0) { + return null; + } + + if (retries == 0) { + return new RequestRetryOptions(RetryPolicyType.FIXED, 1, + requestTimeout, null, null, + secondaryLocation); + } + return new RequestRetryOptions(RetryPolicyType.EXPONENTIAL, retries, + requestTimeout, null, null, + secondaryLocation); + } + + public static String getConnectionStringFromProperties(Properties properties) { + String sasUri = properties.getProperty(AzureConstantsV12.AZURE_SAS, ""); + String blobEndpoint = properties.getProperty(AzureConstantsV12.AZURE_BLOB_ENDPOINT, ""); + String connectionString = properties.getProperty(AzureConstantsV12.AZURE_CONNECTION_STRING, ""); + String accountName = properties.getProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, ""); + String accountKey = properties.getProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_KEY, ""); + + if (!connectionString.isEmpty()) { + return connectionString; + } + + if (!sasUri.isEmpty()) { + return getConnectionStringForSas(sasUri, blobEndpoint, accountName); + } + + return getConnectionString( + accountName, + accountKey, + blobEndpoint); + } + + public static String getConnectionStringForSas(String sasUri, String blobEndpoint, String accountName) { + if (StringUtils.isEmpty(blobEndpoint)) { + return String.format("AccountName=%s;SharedAccessSignature=%s", accountName, sasUri); + } else { + return String.format("BlobEndpoint=%s;SharedAccessSignature=%s", blobEndpoint, sasUri); + } + } + + public static String getConnectionString(final String accountName, final String accountKey, String blobEndpoint) { + StringBuilder connString = new StringBuilder("DefaultEndpointsProtocol=https"); + connString.append(";AccountName=").append(accountName); + connString.append(";AccountKey=").append(accountKey); + if (!Objects.toString(blobEndpoint, "").isEmpty()) { + connString.append(";BlobEndpoint=").append(blobEndpoint); + } + return connString.toString(); + } + + public static BlobContainerClient getBlobContainerFromConnectionString(final String azureConnectionString, + final String containerName, + @Nullable final RequestRetryOptions retryOptions, + final Properties properties) { + AzureHttpRequestLoggingPolicyV12 loggingPolicy = new AzureHttpRequestLoggingPolicyV12(); + + BlobServiceClientBuilder builder = new BlobServiceClientBuilder() + .connectionString(azureConnectionString) + .addPolicy(loggingPolicy); + if (retryOptions != null) { + builder.retryOptions(retryOptions); + } + HttpClient httpClient = new NettyAsyncHttpClientBuilder() + .proxy(computeProxyOptions(properties)) + .build(); + builder.httpClient(httpClient); + return builder.buildClient().getBlobContainerClient(containerName); + } + + /** + * No-arg overload for callers without retry/proxy context (e.g. DataRecord.getStream()). + */ + public static BlobContainerClient getBlobContainerFromConnectionString(final String azureConnectionString, final String containerName) { + return getBlobContainerFromConnectionString(azureConnectionString, containerName, null, new Properties()); + } + + /** + * Read a configuration properties file. + * + * @param fileName the properties file name + * @return the properties + * @throws java.io.IOException if the file doesn't exist + */ + public static Properties readConfig(String fileName) throws IOException { + if (!new File(fileName).exists()) { + throw new IOException("Config file not found. fileName=" + fileName); + } + Properties prop = new Properties(); + InputStream in = null; + try { + in = new FileInputStream(fileName); + prop.load(in); + } finally { + if (in != null) { + in.close(); + } + } + return prop; + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapperArchTest.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapperArchTest.java new file mode 100644 index 00000000000..85e601c6fe1 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapperArchTest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage; + +import com.tngtech.archunit.junit.AnalyzeClasses; +import com.tngtech.archunit.junit.ArchTest; +import com.tngtech.archunit.junit.ArchUnitRunner; +import com.tngtech.archunit.lang.ArchRule; +import org.junit.runner.RunWith; + +import static com.tngtech.archunit.lang.syntax.ArchRuleDefinition.noClasses; + +@RunWith(ArchUnitRunner.class) +@AnalyzeClasses(packages = "org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage") +public class AzureDataStoreWrapperArchTest { + + // v8 classes must not reference v12 — except AzureDataStoreWrapper (the intentional bridge). + // Test classes (ending in Test/IT) are excluded: test infrastructure routinely crosses + // package boundaries to access helpers like AzuriteDockerRule and AzureDataStoreV12. + @ArchTest + static final ArchRule v8MustNotReferenceV12 = noClasses() + .that().resideInAPackage("..azure.blobstorage") + .and().areNotAssignableTo(AzureDataStoreWrapper.class) + .and().areNotAssignableTo(AzureDataStoreWrapper.DelegatingDataStore.class) + .and().haveSimpleNameNotEndingWith("Test") + .and().haveSimpleNameNotEndingWith("IT") + .should().dependOnClassesThat() + .resideInAPackage("..azure.blobstorage.v12.."); + + // v12 classes must not reference v8 — AzureDataStoreWrapper owns the one-way bridge. + // Test classes (ending in Test/IT) are excluded for the same reason as above. + @ArchTest + static final ArchRule v12MustNotReferenceV8 = noClasses() + .that().resideInAPackage("..azure.blobstorage.v12..") + .and().haveSimpleNameNotEndingWith("Test") + .and().haveSimpleNameNotEndingWith("IT") + .should().dependOnClassesThat() + .resideInAPackage("..azure.blobstorage"); +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapperIT.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapperIT.java new file mode 100644 index 00000000000..a3060158560 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapperIT.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage; + +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobContainerClientBuilder; +import com.azure.storage.blob.specialized.BlockBlobClient; +import org.apache.jackrabbit.oak.plugins.blob.AbstractSharedCachingDataStore; +import org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier; +import org.apache.jackrabbit.oak.spi.blob.data.DataRecord; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.junit.*; +import org.junit.rules.TemporaryFolder; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.*; +import java.util.concurrent.*; + +import static org.junit.Assert.*; + +/** + * Integration tests for AzureDataStoreWrapper against Azurite (Azure Storage emulator). + *

+ * Runs unconditionally in CI via Docker — no external Azure credentials needed. + * Each test gets a fresh container name and home directories. + *

+ * Cross-SDK compatibility tests (v8 write → v12 read and vice versa) use two separate + * wrapper instances pointing at the same Azure container. Upload staging is disabled + * (stagingSplitPercentage=0) so addRecord() writes synchronously to Azurite. + */ +public class AzureDataStoreWrapperIT { + + @ClassRule + public static final AzuriteDockerRule AZURITE = new AzuriteDockerRule(); + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private AzureDataStoreWrapper.DelegatingDataStore dsV8; + private AzureDataStoreWrapper.DelegatingDataStore dsV12; + private String containerName; + + // Azure backend stores blobs under "{first4}-{rest}" keys; must match to find the right blob. + private static String blobKeyFor(DataIdentifier identifier) { + String id = identifier.toString(); + return id.substring(0, 4) + "-" + id.substring(4); + } + + @Before + public void setUp() throws Exception { + containerName = "test-" + System.nanoTime(); + Properties props = azuriteProps(containerName); + + AzureDataStore v8 = new AzureDataStore(); + v8.setProperties(props); + v8.setStagingSplitPercentage(0); + AzureDataStoreWrapper wrapperV8 = new AzureDataStoreWrapper(); + wrapperV8.activeImpl = v8; + dsV8 = wrapperV8.new DelegatingDataStore(); + dsV8.init(folder.newFolder().getAbsolutePath()); + + AbstractSharedCachingDataStore v12 = AzureDataStoreWrapper.createV12Store(props); + v12.setStagingSplitPercentage(0); + AzureDataStoreWrapper wrapperV12 = new AzureDataStoreWrapper(); + wrapperV12.activeImpl = v12; + dsV12 = wrapperV12.new DelegatingDataStore(); + dsV12.init(folder.newFolder().getAbsolutePath()); + } + + @After + public void tearDown() throws DataStoreException { + if (dsV8 != null) dsV8.close(); + if (dsV12 != null) dsV12.close(); + } + + /** + * Write a blob via the v8 SDK path, then read it back via v12. + * Verifies that the v8 blob key format is resolvable by the v12 SDK and that + * the content bytes are byte-for-byte identical. + */ + @Test + public void blobWrittenOnV8IsReadableByV12() throws DataStoreException, IOException { + byte[] payload = "hello from v8 SDK".getBytes(); + + DataRecord written = dsV8.addRecord(new ByteArrayInputStream(payload)); + assertNotNull(written); + + DataRecord read = dsV12.getRecord(written.getIdentifier()); + assertNotNull(read); + assertArrayEquals("content must survive v8-write → v12-read", payload, read.getStream().readAllBytes()); + } + + /** + * Write a blob via v12, then read it back via v8. + * Mirror of the above: verifies v12 key format is resolvable by v8. + */ + @Test + public void blobWrittenOnV12IsReadableByV8() throws DataStoreException, IOException { + byte[] payload = "hello from v12 SDK".getBytes(); + + DataRecord written = dsV12.addRecord(new ByteArrayInputStream(payload)); + assertNotNull(written); + + DataRecord read = dsV8.getRecord(written.getIdentifier()); + assertNotNull(read); + assertArrayEquals("content must survive v12-write → v8-read", payload, read.getStream().readAllBytes()); + } + + /** + * Write blobs via each SDK alternately, then read all of them back through both SDKs. + * Verifies no write is silently dropped and content is intact regardless of which SDK + * path handled each operation. + */ + @Test + public void blobsWrittenViaEitherSdkAreReadableByBoth() throws DataStoreException, IOException { + byte[][] payloads = { + "blob-A-v8".getBytes(), + "blob-B-v12".getBytes(), + "blob-C-v12".getBytes(), + "blob-D-v8".getBytes(), + "blob-E-v12".getBytes(), + }; + + DataRecord[] written = { + dsV8.addRecord(new ByteArrayInputStream(payloads[0])), + dsV12.addRecord(new ByteArrayInputStream(payloads[1])), + dsV12.addRecord(new ByteArrayInputStream(payloads[2])), + dsV8.addRecord(new ByteArrayInputStream(payloads[3])), + dsV12.addRecord(new ByteArrayInputStream(payloads[4])), + }; + + for (int i = 0; i < written.length; i++) { + assertNotNull("record " + i + " must not be null", written[i]); + } + + for (int i = 0; i < written.length; i++) { + byte[] actual = dsV12.getRecord(written[i].getIdentifier()).getStream().readAllBytes(); + assertArrayEquals("blob " + i + " content mismatch via v12", payloads[i], actual); + } + + for (int i = 0; i < written.length; i++) { + byte[] actual = dsV8.getRecord(written[i].getIdentifier()).getStream().readAllBytes(); + assertArrayEquals("blob " + i + " content mismatch via v8", payloads[i], actual); + } + } + + // -- Data-loss safety tests -- + + /** + * Verifies that the record length reported by each SDK matches the actual payload size + * regardless of which SDK wrote the blob. + */ + @Test + public void recordLengthConsistentAcrossSDKs() throws DataStoreException { + byte[] payload = new byte[4096]; + for (int i = 0; i < payload.length; i++) payload[i] = (byte) i; + + DataRecord v8Rec = dsV8.addRecord(new ByteArrayInputStream(payload)); + assertEquals("v8 record length must match payload", payload.length, v8Rec.getLength()); + + DataRecord readByV12 = dsV12.getRecord(v8Rec.getIdentifier()); + assertEquals("v12-read record length must match payload", payload.length, readByV12.getLength()); + + byte[] v12Payload = new byte[8192]; + DataRecord v12Rec = dsV12.addRecord(new ByteArrayInputStream(v12Payload)); + assertEquals("v12 record length must match payload", v12Payload.length, v12Rec.getLength()); + + DataRecord readByV8 = dsV8.getRecord(v12Rec.getIdentifier()); + assertEquals("v8-read record length must match payload", v12Payload.length, readByV8.getLength()); + } + + /** + * A blob written via the v12 SDK must have a "lastModified" metadata key; its absence causes getLastModified() to fall back to the Azure server timestamp, which can make deleteAllOlderThan() GC the blob prematurely. + */ + @Test + public void v12_writeBlob_hasLastModifiedMetadata() throws DataStoreException, IOException { + byte[] payload = new byte[32 * 1024]; + Arrays.fill(payload, (byte) 'x'); + + DataRecord record = dsV12.addRecord(new ByteArrayInputStream(payload)); + assertNotNull(record); + + String blobKey = blobKeyFor(record.getIdentifier()); + BlobContainerClient containerClient = azuriteContainerClient(); + BlockBlobClient blobClient = containerClient.getBlobClient(blobKey).getBlockBlobClient(); + + Map metadata = blobClient.getProperties().getMetadata(); + assertTrue("blob must have 'lastModified' metadata key after v12 write; " + + "absent key causes premature GC via Azure server-timestamp fallback", + metadata != null && metadata.containsKey("lastModified")); + + long lastModified = Long.parseLong(metadata.get("lastModified")); + long now = System.currentTimeMillis(); + assertTrue("lastModified must be a recent epoch-millis timestamp", + lastModified > 0 && lastModified <= now && lastModified > now - 60_000); + } + + // -- helpers -- + + /** + * Concurrent backend initialization against the same container must produce a single consistent reference key. + */ + @Test + public void v12_concurrentReferenceKeyInit_allBackendsGetSameKey() throws Exception { + Properties props = azuriteProps(containerName); + + int backends = 4; + CountDownLatch ready = new CountDownLatch(backends); + CountDownLatch start = new CountDownLatch(1); + ExecutorService pool = Executors.newFixedThreadPool(backends); + List> futures = new ArrayList<>(); + + for (int i = 0; i < backends; i++) { + futures.add(pool.submit(() -> { + AbstractSharedCachingDataStore v12 = AzureDataStoreWrapper.createV12Store(props); + v12.setStagingSplitPercentage(0); + java.io.File home = folder.newFolder(); + ready.countDown(); + start.await(); + v12.init(home.getAbsolutePath()); + return new byte[0]; + })); + } + + ready.await(); + start.countDown(); + pool.shutdown(); + assertTrue("backends did not initialize in time", pool.awaitTermination(60, TimeUnit.SECONDS)); + + long refKeyCount = azuriteContainerClient() + .listBlobs(new com.azure.storage.blob.models.ListBlobsOptions() + .setPrefix("META/"), null) + .stream() + .filter(b -> b.getName().contains("oak.datastore.key") || + b.getName().contains("azure.blob.ref.key")) + .count(); + + assertTrue("concurrent backend init must produce exactly one reference key in storage; " + + "found " + refKeyCount + " — multiple keys cause upload token verification failures", + refKeyCount <= 1); + } + + private Properties azuriteProps(String containerName) { + Properties p = new Properties(); + p.setProperty("azureConnectionString", + "DefaultEndpointsProtocol=http" + + ";AccountName=" + AzuriteDockerRule.ACCOUNT_NAME + + ";AccountKey=" + AzuriteDockerRule.ACCOUNT_KEY + + ";BlobEndpoint=" + AZURITE.getBlobEndpoint()); + p.setProperty("container", containerName); + p.setProperty("azureCreateContainer", "true"); + return p; + } + + private BlobContainerClient azuriteContainerClient() { + String connectionString = + "DefaultEndpointsProtocol=http" + + ";AccountName=" + AzuriteDockerRule.ACCOUNT_NAME + + ";AccountKey=" + AzuriteDockerRule.ACCOUNT_KEY + + ";BlobEndpoint=" + AZURITE.getBlobEndpoint(); + return new BlobContainerClientBuilder() + .connectionString(connectionString) + .containerName(containerName) + .buildClient(); + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapperTest.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapperTest.java new file mode 100644 index 00000000000..c1dfbbf7d47 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzureDataStoreWrapperTest.java @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage; + +import org.apache.jackrabbit.oak.plugins.blob.AbstractSharedCachingDataStore; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.ConfigurableDataRecordAccessProvider; +import org.apache.jackrabbit.oak.spi.blob.data.DataRecord; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.ArgumentCaptor; +import org.osgi.framework.BundleContext; +import org.osgi.framework.Constants; +import org.osgi.framework.ServiceRegistration; +import org.osgi.service.component.ComponentContext; + +import java.io.ByteArrayInputStream; +import java.util.Collections; +import java.util.Dictionary; +import java.util.Map; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.same; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.withSettings; + +/** + * Unit tests for AzureDataStoreWrapper — delegation, v12 feature-flag detection, and OSGi service registration. + *

+ * AzureDataStoreWrapper is the OSGi component that selects between the v8 and v12 blob-store backend + * at runtime based on config/JVM flags and exposes a single DataStore service to the rest of the system. + * These tests verify that delegation is transparent, flag resolution precedence is correct, and the + * OSGi service registration uses the v8 PID so existing configs keep working without migration. + */ +public class AzureDataStoreWrapperTest { + + // mockImpl implements both AbstractSharedCachingDataStore and ConfigurableDataRecordAccessProvider — + // the same intersection both AzureDataStore (v8) and AzureDataStoreV12 satisfy at runtime. + private AbstractSharedCachingDataStore mockImpl; + private AzureDataStoreWrapper wrapper; + + @After + public void tearDown() { + System.clearProperty(AzureDataStoreWrapper.ENV_VAR_V12_ENABLED); + System.clearProperty(AzureDataStoreWrapper.JVM_PROPERTY_V12_ENABLED); + } + + @Before + public void setUp() { + mockImpl = mock( + AbstractSharedCachingDataStore.class, + withSettings().extraInterfaces(ConfigurableDataRecordAccessProvider.class)); + wrapper = new AzureDataStoreWrapper(); + wrapper.activeImpl = mockImpl; + } + + @Test + public void addRecordDelegatesToActiveImpl() throws DataStoreException { + DataRecord record = mock(DataRecord.class); + when(mockImpl.addRecord(any())).thenReturn(record); + + DataRecord result = wrapper.new DelegatingDataStore() + .addRecord(new ByteArrayInputStream(new byte[]{1})); + + assertSame(record, result); + verify(mockImpl).addRecord(any()); + } + + @Test + public void getRecordDelegatesToActiveImpl() throws DataStoreException { + DataRecord record = mock(DataRecord.class); + when(mockImpl.getRecord(any())).thenReturn(record); + + DataRecord result = wrapper.new DelegatingDataStore() + .getRecord(mock(org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier.class)); + + assertSame(record, result); + verify(mockImpl).getRecord(any()); + } + + /** + * Config setters on DelegatingDataStore must forward to activeImpl — buffering them in the wrapper would silently have no effect on the active backend. + */ + @Test + public void configSettersAppliedToActiveImpl() { + AzureDataStoreWrapper.DelegatingDataStore ds = wrapper.new DelegatingDataStore(); + ds.setDirectUploadURIExpirySeconds(300); + ds.setDirectDownloadURIExpirySeconds(600); + ds.setDirectDownloadURICacheSize(100); + ds.setBinaryTransferAccelerationEnabled(true); + + ConfigurableDataRecordAccessProvider provider = (ConfigurableDataRecordAccessProvider) mockImpl; + verify(provider).setDirectUploadURIExpirySeconds(300); + verify(provider).setDirectDownloadURIExpirySeconds(600); + verify(provider).setDirectDownloadURICacheSize(100); + verify(provider).setBinaryTransferAccelerationEnabled(true); + } + + @Test + public void closeClosesActiveImpl() throws DataStoreException { + wrapper.new DelegatingDataStore().close(); + verify(mockImpl).close(); + } + + @Test + public void getUseV12Value_noSysProp_noConfig_returnsFalse() { + assertFalse(AzureDataStoreWrapper.getUseV12Value(Collections.emptyMap())); + } + + @Test + public void getUseV12Value_noSysProp_configTrue_returnsTrue() { + Map config = Collections.singletonMap(AzureDataStoreWrapper.ENV_VAR_V12_ENABLED, true); + assertTrue(AzureDataStoreWrapper.getUseV12Value(config)); + } + + @Test + public void getUseV12Value_noSysProp_configFalse_returnsFalse() { + Map config = Collections.singletonMap(AzureDataStoreWrapper.ENV_VAR_V12_ENABLED, false); + assertFalse(AzureDataStoreWrapper.getUseV12Value(config)); + } + + @Test + public void getUseV12Value_jvmPropTrue_overridesConfigFalse() { + System.setProperty(AzureDataStoreWrapper.JVM_PROPERTY_V12_ENABLED, "true"); + Map config = Collections.singletonMap(AzureDataStoreWrapper.ENV_VAR_V12_ENABLED, false); + assertTrue(AzureDataStoreWrapper.getUseV12Value(config)); + } + + @Test + public void getUseV12Value_jvmPropFalse_overridesConfigTrue() { + System.setProperty(AzureDataStoreWrapper.JVM_PROPERTY_V12_ENABLED, "false"); + Map config = Collections.singletonMap(AzureDataStoreWrapper.ENV_VAR_V12_ENABLED, true); + assertFalse(AzureDataStoreWrapper.getUseV12Value(config)); + } + + @Test + public void registerService_registersUnderAbstractSharedCachingDataStoreClass() { + ComponentContext ctx = mockComponentContext(); + + AzureDataStoreWrapper.registerService(ctx, mockImpl); + + verify(ctx.getBundleContext()).registerService( + eq(AbstractSharedCachingDataStore.class.getName()), same(mockImpl), any()); + } + + /** + * The registered service PID must match AzureDataStore (v8) — OSGi configs in existing AEM + * installations target that PID, so changing it would orphan those configs on upgrade. + */ + @Test + public void registerService_usesV8PidForCompatibility() { + ComponentContext ctx = mockComponentContext(); + + @SuppressWarnings("unchecked") + ArgumentCaptor> props = ArgumentCaptor.forClass(Dictionary.class); + AzureDataStoreWrapper.registerService(ctx, mockImpl); + + verify(ctx.getBundleContext()).registerService(anyString(), any(), props.capture()); + assertEquals(AzureDataStore.class.getName(), props.getValue().get(Constants.SERVICE_PID)); + } + + @Test + public void registerService_setsAzureBlobDescription() { + ComponentContext ctx = mockComponentContext(); + + @SuppressWarnings("unchecked") + ArgumentCaptor> props = ArgumentCaptor.forClass(Dictionary.class); + AzureDataStoreWrapper.registerService(ctx, mockImpl); + + verify(ctx.getBundleContext()).registerService(anyString(), any(), props.capture()); + assertArrayEquals(new String[]{"type=AzureBlob"}, + (String[]) props.getValue().get("oak.datastore.description")); + } + + @Test + public void registerService_returnsRegistrationFromBundleContext() { + ComponentContext ctx = mockComponentContext(); + BundleContext bundleContext = ctx.getBundleContext(); + ServiceRegistration reg = mock(ServiceRegistration.class); + doReturn(reg).when(bundleContext).registerService(anyString(), any(), any()); + + ServiceRegistration result = AzureDataStoreWrapper.registerService(ctx, mockImpl); + + assertSame(reg, result); + } + + @SuppressWarnings("unchecked") + private ComponentContext mockComponentContext() { + BundleContext bundleContext = mock(BundleContext.class); + ComponentContext ctx = mock(ComponentContext.class); + when(ctx.getBundleContext()).thenReturn(bundleContext); + // registerService must be pre-stubbed; without this, Mockito returns null and + // registerService() NPEs before the test can capture its arguments. + doReturn(mock(ServiceRegistration.class)).when(bundleContext).registerService(anyString(), any(), any()); + return ctx; + } + + // Guards against activeImpl accidentally becoming static; each wrapper must own its impl. + @Test + public void instancesHaveIndependentActiveImpl() throws DataStoreException { + AbstractSharedCachingDataStore mockImplB = mock( + AbstractSharedCachingDataStore.class, + withSettings().extraInterfaces(ConfigurableDataRecordAccessProvider.class)); + + AzureDataStoreWrapper wrapperB = new AzureDataStoreWrapper(); + wrapperB.activeImpl = mockImplB; + + DataRecord recA = mock(DataRecord.class, "recA"); + DataRecord recB = mock(DataRecord.class, "recB"); + when(mockImpl.addRecord(any())).thenReturn(recA); + when(mockImplB.addRecord(any())).thenReturn(recB); + + DataRecord resultA = wrapper.new DelegatingDataStore() + .addRecord(new ByteArrayInputStream(new byte[]{1})); + DataRecord resultB = wrapperB.new DelegatingDataStore() + .addRecord(new ByteArrayInputStream(new byte[]{2})); + + assertSame(recA, resultA); + assertSame(recB, resultB); + verify(mockImpl).addRecord(any()); + verify(mockImplB).addRecord(any()); + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzuriteDockerRule.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzuriteDockerRule.java index cb709aca293..78cddb1e9d3 100644 --- a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzuriteDockerRule.java +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/AzuriteDockerRule.java @@ -33,7 +33,6 @@ import java.time.Duration; import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.concurrent.atomic.AtomicReference; public class AzuriteDockerRule extends ExternalResource { @@ -48,8 +47,8 @@ public class AzuriteDockerRule extends ExternalResource { @Override protected void before() throws Throwable { azuriteContainer = new GenericContainer<>(DOCKER_IMAGE_NAME) + .withCommand("azurite-blob", "--blobHost", "0.0.0.0", "--skipApiVersionCheck") .withExposedPorts(10000) - .withEnv(Map.of("executable", "blob")) .withStartupTimeout(Duration.ofSeconds(30)); try { diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/RegressionCSOV8Test.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/RegressionCSOV8Test.java new file mode 100644 index 00000000000..5bf79a66743 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/RegressionCSOV8Test.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; + +/** + * Regression tests for CSO Release 24893 - V8 backend constant isolation. + *

+ * The CSO was caused by V8 silently adopting V12 constant values during OAK-11267 refactoring. + * The V8 backend's MIN_MULTIPART_UPLOAD_PART_SIZE changed from 10MB to 256KB and + * MAX_MULTIPART_UPLOAD_PART_SIZE from 100MB to 4000MB when V8 started importing + * AzureConstants (V12 values) instead of defining its own. + *

+ * Impact: DAM Archive Download buffers entire binary parts in memory. With max part size + * now 4GB instead of 100MB, downloading large assets triggered Java OOM, crashing author pods. + *

+ * Fix (OAK-12164): Revert all V8 changes and enforce complete isolation from V12. V8 must + * define its own constants matching Azure SDK V8 limits, never import from V12. + *

+ * Reference: CSO Release 24893 - DAM Archive Download OOM (GRANITE-66069, ASSETS-65164, OAK-12164) + */ +public class RegressionCSOV8Test { + + // V12 literal values used for isolation assertions — AzureConstantsV12 is package-private + // in the v12 subpackage and not accessible here. These must match AzureConstantsV12 values. + private static final long V12_MIN_MULTIPART_UPLOAD_PART_SIZE = 256L * 1024L; // AzureConstantsV12.AZURE_BLOB_MIN_MULTIPART_UPLOAD_PART_SIZE + private static final long V12_MAX_MULTIPART_UPLOAD_PART_SIZE = 4000L * 1024L * 1024L; // AzureConstantsV12.AZURE_BLOB_MAX_MULTIPART_UPLOAD_PART_SIZE + private static final long V12_MAX_BINARY_UPLOAD_SIZE = 190L * 1024L * 1024L * 1024L * 1024L; // AzureConstantsV12.AZURE_BLOB_MAX_BINARY_UPLOAD_SIZE + + // --- V8 contract: pin each constant at its correct V8 SDK value --- + + /** + * V8 MIN_MULTIPART_UPLOAD_PART_SIZE must be 10MB. + * The CSO regression changed this to 256KB (V12 value), generating 40x more presigned URIs: + * a 10GB download went from ~1,024 URIs to ~40,960, creating ~4MB JSON payloads with + * unknown impact on browsers, aemupload, and NUI workers. + * Ref: GRANITE-66069 (CSO 24893) + */ + @Test + public void v8_minPartSize_mustRemain10MB() { + long expected = 10L * 1024L * 1024L; // 10 MB + + assertEquals( + "V8 minPartSize must be 10MB (Azure SDK V8 limit). " + + "Regression to 256KB caused 40x URI explosion in CSO 24893. " + + "Ref: GRANITE-66069", + expected, AzureBlobStoreBackend.MIN_MULTIPART_UPLOAD_PART_SIZE); + } + + /** + * V8 MAX_MULTIPART_UPLOAD_PART_SIZE must be 100MB. + * The CSO regression changed this to 4000MB (V12 value). DAM Archive Download buffers + * entire binary parts in memory; with 4GB parts, downloading a 1+ GB JPEG triggered + * Java OOM, crashing all author pods on release groups 31 and 32. + * Ref: ASSETS-65164 (CSO 24893) + */ + @Test + public void v8_maxPartSize_mustRemain100MB() { + long expected = 100L * 1024L * 1024L; // 100 MB + + assertEquals( + "V8 maxPartSize must be 100MB (Azure SDK V8 limit). " + + "Regression to 4000MB caused Java OOM on large DAM downloads in CSO 24893. " + + "Ref: ASSETS-65164", + expected, AzureBlobStoreBackend.MAX_MULTIPART_UPLOAD_PART_SIZE); + } + + /** + * V8 MAX_SINGLE_PUT_UPLOAD_SIZE must be 256MB. + * This is the Azure REST API limit for single PUT operations — shared between V8 and V12. + */ + @Test + public void v8_maxSinglePutUploadSize_mustBe256MB() { + long expected = 256L * 1024L * 1024L; // 256 MB + + assertEquals( + "V8 maxSinglePutUploadSize must be 256MB (Azure REST API Put Blob limit). " + + "Uploads <= 256MB use direct PUT; larger use block transfer. " + + "Ref: Azure Blob Storage REST API", + expected, AzureBlobStoreBackend.MAX_SINGLE_PUT_UPLOAD_SIZE); + } + + /** + * V8 MAX_BINARY_UPLOAD_SIZE must remain ~4.75TB (Azure SDK V8 limit). + * The CSO refactoring changed this to V12's ~190.7TiB by importing from AzureConstants. + * Ref: OAK-12164 + */ + @Test + public void v8_maxBinaryUploadSize_mustRemain4_75TB() { + long expected = (long) Math.floor(1024L * 1024L * 1024L * 1024L * 4.75); // ~4.75 TB + + assertEquals( + "V8 maxBinaryUploadSize must be ~4.75TB (Azure SDK V8 limit). " + + "Regression to V12's ~190.7TiB silently altered V8 upload size behavior. " + + "Ref: OAK-12164", + expected, AzureBlobStoreBackend.MAX_BINARY_UPLOAD_SIZE); + } + + // --- Isolation: V8 and V12 constants must not be equal --- + + /** + * V8 and V12 minPartSize must differ. + * Equality means V8 is importing V12 constants — the exact refactoring that caused the CSO. + * Ref: OAK-12164 (fix enforced full code path isolation) + */ + @Test + public void v8_minPartSize_mustNotEqualV12() { + assertNotEquals( + "V8 and V12 minPartSize must differ. " + + "Equality means V8 imported V12 constants — the CSO root cause. " + + "V8 must be 10MB; V12 is 256KB. Ref: OAK-12164", + V12_MIN_MULTIPART_UPLOAD_PART_SIZE, + AzureBlobStoreBackend.MIN_MULTIPART_UPLOAD_PART_SIZE); + } + + /** + * V8 and V12 maxPartSize must differ. + * Equality means V8 is importing V12 constants — the exact refactoring that caused the CSO. + * Ref: OAK-12164 + */ + @Test + public void v8_maxPartSize_mustNotEqualV12() { + assertNotEquals( + "V8 and V12 maxPartSize must differ. " + + "Equality means V8 imported V12 constants — the CSO root cause. " + + "V8 must be 100MB; V12 is 4000MB. Ref: OAK-12164", + V12_MAX_MULTIPART_UPLOAD_PART_SIZE, + AzureBlobStoreBackend.MAX_MULTIPART_UPLOAD_PART_SIZE); + } + + /** + * V8 and V12 maxBinaryUploadSize must differ. + * Equality means V8 is importing V12 constants — the exact refactoring that caused the CSO. + * Ref: OAK-12164 + */ + @Test + public void v8_maxBinaryUploadSize_mustNotEqualV12() { + assertNotEquals( + "V8 and V12 maxBinaryUploadSize must differ. " + + "V8 = ~4.75TB (SDK V8 limit), V12 = ~190.7TiB (SDK V12 limit). " + + "Equality means V8 imported V12 constants — the CSO root cause. " + + "Ref: OAK-12164", + V12_MAX_BINARY_UPLOAD_SIZE, + AzureBlobStoreBackend.MAX_BINARY_UPLOAD_SIZE); + } + + // --- Behavioral impact --- + + /** + * Part size ratio: V8 maxPartSize / minPartSize must be ~10x (100MB / 10MB). + * Collapse (e.g., 1:1 or 16000:1) indicates constant sharing or invalid refactoring. + */ + @Test + public void v8_partSize_ratio_isHealthy() { + long min = AzureBlobStoreBackend.MIN_MULTIPART_UPLOAD_PART_SIZE; + long max = AzureBlobStoreBackend.MAX_MULTIPART_UPLOAD_PART_SIZE; + + assertEquals( + "V8 part size ratio must be 10x (100MB max / 10MB min). " + + "Deviation indicates constant contamination from V12. " + + "Ref: CSO 24893", + 10.0, (double) max / min, 0.01); + } + + /** + * Presigned URI generation scalability: 10GB download with V8's 10MB minPartSize. + * Expected ~1,024 URIs. If V8 had adopted V12's 256KB, count would be ~40,960 (40x). + */ + @Test + public void v8_presignedURI_generation_scalability_10GB_download() { + long minPartSize = AzureBlobStoreBackend.MIN_MULTIPART_UPLOAD_PART_SIZE; + long downloadSize = 10L * 1024L * 1024L * 1024L; // 10 GB + long uriCount = (downloadSize + minPartSize - 1) / minPartSize; + + assertEquals( + "V8 presigned URI count for 10GB download must be ~1,024 (with 10MB minPartSize). " + + "If V8 had adopted V12's 256KB, count would be ~40,960 — 40x explosion. " + + "Ref: GRANITE-66069 (CSO 24893)", + 1024L, uriCount); + } + + /** + * Memory buffering per part: V8's 100MB maxPartSize bounds per-part memory to 100MB. + * Consumers (e.g., DAM Archive Download) that buffer entire parts in memory are safe + * up to 100MB per part. If V8 had adopted V12's 4000MB, a 1+ GB file would OOM. + */ + @Test + public void v8_memory_buffering_per_part_bounded_at_100MB() { + long maxPartSize = AzureBlobStoreBackend.MAX_MULTIPART_UPLOAD_PART_SIZE; + + assertEquals( + "V8 maxPartSize bounds per-part memory buffering to 100MB. " + + "Safe for consumers that buffer entire parts (e.g., DAM Archive Download). " + + "If regressed to V12's 4000MB, 1+ GB downloads cause Java OOM. " + + "Ref: ASSETS-65164 (CSO 24893)", + 100L * 1024L * 1024L, maxPartSize); + } + + /** + * Heap safety: V8's 100MB maxPartSize is comfortably below typical 4-8GB heaps. + * Ensures even many concurrent part transfers won't exhaust memory. + */ + @Test + public void v8_maxPartSize_safe_relative_to_typical_heap() { + long maxPartSize = AzureBlobStoreBackend.MAX_MULTIPART_UPLOAD_PART_SIZE; + long minTypicalHeap = 4L * 1024L * 1024L * 1024L; // 4 GB + + assertTrue( + "V8 maxPartSize (100MB) must be well under typical heap (4GB). " + + "V12's 4000MB part size approaches typical heap, risking OOM under load. " + + "Ref: CSO 24893", + maxPartSize * 10 < minTypicalHeap); + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12AuthIT.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12AuthIT.java new file mode 100644 index 00000000000..aac9354967a --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12AuthIT.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobContainerClientBuilder; +import com.azure.storage.blob.sas.BlobContainerSasPermission; +import com.azure.storage.blob.sas.BlobServiceSasSignatureValues; +import org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.AzuriteDockerRule; +import org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.time.OffsetDateTime; +import java.util.Properties; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Integration tests for AzureBlobStoreBackendV12 authentication paths. + * Verifies that connection string, SAS token (read-write, read-only, expired) auth + * behave correctly — mirrors AzureBlobStoreBackendTest for the v12 backend. + *

+ * Uses Azurite (Microsoft's open-source Azure Storage emulator) running in Docker + * instead of real Azure, so the tests need no external credentials and run offline. + */ +public class AzureBlobStoreBackendV12AuthIT { + + // Azurite Docker container shared across all tests in this class; starting it once + // keeps the suite fast. The container is torn down after the last test completes. + @ClassRule + public static final AzuriteDockerRule AZURITE = new AzuriteDockerRule(); + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private BlobContainerClient adminContainer; + private String containerName; + + private static BlobContainerSasPermission readWritePermissions() { + return new BlobContainerSasPermission() + .setReadPermission(true) + .setWritePermission(true) + .setCreatePermission(true) + .setDeletePermission(true) + .setListPermission(true) + .setAddPermission(true); + } + + private static BlobContainerSasPermission readOnlyPermissions() { + return new BlobContainerSasPermission() + .setReadPermission(true) + .setListPermission(true); + } + + @Before + public void setUp() { + // Unique per test so auth failures in one test can't bleed into another. + containerName = "auth-" + System.nanoTime(); + adminContainer = new BlobContainerClientBuilder() + .connectionString(azuriteConnectionString()) + .containerName(containerName) + .buildClient(); + adminContainer.create(); + } + + @After + public void tearDown() { + if (adminContainer != null) { + try { + adminContainer.deleteIfExists(); + } catch (Exception ignore) { + } + } + } + + /** + * Connection string is the primary auth path — backend must init, write, and persist a reference key. + */ + @Test + public void initWithConnectionString_writesAndPersistsReferenceKey() throws DataStoreException { + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12(); + backend.setProperties(connectionStringProps()); + backend.init(); + + byte[] key1 = backend.getOrCreateReferenceKey(); + byte[] key2 = backend.getOrCreateReferenceKey(); + + assertNotNull("connection string auth must produce a reference key", key1); + assertTrue("reference key must be non-empty", key1.length > 0); + assertArrayEquals("reference key must be stable across calls", key1, key2); + } + + /** + * Connection string auth must allow writes — write a blob and verify it's retrievable. + */ + @Test + public void initWithConnectionString_writeAndReadBlob() throws DataStoreException, IOException { + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12(); + backend.setProperties(connectionStringProps()); + backend.init(); + + File file = writeTempFile("connection string write test"); + backend.write(new DataIdentifier("conntest1"), file); + + // getKeyName("conntest1") produces "conn-test1" (4-char prefix + dash + remainder) + assertTrue("written blob must exist in storage", + adminContainer.listBlobs().stream() + .anyMatch(b -> b.getName().contains("conn-test1"))); + } + + /** + * Read-write SAS must allow init and write — the backend needs to create the reference key on first run. + */ + @Test + public void initWithSasToken_readWrite_canInitAndWrite() throws DataStoreException, IOException { + String sasToken = generateContainerSas(readWritePermissions(), OffsetDateTime.now().plusHours(1)); + + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12(); + backend.setProperties(sasProps(sasToken)); + backend.init(); + + byte[] key = backend.getOrCreateReferenceKey(); + assertNotNull("read-write SAS must allow reference key creation", key); + assertTrue("reference key must be non-empty", key.length > 0); + } + + /** + * Read-only SAS must reject writes — a write attempt must throw DataStoreException, not silently succeed. + */ + @Test + public void initWithSasToken_readOnly_writeThrowsDataStoreException() + throws DataStoreException, IOException { + // Pre-populate the reference key so that init() can read it without needing write access. + AzureBlobStoreBackendV12 adminBackend = new AzureBlobStoreBackendV12(); + adminBackend.setProperties(connectionStringProps()); + adminBackend.init(); + + String sasToken = generateContainerSas(readOnlyPermissions(), OffsetDateTime.now().plusHours(1)); + + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12(); + backend.setProperties(sasProps(sasToken)); + backend.init(); + + File file = writeTempFile("read only test"); + try { + backend.write(new DataIdentifier("readonly1"), file); + fail("write with read-only SAS must throw DataStoreException"); + } catch (DataStoreException expected) { + // correct: SAS does not grant write permission + } + } + + /** + * Expired SAS must reject all access — init must throw rather than silently succeeding with a stale token. + * BlobStorageException (RuntimeException) propagates unwrapped through withAzureSDKContext. + */ + @Test + public void initWithExpiredSasToken_initThrowsDataStoreException() throws DataStoreException { + String sasToken = generateContainerSas(readWritePermissions(), OffsetDateTime.now().minusSeconds(60)); + + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12(); + backend.setProperties(sasProps(sasToken)); + try { + backend.init(); + fail("init with expired SAS must throw"); + } catch (DataStoreException | RuntimeException ignored) { + // expected: expired SAS causes auth failure + } + } + + /** + * Reference key must be the same on repeated init — a diverged key would corrupt upload-token signing. + */ + @Test + public void initSecret_referenceKeyStableAcrossReinit() throws DataStoreException { + AzureBlobStoreBackendV12 b1 = new AzureBlobStoreBackendV12(); + b1.setProperties(connectionStringProps()); + b1.init(); + byte[] key1 = b1.getOrCreateReferenceKey(); + + AzureBlobStoreBackendV12 b2 = new AzureBlobStoreBackendV12(); + b2.setProperties(connectionStringProps()); + b2.init(); + byte[] key2 = b2.getOrCreateReferenceKey(); + + assertArrayEquals("reference key must be identical across backend re-inits", key1, key2); + } + + private String azuriteConnectionString() { + return "DefaultEndpointsProtocol=http" + + ";AccountName=" + AzuriteDockerRule.ACCOUNT_NAME + + ";AccountKey=" + AzuriteDockerRule.ACCOUNT_KEY + + ";BlobEndpoint=" + AZURITE.getBlobEndpoint(); + } + + private Properties connectionStringProps() { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.AZURE_CONNECTION_STRING, azuriteConnectionString()); + p.setProperty(AzureConstantsV12.AZURE_BLOB_CONTAINER_NAME, containerName); + return p; + } + + private Properties sasProps(String sasToken) { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.AZURE_SAS, sasToken); + p.setProperty(AzureConstantsV12.AZURE_BLOB_ENDPOINT, AZURITE.getBlobEndpoint()); + p.setProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, AzuriteDockerRule.ACCOUNT_NAME); + p.setProperty(AzureConstantsV12.AZURE_BLOB_CONTAINER_NAME, containerName); + // container already exists from @Before; skip the exists()/create() check that Azurite + // doesn't permit under container-scoped SAS tokens + p.setProperty(AzureConstantsV12.AZURE_CREATE_CONTAINER, "false"); + return p; + } + + private String generateContainerSas(BlobContainerSasPermission permissions, OffsetDateTime expiry) { + BlobServiceSasSignatureValues values = new BlobServiceSasSignatureValues(expiry, permissions); + return adminContainer.generateSas(values); + } + + private File writeTempFile(String content) throws IOException { + File f = folder.newFile(); + Files.write(f.toPath(), content.getBytes()); + return f; + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12IT.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12IT.java new file mode 100644 index 00000000000..ed0f7fe5bfd --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12IT.java @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import com.azure.storage.blob.specialized.BlockBlobClient; +import org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.AzuriteDockerRule; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.DataRecordUploadToken; +import org.apache.jackrabbit.oak.spi.blob.data.DataRecord; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Base64; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Integration tests for data-loss safety properties of AzureBlobStoreBackendV12. + *

+ * Tests are in the v12 package to access package-private/protected methods of AzureBlobStoreBackendV12. + * Uses Azurite (Microsoft's open-source Azure Storage emulator running in Docker) — no external + * credentials needed, and the emulator's real block-blob commit semantics are required to exercise + * the concurrent-write and metadata-atomicity scenarios tested here. + *

+ * These tests verify findings from the pre-merge data-loss risk assessment that require real storage + * to exercise: metadata atomicity on upload completion, concurrent completeHttpUpload, and + * concurrent reference-key initialization. + */ +public class AzureBlobStoreBackendV12IT { + + // Azurite Docker container shared across all tests in this class; starting it once + // keeps the suite fast. The container is torn down after the last test completes. + @ClassRule + public static final AzuriteDockerRule AZURITE = new AzuriteDockerRule(); + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private AzureBlobStoreBackendV12 backend; + private String containerName; + + /** + * Constructs a blob key using the same format as getKeyName(identifier): + * first 4 hex chars, then "-", then the remainder. + */ + private static String newBlobId() { + String id = UUID.randomUUID().toString().replace("-", ""); // 32 hex chars + return id.substring(0, 4) + "-" + id.substring(4); + } + + @Before + public void setUp() throws DataStoreException, IOException { + containerName = "v12it-" + System.nanoTime(); + Properties props = azuriteProps(containerName); + + AzureDataStoreV12 store = new AzureDataStoreV12(); + store.setProperties(props); + store.setStagingSplitPercentage(0); // disable local staging cache; all writes go directly to Azurite + store.init(folder.newFolder().getAbsolutePath()); + + backend = (AzureBlobStoreBackendV12) store.getBackend(); + } + + @After + public void tearDown() { + // Nothing to close for the backend directly; the store lifecycle is managed by the test. + } + + /** + * A blob committed via completeHttpUpload must have a "lastModified" metadata key; its absence makes getLastModified() fall back to the Azure server timestamp and can cause deleteAllOlderThan() to GC the blob prematurely. + */ + @Test + public void directUploadCompletion_hasLastModifiedMetadata() + throws Exception { + byte[] payload = new byte[4096]; + Arrays.fill(payload, (byte) 0x42); + + // Build a valid upload token manually — no SAS URL needed; completeHttpUpload + // only uses the blobId from the token to find and commit blocks. + String blobId = newBlobId(); + String uploadId = Base64.getEncoder().encodeToString(UUID.randomUUID().toString().getBytes()); + DataRecordUploadToken token = new DataRecordUploadToken(blobId, uploadId); + byte[] refKey = backend.getOrCreateReferenceKey(); + String encodedToken = token.getEncodedToken(refKey); + + // Stage a block directly in Azurite using the container client (not via SAS URL). + String blockId = Base64.getEncoder().encodeToString("blk001".getBytes()); + backend.getAzureContainer() + .getBlobClient(blobId) + .getBlockBlobClient() + .stageBlock(blockId, new ByteArrayInputStream(payload), payload.length); + + // Complete the upload — this invokes commitBlocksAndGetSize, which now atomically + // includes lastModified metadata via BlockBlobCommitBlockListOptions. + DataRecord record = backend.completeHttpUpload(encodedToken); + assertNotNull("completeHttpUpload must return a DataRecord", record); + assertEquals("DataRecord length must equal payload size", payload.length, record.getLength()); + + // Verify the committed blob has lastModified metadata in Azurite. + BlockBlobClient blobClient = backend.getAzureContainer() + .getBlobClient(blobId).getBlockBlobClient(); + Map metadata = blobClient.getProperties().getMetadata(); + assertTrue( + "committed blob must have 'lastModified' metadata; " + + "absent key causes getLastModified() to fall back to Azure server timestamp, " + + "which can cause deleteAllOlderThan() to prematurely GC the blob", + metadata != null && metadata.containsKey(AzureConstantsV12.AZURE_BLOB_LAST_MODIFIED_KEY)); + + long lastModified = Long.parseLong(metadata.get(AzureConstantsV12.AZURE_BLOB_LAST_MODIFIED_KEY)); + long now = System.currentTimeMillis(); + assertTrue("lastModified must be a recent epoch-millis value", + lastModified > 0 && lastModified <= now && lastModified > now - 60_000); + } + + /** + * Concurrent completeHttpUpload calls on the same token must not produce a zero-length DataRecord — a zero length means the committed-block fallback read an empty list before the first commit was durable. + */ + @Test + public void concurrentCompleteUpload_neitherResultZeroLength() throws Exception { + byte[] payload = new byte[8192]; + Arrays.fill(payload, (byte) 0x55); + + String blobId = newBlobId(); + String uploadId = Base64.getEncoder().encodeToString(UUID.randomUUID().toString().getBytes()); + DataRecordUploadToken token = new DataRecordUploadToken(blobId, uploadId); + byte[] refKey = backend.getOrCreateReferenceKey(); + String encodedToken = token.getEncodedToken(refKey); + + // Stage blocks in Azurite directly. + String blockId = Base64.getEncoder().encodeToString("blk001".getBytes()); + backend.getAzureContainer() + .getBlobClient(blobId) + .getBlockBlobClient() + .stageBlock(blockId, new ByteArrayInputStream(payload), payload.length); + + // Two threads concurrently complete the same upload. + CountDownLatch ready = new CountDownLatch(2); + CountDownLatch start = new CountDownLatch(1); + ExecutorService pool = Executors.newFixedThreadPool(2); + List> futures = new ArrayList<>(); + + for (int i = 0; i < 2; i++) { + futures.add(pool.submit(() -> { + ready.countDown(); + start.await(); + return backend.completeHttpUpload(encodedToken); + })); + } + + ready.await(); + start.countDown(); + pool.shutdown(); + boolean finished30 = pool.awaitTermination(30, TimeUnit.SECONDS); + if (!finished30) { + pool.shutdownNow(); + } + assertTrue("threads must finish within 30s", finished30); + + // At least one thread must succeed; neither must return a zero-length record. + int successes = 0; + for (Future f : futures) { + DataRecord result = null; + try { + result = f.get(); + successes++; + } catch (Exception e) { + // One thread may throw (e.g. DataStoreException) if the other already committed. + // That is acceptable — data safety means the successful commit has the right size. + continue; + } + assertNotNull(result); + assertNotEquals("concurrent completeHttpUpload must never return a zero-length DataRecord — " + + "a zero-length result means the committed-block fallback read a stale empty list", 0, result.getLength()); + assertEquals("DataRecord length must equal payload size", + payload.length, result.getLength()); + } + assertTrue("at least one completeHttpUpload call must succeed", successes >= 1); + } + + /** + * Concurrent backend initialization against the same container must produce a single consistent reference key; diverged keys cause upload-token verification failures that orphan staged blocks. + */ + @Test + public void concurrentReferenceKeyInit_allBackendsGetSameKey() throws Exception { + // Use the container already created in setUp, so all backends share the same storage. + Properties props = azuriteProps(containerName); + + int n = 4; + // ready: each thread signals when it has constructed its store (but not called init yet). + // start: released once all threads are ready, so they race into init() simultaneously. + CountDownLatch ready = new CountDownLatch(n); + CountDownLatch start = new CountDownLatch(1); + ExecutorService pool = Executors.newFixedThreadPool(n); + List> futures = new ArrayList<>(); + + for (int i = 0; i < n; i++) { + futures.add(pool.submit(() -> { + AzureDataStoreV12 store = new AzureDataStoreV12(); + store.setProperties(props); + store.setStagingSplitPercentage(0); + ready.countDown(); + start.await(); + store.init(folder.newFolder().getAbsolutePath()); + AzureBlobStoreBackendV12 b = (AzureBlobStoreBackendV12) store.getBackend(); + return b.getOrCreateReferenceKey(); + })); + } + + ready.await(); + start.countDown(); + pool.shutdown(); + boolean finished60 = pool.awaitTermination(60, TimeUnit.SECONDS); + if (!finished60) { + pool.shutdownNow(); + } + assertTrue("backends must initialize within 60s", finished60); + + List keys = new ArrayList<>(); + for (Future f : futures) { + keys.add(f.get()); // propagates any exception from the init thread + } + + // All returned keys must be identical — a diverged key would cause token-verification failure. + byte[] first = keys.get(0); + assertNotNull("reference key must not be null", first); + for (int i = 1; i < keys.size(); i++) { + assertArrayEquals("all concurrently-initialized backends must hold the same reference key; " + + "diverged keys cause upload-token verification failures and orphaned blocks", first, keys.get(i)); + } + + // Count reference key blobs in Azurite — must be exactly one. + long refKeyCount = backend.getAzureContainer() + .listBlobs(new com.azure.storage.blob.models.ListBlobsOptions() + .setPrefix(AzureConstantsV12.AZURE_BlOB_META_DIR_NAME + "/"), null) + .stream() + .filter(b -> b.getName().contains(AzureConstantsV12.AZURE_BLOB_REF_KEY) + || b.getName().contains("oak.datastore.key")) + .count(); + + assertTrue( + "concurrent init must result in at most one reference key in storage; " + + "found " + refKeyCount + " — multiple keys indicate a write race that corrupts token signing", + refKeyCount <= 1); + } + + private Properties azuriteProps(String containerName) { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.AZURE_CONNECTION_STRING, + "DefaultEndpointsProtocol=http" + + ";AccountName=" + AzuriteDockerRule.ACCOUNT_NAME + + ";AccountKey=" + AzuriteDockerRule.ACCOUNT_KEY + + ";BlobEndpoint=" + AZURITE.getBlobEndpoint()); + p.setProperty(AzureConstantsV12.AZURE_BLOB_CONTAINER_NAME, containerName); + p.setProperty(AzureConstantsV12.AZURE_CREATE_CONTAINER, "true"); + p.setProperty(AzureConstantsV12.AZURE_BLOB_ENDPOINT, AZURITE.getBlobEndpoint()); + return p; + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12Test.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12Test.java new file mode 100644 index 00000000000..7e0b16c92b3 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureBlobStoreBackendV12Test.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import com.azure.storage.blob.BlobContainerClient; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Properties; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.fail; + +/** + * Unit tests for AzureBlobStoreBackendV12. + */ +public class AzureBlobStoreBackendV12Test { + + /** + * getAllMetadataRecords must throw on storage error, not return empty — an empty result tells GC no records exist and causes it to delete all blobs. + */ + @Test + public void getAllMetadataRecords_storageException_propagatesInsteadOfReturningEmpty() { + try { + new FailingContainerBackend().getAllMetadataRecords("prefix"); + fail("Storage failure must propagate; silent empty return causes GC to delete all blobs"); + } catch (RuntimeException expected) { + // correct: GC receives the error and aborts rather than sweeping against empty live-refs + } + } + + /** + * deleteAllMetadataRecords must throw on storage error, not silently succeed — a silent no-op leaves stale metadata that misleads the next GC mark phase. + */ + @Test + public void deleteAllMetadataRecords_storageException_propagatesInsteadOfSilentReturn() { + try { + new FailingContainerBackend().deleteAllMetadataRecords("prefix"); + fail("Storage failure must propagate; silent no-op on delete leaves GC in inconsistent state"); + } catch (RuntimeException expected) { + // correct: caller learns the delete failed and can retry or abort the GC phase + } + } + + /** + * IllegalArgumentException from Azure SDK validation inside uploadBlob must be caught and surfaced as DataStoreException, not escape unchecked (which would silently leave the blob unwritten). + */ + @Test + public void uploadBlob_illegalArgumentFromSdk_wrappedAsDataStoreException() throws Exception { + // Simulate what the Azure SDK does when block size > 4000 MiB. + // The FailingUploadBackend overrides uploadBlob to throw IllegalArgumentException, + // exactly what ParallelTransferOptions.setBlockSizeLong(oversized) does. + FailingUploadBackend backend = new FailingUploadBackend(); + + java.io.File tempFile = java.io.File.createTempFile("safety-test", ".bin"); + tempFile.deleteOnExit(); + + try { + backend.write(new org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier("test1234567890abcdef"), tempFile); + fail("IllegalArgumentException from Azure SDK must be wrapped as DataStoreException, not escape unchecked"); + } catch (DataStoreException expected) { + // correct: caller sees a typed exception and can handle/retry appropriately + } catch (IllegalArgumentException leaked) { + fail("IllegalArgumentException leaked unchecked from write() — blob silently not stored: " + leaked); + } + } + + /** + * Concurrent cold-start must write exactly one key — a second write invalidates upload tokens signed against the first, orphaning staged blocks. + */ + @Test + public void getOrCreateReferenceKey_concurrentColdStart_writesOnce() throws Exception { + CountDownLatch writeStarted = new CountDownLatch(1); + CountDownLatch letWriteProceed = new CountDownLatch(1); + AtomicInteger writeCount = new AtomicInteger(0); + + // Backend that: returns null until a key is stored, and blocks in addMetadataRecord + // so Thread 2 can observe null before Thread 1 finishes writing. + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12() { + volatile byte[] stored = null; + + @Override + protected byte[] readMetadataBytes(String name) { + return stored; + } + + @Override + public void addMetadataRecord(InputStream input, String name) throws DataStoreException { + try { + writeCount.incrementAndGet(); + writeStarted.countDown(); + letWriteProceed.await(); + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + byte[] chunk = new byte[256]; + int n; + while ((n = input.read(chunk)) != -1) buf.write(chunk, 0, n); + stored = buf.toByteArray(); + } catch (IOException e) { + throw new DataStoreException(e.getMessage()); + } catch (InterruptedException e) { + throw new DataStoreException(e.getMessage()); + } + } + }; + + ExecutorService exec = Executors.newFixedThreadPool(2); + // Thread 1: starts initialization, enters addMetadataRecord and blocks there + Future f1 = exec.submit(backend::getOrCreateReferenceKey); + writeStarted.await(5, TimeUnit.SECONDS); + + // Thread 2: starts while Thread 1 is blocked mid-write + Future f2 = exec.submit(backend::getOrCreateReferenceKey); + Thread.sleep(50); // give Thread 2 time to reach readMetadataBytes (no sync) or block on lock (sync) + + letWriteProceed.countDown(); // let Thread 1 finish writing + + byte[] key1 = f1.get(5, TimeUnit.SECONDS); + byte[] key2 = f2.get(5, TimeUnit.SECONDS); + exec.shutdown(); + + assertEquals("Concurrent cold-start must write exactly one key; a second write invalidates upload tokens from the first", 1, writeCount.get()); + assertArrayEquals("Both concurrent callers must return the same reference key", key1, key2); + } + + /** + * getDefaultBlobStorageDomain() provides the host embedded in SAS presigned download URIs. + * Wrong value makes client fetches fail against non-standard (Azurite, private cloud) endpoints. + */ + @Test + public void getDefaultBlobStorageDomain_customEndpoint_returnsHostFromEndpoint() { + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12(); + Properties props = new Properties(); + props.setProperty(AzureConstantsV12.AZURE_BLOB_ENDPOINT, "https://myaccount.blob.core.some.custom.endpoint.com"); + props.setProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, "myaccount"); + backend.setProperties(props); + + assertEquals("myaccount.blob.core.some.custom.endpoint.com", backend.getDefaultBlobStorageDomain()); + } + + @Test + public void getDefaultBlobStorageDomain_noCustomEndpoint_returnsDefaultWindowsNet() { + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12(); + Properties props = new Properties(); + props.setProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, "myaccount"); + backend.setProperties(props); + + assertEquals("myaccount.blob.core.windows.net", backend.getDefaultBlobStorageDomain()); + } + + @Test + public void getDefaultBlobStorageDomain_malformedEndpoint_fallsBackToAccountName() { + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12(); + Properties props = new Properties(); + props.setProperty(AzureConstantsV12.AZURE_BLOB_ENDPOINT, "not a valid uri ://@@"); + props.setProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, "myaccount"); + backend.setProperties(props); + + assertEquals("myaccount.blob.core.windows.net", backend.getDefaultBlobStorageDomain()); + } + + @Test + public void getDefaultBlobStorageDomain_noAccountAndNoEndpoint_returnsNull() { + AzureBlobStoreBackendV12 backend = new AzureBlobStoreBackendV12(); + backend.setProperties(new Properties()); + + assertNull(backend.getDefaultBlobStorageDomain()); + } + + static class FailingContainerBackend extends AzureBlobStoreBackendV12 { + @Override + protected BlobContainerClient getAzureContainer() throws DataStoreException { + throw new DataStoreException("simulated Azure connectivity failure"); + } + } + + static class FailingUploadBackend extends AzureBlobStoreBackendV12 { + @Override + protected BlobContainerClient getAzureContainer() throws DataStoreException { + com.azure.storage.blob.BlobContainerClient mock = + org.mockito.Mockito.mock(com.azure.storage.blob.BlobContainerClient.class); + com.azure.storage.blob.BlobClient blobClient = + org.mockito.Mockito.mock(com.azure.storage.blob.BlobClient.class); + com.azure.storage.blob.specialized.BlockBlobClient blockBlobClient = + org.mockito.Mockito.mock(com.azure.storage.blob.specialized.BlockBlobClient.class); + org.mockito.Mockito.when(mock.getBlobClient(org.mockito.ArgumentMatchers.anyString())) + .thenReturn(blobClient); + org.mockito.Mockito.when(blobClient.getBlockBlobClient()).thenReturn(blockBlobClient); + org.mockito.Mockito.when(blockBlobClient.exists()).thenReturn(false); + org.mockito.Mockito.when(blobClient.uploadFromFileWithResponse( + org.mockito.ArgumentMatchers.any(), + org.mockito.ArgumentMatchers.any(), + org.mockito.ArgumentMatchers.any())) + .thenThrow(new IllegalArgumentException("blockSize must be <= 4000 MiB")); + org.mockito.Mockito.when(blockBlobClient.getContainerClient()).thenReturn(mock); + return mock; + } + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataRecordAccessProviderV12IT.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataRecordAccessProviderV12IT.java new file mode 100644 index 00000000000..511a025fb25 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataRecordAccessProviderV12IT.java @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import org.apache.jackrabbit.oak.api.blob.BlobDownloadOptions; +import org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.AzuriteDockerRule; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.DataRecordDownloadOptions; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.DataRecordUpload; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.DataRecordUploadException; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.DataRecordUploadToken; +import org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier; +import org.apache.jackrabbit.oak.spi.blob.data.DataRecord; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.Base64; +import java.util.Properties; +import java.util.UUID; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +/** + * Integration tests for AzureDataStoreV12 direct upload/download URI generation via Azurite. + * Mirrors AzureDataRecordAccessProviderTest for the v12 backend. + */ +public class AzureDataRecordAccessProviderV12IT { + + @ClassRule + public static final AzuriteDockerRule AZURITE = new AzuriteDockerRule(); + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private AzureDataStoreV12 store; + private AzureBlobStoreBackendV12 backend; + + private static String newBlobId() { + String id = UUID.randomUUID().toString().replace("-", ""); + return id.substring(0, 4) + "-" + id.substring(4); + } + + @Before + public void setUp() throws DataStoreException, IOException { + String containerName = "v12access-" + System.nanoTime(); + + store = new AzureDataStoreV12(); + store.setProperties(azuriteProps(containerName)); + // 0% staging so all writes go directly to Azure — avoids local-staging code paths masking backend failures. + store.setStagingSplitPercentage(0); + store.init(folder.newFolder().getAbsolutePath()); + // setters only work after init() creates the backend + store.setDirectUploadURIExpirySeconds(3600); + store.setDirectDownloadURIExpirySeconds(3600); + + backend = (AzureBlobStoreBackendV12) store.getBackend(); + } + + @After + public void tearDown() { + try { + store.close(); + } catch (Exception ignore) { + } + } + + /** + * Upload initiation must return a token and at least one URI — without them the client cannot stage any blocks. + */ + @Test + public void initiateDirectUpload_returnsTokenAndURIs() throws DataRecordUploadException { + DataRecordUpload upload = store.initiateDataRecordUpload(1024 * 1024, 10); + + assertNotNull("upload object must be returned", upload); + assertNotNull("upload token must be present", upload.getUploadToken()); + assertFalse("at least one upload URI must be returned", upload.getUploadURIs().isEmpty()); + } + + /** + * Small files must use single-part upload — multipart for small data wastes block staging overhead. + */ + @Test + public void initiateDirectUpload_smallFile_returnsSingleURI() throws DataRecordUploadException { + DataRecordUpload upload = store.initiateDataRecordUpload(1024, 1); + + assertNotNull(upload); + assertEquals("small file must get exactly one upload URI", 1, upload.getUploadURIs().size()); + } + + /** + * Large files must use multi-part upload — a single PUT is capped at 256 MiB by Azure. + */ + @Test + public void initiateDirectUpload_largeFile_returnsMultipleURIs() throws DataRecordUploadException { + long tenGB = 10L * 1024 * 1024 * 1024; + DataRecordUpload upload = store.initiateDataRecordUpload(tenGB, 50); + + assertNotNull(upload); + assertTrue("large file must require more than one URI", upload.getUploadURIs().size() > 1); + } + + /** + * Zero upload size is invalid — must throw rather than returning a URI that would create a zero-byte blob. + */ + @Test(expected = IllegalArgumentException.class) + public void initiateDirectUpload_zeroSize_throwsIllegalArgument() throws DataRecordUploadException { + store.initiateDataRecordUpload(0, 1); + } + + /** + * Negative upload size is always invalid — must be rejected before any Azure call is made. + */ + @Test(expected = IllegalArgumentException.class) + public void initiateDirectUpload_negativeSize_throwsIllegalArgument() throws DataRecordUploadException { + store.initiateDataRecordUpload(-1, 1); + } + + /** + * Completing a staged upload must return a DataRecord with the correct byte length. + *

+ * Direct binary upload is a three-phase protocol: initiate (get URIs + token) → + * client PUTs one or more blocks to Azure → complete (commit blocks, get DataRecord). + * This test short-circuits the client PUT by staging the block directly via the SDK, + * then verifies that complete() commits and returns a correct DataRecord. + */ + @Test + public void completeDirectUpload_stagedBlocks_returnsRecordWithCorrectLength() throws Exception { + byte[] payload = new byte[4096]; + Arrays.fill(payload, (byte) 0x77); + + String blobId = newBlobId(); + String uploadId = Base64.getEncoder().encodeToString(UUID.randomUUID().toString().getBytes()); + DataRecordUploadToken token = new DataRecordUploadToken(blobId, uploadId); + byte[] refKey = backend.getOrCreateReferenceKey(); + String encodedToken = token.getEncodedToken(refKey); + + String blockId = Base64.getEncoder().encodeToString("blk001".getBytes()); + backend.getAzureContainer() + .getBlobClient(blobId) + .getBlockBlobClient() + .stageBlock(blockId, new ByteArrayInputStream(payload), payload.length); + + DataRecord record = store.completeDataRecordUpload(encodedToken); + + assertNotNull("completed upload must return a DataRecord", record); + assertEquals("DataRecord length must equal staged payload size", payload.length, record.getLength()); + assertNotNull("DataRecord must have an identifier", record.getIdentifier()); + } + + /** + * Download URI must be returned for a blob that exists — clients cannot download without it. + */ + @Test + public void getDownloadURI_existingBlob_returnsNonNullURI() throws DataStoreException, IOException { + DataRecord record = store.addRecord(new ByteArrayInputStream("download test".getBytes())); + + URI uri = store.getDownloadURI(record.getIdentifier(), DataRecordDownloadOptions.DEFAULT); + + assertNotNull("download URI must be returned for an existing blob", uri); + } + + /** + * Download URI for a non-existent blob must return null, not throw — callers handle null as "not available". + */ + @Test + public void getDownloadURI_nonExistentBlob_returnsNull() { + URI uri = store.getDownloadURI( + new DataIdentifier("nonexistentblob12345"), + DataRecordDownloadOptions.DEFAULT); + + assertNull("download URI for a non-existent blob must be null", uri); + } + + /** + * Download URI with a content-type hint must embed response-header override params (rsct) in the SAS query. + */ + @Test + public void getDownloadURI_withContentType_uriContainsContentTypeParam() + throws DataStoreException, IOException { + DataRecord record = store.addRecord(new ByteArrayInputStream("pdf content".getBytes())); + + DataRecordDownloadOptions options = DataRecordDownloadOptions.fromBlobDownloadOptions( + new BlobDownloadOptions("application/pdf", null, null, "inline")); + URI uri = store.getDownloadURI(record.getIdentifier(), options); + + assertNotNull("download URI with content-type options must not be null", uri); + String query = uri.toString(); + assertTrue("SAS must carry response content-type override (rsct)", + query.contains("rsct") || query.contains("application%2Fpdf")); + } + + private Properties azuriteProps(String containerName) { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.AZURE_CONNECTION_STRING, + "DefaultEndpointsProtocol=http" + + ";AccountName=" + AzuriteDockerRule.ACCOUNT_NAME + + ";AccountKey=" + AzuriteDockerRule.ACCOUNT_KEY + + ";BlobEndpoint=" + AZURITE.getBlobEndpoint()); + p.setProperty(AzureConstantsV12.AZURE_BLOB_CONTAINER_NAME, containerName); + p.setProperty(AzureConstantsV12.AZURE_CREATE_CONTAINER, "true"); + // required so getDefaultBlobStorageDomain() can resolve a non-null domain for SAS URI generation + p.setProperty(AzureConstantsV12.AZURE_BLOB_ENDPOINT, AZURITE.getBlobEndpoint()); + return p; + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataStoreV12IT.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataStoreV12IT.java new file mode 100644 index 00000000000..fe07c0a2d62 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataStoreV12IT.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.AzuriteDockerRule; +import org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier; +import org.apache.jackrabbit.oak.spi.blob.data.DataRecord; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +/** + * Integration tests for AzureDataStoreV12 CRUD, deduplication, and GC via Azurite. + * Mirrors TestAzureDS / AzureDataStoreTest for the v12 backend. + */ +public class AzureDataStoreV12IT { + + @ClassRule + public static final AzuriteDockerRule AZURITE = new AzuriteDockerRule(); + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + private AzureDataStoreV12 store; + + @Before + public void setUp() throws DataStoreException, IOException { + store = new AzureDataStoreV12(); + store.setProperties(azuriteProps("v12ds-" + System.nanoTime())); + // 0% staging so all writes go directly to Azure — avoids local-staging code paths masking backend failures. + store.setStagingSplitPercentage(0); + store.init(folder.newFolder().getAbsolutePath()); + } + + @After + public void tearDown() { + try { + store.close(); + } catch (Exception ignore) { + } + } + + /** + * addRecord must return a record with correct length and a non-empty identifier. + */ + @Test + public void testAddRecord() throws DataStoreException, IOException { + byte[] data = "hello world".getBytes(); + DataRecord record = store.addRecord(new ByteArrayInputStream(data)); + + assertNotNull("record must be returned", record); + assertEquals("record length must match input", data.length, record.getLength()); + assertFalse("record ID must be non-empty", record.getIdentifier().toString().isEmpty()); + } + + /** + * getRecord must return the same content and length as what was written. + */ + @Test + public void testGetRecord() throws DataStoreException, IOException { + byte[] data = "test data for get".getBytes(); + DataRecord added = store.addRecord(new ByteArrayInputStream(data)); + + DataRecord fetched = store.getRecord(added.getIdentifier()); + + assertNotNull("getRecord must not return null for an existing record", fetched); + assertEquals("fetched record length must match original", data.length, fetched.getLength()); + assertEquals("fetched record ID must match", added.getIdentifier(), fetched.getIdentifier()); + } + + /** + * getRecordIfStored for a non-existent ID must return null, not throw — callers treat null as "not found". + */ + @Test + public void testGetRecord_notFound_returnsNull() throws DataStoreException { + assertNull("getRecordIfStored on unknown ID must return null", + store.getRecordIfStored(new DataIdentifier("nonexistent1234567890abcdef"))); + } + + /** + * Same content must produce the same record ID — deduplication is the core space-saving contract. + */ + @Test + public void testAddDuplicateRecord() throws DataStoreException, IOException { + byte[] data = "identical content".getBytes(); + DataRecord r1 = store.addRecord(new ByteArrayInputStream(data)); + DataRecord r2 = store.addRecord(new ByteArrayInputStream(data)); + + assertEquals("duplicate content must yield the same record ID", r1.getIdentifier(), r2.getIdentifier()); + } + + /** + * deleteRecord must remove the blob so that subsequent getRecord returns null. + */ + @Test + public void testDeleteRecord() throws DataStoreException, IOException { + DataRecord record = store.addRecord(new ByteArrayInputStream("to be deleted".getBytes())); + DataIdentifier id = record.getIdentifier(); + + store.deleteRecord(id); + + assertNull("deleted record must not be retrievable", store.getRecordIfStored(id)); + } + + /** + * Records of different sizes must all round-trip correctly — exercises small, medium, and large code paths. + */ + @Test + public void testRecordsOfVaryingSizes() throws DataStoreException, IOException { + int[] sizes = {100, 10 * 1024, 1024 * 1024}; + List ids = new ArrayList<>(); + + for (int size : sizes) { + byte[] data = new byte[size]; + Arrays.fill(data, (byte) 0x42); + DataRecord record = store.addRecord(new ByteArrayInputStream(data)); + assertEquals("stored record length must match for size=" + size, size, record.getLength()); + ids.add(record.getIdentifier()); + } + + for (int i = 0; i < sizes.length; i++) { + DataRecord fetched = store.getRecord(ids.get(i)); + assertNotNull("record must be retrievable for size=" + sizes[i], fetched); + assertEquals("fetched record length must match for size=" + sizes[i], sizes[i], fetched.getLength()); + } + } + + private Properties azuriteProps(String containerName) { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.AZURE_CONNECTION_STRING, + "DefaultEndpointsProtocol=http" + + ";AccountName=" + AzuriteDockerRule.ACCOUNT_NAME + + ";AccountKey=" + AzuriteDockerRule.ACCOUNT_KEY + + ";BlobEndpoint=" + AZURITE.getBlobEndpoint()); + p.setProperty(AzureConstantsV12.AZURE_BLOB_CONTAINER_NAME, containerName); + p.setProperty(AzureConstantsV12.AZURE_CREATE_CONTAINER, "true"); + p.setProperty(AzureConstantsV12.AZURE_BLOB_ENDPOINT, AZURITE.getBlobEndpoint()); + return p; + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataStoreV12Test.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataStoreV12Test.java new file mode 100644 index 00000000000..05f08c7b690 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/AzureDataStoreV12Test.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.DataRecordUploadException; +import org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.DataRecordUploadOptions; +import org.apache.jackrabbit.oak.spi.blob.data.DataIdentifier; +import org.apache.jackrabbit.oak.spi.blob.data.DataStoreException; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +/** + * Unit tests for AzureDataStoreV12 — covers guards and config delegation before init(). + */ +public class AzureDataStoreV12Test { + + @Test + public void getMinRecordLength_default_returns16k() { + assertEquals(16 * 1024, new AzureDataStoreV12().getMinRecordLength()); + } + + @Test + public void setMinRecordLength_updatesValue() { + AzureDataStoreV12 store = new AzureDataStoreV12(); + store.setMinRecordLength(32 * 1024); + assertEquals(32 * 1024, store.getMinRecordLength()); + } + + /** + * initiateDataRecordUpload must throw, not NPE, when the backend was never initialized. + */ + @Test(expected = DataRecordUploadException.class) + public void initiateDataRecordUpload_beforeInit_throwsDataRecordUploadException() + throws DataRecordUploadException { + new AzureDataStoreV12().initiateDataRecordUpload(1024, 1); + } + + /** + * Same contract as the no-options overload — must throw, not NPE, when backend is null. + */ + @Test(expected = DataRecordUploadException.class) + public void initiateDataRecordUpload_withOptions_beforeInit_throwsDataRecordUploadException() + throws DataRecordUploadException { + new AzureDataStoreV12().initiateDataRecordUpload(1024, 1, DataRecordUploadOptions.DEFAULT); + } + + /** + * completeDataRecordUpload must throw, not NPE, when the backend was never initialized. + */ + @Test(expected = DataRecordUploadException.class) + public void completeDataRecordUpload_beforeInit_throwsDataRecordUploadException() + throws DataRecordUploadException, DataStoreException { + new AzureDataStoreV12().completeDataRecordUpload("some-token"); + } + + /** + * getDownloadURI must return null, not NPE, when the backend was never initialized. + */ + @Test + public void getDownloadURI_beforeInit_returnsNull() { + assertNull(new AzureDataStoreV12().getDownloadURI( + new DataIdentifier("abc123"), + org.apache.jackrabbit.oak.plugins.blob.datastore.directaccess.DataRecordDownloadOptions.DEFAULT)); + } + + /** + * setDirectUploadURIExpirySeconds must be a no-op, not NPE, when backend is null. + */ + @Test + public void setDirectUploadURIExpirySeconds_beforeInit_doesNotThrow() { + new AzureDataStoreV12().setDirectUploadURIExpirySeconds(300); + } + + /** + * setDirectDownloadURIExpirySeconds must be a no-op, not NPE, when backend is null. + */ + @Test + public void setDirectDownloadURIExpirySeconds_beforeInit_doesNotThrow() { + new AzureDataStoreV12().setDirectDownloadURIExpirySeconds(300); + } + + /** + * setDirectDownloadURICacheSize must be a no-op, not NPE, when backend is null. + */ + @Test + public void setDirectDownloadURICacheSize_beforeInit_doesNotThrow() { + new AzureDataStoreV12().setDirectDownloadURICacheSize(100); + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/BlobSasHeadersV12Test.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/BlobSasHeadersV12Test.java new file mode 100644 index 00000000000..f9365b973c3 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/BlobSasHeadersV12Test.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import com.azure.storage.blob.sas.BlobSasPermission; +import com.azure.storage.blob.sas.BlobServiceSasSignatureValues; +import org.junit.Test; + +import java.time.OffsetDateTime; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; + +/** + * Unit tests for BlobSasHeadersV12 — hasHeaders detection, applyTo null-safety, and fluent setters. + */ +public class BlobSasHeadersV12Test { + + @Test + public void hasHeaders_noFieldsSet_returnsFalse() { + assertFalse(new BlobSasHeadersV12().hasHeaders()); + } + + @Test + public void hasHeaders_oneFieldSet_returnsTrue() { + assertTrue(new BlobSasHeadersV12().setContentType("application/octet-stream").hasHeaders()); + } + + @Test + public void hasHeaders_allFieldsSet_returnsTrue() { + assertTrue(new BlobSasHeadersV12("no-cache", "inline", "gzip", "en", "text/plain").hasHeaders()); + } + + /** + * null sasValues must be a no-op, not a NullPointerException. + */ + @Test + public void applyTo_nullSasValues_doesNotThrow() { + new BlobSasHeadersV12("cc", "cd", "ce", "cl", "ct").applyTo(null); + } + + /** + * All five response-header override fields (rscc, rscd, rsce, rscl, rsct) must be wired through to the SAS. + * Missing any one of them means the browser ignores the override and uses the stored blob metadata instead. + */ + @Test + public void applyTo_allFieldsSet_appliesAllToSasValues() { + BlobSasHeadersV12 headers = new BlobSasHeadersV12("no-cache", "inline", "gzip", "en", "application/json"); + BlobServiceSasSignatureValues sas = new BlobServiceSasSignatureValues( + OffsetDateTime.now().plusHours(1), BlobSasPermission.parse("r")); + + headers.applyTo(sas); + + assertEquals("no-cache", sas.getCacheControl()); + assertEquals("inline", sas.getContentDisposition()); + assertEquals("gzip", sas.getContentEncoding()); + assertEquals("en", sas.getContentLanguage()); + assertEquals("application/json", sas.getContentType()); + } + + /** + * Null fields in BlobSasHeadersV12 must not overwrite non-null values already set on the sas object. + */ + @Test + public void applyTo_nullFields_doesNotOverrideExistingValues() { + BlobServiceSasSignatureValues sas = new BlobServiceSasSignatureValues( + OffsetDateTime.now().plusHours(1), BlobSasPermission.parse("r")); + sas.setCacheControl("no-store"); + + new BlobSasHeadersV12().applyTo(sas); + + assertEquals("no-store", sas.getCacheControl()); + } + + @Test + public void setters_returnThis_allowsChaining() { + BlobSasHeadersV12 h = new BlobSasHeadersV12(); + assertSame(h, h.setCacheControl("cc")); + assertSame(h, h.setContentDisposition("cd")); + assertSame(h, h.setContentEncoding("ce")); + assertSame(h, h.setContentLanguage("cl")); + assertSame(h, h.setContentType("ct")); + } + + @Test + public void getters_returnSetValues() { + BlobSasHeadersV12 h = new BlobSasHeadersV12("cc", "cd", "ce", "cl", "ct"); + assertEquals("cc", h.getCacheControl()); + assertEquals("cd", h.getContentDisposition()); + assertEquals("ce", h.getContentEncoding()); + assertEquals("cl", h.getContentLanguage()); + assertEquals("ct", h.getContentType()); + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/RegressionCSOV12Test.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/RegressionCSOV12Test.java new file mode 100644 index 00000000000..89b64dfd638 --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/RegressionCSOV12Test.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Regression tests for CSO Release 24893 - V12 backend constant and behavior validation. + *

+ * These tests ensure that V12's intentional design choices (larger part sizes, higher URI counts) + * remain stable and do not regress. The CSO incident highlighted risks when constants change silently; + * these tests document V12's contract and prevent future refactoring from introducing unexpected changes. + *

+ * Context: V12 uses 256KB minPartSize and 4000MB maxPartSize, intentionally different from V8's + * 10MB/100MB. This is by design for the V12 SDK. These tests protect that design. + *

+ * Reference: CSO Release 24893 - DAM Archive Download OOM (GRANITE-66069, ASSETS-65164) + */ +public class RegressionCSOV12Test { + + /** + * V12 MIN_MULTIPART_UPLOAD_PART_SIZE must be 256KB. + * This is intentional for V12 SDK v12 to optimize throughput with larger blocks. + * Changing this affects URI generation and downstream consumer systems. + */ + @Test + public void v12_minPartSize_mustBe256KB() { + long expected = 256L * 1024L; // 256 KB + long actual = AzureConstantsV12.AZURE_BLOB_MIN_MULTIPART_UPLOAD_PART_SIZE; + + assertEquals( + "V12 minPartSize must be 256KB. Changes here affect presigned URI generation " + + "and downstream systems (browsers, aemupload, NUI workers). " + + "Ref: CSO 24893 - 256KB generates ~40x more URIs than V8's 10MB", + expected, actual); + } + + /** + * V12 MAX_MULTIPART_UPLOAD_PART_SIZE must be 4000MB (4GB). + * This is the Azure SDK V12 limit for single block uploads. + * Reducing this would degrade throughput; increasing beyond Azure's limit is invalid. + */ + @Test + public void v12_maxPartSize_mustBe4000MB() { + long expected = 4000L * 1024L * 1024L; // 4000 MB + long actual = AzureConstantsV12.AZURE_BLOB_MAX_MULTIPART_UPLOAD_PART_SIZE; + + assertEquals( + "V12 maxPartSize must be 4000MB (Azure SDK V12 block upload limit). " + + "This allows efficient large file uploads via parallel transfer options. " + + "Ref: Azure SDK v12 BlockBlobClient limits", + expected, actual); + } + + /** + * V12 MAX_SINGLE_PUT_UPLOAD_SIZE must be 256MB. + * This is the Azure REST API limit for single PUT operations (non-block uploads). + * Uploads smaller than this use direct PUT; larger use block commits. + */ + @Test + public void v12_maxSinglePutUploadSize_mustBe256MB() { + long expected = 256L * 1024L * 1024L; // 256 MB + long actual = AzureConstantsV12.AZURE_BLOB_MAX_SINGLE_PUT_UPLOAD_SIZE; + + assertEquals( + "V12 maxSinglePutUploadSize must be 256MB (Azure REST API limit). " + + "Uploads <= 256MB use direct PUT; larger use block transfer. " + + "Ref: Azure Blob Storage REST API Put Blob operation", + expected, actual); + } + + /** + * V12 MAX_BINARY_UPLOAD_SIZE must be ~190.7TiB. + * This is derived from Azure's 50,000 block limit and 4GB max block size. + * 50,000 blocks * 4GB/block = 200,000GB ≈ 190.7TiB + */ + @Test + public void v12_maxBinaryUploadSize_mustBe190_7TiB() { + long expected = 190L * 1024L * 1024L * 1024L * 1024L; // ~190.7 TiB + long actual = AzureConstantsV12.AZURE_BLOB_MAX_BINARY_UPLOAD_SIZE; + + assertEquals( + "V12 maxBinaryUploadSize must be ~190.7TiB (50k blocks * 4GB max block). " + + "Derived from Azure Blob Storage limits. " + + "Ref: Azure Blob Storage block limits (50,000 blocks max)", + expected, actual); + } + + /** + * V12 BUFFERED_STREAM_THRESHOLD must be 8MiB. + * Streams smaller than 8MiB are buffered to memory; larger are buffered to disk. + * This threshold prevents excessive memory use during large uploads. + */ + @Test + public void v12_bufferedStreamThreshold_mustBe8MiB() { + long expected = 8L * 1024L * 1024L; // 8 MiB + long actual = AzureConstantsV12.AZURE_BLOB_BUFFERED_STREAM_THRESHOLD; + + assertEquals( + "V12 bufferedStreamThreshold must be 8MiB. Larger streams use disk buffering. " + + "This guards against memory exhaustion during large concurrent uploads. " + + "Ref: AzureConstantsV12", + expected, actual); + } + + /** + * V12 MAX_ALLOWABLE_UPLOAD_URIS must be 50,000. + * This is the Azure Blob Storage hard limit on blocks per blob. + * Exceeding this causes upload failures. + */ + @Test + public void v12_maxAllowableUploadURIs_mustBe50000() { + int expected = 50000; + int actual = AzureConstantsV12.AZURE_BLOB_MAX_ALLOWABLE_UPLOAD_URIS; + + assertEquals( + "V12 maxAllowableUploadURIs must be 50,000 (Azure hard limit on blocks/blob). " + + "Presigned URI generation must respect this to prevent upload failures. " + + "Ref: Azure Blob Storage limits", + expected, actual); + } + + /** + * V12 DEFAULT_CONCURRENT_REQUEST_COUNT must be 5. + * This is the default parallelism for multi-part uploads. + * Tuning this affects throughput vs. memory consumption. + */ + @Test + public void v12_defaultConcurrentRequestCount_mustBe5() { + int expected = 5; + int actual = AzureConstantsV12.AZURE_BLOB_DEFAULT_CONCURRENT_REQUEST_COUNT; + + assertEquals( + "V12 defaultConcurrentRequestCount must be 5 (default parallelism). " + + "Affects upload throughput. Changing this impacts performance tuning. " + + "Ref: AzureConstantsV12", + expected, actual); + } + + /** + * V12 MAX_CONCURRENT_REQUEST_COUNT must be 10. + * This is the upper cap on parallelism to prevent overwhelming Azure. + * Exceeding this can cause throttling or transient failures. + */ + @Test + public void v12_maxConcurrentRequestCount_mustBe10() { + int expected = 10; + int actual = AzureConstantsV12.AZURE_BLOB_MAX_CONCURRENT_REQUEST_COUNT; + + assertEquals( + "V12 maxConcurrentRequestCount must be 10 (concurrency cap). " + + "Higher values risk Azure throttling. " + + "Ref: AzureConstantsV12, Azure rate limiting", + expected, actual); + } + + /** + * V12 PARALLEL_UPLOAD_BLOCK_SIZE must be 4MiB. + * This is the per-block size used in parallel upload streaming (BlobOutputStream). + * Larger blocks reduce roundtrips; smaller blocks reduce memory per concurrent block. + */ + @Test + public void v12_parallelUploadBlockSize_mustBe4MiB() { + long expected = 4L * 1024L * 1024L; // 4 MiB + long actual = AzureConstantsV12.AZURE_BLOB_PARALLEL_UPLOAD_BLOCK_SIZE; + + assertEquals( + "V12 parallelUploadBlockSize must be 4MiB (per-block size for BlobOutputStream). " + + "Tuning this affects upload concurrency and memory footprint. " + + "Ref: AzureConstantsV12, Azure SDK v12 BlobOutputStream", + expected, actual); + } + + /** + * V12 PARALLEL_UPLOAD_MAX_CONCURRENCY must be 4. + * This is the default number of concurrent block uploads for streaming. + * Higher values increase throughput at cost of memory (4 blocks * 4MiB = 16MiB overhead). + */ + @Test + public void v12_parallelUploadMaxConcurrency_mustBe4() { + int expected = 4; + int actual = AzureConstantsV12.AZURE_BLOB_PARALLEL_UPLOAD_MAX_CONCURRENCY; + + assertEquals( + "V12 parallelUploadMaxConcurrency must be 4 (concurrent streaming blocks). " + + "Memory overhead: 4 blocks * 4MiB = 16MiB. " + + "Ref: AzureConstantsV12, Azure SDK v12 ParallelTransferOptions", + expected, actual); + } + + /** + * Part size ratio test: ensures V12 minPartSize << maxPartSize. + * Ratio ~16000x (4000MB / 256KB) is healthy. Collapse indicates misconfiguration. + */ + @Test + public void v12_partSize_ratio_isHealthy() { + long minSize = AzureConstantsV12.AZURE_BLOB_MIN_MULTIPART_UPLOAD_PART_SIZE; + long maxSize = AzureConstantsV12.AZURE_BLOB_MAX_MULTIPART_UPLOAD_PART_SIZE; + + double ratio = (double) maxSize / minSize; + double expectedRatio = 16000.0; // 4000MB / 256KB + + assertEquals( + "V12 part size ratio must be ~16000x (4000MB max / 256KB min). " + + "Deviation indicates misconfiguration or refactoring error. " + + "Ref: CSO 24893", + expectedRatio, ratio, 1.0); + } + + /** + * Presigned URI generation scalability: 10GB download with V12's 256KB minPartSize. + * Expected: ~40,960 URIs (10GB / 256KB). + * This documents the URI explosion that motivated the CSO investigation. + */ + @Test + public void v12_presignedURI_generation_scalability_10GB_download() { + long minPartSize = AzureConstantsV12.AZURE_BLOB_MIN_MULTIPART_UPLOAD_PART_SIZE; + long downloadSize = 10L * 1024L * 1024L * 1024L; // 10 GB + long uriCount = (downloadSize + minPartSize - 1) / minPartSize; // ceiling division + + long expectedURICount = 40960; // Approximately 10GB / 256KB + long actualURICount = uriCount; + + assertEquals( + "V12 presigned URI count for 10GB download is ~40,960 (with 256KB minPartSize). " + + "This is 40x more than V8's ~1024 URIs, creating ~4MB JSON payloads. " + + "Downstream systems (browsers, aemupload, NUI) must handle this. " + + "Ref: GRANITE-66069 (CSO 24893)", + expectedURICount, actualURICount); + } + + /** + * Azure's 50,000 block limit caps the maximum uploadable blob size at current minPartSize. + * Max size = 50,000 blocks * 256KB = 12.5 GiB. + * Files larger than this at 256KB min part size cannot be uploaded without increasing the block size. + * The CSO incident tested a ~12.8GB download which generated ~48,805 URIs — near but under the limit. + */ + @Test + public void v12_maxUploadableSize_at_minPartSize_is_12_5GiB() { + long minPartSize = AzureConstantsV12.AZURE_BLOB_MIN_MULTIPART_UPLOAD_PART_SIZE; // 256KB + long maxBlocks = AzureConstantsV12.AZURE_BLOB_MAX_ALLOWABLE_UPLOAD_URIS; // 50,000 + long maxSize = minPartSize * maxBlocks; // 12.5 GiB + + long expected = 256L * 1024L * 50_000L; + + assertEquals( + "Max uploadable size at V12 minPartSize (256KB) is 50,000 * 256KB = 12.5 GiB. " + + "Files larger than this require the SDK to negotiate a larger block size. " + + "The CSO tested a ~12.8GB download (~48,805 URIs) approaching this boundary. " + + "Ref: CSO 24893 incident report, Azure block limit", + expected, maxSize); + } + + /** + * Memory buffering per part: V12's maxPartSize allows up to 4GB buffered per part. + * This is intentional for V12's higher-throughput design. + * Consumers (DAM, Archive Download) must stream, not buffer entire parts in memory. + */ + @Test + public void v12_memory_buffering_per_part_bounded_at_4GB() { + long maxPartSize = AzureConstantsV12.AZURE_BLOB_MAX_MULTIPART_UPLOAD_PART_SIZE; + long expectedMax = 4000L * 1024L * 1024L; // 4000 MB = 4 GB + + assertEquals( + "V12 max part size bounds potential memory buffering per part to 4GB. " + + "This is acceptable IF downstream consumers stream, not buffer entirely. " + + "If a consumer buffers entire parts in memory (like DAM did in CSO), " + + "downloading large assets will trigger OOM. " + + "Ref: ASSETS-65164 (CSO 24893) - DAM buffered entire parts, causing OOM", + expectedMax, maxPartSize); + } + + /** + * Streaming requirement documentation: V12's large part sizes require streaming consumers. + * A 4GB part cannot be buffered on typical 4-8GB heaps. + * This test documents the architectural constraint. + */ + @Test + public void v12_requires_streaming_consumers_for_large_parts() { + long maxPartSize = AzureConstantsV12.AZURE_BLOB_MAX_MULTIPART_UPLOAD_PART_SIZE; // 4GB + long typicalHeap = 8L * 1024L * 1024L * 1024L; // 8GB + + assertTrue( + "V12 part size (4GB) approaches typical heap size (8GB). " + + "Buffering entire parts would leave no room for other objects. " + + "Downstream consumers MUST stream data, not buffer. " + + "This was the root cause of CSO 24893: DAM buffered entire parts. " + + "Ref: ASSETS-65164 (CSO 24893)", + maxPartSize < typicalHeap); + } +} diff --git a/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/UtilsV12Test.java b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/UtilsV12Test.java new file mode 100644 index 00000000000..1662bd8c1fa --- /dev/null +++ b/oak-blob-cloud-azure/src/test/java/org/apache/jackrabbit/oak/blob/cloud/azure/blobstorage/v12/UtilsV12Test.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.blob.cloud.azure.blobstorage.v12; + +import org.junit.Test; + +import java.io.IOException; +import java.util.Properties; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +/** + * Unit tests for UtilsV12 — connection-string construction, auth priority, proxy options, and retry config. + */ +public class UtilsV12Test { + + @Test + public void getConnectionStringForSas_withBlobEndpoint_usesBlobEndpointFormat() { + String result = UtilsV12.getConnectionStringForSas("mySas", "https://myaccount.blob.core.windows.net", "myaccount"); + assertTrue(result.startsWith("BlobEndpoint=https://myaccount.blob.core.windows.net")); + assertTrue(result.contains("SharedAccessSignature=mySas")); + } + + @Test + public void getConnectionStringForSas_noBlobEndpoint_usesAccountNameFormat() { + String result = UtilsV12.getConnectionStringForSas("mySas", "", "myaccount"); + assertTrue(result.startsWith("AccountName=myaccount")); + assertTrue(result.contains("SharedAccessSignature=mySas")); + } + + @Test + public void getConnectionString_withBlobEndpoint_includesEndpointInString() { + String result = UtilsV12.getConnectionString("acc", "key123", "https://custom.endpoint.net"); + assertTrue(result.contains("AccountName=acc")); + assertTrue(result.contains("AccountKey=key123")); + assertTrue(result.contains("BlobEndpoint=https://custom.endpoint.net")); + } + + @Test + public void getConnectionString_noBlobEndpoint_omitsBlobEndpointField() { + String result = UtilsV12.getConnectionString("acc", "key123", null); + assertTrue(result.contains("AccountName=acc")); + assertTrue(result.contains("AccountKey=key123")); + assertFalse(result.contains("BlobEndpoint")); + } + + @Test + public void getConnectionString_emptyEndpoint_omitsBlobEndpointField() { + String result = UtilsV12.getConnectionString("acc", "key123", ""); + assertFalse(result.contains("BlobEndpoint")); + } + + /** + * Connection string takes priority over SAS and account key. + */ + @Test + public void getConnectionStringFromProperties_explicitConnectionString_takesPriority() { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.AZURE_CONNECTION_STRING, "explicit-connection-string"); + p.setProperty(AzureConstantsV12.AZURE_SAS, "should-not-be-used"); + assertEquals("explicit-connection-string", UtilsV12.getConnectionStringFromProperties(p)); + } + + /** + * SAS URI is used when no explicit connection string is present. + */ + @Test + public void getConnectionStringFromProperties_sasUri_usedWhenNoConnectionString() { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.AZURE_SAS, "mySas"); + p.setProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, "acc"); + String result = UtilsV12.getConnectionStringFromProperties(p); + assertTrue(result.contains("mySas")); + } + + /** + * Falls back to account name + key when neither connection string nor SAS is set. + */ + @Test + public void getConnectionStringFromProperties_accountKey_fallbackWhenNoSas() { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_NAME, "acc"); + p.setProperty(AzureConstantsV12.AZURE_STORAGE_ACCOUNT_KEY, "key123"); + String result = UtilsV12.getConnectionStringFromProperties(p); + assertTrue(result.contains("AccountName=acc")); + assertTrue(result.contains("AccountKey=key123")); + } + + @Test + public void computeProxyOptions_hostAndPortSet_returnsProxyOptions() { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.PROXY_HOST, "proxy.example.com"); + p.setProperty(AzureConstantsV12.PROXY_PORT, "8080"); + assertNotNull(UtilsV12.computeProxyOptions(p)); + } + + @Test + public void computeProxyOptions_noHostOrPort_returnsNull() { + assertNull(UtilsV12.computeProxyOptions(new Properties())); + } + + @Test + public void computeProxyOptions_hostWithoutPort_returnsNull() { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.PROXY_HOST, "proxy.example.com"); + assertNull(UtilsV12.computeProxyOptions(p)); + } + + @Test + public void computeProxyOptions_portWithoutHost_returnsNull() { + Properties p = new Properties(); + p.setProperty(AzureConstantsV12.PROXY_PORT, "8080"); + assertNull(UtilsV12.computeProxyOptions(p)); + } + + /** + * A negative retry count means "use SDK defaults" — return null so the SDK applies its own policy. + */ + @Test + public void getRetryOptions_negativeCount_returnsNull() { + assertNull(UtilsV12.getRetryOptions("-1", null, null)); + } + + /** + * Zero retries → fixed policy with maxTries=1 (no retry). + */ + @Test + public void getRetryOptions_zeroRetries_returnsNonNull() { + assertNotNull(UtilsV12.getRetryOptions("0", null, null)); + } + + /** + * Positive retry count → exponential policy. + */ + @Test + public void getRetryOptions_positiveCount_returnsNonNull() { + assertNotNull(UtilsV12.getRetryOptions("3", null, null)); + } + + @Test(expected = IOException.class) + public void readConfig_nonExistentFile_throwsIOException() throws IOException { + UtilsV12.readConfig("/tmp/does-not-exist-" + System.nanoTime() + ".properties"); + } +} diff --git a/oak-it-osgi/src/test/java/org/apache/jackrabbit/oak/osgi/IndexVersionSelectionIT.java b/oak-it-osgi/src/test/java/org/apache/jackrabbit/oak/osgi/IndexVersionSelectionIT.java index a7a2a82ff36..3ff1af8dc7a 100644 --- a/oak-it-osgi/src/test/java/org/apache/jackrabbit/oak/osgi/IndexVersionSelectionIT.java +++ b/oak-it-osgi/src/test/java/org/apache/jackrabbit/oak/osgi/IndexVersionSelectionIT.java @@ -290,12 +290,14 @@ public Option[] configuration() throws IOException, URISyntaxException { mavenBundle("org.apache.felix", "org.apache.felix.configadmin", "1.9.20"), mavenBundle("org.apache.felix", "org.apache.felix.fileinstall", "3.2.6"), mavenBundle("org.ops4j.pax.logging", "pax-logging-api", "1.7.2"), + // Jackson dependency for object serialisation. // (these only need to be defined here when the versions are different from the ones // defined in the project -- otherwise -> "bundle symbolic name and version are not unique") - mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-core").version("2.19.1"), - mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-annotations").version("2.19.1"), - mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-databind").version("2.19.1"), + // mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-core").version("2.20.2"), + // mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-annotations").version("2.20"), + // mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-databind").version("2.20.2"), + mavenBundle().groupId("com.github.ben-manes.caffeine").artifactId("caffeine").version("3.1.8"), frameworkProperty("repository.home").value("target"), diff --git a/oak-it-osgi/src/test/java/org/apache/jackrabbit/oak/osgi/OSGiIT.java b/oak-it-osgi/src/test/java/org/apache/jackrabbit/oak/osgi/OSGiIT.java index 8a33d934cb1..75a0c717029 100644 --- a/oak-it-osgi/src/test/java/org/apache/jackrabbit/oak/osgi/OSGiIT.java +++ b/oak-it-osgi/src/test/java/org/apache/jackrabbit/oak/osgi/OSGiIT.java @@ -74,9 +74,9 @@ public Option[] configuration() throws IOException, URISyntaxException { // Jackson dependency for object serialisation. // (these only need to be defined here when the versions are different from the ones // defined in the project -- otherwise -> "bundle symbolic name and version are not unique") - mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-core").version("2.19.1"), - mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-annotations").version("2.19.1"), - mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-databind").version("2.19.1"), + // mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-core").version("2.20.02"), + // mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-annotations").version("2.20"), + // mavenBundle().groupId("com.fasterxml.jackson.core").artifactId("jackson-databind").version("2.20.2"), mavenBundle().groupId("com.github.ben-manes.caffeine").artifactId("caffeine").version("3.1.8"), diff --git a/oak-it-osgi/test-bundles.xml b/oak-it-osgi/test-bundles.xml index 96fdb9928d7..14e87fd7d3d 100644 --- a/oak-it-osgi/test-bundles.xml +++ b/oak-it-osgi/test-bundles.xml @@ -36,7 +36,9 @@ commons-codec:commons-codec commons-io:commons-io commons-logging:commons-logging + com.fasterxml.jackson.core:jackson-annotations com.fasterxml.jackson.core:jackson-core + com.fasterxml.jackson.core:jackson-databind org.apache.commons:commons-lang3 org.apache.commons:commons-math3 org.apache.commons:commons-text diff --git a/oak-parent/pom.xml b/oak-parent/pom.xml index ab277e5fd14..a3232c25a9e 100644 --- a/oak-parent/pom.xml +++ b/oak-parent/pom.xml @@ -64,7 +64,8 @@ 2.1.214 1.28.5 10.16.1.1 - 2.19.4 + 2.20.2 + 2.20 1.21.4 4.14.0 2.6.17 @@ -709,7 +710,7 @@ com.fasterxml.jackson.core jackson-annotations - ${jackson.version} + ${jackson.annotations.version} com.fasterxml.jackson.core