All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dalet.vfs2.provider.azure.AzFileObject Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.dalet.vfs2.provider.azure;

import com.azure.core.util.polling.LongRunningOperationStatus;
import com.azure.core.util.polling.PollResponse;
import com.azure.core.util.polling.SyncPoller;
import com.azure.storage.blob.BlobClient;
import com.azure.storage.blob.BlobContainerAsyncClient;
import com.azure.storage.blob.BlobContainerClient;
import com.azure.storage.blob.models.BlobCopyInfo;
import com.azure.storage.blob.models.BlobItem;
import com.azure.storage.blob.models.BlobProperties;
import com.azure.storage.blob.models.BlobRequestConditions;
import com.azure.storage.blob.models.BlobStorageException;
import com.azure.storage.blob.models.ParallelTransferOptions;
import com.azure.storage.blob.sas.BlobSasPermission;
import com.azure.storage.blob.sas.BlobServiceSasSignatureValues;
import com.azure.storage.blob.specialized.BlobOutputStream;
import com.azure.storage.common.sas.SasProtocol;
import org.apache.commons.vfs2.FileName;
import org.apache.commons.vfs2.FileNotFolderException;
import org.apache.commons.vfs2.FileObject;
import org.apache.commons.vfs2.FileSelector;
import org.apache.commons.vfs2.FileSystemException;
import org.apache.commons.vfs2.FileType;
import org.apache.commons.vfs2.NameScope;
import org.apache.commons.vfs2.Selectors;
import org.apache.commons.vfs2.provider.AbstractFileName;
import org.apache.commons.vfs2.provider.AbstractFileObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.time.Duration;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;


/**
 * The main FileObject class in this provider.  It holds most of the API callbacks
 * for the provider.
 */
public class AzFileObject extends AbstractFileObject {

    private static final Logger log = LoggerFactory.getLogger(AzFileObject.class);

    protected static final long MEGABYTES_TO_BYTES_MULTIPLIER = (int) Math.pow(2.0, 20.0);

    // Increased default size : Fix for FLEX-64152
    protected static final int DEFAULT_UPLOAD_BLOCK_SIZE_MB = 8;
    private static final int TWENTY_FOUR_HOURS_IN_SEC = 24 * 60 * 60;

    private static final long STREAM_BUFFER_SIZE_MB = DEFAULT_UPLOAD_BLOCK_SIZE_MB * MEGABYTES_TO_BYTES_MULTIPLIER;
    private static final long BLOB_COPY_THRESHOLD_MB = 256 * MEGABYTES_TO_BYTES_MULTIPLIER;

    private static final int AZURE_MAX_BLOCKS = 50000;
    private static final int AZURE_MAX_BLOCK_SIZE_MB = 100;
    private static final long AZURE_MAX_BLOB_SIZE_BYTES =
            AZURE_MAX_BLOCK_SIZE_MB * MEGABYTES_TO_BYTES_MULTIPLIER * AZURE_MAX_BLOCKS;

    private static final String SLASH = "/";

    private final BlobContainerClient blobContainerClient;
    private final BlobContainerAsyncClient blobContainerAsyncClient;
    private BlobClient blobClient;
    private BlobProperties blobProperties;

    private FileType fileType = null;
    private boolean isAttached = false;


    /**
     * Creates a new FileObject for use with a remote Azure Blob Storage file or folder.
     *
     * @param fileName   - Azure file name object, which contains path and name of the file
     * @param fileSystem - Azure file system object for file operation
     */
    public AzFileObject(final AbstractFileName fileName, final AzFileSystem fileSystem) {

        super(fileName, fileSystem);

        blobContainerAsyncClient = fileSystem.getContainerAsyncClient();
        blobContainerClient = fileSystem.getContainerClient();
        blobProperties = null;
    }


    @Override
    protected void doAttach() {

        if (isAttached) {
            return;
        }

        String name = getName().getPath();

        if (name.startsWith(SLASH)) {
            name = name.substring(1);
        }

        BlobClient client = blobContainerClient.getBlobClient(name);

        if (client != null) {
            blobClient = client;
            isAttached = true;
        }
    }


    /**
     * Callback for use when detaching this File Object from Azure Blob Storage.
     * 

* The File Object should be reusable after attach() call. */ @Override protected void doDetach() { blobClient = null; blobProperties = null; isAttached = false; fileType = null; } /** * Callback for checking the type of the current FileObject. Typically can * be of type... * FILE for regular remote files * FOLDER for regular remote containers * IMAGINARY for a path that does not exist remotely. * * @return - File Type of current azure url */ @Override protected FileType doGetType() { doAttach(); AzFileName fileName = (AzFileName) getName(); //file type IMAGINARY check is required because in case of place holder type object file type would be IMAGINARY so // that needs to be corrected once it gets imported. // second reason behind this check is, this.isAttached and super.attached properties are not in sync when this // .doAttached() called directly so while closing object (fleObject.close()) internally it calls detach() method to // detach the object but it finds supper.attached to false and return from there without detaching the object. if (this.fileType != null && this.fileType != FileType.IMAGINARY) { return this.fileType; } if (fileName != null && fileName.getType() == FileType.FOLDER) { this.fileType = FileType.FOLDER; injectType(this.fileType); return this.fileType; } String name = fileName.getPath(); if (name.startsWith(SLASH)) { name = name.substring(1); } // If we are given the container root then consider this a folder. if ("".equals(name)) { this.fileType = FileType.FOLDER; injectType(this.fileType); return this.fileType; } Iterable blobs = blobContainerClient.listBlobsByHierarchy(name); BlobItem blobItem = null; Iterator iterator = blobs.iterator(); while (iterator.hasNext()) { BlobItem item = iterator.next(); if (item.getName().equals(name) || item.getName().equals(name + SLASH)) { blobItem = item; break; } } FileType res; if (blobItem == null) { res = FileType.IMAGINARY; } else if (blobItem.isPrefix() != null && blobItem.isPrefix()) { res = FileType.FOLDER; } else { res = FileType.FILE; } this.fileType = res; super.injectType(this.fileType); return this.fileType; } /** * Callback for handling "content size" requests by the provider. * * @return The number of bytes in the File Object's content */ @Override protected long doGetContentSize() { return getBlobProperties().getBlobSize(); } /** * Get an InputStream for reading the content of this File Object. * * @return The InputStream object for reading. */ @Override protected InputStream doGetInputStream() { return blobClient.getBlockBlobClient().openInputStream(); } /** * Callback for getting an OutputStream for writing into Azure Blob Storage file. * * @param overwrite true if the file should be overwritten. * @return - output stream of current blob */ @Override protected OutputStream doGetOutputStream(boolean overwrite) { return blobClient.getBlockBlobClient().getBlobOutputStream(true); } /** * Lists the children of this file. Is only called if {@link #doGetType} * returns {@link FileType#FOLDER}. The return value of this method * is cached, so the implementation can be expensive.
* * @return a possible empty String array if the file is a directory or null or an exception if the * file is not a directory or can't be read. */ @Override protected String[] doListChildren() { AzFileName fileName = (AzFileName) getName(); String path = fileName.getPath(); if (path.equals(SLASH)) { path = ""; //root path } else { if (path.startsWith(SLASH)) { path = path.substring(1); } if (!path.endsWith(SLASH)) { path = path + SLASH; } } Iterable blobs = blobContainerAsyncClient.listBlobsByHierarchy(path).toIterable(); List blobList = new ArrayList<>(); blobs.forEach(blobList::add); ArrayList resList = new ArrayList<>(); for (BlobItem blobItem : blobList) { String name = blobItem.getName(); String[] names = name.split(SLASH); String itemName = names[names.length - 1]; // Preserve folders if (name.endsWith(SLASH)) { itemName = itemName + SLASH; } resList.add(itemName); } return resList.toArray(new String[resList.size()]); } /** * Callback for handling create folder requests. Since there are no folders * in Azure Cloud Storage this call is ignored. */ @Override protected void doCreateFolder() { log.debug("doCreateFolder() called."); } /** * Callback for handling delete on this File Object */ @Override protected void doDelete() { if (FileType.FILE == doGetType()) { blobClient.delete(); } //once object gets deleted fileType must be set as FileType.IMAGINARY because it's no longer exist. this.fileType = FileType.IMAGINARY; } /** * Override delete method of VFS layer to handle folder delete scenario. In some cases we create imaginary file (file with * same name as folder and zero byte) for physical representation of folder. So while removing the imaginary file we shall * handle the case of folder delete instead of imaginary file itself. * * Example: * Folder Structure * folder-one/sub-folder-one/ * ...folder-one/sub-folder-one/file-one.jpg -> (file type) * ...folder-one/sub-folder-one/file-tow.jpg -> (file type) * ...folder-one/sub-folder-one/sub-folder-one/ -> (imaginary type) * * - Now when someone who want to delete entire folder they would make call like delete('folder-one/sub-folder-one/') * - The VFS layer get children and make delete call one by one for each file. * - So when a call made to delete 'folder-one/sub-folder-one/sub-folder-one/' imaginary file it does nothing since exists() * check for it returns false! * - We override this method to handle that case * - delete call of Azure for 'folder-one/sub-folder-one/sub-folder-one/' fails too since that is imaginary file * - We need to make delete call by its parent path, which is 'folder-one/sub-folder-one/' -> this works * - Delete by parent path does not remove all files, it simply deletes the imaginary file exists with the same name as * folder name! */ @Override public boolean delete() throws FileSystemException { if (FileType.IMAGINARY != doGetType()) { return super.delete(); } try { //Handle removal of imaginary file from file cache this.handleDelete(); //Check the imaginary file (zero byte size file) belong to the folder not if (isZeroByteFile(getName())) { // Handle removing of an imaginary file // It is a file created to represent the folder in virtual file system // To delete an imaginary file we need to call delete call of azure blob client with parent path! String name = getName().getParent().getPath(); //Path should not start with slash if (name.startsWith(SLASH)) { name = name.substring(1); } //Folder path should always end with slash if (!name.endsWith(SLASH)) { name = name + SLASH; } BlobClient client = blobContainerClient.getBlobClient(name); client.delete(); } } catch (Exception e) { //Imaginary file removal is not a critical, it should not prevent removal of other files as part of folder //removal operations. log.warn("Could not delete {} imaginary file", getName(), e); } return true; } /** * Returns true if path of the given FileName is like `folder-one/folder-two/folder-two` */ private boolean isZeroByteFile(FileName name) { String[] names = name.getPath().split(SLASH); int len = names.length; return len >= 2 && names[len - 1].equals(names[len - 2]); } /** * Callback for handling the getLastModifiedTime() Commons VFS API call. * * @return Time since the file has last been modified */ @Override protected long doGetLastModifiedTime() { if (Boolean.FALSE.equals(blobClient.exists())) { return 0; } return getBlobProperties().getLastModified().toInstant().toEpochMilli(); } /** * We need to override this method, because the parent one throws an exception. * * @param modifiedTime the last modified time to set. * @return true if setting the last modified time was successful. */ @Override protected boolean doSetLastModifiedTime(long modifiedTime) { return true; } /** * Determines if the file exists. * * @return true if the file exists, false otherwise, * @throws FileSystemException if an error occurs. */ @Override public boolean exists() throws FileSystemException { try { FileType type = doGetType(); return FileType.IMAGINARY != type; } catch (Exception e) { throw new FileSystemException(e); } } /** * This will prepare the fileObject to get resynchronized with the underlying file system if required. */ @Override public void refresh() { // Noop } /** * Returns the list of children. * * @return The list of children * @throws FileSystemException If there was a problem listing children * @see AbstractFileObject#getChildren() */ @Override public FileObject[] getChildren() throws FileSystemException { try { // Folders which are copied from other folders, have type = IMAGINARY. We can not throw exception based on folder // type only and so we have check here for content. if (doGetType().hasContent()) { throw new FileNotFolderException(getName()); } } catch (Exception ex) { throw new FileNotFolderException(getName(), ex); } return super.getChildren(); } @Override public void copyFrom(FileObject src, FileSelector selector) throws FileSystemException { if (!src.exists()) { throw new FileSystemException("vfs.provider/copy-missing-file.error", src); } try { doAttach(); List files = new ArrayList<>(); src.findFiles(selector, false, files); for (FileObject srcFile : files) { FileType srcFileType = srcFile.getType(); if (FileType.FOLDER == srcFileType) { continue; } String relPath = src.getName().getRelativeName(srcFile.getName()); FileObject destFile = this.resolveFile(relPath, NameScope.DESCENDENT_OR_SELF); if (destFile.exists() && destFile.getType() != srcFile.getType()) { destFile.delete(Selectors.SELECT_ALL); } try { if (srcFile.getType().hasChildren()) { destFile.createFolder(); } else if (canCopyServerSide(srcFile, destFile)) { AzFileObject destAzFile = (AzFileObject) destFile; String url = ((AzFileObject) srcFile).getSignedUrl(TWENTY_FOUR_HOURS_IN_SEC).toString(); if (srcFile.getContent().getSize() > BLOB_COPY_THRESHOLD_MB) { SyncPoller poll = destAzFile.blobClient.beginCopy(url, Duration.ofSeconds(1)); PollResponse pollResponse = poll.waitForCompletion(); if (pollResponse.getStatus() != LongRunningOperationStatus.SUCCESSFULLY_COMPLETED) { Exception exception = new Exception(pollResponse.getStatus().toString()); throw new FileSystemException("vfs.provider/copy-file.error", exception, srcFile, destFile); } } else { destAzFile.blobClient.copyFromUrl(url); } destAzFile.doGetType(); // Change file to non-imaginary type. } else if (srcFile.getType().hasContent()) { doCopyFromStream(srcFile, destFile); } else { // nothing useful to do if no content and can't have children throw new FileSystemException("vfs.provider/copy-file.error", new UnsupportedOperationException(), srcFile, destFile); } } catch (IOException | BlobStorageException e) { throw new FileSystemException("vfs.provider/copy-file.error", e, srcFile, destFile); } } } catch (Exception e) { throw new FileSystemException(e); } } /** * Copy a file object via input/output streams. * * @param srcFile - source file to copy * @param destFile - destination file into which copy * @throws Exception - throw exception in case unexpected situation occurs */ private void doCopyFromStream(FileObject srcFile, FileObject destFile) throws Exception { try { String destFilename = destFile.getName().getPath(); if (destFilename.startsWith(SLASH)) { destFilename = destFilename.substring(1); } BlobClient destBlobClient = blobContainerClient.getBlobClient(destFilename); long blockSize = getBlockSize(srcFile.getContent().getSize()); ParallelTransferOptions opts = new ParallelTransferOptions() .setBlockSizeLong(blockSize) .setMaxConcurrency(5); BlobRequestConditions requestConditions = new BlobRequestConditions(); try (BlobOutputStream bos = destBlobClient.getBlockBlobClient().getBlobOutputStream( opts, null, null, null, requestConditions); InputStream is = srcFile.getContent().getInputStream()) { byte[] buffer = new byte[(int) STREAM_BUFFER_SIZE_MB]; for (int len; (len = is.read(buffer)) != -1; ) { bos.write(buffer, 0, len); } } ((AzFileObject) destFile).doGetType(); } finally { destFile.close(); srcFile.close(); } } /** * Returns the block size depending on the size of the file to be uploaded. * A default block size of 4MB is used until the file size is larger than * 4mb * 50000, after this block size is scaled so that 50000 blocks are used. * * @param fileSize - file size for which block size to be decided * @return block size based on given file size * @throws FileSystemException - will be thrown in case of unexpected situation occur. */ protected long getBlockSize(long fileSize) throws FileSystemException { if (fileSize > AZURE_MAX_BLOB_SIZE_BYTES) { throw new FileSystemException("File size exceeds Azure Blob size limit"); } long dynamicBlockSizeThreshold = (DEFAULT_UPLOAD_BLOCK_SIZE_MB * AZURE_MAX_BLOCKS) * MEGABYTES_TO_BYTES_MULTIPLIER; if (fileSize < dynamicBlockSizeThreshold) { return DEFAULT_UPLOAD_BLOCK_SIZE_MB * MEGABYTES_TO_BYTES_MULTIPLIER; } else { return (long) Math.ceil((float) fileSize / (float) AZURE_MAX_BLOCKS); } } /** * Compares credential to check possibilities of copying file at server side. * * @param sourceFileObject - source file object to copy * @param destinationFileObject - destination file object into which copy * @return - boolean flag to decide server side copy possible or not */ private boolean canCopyServerSide(FileObject sourceFileObject, FileObject destinationFileObject) { if (!(sourceFileObject instanceof AzFileObject) || !(destinationFileObject instanceof AzFileObject)) { return false; } AzFileObject azSourceFileObject = (AzFileObject) sourceFileObject; AzFileObject azDestinationFileObject = (AzFileObject) destinationFileObject; String sourceAccountName = getAccountName(azSourceFileObject); String destinationAccountName = getAccountName(azDestinationFileObject); return sourceAccountName != null && sourceAccountName.equals(destinationAccountName); } /** * Returns false to reply on copyFrom method in case moving/copying file within same azure container * * @param fileObject - file object for which renamed can be decided * @return - always return false, renamed cannot be done */ @Override public boolean canRenameTo(FileObject fileObject) { return false; } /** * Generate signed url to directly access file. * * @param durationSec - SAS validity duration in hours * @return Signed URL to process * @throws Exception - will be thrown in case of unexpected situation occur. */ public URL getSignedUrl(int durationSec) throws Exception { doAttach(); OffsetDateTime offsetDateTime = OffsetDateTime.now().plusSeconds(durationSec); BlobSasPermission sasPermission = BlobSasPermission.parse("r"); BlobServiceSasSignatureValues signatureValues = new BlobServiceSasSignatureValues(offsetDateTime, sasPermission); signatureValues.setStartTime(OffsetDateTime.now().minusMinutes(10)); signatureValues.setProtocol(SasProtocol.HTTPS_ONLY); // Sign the url for the this object String url = this.blobClient.getBlobUrl() + "?" + blobClient.generateSas(signatureValues); return new URL(url); } /** * Returns an account name from given azure file object * * @param azFileObject - Azure file object for which account name to be returned * @return - name of account for given azure file object */ private String getAccountName(AzFileObject azFileObject) { AzFileSystem azFileSystem = (AzFileSystem) azFileObject.getFileSystem(); return azFileSystem.getContainerClient().getAccountName(); } private BlobProperties getBlobProperties() { if (blobProperties == null) { doAttach(); blobProperties = blobClient.getProperties(); } return blobProperties; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy