All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.databricks.sdk.mixin.DbfsExt Maven / Gradle / Ivy

There is a newer version: 0.38.0
Show newest version
package com.databricks.sdk.mixin;

import com.databricks.sdk.core.ApiClient;
import com.databricks.sdk.service.files.*;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.*;

/**
 * Utility methods for the DBFS API.
 *
 * 

This class provides utility methods for the DBFS API that are not part of the official API * surface. These methods are subject to change without notice. */ public class DbfsExt extends DbfsAPI { public DbfsExt(ApiClient apiClient) { super(apiClient); } public DbfsExt(DbfsService mock) { super(mock); } /** * Construct a new {@code java.io.BufferedReader} for the given file. * *

The returned implementation of {@code InputStream} internally buffers calls to the DBFS API * to reduce the number of requests made. The buffer has a maximum size of 1 MB, corresponding to * the maximum number of bytes that can be read in a single call to the DBFS API. The buffer is * refilled when {@code read()} is called after the buffer has been exhausted. * * @param path the path to the file to read * @return an InputStream that reads from the given file in DBFS */ public InputStream open(String path) { return new InputStream() { private long offset = 0; private byte[] buffer = new byte[0]; private int bufferOffset = 0; @Override public int read() { if (bufferOffset >= buffer.length) { // Buffer is exhausted, refill it. ReadDbfsRequest request = new ReadDbfsRequest().setPath(path).setOffset(offset).setLength(1024 * 1024L); ReadResponse response = DbfsExt.this.read(request); buffer = Base64.getDecoder().decode(response.getData()); bufferOffset = 0; offset += buffer.length; } if (bufferOffset >= buffer.length) { // Buffer is still exhausted, we're at EOF. return -1; } byte b = buffer[bufferOffset++]; if (b == -1) { return 255; } return b; } }; } /** * Returns the contents of the given file as a byte array. * *

This method is analogous to {@code Files.readAllBytes(path)} in Java 8, but it reads the * file from DBFS instead of the local filesystem. * * @param path the path to the file to read * @return the contents of the file as a byte array * @throws IOException if an I/O error occurs */ public byte[] readAllBytes(Path path) throws IOException { try (InputStream in = open(path.toString())) { ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] buffer = new byte[1024 * 1024]; int result; do { result = in.read(buffer); if (result > 0) { out.write(buffer, 0, result); } } while (result >= 0); return out.toByteArray(); } } /** * Returns the contents of the given file as a list of strings, where each string corresponds to a * line in the file. * *

This method is analogous to {@code Files.readAllLines(path, cs)} in Java 8, but it reads the * file from DBFS instead of the local filesystem. * * @param path the path to the file to read * @param cs the charset to use when decoding the file * @return the contents of the file as a list of strings * @throws IOException if an I/O error occurs */ public List readAllLines(Path path, Charset cs) throws IOException { // Read all bytes using readAllBytes API, then convert to a list of string using the given // charset. byte[] bytes = readAllBytes(path); CharBuffer charBuffer = cs.decode(ByteBuffer.wrap(bytes)); // Split the CharBuffer into lines. List lines = new ArrayList<>(); int start = 0; for (int i = 0; i < charBuffer.length(); i++) { if (charBuffer.charAt(i) == '\n') { lines.add(charBuffer.subSequence(start, i).toString()); start = i + 1; } } return lines; } /** * Returns an OutputStream that writes to the given file in DBFS. * *

This method first creates the given file in DBFS if it does not exist. If the file exists, * this method overwrites the existing file. Then, this method constructs an OutputStream that * writes to the file in DBFS. The returned OutputStream buffers writes to the file to reduce the * number of requests made to the DBFS API. The buffer has a maximum size of 1 MB, corresponding * to the maximum number of bytes that can be written in a single call to the DBFS AddBlock API, * and is flushed when full. * * @param path the path to the file to read * @return an OutputStream that writes to the given file in DBFS */ public OutputStream getOutputStream(String path) { CreateResponse createResponse = this.create(new Create().setPath(path).setOverwrite(true)); long handle = createResponse.getHandle(); return new OutputStream() { private final byte[] buffer = new byte[1024 * 1024]; private int bufferOffset = 0; @Override public void write(int b) { buffer[bufferOffset++] = (byte) b; if (bufferOffset >= buffer.length) { // Buffer is full, flush it. flush(); } } @Override public void flush() { if (bufferOffset > 0) { // Flush the remaining bytes in the buffer. byte[] remainingBytes = Arrays.copyOfRange(buffer, 0, bufferOffset); DbfsExt.this.addBlock( new AddBlock() .setHandle(handle) .setData(Base64.getEncoder().encodeToString(remainingBytes))); bufferOffset = 0; } } @Override public void close() { flush(); DbfsExt.this.close(new Close().setHandle(handle)); } }; } /** * Writes the given bytes to the given file in DBFS. * *

This methods is analogous to {@code Files.write(path, bytes)} in Java 8, but it writes the * file to DBFS instead of the local filesystem. * * @param path the path to the file to write * @param bytes the bytes to write * @return the path to the file in DBFS * @throws IOException if an I/O error occurs */ public Path write(Path path, byte[] bytes) throws IOException { try (OutputStream out = getOutputStream(path.toString())) { out.write(bytes); } return path; } /** An iterator which iterates over the files in a directory lazily. */ private class LazyDirectoryIterator implements Iterator { private final Queue dirsToVisit; private Iterator currentFiles; public LazyDirectoryIterator(String path) { this.dirsToVisit = new ArrayDeque<>(); this.dirsToVisit.add(path); this.currentFiles = Collections.emptyIterator(); } @Override public boolean hasNext() { while (!currentFiles.hasNext() && !dirsToVisit.isEmpty()) { String nextDir = dirsToVisit.remove(); currentFiles = list(nextDir).iterator(); } return currentFiles.hasNext(); } @Override public FileInfo next() { if (!hasNext()) { throw new NoSuchElementException(); } FileInfo nextFile = currentFiles.next(); if (nextFile.getIsDir()) { dirsToVisit.add(nextFile.getPath()); } return nextFile; } } /** * Recursively lists files in DBFS, starting from the provided directory. * * @param path the path to the directory to list * @return an iterable of FileInfo objects, one for each file and directory listing in the * directory, recursively */ public Iterable recursiveList(String path) { return () -> new LazyDirectoryIterator(path); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy