com.simiacryptus.hadoop_jgit.GitRepoFileSystem Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-jgit-fs Show documentation
Show all versions of hadoop-jgit-fs Show documentation
A Hadoop filesystem driver for Git
/*
* Copyright (c) 2019 by Andrew Charneski.
*
* The author licenses this file to you under the
* Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance
* with the License. You may obtain a copy
* of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.simiacryptus.hadoop_jgit;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.eclipse.jgit.dircache.DirCache;
import org.eclipse.jgit.dircache.DirCacheCheckout;
import org.eclipse.jgit.lib.*;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.transport.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collection;
import java.util.concurrent.TimeUnit;
public class GitRepoFileSystem extends ReadOnlyFileSystem {
private static final Logger logger = LoggerFactory.getLogger(GitRepoFileSystem.class);
private final File gitDir;
private final Repository repository;
private final RemoteConfig remoteConfig;
private final ParsePath parsedPath;
private final RawLocalFileSystem innerFS;
private final URI localBase;
private final URI univeralBase;
private final double eagerPullPeriod;
private final double dismountPeriod;
private final boolean dismountDelete;
private final double lazyPullPeriod;
private long lastTouch = 0;
private long lastFetch = 0;
public GitRepoFileSystem(String url, final GitFileSystem parent) throws IOException, URISyntaxException {
setConf(parent.getConf());
statistics = parent.getStats();
TimeUnit timeUnit = TimeUnit.SECONDS;
this.lazyPullPeriod = Double.parseDouble(getProperty("fs.jgit.pull.lazy", Double.toString(timeUnit.toSeconds(5))).toString());
this.eagerPullPeriod = Double.parseDouble(getProperty("fs.jgit.pull.eager", Double.toString(timeUnit.toSeconds(5))).toString());
this.dismountPeriod = Double.parseDouble(getProperty("fs.jgit.dismount.seconds", Double.toString(timeUnit.toSeconds(60))).toString());
this.dismountDelete = Boolean.parseBoolean(getProperty("fs.jgit.dismount.delete", Boolean.toString(false)).toString());
File dataDirectory = new File(getProperty("fs.jgit.datadir", getProperty("java.io.tmpdir")).toString(), "git");
dataDirectory.mkdirs();
logger.debug("Git FS: " + url);
this.parsedPath = new ParsePath(url).invoke();
logger.debug("Git Repo: " + getParsedPath().getRepoPath());
logger.debug("Git Branch: " + getParsedPath().getRepoBranch());
logger.debug("Git File: " + getParsedPath().getFilePath());
URIish parsedUrl = new URIish(url);
final URIish sourceUrl = new URIish(String.format("%s://%s/%s", parsedUrl.getScheme(), parsedUrl.getHost(), getParsedPath().getRepoPath()));
logger.debug("Git Url: " + sourceUrl);
this.gitDir = new File(dataDirectory, String.format("%s/%s/%s", parsedUrl.getHost(), getParsedPath().getRepoPath(), getParsedPath().getRepoBranch()));
logger.debug("Temp Git Dir: " + getGitDir().getAbsolutePath());
this.repository = new RepositoryBuilder().setWorkTree(getGitDir()).build();
if (!getGitDir().exists()) {
if (!getGitDir().mkdirs()) {
throw new RuntimeException(getGitDir().getAbsolutePath());
}
getRepository().create(false);
}
this.remoteConfig = getRemoteConfig(sourceUrl, getRepository().getConfig());
pull();
this.localBase = this.getGitDir().toPath().toUri();
this.univeralBase = new URI(sourceUrl.toString()).resolve(getParsedPath().getRepoBranch() + "/");
logger.debug("Local Base: " + localBase());
logger.debug("Universal Base: " + gitBase());
this.innerFS = new LocalRepoFileSystem();
getInnerFS().setWorkingDirectory(new Path(getGitDir().getAbsolutePath()));
getInnerFS().setConf(parent.getConf());
}
@Nonnull
public Path toLocalPath(final Path path) {
if (null == path) return null;
return new Path(toLocalUrl(path.toUri()));
}
@Nonnull
public Path toGitPath(final Path path) {
if (null == path) return null;
URI uri = toGitUrl(path.toUri());
try {
return new Path(new URI(uri.toString().replaceAll("^https?", "git")));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}
@Nonnull
public URI toLocalUrl(final URI path) {
if (null == path) return null;
URI relativized = localBase().resolve(gitBase().relativize(path));
logger.debug(String.format("Converted %s to %s", path, relativized));
return relativized;
}
@Nonnull
public URI toGitUrl(final URI path) {
if (null == path) return null;
URI relativized = gitBase().resolve(localBase().relativize(path));
logger.debug(String.format("Converted %s to %s", path, relativized));
return relativized;
}
public void pull() throws IOException {
this.lastFetch = System.currentTimeMillis();
CharSequence branch = getParsedPath().getRepoBranch();
Collection fetch = fetch(getRepository(), getRemoteConfig(), branch);
checkout(getRepository(), fetch.stream().filter(x -> x.getName().equals("refs/heads/" + branch)).findAny()
.orElseGet(() -> fetch.stream().filter(x -> x.getName().equals("HEAD")).findAny().get()));
}
private boolean checkout(final Repository repository, final Ref tagName) throws IOException {
if (tagName == null) return false;
final RevCommit commit;
try (RevWalk revWalk = new RevWalk(repository)) {
assert revWalk != null;
ObjectId resolve = tagName.getObjectId();
if (resolve == null) return false;
commit = revWalk.parseCommit(resolve);
assert commit != null;
}
DirCache dirCache = repository.lockDirCache();
RevTree tree = commit.getTree();
DirCacheCheckout dirCacheCheckout = new DirCacheCheckout(repository, dirCache, tree);
dirCacheCheckout.setFailOnConflict(true);
boolean checkout = dirCacheCheckout.checkout();
logger.debug(String.format("Checked out %s: %s", tagName.getObjectId(), checkout));
return checkout;
}
private Collection fetch(final Repository repository, final RemoteConfig remoteConfig, final CharSequence repoBranch) {
try (Transport transport = Transport.open(repository, remoteConfig)) {
configure(transport);
transport.setCheckFetchedObjects(false);
transport.setRemoveDeletedRefs(false);
transport.setDryRun(false);
transport.setTagOpt(TagOpt.FETCH_TAGS);
transport.setFetchThin(false);
final ProgressMonitor monitor = new EmptyProgressMonitor() {
};
FetchResult result = transport.fetch(monitor, Arrays.asList(
new RefSpec("refs/heads/" + repoBranch)
));
logger.debug(String.format("Fetched %s: %s", result.getURI(), result.getMessages()));
result.getAdvertisedRefs().stream().forEach(ref -> {
logger.debug(String.format("Remote Ref: %s = %s", ref.getName(), ref.getObjectId()));
});
return result.getAdvertisedRefs();
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
private void configure(final Transport transport) {
String username = getProperty("fs.jgit.auth.user", "").toString();
if (!username.isEmpty()) {
String password = getProperty("fs.jgit.auth.pass").toString();
logger.debug(String.format("Login: %s %s", username, password.replaceAll(".", "*")));
transport.setCredentialsProvider(new UsernamePasswordCredentialsProvider(username, password));
}
}
@Nonnull
private RemoteConfig getRemoteConfig(final URIish uri, final StoredConfig config) throws URISyntaxException, IOException {
RemoteConfig remote = new RemoteConfig(config, "origin");
remote.addFetchRefSpec(new RefSpec().setForceUpdate(true).setSourceDestination("refs/heads/*", "refs/remotes/origin/*"));
remote.addURI(uri);
remote.update(config);
config.save();
return remote;
}
@Override
public URI getUri() {
return gitBase();
}
@Override
public FSDataInputStream open(final Path f, final int bufferSize) throws IOException {
return getInnerFS().open(toLocalPath(f), bufferSize);
}
@Override
public FileStatus[] listStatus(final Path f) throws IOException {
return Arrays.stream(getInnerFS().listStatus(toLocalPath(f))).map(this::filter).toArray(i -> new FileStatus[i]);
}
protected FileStatus filter(final FileStatus fileStatus) {
FileStatus newObj = new FileStatus();
newObj.setPath(toGitPath(fileStatus.getPath()));
try {
newObj.setSymlink(toGitPath(fileStatus.getSymlink()));
} catch (IOException e) {
}
return newObj;
}
@Override
public Path getWorkingDirectory() {
return new Path(gitBase());
}
@Override
public void setWorkingDirectory(final Path new_dir) {
throw new RuntimeException("Static Filesystem");
}
@Override
public FileStatus getFileStatus(final Path f) throws IOException {
return getInnerFS().getFileStatus(toLocalPath(f));
}
public void touch() {
this.lastTouch = System.currentTimeMillis();
if (secondsSinceFetch() > getLazyPullPeriod()) try {
pull();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public double secondsSinceFetch() {
final long now = System.currentTimeMillis();
return (now - this.getLastFetch()) / 1e3;
}
public double secondsSinceTouch() {
final long now = System.currentTimeMillis();
return (now - this.getLastTouch()) / 1e3;
}
public File getGitDir() {
return gitDir;
}
public Repository getRepository() {
return repository;
}
public RemoteConfig getRemoteConfig() {
return remoteConfig;
}
public ParsePath getParsedPath() {
return parsedPath;
}
public RawLocalFileSystem getInnerFS() {
return innerFS;
}
public URI localBase() {
return localBase;
}
public URI gitBase() {
return univeralBase;
}
public double getEagerPullPeriod() {
return eagerPullPeriod;
}
public long getLastTouch() {
return lastTouch;
}
public long getLastFetch() {
return lastFetch;
}
public double getLazyPullPeriod() {
return lazyPullPeriod;
}
public double getDismountPeriod() {
return dismountPeriod;
}
public boolean isDismountDelete() {
return dismountDelete;
}
private class LocalRepoFileSystem extends RawLocalFileSystem {
public LocalRepoFileSystem() {
statistics = GitRepoFileSystem.this.statistics;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy