org.apache.mahout.clustering.topdown.PathDirectory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mahout-mr Show documentation
Show all versions of mahout-mr Show documentation
Scalable machine learning libraries
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.clustering.topdown;
import java.io.File;
import org.apache.hadoop.fs.Path;
/**
* Contains list of all internal paths used in top down clustering.
*/
public final class PathDirectory {
public static final String TOP_LEVEL_CLUSTER_DIRECTORY = "topLevelCluster";
public static final String POST_PROCESS_DIRECTORY = "clusterPostProcessed";
public static final String CLUSTERED_POINTS_DIRECTORY = "clusteredPoints";
public static final String BOTTOM_LEVEL_CLUSTER_DIRECTORY = "bottomLevelCluster";
private PathDirectory() {
}
/**
* All output of top level clustering is stored in output directory/topLevelCluster.
*
* @param output
* the output path of clustering.
* @return The top level Cluster Directory.
*/
public static Path getTopLevelClusterPath(Path output) {
return new Path(output + File.separator + TOP_LEVEL_CLUSTER_DIRECTORY);
}
/**
* The output of top level clusters is post processed and kept in this path.
*
* @param outputPathProvidedByUser
* the output path of clustering.
* @return the path where the output of top level cluster post processor is kept.
*/
public static Path getClusterPostProcessorOutputDirectory(Path outputPathProvidedByUser) {
return new Path(outputPathProvidedByUser + File.separator + POST_PROCESS_DIRECTORY);
}
/**
* The top level clustered points before post processing is generated here.
*
* @param output
* the output path of clustering.
* @return the clustered points directory
*/
public static Path getClusterOutputClusteredPoints(Path output) {
return new Path(output + File.separator + CLUSTERED_POINTS_DIRECTORY + File.separator, "*");
}
/**
* Each cluster produced by top level clustering is processed in output/"bottomLevelCluster"/clusterId.
*
* @param output
* @param clusterId
* @return the bottom level clustering path.
*/
public static Path getBottomLevelClusterPath(Path output, String clusterId) {
return new Path(output + File.separator + BOTTOM_LEVEL_CLUSTER_DIRECTORY + File.separator + clusterId);
}
/**
* Each clusters path name is its clusterId. The vectors reside in separate files inside it.
*
* @param clusterPostProcessorOutput
* the path of cluster post processor output.
* @param clusterId
* the id of the cluster.
* @return the cluster path for cluster id.
*/
public static Path getClusterPathForClusterId(Path clusterPostProcessorOutput, String clusterId) {
return new Path(clusterPostProcessorOutput + File.separator + clusterId);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy