hadoop format过程

时间:2023-03-09 03:54:07
hadoop  format过程
private static boolean format(Configuration conf,
boolean isConfirmationNeeded
) throws IOException {
boolean allowFormat = conf.getBoolean("dfs.namenode.support.allowformat", // 获取format配置文件,
true);
if (!allowFormat) {
throw new IOException("The option dfs.namenode.support.allowformat is "
+ "set to false for this filesystem, so it "
+ "cannot be formatted. You will need to set "
+ "dfs.namenode.support.allowformat parameter "
+ "to true in order to format this filesystem");
} }

为了防止生产集群误操作hadoop被format,可加入一下配置,

<property>
  <name>dfs.namenode.support.allowformat</name>
  <value>flase</value>
  <description>Does HDFS namenode allow itself to be formatted?
               You should consider setting this to false for any production
               cluster, to avoid any possibility of formatting a running DFS.
  </description>
</property>

Collection<File> dirsToFormat = FSNamesystem.getNamespaceDirs(conf); //获取namenode元数据目录, FSNamesystem.java  395行
 Collection<File> editDirsToFormat =
FSNamesystem.getNamespaceEditsDirs(conf); // 获取editlog目录, FSNamesystem.java 406行

 public static Collection<File> getNamespaceDirs(Configuration conf) {
Collection<String> dirNames = conf.getStringCollection("dfs.name.dir");
if (dirNames.isEmpty())
dirNames.add("/tmp/hadoop/dfs/name"); //如果不配置目录,将使用默认目录:/tmp/hadoop/dfs/name
Collection<File> dirs = new ArrayList<File>(dirNames.size());
for(String name : dirNames) {
dirs.add(new File(name)); //新建目录的意思
}
return dirs; //最后将目录返回
}
Collection 网上看好像是接口的意思,记得以前学的时候implements是接口的意思,这个问题,有待深入,哈哈
  for(Iterator<File> it = dirsToFormat.iterator(); it.hasNext();) {   //这里是个for 循环,说明元数据目录可配置多个,直到所有目录
File curDir = it.next();
if (!curDir.exists())
continue;
if (isConfirmationNeeded) {
System.err.print("Re-format filesystem in " + curDir +" ? (Y or N) "); //如果已经被format过,将提示是否re-format
if (!(System.in.read() == 'Y')) {
System.err.println("Format aborted in "+ curDir);
return true;
}
while(System.in.read() != '\n'); // discard the enter-key 直接回车不输入的话,应该是不格式化,
 FSNamesystem nsys = new FSNamesystem(new FSImage(dirsToFormat,
editDirsToFormat), conf); //先实例化FSNamesystem,传入namenode和editlog两个目录
nsys.dir.fsImage.format(); //执行format
return false;
}

//开始看format的过程:

  FSNamesystem(FSImage fsImage, Configuration conf) throws IOException {
this.fsLock = new ReentrantReadWriteLock();
setConfigurationParameters(conf); // setConfigurationParameters FSNamesystem.java 466行
this.dir = new FSDirectory(fsImage, this, conf);
}
private void setConfigurationParameters(Configuration conf)
throws IOException {
fsNamesystemObject = this; if (conf.getBoolean("hadoop.disable.shell",false)){
conf.setStrings(UnixUserGroupInformation.UGI_PROPERTY_NAME, new String[]{"hadoop", "hadoop"});
Shell.setDisabled(true);
} try {
fsOwner = UnixUserGroupInformation.login(conf);
} catch (LoginException e) {
throw new IOException(StringUtils.stringifyException(e));
} LOG.info("fsOwner=" + fsOwner);
         // 用户和目录权限
this.hasRwLock = conf.getBoolean("dfs.rwlock", false);
this.supergroup = conf.get("dfs.permissions.supergroup", "supergroup");
this.isPermissionEnabled = conf.getBoolean("dfs.permissions", true);
this.persistBlocks = conf.getBoolean("dfs.persist.blocks", false);
LOG.info("supergroup=" + supergroup);
LOG.info("isPermissionEnabled=" + isPermissionEnabled);
short filePermission = (short)conf.getInt("dfs.upgrade.permission", 0777);
this.defaultPermission = PermissionStatus.createImmutable(
fsOwner.getUserName(), supergroup, new FsPermission(filePermission)); this.maxCorruptFilesReturned = conf.getInt("dfs.corruptfilesreturned.max",
DEFAULT_MAX_CORRUPT_FILES_RETURNED);
this.defaultReplication = conf.getInt("dfs.replication", 3);
this.maxReplication = conf.getInt("dfs.replication.max", 512);
this.minReplication = conf.getInt("dfs.replication.min", 1);
if (minReplication <= 0) // 获取备份数,最大512个,最小1个,小于1 报异常
throw new IOException(
"Unexpected configuration parameters: dfs.replication.min = "
+ minReplication
+ " must be greater than 0");
if (maxReplication >= (int)Short.MAX_VALUE)
throw new IOException(
"Unexpected configuration parameters: dfs.replication.max = "
+ maxReplication + " must be less than " + (Short.MAX_VALUE));
if (maxReplication < minReplication)
throw new IOException(
"Unexpected configuration parameters: dfs.replication.min = "
+ minReplication
+ " must be less than dfs.replication.max = "
+ maxReplication);
this.maxReplicationStreams = conf.getInt("dfs.max-repl-streams", 2);
long heartbeatInterval = conf.getLong("dfs.heartbeat.interval", 3) * 1000; // 心跳时间
this.heartbeatRecheckInterval = conf.getInt(
"heartbeat.recheck.interval", 5 * 60 * 1000); // 5 minutes // 5分钟check
this.heartbeatExpireInterval = 2 * heartbeatRecheckInterval +
10 * heartbeatInterval;                                  // 10分钟丢弃
this.replicationRecheckInterval =
conf.getInt("dfs.replication.interval", 3) * 1000L;
this.defaultBlockSize = conf.getLong("dfs.block.size", DEFAULT_BLOCK_SIZE);
this.maxFsObjects = conf.getLong("dfs.max.objects", 0);
this.blockInvalidateLimit = Math.max(this.blockInvalidateLimit,
20*(int)(heartbeatInterval/1000));
this.accessTimePrecision = conf.getLong("dfs.access.time.precision", 0);
this.supportAppends = conf.getBoolean("dfs.support.append", false); // set soft and hard lease period
long hardLeaseLimit = conf.getLong(FSConstants.DFS_HARD_LEASE_KEY,
FSConstants.LEASE_HARDLIMIT_PERIOD);
long softLeaseLimit = conf.getLong(FSConstants.DFS_SOFT_LEASE_KEY,
FSConstants.LEASE_SOFTLIMIT_PERIOD);
this.leaseManager.setLeasePeriod(
Math.min(hardLeaseLimit, softLeaseLimit), hardLeaseLimit);
}

默认快大小是64M

<property>
  <name>dfs.block.size</name>
  <value>67108864</value>
  <description>The default block size for new files.</description>
</property>

 FSDirectory(FSImage fsImage, FSNamesystem ns, Configuration conf) {
rootDir = new INodeDirectoryWithQuota(INodeDirectory.ROOT_NAME,
ns.createFsOwnerPermissions(new FsPermission((short)0755)),
Integer.MAX_VALUE, -1);
this.fsImage = fsImage;
namesystem = ns;
initialize(conf);
}