What is the sequence
file in Hadoop?
· File which stores key& value in binary
format
· As it is binary format , we can compress that ,
results it comsumes less Diskspce, less I/O operation, less bandwith
· It also resolves small file problem (whole data
of the small file becomes the value of the sequence file )
Now we are going to look in to, how to convert large number
of small files to sequence file
Below is the java code for writing sequence file
public class SequenceFileWritter {
public static void main(String[] args) throws IOException {
String uri = args[1];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new Path(uri);
Text key = new Text();
Text value = new Text();
File infolder = new File(args[0]);
SequenceFile.Writer writer = null;
try {
FSDataOutputStream stm = fs.create(path);
writer = SequenceFile.createWriter(conf, stm, key.getClass(), value.getClass(),
SequenceFile.CompressionType.BLOCK, new DefaultCodec(), new Metadata());
File[] listOfFiles = infolder.listFiles();
System.out.printf("Folder is ", infolder.toString());
if (null != listOfFiles) {
System.out.printf("# of files ", listOfFiles.length);
for (int i = 0; i < listOfFiles.length; i++) {
if (listOfFiles[i].isFile()) {
key.set(listOfFiles[i].getName());
value.set(listOfFiles[i].getPath());
writer.append(key, value);
System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
} else if (listOfFiles[i].isDirectory()) {
System.out.println("Directory " + listOfFiles[i].getName());
}
}
} else {
System.out.printf("list of files is null ", " check ");
}
} finally {
IOUtils.closeStream(writer);
}
}
}
To read sequence file
public class SequenceFileRead
{
public static void main(String[] args) throws IOException {
String uri = args[0];
Configuration conf = new Configuration();
Path path = new Path(uri);
SequenceFile.Reader reader = null;
FileSystem fs = FileSystem.get(conf);
try {
reader = new SequenceFile.Reader(fs, path, conf);
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
while (reader.next(key, value)) {
String syncSeen = reader.syncSeen() ? "sync" : "";
System.out.printf("[%s]\t%s\t%s\n", syncSeen, key, value);
}
} finally {
IOUtils.closeStream(reader);
}
}
}
No comments:
Post a Comment