获取CentOS软件源中的updates包
可是我们也就是在本地用用,不需要同步,于是写了个Java程序,找了个速度比较快的镜像(东北大学的,写程序的时候北理工的镜像站打不开),直接很暴力地把上面的updates目录下的包 全都抓下来了,然后createrepo一下就可以用了,抓软件包的代码如下,其中HttpFactoy是自定义的一个封装Apache HttpClient接口的工具类:
package cn.edu.ruc.extract; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.filters.TagNameFilter; import org.htmlparser.tags.TableColumn; import org.htmlparser.tags.TableRow; import org.htmlparser.tags.TableTag; import org.htmlparser.util.NodeList; import org.htmlparser.visitors.HtmlPage; public class Main { public static void main(String[] args) { try { //要抓取的目录 String baseUrl = "http://mirror.neu.edu.cn/centos/6.4/updates/x86_64/Packages/"; String html = HttpFactory.getInstance().getPageContent(baseUrl); //存放软件包的本地目录 String rootDir = "/home/hadoop/updates/"; Parser parser = Parser.createParser(html, "utf-8"); HtmlPage page = new HtmlPage(parser); parser.visitAllNodesWith(page); NodeFilter filter = new TagNameFilter("TABLE"); NodeList nodes = page.getBody().extractAllNodesThatMatch(filter, true); int num = 0; for (int i = 0; i < nodes.size(); ++i) { TableTag tableTag = (TableTag) nodes.elementAt(i); TableRow[] rows = tableTag.getRows(); for (TableRow row : rows) { TableColumn[] columns = row.getColumns(); if (columns != null && columns.length > 0) { if (num > 0) { String fileName = columns[0].toPlainTextString().trim(); String url = baseUrl + fileName; System.out.println(url); File rpmFile = new File(rootDir + fileName); FileOutputStream outputStream = new FileOutputStream(rpmFile); InputStream inputStream = HttpFactory.getInstance().getResponseEntity(url).getContent(); byte b[] = new byte[1024*1024]; int j = 0; while ((j = inputStream.read(b)) != -1) { outputStream.write(b, 0, j); } outputStream.flush(); outputStream.close(); inputStream.close(); } num++; } } } System.out.println(num-1 + " packages downloaded."); } catch (Exception e) { e.printStackTrace(); } } }
补充:软件开发 , Java ,