百度空间 | 百度首页 
 
查看文章
 
利用HttpClient获取网页内容
2009-07-01 11:10

importjava.io.BufferedReader;  
importjava.io.InputStreamReader;  
importjava.io.UnsupportedEncodingException;  
importorg.apache.commons.httpclient.HttpClient;  
importorg.apache.commons.httpclient.MultiThreadedHttpConnectionManager;  
importorg.apache.commons.httpclient.NameValuePair;  
importorg.apache.commons.httpclient.methods.GetMethod;  
importorg.apache.commons.httpclient.methods.PostMethod;  
publicclassHttpClientExample{  
  //获得ConnectionManager,设置相关参数  
  privatestaticMultiThreadedHttpConnectionManagermanager=newMultiThreadedHttpConnectionManager();  
  privatestaticintconnectionTimeOut=20000;  
  privatestaticintsocketTimeOut=10000;  
  privatestaticintmaxConnectionPerHost=5;  
  privatestaticintmaxTotalConnections=40;  
  //标志初始化是否完成的flag  
  privatestaticbooleaninitialed=false;  
  //初始化ConnectionManger的方法  
  publicstaticvoidSetPara(){  
    manager.getParams().setConnectionTimeout(connectionTimeOut);  
    manager.getParams().setSoTimeout(socketTimeOut);  
    manager.getParams().setDefaultMaxConnectionsPerHost(  
        maxConnectionPerHost);  
    manager.getParams().setMaxTotalConnections(maxTotalConnections);  
    initialed=true;  
  }  
  //通过get方法获取网页内容  
  publicstaticStringgetGetResponseWithHttpClient(Stringurl,Stringencode){  
    HttpClientclient=newHttpClient(manager);  
    if(initialed){  
      HttpClientExample.SetPara();  
    }  
    GetMethodget=newGetMethod(url);  
    get.setFollowRedirects(true);  
    Stringresult=null;  
    StringBufferresultBuffer=newStringBuffer();  
    try{  
      client.executeMethod(get);  
      //在目标页面情况未知的条件下,不推荐使用getResponseBodyAsString()方法  
      //StringstrGetResponseBody=post.getResponseBodyAsString();  
      BufferedReaderin=newBufferedReader(newInputStreamReader(get  
          .getResponseBodyAsStream(),get.getResponseCharSet()));  
      StringinputLine=null;  
      while((inputLine=in.readLine())!=null){  
        resultBuffer.append(inputLine);  
        resultBuffer.append("n");  
      }  
      in.close();  
      result=resultBuffer.toString();  
      //iso-8859-1isthedefaultreadingencode  
      result=HttpClientExample.ConverterStringCode(resultBuffer  
          .toString(),get.getResponseCharSet(),encode);  
    }catch(Exceptione){  
      e.printStackTrace();  
      result="";  
    }finally{  
        
      get.releaseConnection();  
      returnresult;  
    }  
  }  
  publicstaticStringgetPostResponseWithHttpClient(Stringurl,Stringencode){  
    HttpClientclient=newHttpClient(manager);  
    if(initialed){  
      HttpClientExample.SetPara();  
    }  
    PostMethodpost=newPostMethod(url);  
    post.setFollowRedirects(false);  
    StringBufferresultBuffer=newStringBuffer();  
    Stringresult=null;  
    try{  
      client.executeMethod(post);  
      BufferedReaderin=newBufferedReader(newInputStreamReader(post  
          .getResponseBodyAsStream(),post.getResponseCharSet()));  
      StringinputLine=null;  
      while((inputLine=in.readLine())!=null){  
        resultBuffer.append(inputLine);  
        resultBuffer.append("n");  
      }  
      in.close();  
      //iso-8859-1isthedefaultreadingencode  
      result=HttpClientExample.ConverterStringCode(resultBuffer  
          .toString(),post.getResponseCharSet(),encode);  
    }catch(Exceptione){  
      e.printStackTrace();  
      result="";  
    }finally{  
        
      post.releaseConnection();  
      returnresult;  
    }  
  }  
  publicstaticStringgetPostResponseWithHttpClient(Stringurl,  
      Stringencode,NameValuePair[]nameValuePair){  
    HttpClientclient=newHttpClient(manager);  
    if(initialed){  
      HttpClientExample.SetPara();  
    }  
    PostMethodpost=newPostMethod(url);  
    post.setRequestBody(nameValuePair);  
    post.setFollowRedirects(false);  
    Stringresult=null;  
    StringBufferresultBuffer=newStringBuffer();  
    try{  
      client.executeMethod(post);  
      BufferedReaderin=newBufferedReader(newInputStreamReader(post  
          .getResponseBodyAsStream(),post.getResponseCharSet()));  
      StringinputLine=null;  
      while((inputLine=in.readLine())!=null){  
        resultBuffer.append(inputLine);  
        resultBuffer.append("n");  
      }  
      in.close();  
      //iso-8859-1isthedefaultreadingencode  
      result=HttpClientExample.ConverterStringCode(resultBuffer  
          .toString(),post.getResponseCharSet(),encode);  
    }catch(Exceptione){  
      e.printStackTrace();  
      result="";  
    }finally{  
      post.releaseConnection();  
      returnresult;  
    }  
  }  
  privatestaticStringConverterStringCode(Stringsource,StringsrcEncode,  
      StringdestEncode){  
    if(source!=null){  
      try{  
        returnnewString(source.getBytes(srcEncode),destEncode);  
      }catch(UnsupportedEncodingExceptione){  
        //TODOAuto-generatedcatchblock  
        e.printStackTrace();  
        return"";  
      }  
    }else{  
      return"";  
    }  
  }  
} 

之后,就可以通过下面的代码获得目标网页:

  String source = HttpClientExample.getGetResponseWithHttpClient("http://www.163.com", "GBK");

  注意,在默认情况下,HttpClient的Request的Head中

  User-Agent的值是Jakarta Commons-HttpClient 3.0RC1,如果需要改变它(例如,变为Mozilla/4.0),必须在调用之前运行如下语句:

  System.getProperties().setProperty("httpclient.useragent", "Mozilla/4.0");


类别:Http Html | 添加到搜藏 | 浏览() | 评论 (0)
 
最近读者:
 
网友评论:
发表评论:
姓 名:
网址或邮箱: (选填)
内 容:
验证码: 请点击后输入四位验证码,字母不区分大小写
      

     

©2009 Baidu