获取网页上数据(图片、文字、视频)-b

时间:2023-03-08 17:41:37

Demo地址:http://download.****.net/detail/u012881779/8831835

获取网页上所有图片、获取所有html、获取网页title、获取网页内容文字。。。

.h 文件  代码:

//网页  
//NSString *strPath = [NSString stringWithFormat:@"http://www.baidu.com/s?wd=%@&cl=3",theWord];   //视频  
//NSString *strPath = [NSString stringWithFormat:@"http://www.itinge.com/music/16241.mp4"];   //图片  
NSString *strPath = [NSString stringWithFormat:@"http://image.baidu.com/search/index?tn=baiduimage&istype=2&ie=utf-8&word=%@",theWord];   strPath = [strPath stringByAddingPercentEscapesUsingEncoding:NSUTF8StringEncoding];

.m 文件  代码:

@interface ViewController ()<UISearchBarDelegate , UIWebViewDelegate,UIGestureRecognizerDelegate>  
@property (weak, nonatomic) IBOutlet UISearchBar *searchBar;  
@property (weak, nonatomic) IBOutlet UIWebView *webview;   @end   @implementation ViewController   - (void)viewDidLoad {  
   [super viewDidLoad];      _searchBar.delegate = self;  
   _webview.delegate = self;  
   [self addTapOnWebView];  
}   -(void)addTapOnWebView{  
   UITapGestureRecognizer* singleTap = [[UITapGestureRecognizer alloc] initWithTarget:self action:@selector(handleSingleTap:)];  
   [_webview addGestureRecognizer:singleTap];  
   singleTap.delegate = self;  
   singleTap.cancelsTouchesInView = NO;  
}   #pragma mark- TapGestureRecognizer  
- (BOOL)gestureRecognizer:(UIGestureRecognizer *)gestureRecognizer shouldRecognizeSimultaneouslyWithGestureRecognizer:(UIGestureRecognizer *)otherGestureRecognizer{  
   return YES;  
}  
//被点击位置对应链接  
-(void)handleSingleTap:(UITapGestureRecognizer *)sender{  
   CGPoint pt = [sender locationInView:_webview];  
   NSString *imgURL = [NSString stringWithFormat:@"document.elementFromPoint(%f, %f).src", pt.x, pt.y];  
   NSString *urlToSave = [_webview stringByEvaluatingJavaScriptFromString:imgURL];  
   NSLog(@"image url=%@", urlToSave);      NSString * JsToGetHTMLSource = @"top.location.href";  
   NSString * pageSource = [_webview   stringByEvaluatingJavaScriptFromString:JsToGetHTMLSource];  
   NSLog(@"\n\n__url=%@", pageSource);      if (urlToSave.length > 4) {  
       NSString *substr = [urlToSave substringFromIndex:urlToSave.length-3];  
       if([substr isEqualToString:@"jpg"] || [substr isEqualToString:@"png"]){  
           [self showImageURL:urlToSave point:pt];  
       }  
   }  
}   //呈现图片,HTML是否适配分辨率将影响点击资源与获取到得资源是否一致  
-(void)showImageURL:(NSString *)url point:(CGPoint)point  
{  
   UIImageView *showView = [[UIImageView alloc] initWithFrame:[[UIScreen mainScreen]bounds]];  
   showView.center = point;  
   CGPoint newPoint = self.view.center;  
   showView.center = newPoint;      showView.backgroundColor = [UIColor blackColor];  
   showView.alpha = 1;  
   showView.userInteractionEnabled = YES;  
   [self.view addSubview:showView];  
   [showView setContentMode:UIViewContentModeScaleAspectFit];  
   [showView showImageFromURL:url placeHolder:nil CompletionBlock:nil];      UITapGestureRecognizer* singleTap = [[UITapGestureRecognizer alloc] initWithTarget:self action:@selector(handleSingleViewTap:)];  
   [showView addGestureRecognizer:singleTap];      [self.navigationController setNavigationBarHidden:YES animated:YES];  
}   //移除图片查看视图  
-(void)handleSingleViewTap:(UITapGestureRecognizer *)sender  
{  
   for (id obj in self.view.subviews) {  
       if ([obj isKindOfClass:[UIImageView class]]) {  
           [obj removeFromSuperview];  
       }  
   }  
   [self.navigationController setNavigationBarHidden:YES animated:YES];  
}   - (void)didReceiveMemoryWarning {  
   [super didReceiveMemoryWarning];  
   // Dispose of any resources that can be recreated.  
}   /*
*JavaScript获取网页信息总结
获取所有html:NSString *lJs = @"document.documentElement.innerHTML";
获取网页title:NSString *lJs2 = @"document.title";
UIWebView *lWebView = [self getCurrentWebView];
NSString *lHtml1 = [lWebView stringByEvaluatingJavaScriptFromString:lJs];
NSString *lHtml2 = [lWebView stringByEvaluatingJavaScriptFromString:lJs2]; JavaScript获取网页信息总结
JavaScript获取当前页面URL、title等 thisURL = document.URL;
thisHREF = document.location.href;
thisSLoc = self.location.href;
thisDLoc = document.location;
thisTLoc = top.location.href;
thisPLoc = parent.document.location;
thisTHost = top.location.hostname;
thisHost = location.hostname;
thisTitle = document.title;
thisProtocol = document.location.protocol;
thisPort = document.location.port;
thisHash = document.location.hash;
thisSearch = document.location.search;
thisPathname = document.location.pathname;
thisHtml = document.documentElement.innerHTML;
thisBodyText = document.documentElement.innerText;//获取网页内容文字
thisBodyText = document.body.innerText;//获取网页内容文字  怎么和上一个一样?有知道的请解释
*/  
//获取  
- (IBAction)receiveAction:(id)sender {  
   /*1视频*/  
   /*
    //获取网页中所有视频
    NSString *getVideoTitle = [_webview getVideoTitle];
    NSLog(@"\n\n 视频名称 : %@",getVideoTitle);
    double getVideoDuration = [_webview getVideoDuration];
    NSLog(@"\n\n 视频总时间 : %f",getVideoDuration);
    double getVideoCurrentTime = [_webview getVideoCurrentTime];
    NSLog(@"\n\n 视频当前时间 : %f",getVideoCurrentTime);
    */      /*2网页*/  
   /*
   //获取网页中所有图片
   NSString *imageUrls = [_webview stringByEvaluatingJavaScriptFromString:@"var str=new Array();""$('img').each(function(){str.push($(this).attr('src'));});"
                          "str.join(',');"];
   NSLog(@"\n\n 所有图片 : %@",imageUrls);    //获取所有html
   NSString *lJs = @"document.documentElement.innerHTML";
   NSString *lHtml1 = [_webview stringByEvaluatingJavaScriptFromString:lJs];
   //NSLog(@"1.%@",lHtml1);    //获取网页title:
   NSString *lJs2 = @"document.title";
   NSString *lHtml2 = [_webview stringByEvaluatingJavaScriptFromString:lJs2];
   NSLog(@"2.%@",lHtml2);    //thisURL = document.URL
   NSString *lJs3 = @"document.URL";
   NSString *lHtml3 = [_webview stringByEvaluatingJavaScriptFromString:lJs3];
   NSLog(@"3.%@",lHtml3);    //获取网页内容文字
   NSString *lJs4 = @"document.documentElement.innerText";
   NSString *lHtml4 = [_webview stringByEvaluatingJavaScriptFromString:lJs4];
   NSLog(@"4.%@",lHtml4);    //获取网页内容文字
   NSString *lJs5 = @"document.body.innerText";
   NSString *lHtml5 = [_webview stringByEvaluatingJavaScriptFromString:lJs5];
   NSLog(@"5.%@",lHtml5);
   */      /*3图片*/  
   /**/  
   //获取所有html  
   NSString *innerHTML = @"document.documentElement.innerHTML";  
   NSString *innerHTMLString = [_webview stringByEvaluatingJavaScriptFromString:innerHTML];  
   //检索图片  
   if(![innerHTMLString isEqualToString:@"<head></head><body></body>"]){  
       [self searchPictureFromHTML:innerHTMLString];  
   }else{  
       UIAlertView *alert = [[UIAlertView alloc] initWithTitle:nil message:@"请先搜索关键字" delegate:nil cancelButtonTitle:@"确定" otherButtonTitles:nil, nil nil];  
       [alert show];  
   }   }   //检索图片  
-(void)searchPictureFromHTML:(NSString *)theHTML{  
   //"http://img0.bdstatic.com/img/image/shouye/qwscmeb02.jpg”  
   NSMutableArray *picMutableArr = [[NSMutableArray alloc] init];  
   NSMutableArray *picHttpArr = [[theHTML componentsSeparatedByString:@"http://"] mutableCopy];      for (int i = 0; i < picHttpArr.count ; i ++) {  
       NSString *tempStr  = [picHttpArr objectAtIndex:i];  
       NSArray  *tempArr  = [tempStr componentsSeparatedByString:@".jpg"];  
       NSString *firstStr = [tempArr firstObject];  
       //判断字符串是否为图片  
       if([self judgeStringIsPicture:firstStr]){  
           if([self judgeStringIsNull:firstStr]){  
               NSString *picUrl = [NSString stringWithFormat:@"http://%@.jpg",firstStr];  
               [picMutableArr addObject:picUrl];  
           }  
       }else{  
       }  
   }  
   //清除重复图片  
   picMutableArr = [self cleanRepeatPicture:picMutableArr];      //展示获取图片  
   PictureViewController *picVc = [[PictureViewController alloc] initWithNibName:@"PictureViewController" bundle:nil];  
   picVc.valueArr = picMutableArr;  
   [self.navigationController pushViewController:picVc animated:YES];   }   //判断字符串是否为图片链接  
-(BOOL)judgeStringIsPicture:(NSString *)string{  
   BOOL result = YES;  
   NSMutableArray *mutable = [[NSMutableArray alloc] initWithObjects:@"<",@">",@"{",@"}",@"[",@"]",@"(",@")",@"|",@"||",@"$",@"?",@";", nil nil];      if(string != nil && string.length > 0){  
       for (int i = 0; i < string.length; i ++) {  
           NSString *subStr = [string substringWithRange:NSMakeRange(i, 1)];  
           for (int j = 0; j < mutable.count ; j ++) {  
               NSString *markStr = [mutable objectAtIndex:j];  
               if([subStr isEqualToString:markStr]){  
                   result = NO;  
               }  
           }  
       }  
   }      return result;  
}   //清除重复图片  
-(NSMutableArray *)cleanRepeatPicture:(NSMutableArray *)picarr{  
   NSMutableArray *tempArr = [[NSMutableArray alloc] init];      for (int i = picarr.count-1 ; i >= 0 ; i --) {  
       NSString *tempStr = [picarr objectAtIndex:i];  
       NSArray *oneArr = [tempStr componentsSeparatedByString:@"&fm"];  
       if(tempArr.count == 0){  
           [tempArr insertObject:tempStr atIndex:0];  
       }else{  
           BOOL result = YES;  
           for (int j = 0 ; j < tempArr.count ; j ++) {  
               NSString *jstr = [tempArr objectAtIndex:j];  
               if([jstr isEqualToString:tempStr]){  
                   result = NO;  
               }else{  
                   if(oneArr.count > 1){  
                       NSArray *twoArr = [jstr componentsSeparatedByString:@"&fm"];  
                       if([[oneArr firstObject] isEqualToString:[twoArr firstObject]]){  
                           result = NO;  
                       }  
                   }  
               }  
           }  
           if(result){  
               [tempArr insertObject:tempStr atIndex:0];  
           }  
       }  
   }      return tempArr;  
}   //HTML  
-(void)detailsWithUrl:(NSString *)urlStr{  
   NSURL *url =[NSURL URLWithString:urlStr];  
   NSURLRequest *request =[NSURLRequest requestWithURL:url];  
   [_webview loadRequest:request];  
   [_webview setScalesPageToFit:YES];      //隐藏滚动条  
   _webview.backgroundColor=[UIColor clearColor];  
   _webview.opaque = NO;  
   for (UIView *aView in [_webview subviews]){  
       [aView setBackgroundColor:[UIColor clearColor]];  
       if ([aView isKindOfClass:[UIScrollView class]]){  
           UIScrollView *tempSV = (UIScrollView *)aView;  
           tempSV.tag = 1321;  
           [tempSV setShowsHorizontalScrollIndicator:NO]; //右侧的滚动条 (水平的类似)  
           [tempSV setShowsVerticalScrollIndicator:NO];  
           [tempSV setBounces:NO];  
           [tempSV setContentSize:CGSizeMake(1,tempSV.contentSize.height )];  
           for (UIView *shadowView in tempSV.subviews){  
               if ([shadowView isKindOfClass:[UIImageView class]]){  
                   shadowView.hidden = YES;  //上下滚动出边界时的黑色的图片 也就是拖拽后的上下阴影  
               }  
           }  
       }  
   }  
}   //判断字符串不全为空  
-(BOOL)judgeStringIsNull:(NSString *)string{  
   BOOL result = NO;  
   if(string != nil && string.length > 0){  
       for (int i = 0; i < string.length; i ++) {  
           NSString *subStr = [string substringWithRange:NSMakeRange(i, 1)];  
           if(![subStr isEqualToString:@" "] && ![subStr isEqualToString:@""]){  
               result = YES;  
           }  
       }  
   }  
   return result;  
}   #pragma mark UISearchBarDelegate  
- (void)searchBarSearchButtonClicked:(UISearchBar *)searchBar{  
   if([self judgeStringIsNull:searchBar.text]){  
       //搜索接口  
       NSString *urlStr = [NetPortShared baiduSearchDelegate:self andTag:33333 andWord:_searchBar.text];  
       [self detailsWithUrl:urlStr];  
   }else{  
       UIAlertView *alert = [[UIAlertView alloc] initWithTitle:nil message:@"请输入关键字" delegate:nil cancelButtonTitle:@"确定" otherButtonTitles:nil, nil nil];  
       [alert show];  
   }  
}   @end

示意图:

aaarticlea/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAyBpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMC1jMDYwIDYxLjEzNDc3NywgMjAxMC8wMi8xMi0xNzozMjowMCAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczp4bXBNTT0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL21tLyIgeG1sbnM6c3RSZWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZVJlZiMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIENTNSBXaW5kb3dzIiB4bXBNTTpJbnN0YW5jZUlEPSJ4bXAuaWlkOkJDQzA1MTVGNkE2MjExRTRBRjEzODVCM0Q0NEVFMjFBIiB4bXBNTTpEb2N1bWVudElEPSJ4bXAuZGlkOkJDQzA1MTYwNkE2MjExRTRBRjEzODVCM0Q0NEVFMjFBIj4gPHhtcE1NOkRlcml2ZWRGcm9tIHN0UmVmOmluc3RhbmNlSUQ9InhtcC5paWQ6QkNDMDUxNUQ2QTYyMTFFNEFGMTM4NUIzRDQ0RUUyMUEiIHN0UmVmOmRvY3VtZW50SUQ9InhtcC5kaWQ6QkNDMDUxNUU2QTYyMTFFNEFGMTM4NUIzRDQ0RUUyMUEiLz4gPC9yZGY6RGVzY3JpcHRpb24+IDwvcmRmOlJERj4gPC94OnhtcG1ldGE+IDw/eHBhY2tldCBlbmQ9InIiPz6p+a6fAAAAD0lEQVR42mJ89/Y1QIABAAWXAsgVS/hWAAAAAElFTkSuQmCC" alt="" data-w="640" data-ratio="1.775" data-type="png" data-src="http://mmbiz.qpic.cn/mmbiz_png/g4uoJOMA38Lib8VMmfWNTzVHSJ9ibvJrEEiasibKYtG1EgbIr4s5J7QzTEk7VH1vf2f0aQAB6MY5hcHnTHcGFibWnvA/0?wx_fmt=png" />

获取网页上数据(图片、文字、视频)-b

文/作者:枫志应明

c博客地址:http://blog.****.net/wsyx768/article/details/46618125