반응형
C# -- IE automation #7 -- iframe 접근하기
환경 : windows 7 64bit, visual studio 2013 community
참고 : http://stackoverflow.com/a/14661214 --> CrossFrameIE class
http://stackoverflow.com/a/19275241 --> setting [STAThread]
1, iframe 내부 접근시, InvalidCastException 발생.
Main() 함수를 [STAThread] 로 지정하면, InvalidCastException 문제 해결됨.
but, 새로운 예외 UnauthorizedAccessException 발생함.
** UnauthorizedAccessException 해결위해 , stackoverflow 검색하여 CrossFrameIE class 사용함.
<< 완성 소스 >>
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using SHDocVw; | |
using mshtml; | |
using System.Runtime.InteropServices; | |
namespace IE_iframe_ex | |
{ | |
class Program | |
{ | |
[STAThread] | |
static void Main(string[] args) | |
{ | |
var IE = new SHDocVw.InternetExplorer(); | |
IE.Visible = true; | |
dynamic url = "http://www.daum.net"; | |
IE.Navigate2(ref url); | |
IE.Wait(); // 페이지의 요소를 반영하기 위해서는 페이지가 표시 될 때까지 기다릴 필요 | |
mshtml.IHTMLDocument3 doc = IE.Document as mshtml.IHTMLDocument3; | |
mshtml.IHTMLElementCollection iframeColl = doc.getElementsByTagName("iframe"); | |
foreach (mshtml.HTMLIFrame ifrElem in iframeColl) | |
{ | |
if (ifrElem != null) | |
{ | |
// mshtml.IHTMLDocument2 newdoc2 = ifrElem.contentWindow.document; | |
mshtml.IHTMLDocument2 newdoc2 = CrossFrameIE.GetDocumentFromWindow(ifrElem.contentWindow); | |
mshtml.IHTMLElement bodyElement = newdoc2.body; | |
Console.WriteLine(bodyElement.innerHTML); | |
} | |
} | |
Console.ReadLine(); // Enter 입력해야 다음으로 넘아감. | |
} | |
} | |
// 페이지 로딩 완료까지 대기하는 확장 메서드 | |
public static class SHDovVwEx | |
{ | |
public static void Wait(this SHDocVw.InternetExplorer ie, int millisecond = 0) | |
{ | |
while (ie.Busy == true || ie.ReadyState != SHDocVw.tagREADYSTATE.READYSTATE_COMPLETE) | |
{ | |
System.Threading.Thread.Sleep(100); | |
} | |
System.Threading.Thread.Sleep(millisecond); | |
} | |
} | |
public class CrossFrameIE | |
{ | |
// Returns null in case of failure. | |
public static IHTMLDocument2 GetDocumentFromWindow(IHTMLWindow2 htmlWindow) | |
{ | |
if (htmlWindow == null) | |
{ | |
return null; | |
} | |
// First try the usual way to get the document. | |
try | |
{ | |
IHTMLDocument2 doc = htmlWindow.document; | |
return doc; | |
} | |
catch (COMException comEx) | |
{ | |
// I think COMException won't be ever fired but just to be sure ... | |
if (comEx.ErrorCode != E_ACCESSDENIED) | |
{ | |
return null; | |
} | |
} | |
catch (System.UnauthorizedAccessException) | |
{ | |
} | |
catch | |
{ | |
// Any other error. | |
return null; | |
} | |
// At this point the error was E_ACCESSDENIED because the frame contains a document from another domain. | |
// IE tries to prevent a cross frame scripting security issue. | |
try | |
{ | |
// Convert IHTMLWindow2 to IWebBrowser2 using IServiceProvider. | |
IServiceProvider sp = (IServiceProvider)htmlWindow; | |
// Use IServiceProvider.QueryService to get IWebBrowser2 object. | |
Object brws = null; | |
sp.QueryService(ref IID_IWebBrowserApp, ref IID_IWebBrowser2, out brws); | |
// Get the document from IWebBrowser2. | |
IWebBrowser2 browser = (IWebBrowser2)(brws); | |
return (IHTMLDocument2)browser.Document; | |
} | |
catch | |
{ | |
} | |
return null; | |
} | |
private const int E_ACCESSDENIED = unchecked((int)0x80070005L); | |
private static Guid IID_IWebBrowserApp = new Guid("0002DF05-0000-0000-C000-000000000046"); | |
private static Guid IID_IWebBrowser2 = new Guid("D30C1661-CDAF-11D0-8A3E-00C04FC9E26E"); | |
} | |
// This is the COM IServiceProvider interface, not System.IServiceProvider .Net interface! | |
[ComImport(), ComVisible(true), Guid("6D5140C1-7436-11CE-8034-00AA006009FA"), | |
InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)] | |
public interface IServiceProvider | |
{ | |
[return: MarshalAs(UnmanagedType.I4)] | |
[PreserveSig] | |
int QueryService(ref Guid guidService, ref Guid riid, [MarshalAs(UnmanagedType.Interface)] out object ppvObject); | |
} | |
} | |
반응형
'C# Web Scraping' 카테고리의 다른 글
C# -- HTML Agility Pack 사용한 html 파싱 (1) | 2016.03.24 |
---|---|
C# -- IE automation #6 -- 구글 검색창 제어하기 (0) | 2016.02.09 |
C# -- IE automation #5 -- Attribute 제어하기 (0) | 2016.02.09 |
C# -- IE automation #4 -- html 구조 파헤치기 (0) | 2016.02.09 |
C# -- IE automation #3 -- internet explorer 제어하여 html 소스 가져오기 (4) | 2016.02.03 |