November 08, 2024, 05:48:49 PM

News:

IWBasic runs in Windows 11!


Quickest way to get the title of a webpage?

Started by WayneA, February 19, 2011, 02:29:30 AM

Previous topic - Next topic

0 Members and 1 Guest are viewing this topic.

WayneA

I'm trying to get the title of a webpage without getting the full webpage downloaded and then scanning for the appropriate tag.

I do have a working prototype that'll do the job, but its more time consuming than I would like.

Note: I didn't bother covering case differences. This example assumes the <title> tags will be lower case and will only get the title in those instances.

$Include "windows.inc"
AutoDefine "Off"
Dim wndMain As Window
OpenWindow wndMain,0,0,350,350,@MinBox,0,"Webpage Title Snagger",&wndMainProc
SetWindowColor wndMain,_GetSysColor(15)
WaitUntil IsWindowClosed wndMain
NCSFree()
End

Sub wndMainProc
Dim l,t,w,h,i As Int
Select @Message
Case @IDCreate
CenterWindow wndMain
GetClientSize wndMain,l,t,w,h
Control wndMain,@Edit,"",0,0,w-50,25,@Border|@CTEditAutoH,1
Control wndMain,@Button,"Get",w-50,0,50,25,0,2
Control wndMain,@RichEdit,"",0,25,w,h-25,@Border|@CTEditMulti|@HScroll|@VScroll|@CTEditAutoH|@CTEditAutoV|@CTEditRO,3
For i=1 to 3
SetFont wndMain,"MS Sans Serif",8,400,0,i
Next i
Case @IDControl
If @ControlID=2 And @NotifyCode=0 Then
If GrabHTML(GetControlText(wndMain,1),wndMain,3) Then
SetCaption(wndMain,GrabTitle(GetControlText(wndMain,3)))
EndIf
EndIf
Case @IDCloseWindow
CloseWindow wndMain
EndSelect
EndSub

Sub GrabTitle(HTML As String),String
Dim start,ending As Int
start=InStr(HTML,"<title>")
ending=InStr(HTML,"</title>",start)
If start>0 And ending>0 Then
Return Mid$(HTML,start+7,ending-start-7)
EndIf
Return "No title found."
EndSub

Sub GrabHTML(url As String,parent As Window,cid As Int,Opt timeout=10000 As Int),Int
Dim sock,status As UInt
Dim msg=False,i,getpos As Int
Dim buf[4024] As IString
Dim getfile="/" As String
SetControlText(parent,cid,"")
status=NCSInit(1,0)
If status>=0 Then
getpos=InStr(url,"://")
If getpos>0 Then url=Mid$(url,getpos+3,Len(url))
getpos=InStr(url,"/")
If getpos>0 Then
getfile=Mid$(url,getpos,Len(url)-getpos+1)
url[getpos-1]=0
DebugPrint(url)
'DebugPrint(Mid$(url,getpos,Len(url)-getpos+1))
EndIf
status=NCSConnectToServer(url,80)
If status>=0 Then
sock=status
buf="GET "+getfile+" HTTP/1.1\nHost:"+url+"\n\n"
status=NCSSendData(sock,buf,Len(buf))
If status>=0 Then
For i=0 to timeout
Wait 1
If NCSWaitForData(sock,1)
msg=True
BreakFor
EndIf
Next i
If msg Then
While NCSWaitForData(sock,1000)
status=NCSReadData(sock,buf,4023,1000)
If status>=0 Then
EditAppend(parent,cid,buf)
EndIf
WEnd
EndIf
EndIf
NCSCloseSocket(sock)
Return True
EndIf
' NCSFree()
EndIf
Return False
EndSub

Sub EditAppend(win As Window,cid As Int,text As String)
Dim l=SendMessage(win,WM_GETTEXTLENGTH,0,0,cid) As Int
ControlCMD win,cid,@EDSetSelection,l,l
ControlCMD win,cid,@EDReplaceSel,text
EndSub


I also found a problem with this method on a phpboard forum while testing. It seems some webpages generate the title dynamically so without actually running the relevant javascript or whatever else is used the title will be incorrect. Is there a method to get the appropriate title from more websites without having to dump all this into a browser control and then pull the title?
99 little bugs in the code,
99 bugs in the code,
Fix one bug,
Compile again,
104 little bugs in the code...

All code I post is in the public domain.