User:AmiDaniel/VP/RSS source

The following is the method being used in VandalProof version 1.3 to retrieve items from the recent changes RSS feed, and as I've had many requests for it, I decided to copy it here. This method will work for an RSS RC feed on any foundation Wiki regardless of language, etc.

To use it, you will need to copy the following code into a module. Then you will need to retrieve the innerHTML of the RSS feed you wish to scrape (on en.wikipedia, it can be found at http://en.wikipedia.org/w/index.php?title=Special:Recentchanges&feed=rss). Then pass the innerHTML to SplitItems (like so: SplitItems WB_RSS.Document.body.innerHTML). That will then populate the RSSItems variable with every RC item it finds in the feed.

Option Explicit

Public Type RSSItem BodyContent As String sUser As String sArticleName As String sPageAddress As String sSummary As String sAdded As String sRemoved As String sMatches As String sNewTime As String sOldTime As String End Type

Public RSSItems As RSSItem

Public Sub SplitItems(ByVal str$) Dim i%   On Error Resume Next i = UBound(RSSItems) If Err Then Err.Clear ReDim RSSItems(0) End If   On Error GoTo 0 Do Until InStr(1, LCase(str), " ") = 0 ReDim Preserve RSSItems(i) With RSSItems(i) .BodyContent = Left(str, InStr(1, LCase(str), " ") - 1) .BodyContent = FindAndReplace(.BodyContent, """/w", """" & GlVars.Root & "/w") .sArticleName = BetwixtStr(.BodyContent, " ", " ") .sPageAddress = BetwixtStr(.BodyContent, " ", " ") .sUser = BetwixtStr(.BodyContent, "", "") .sNewTime = BetwixtStr(.BodyContent, "", " ") .sSummary = BetwixtStr(.BodyContent, " ", " ") .sSummary = FindAndReplace(.sSummary, " ", "/*") .sSummary = FindAndReplace(.sSummary, " ", "*/") .sAdded = GetAdded(.BodyContent) .sRemoved = GetRemoved(.BodyContent) .BodyContent = FindAndReplace(.BodyContent, " " & .sPageAddress & " ", "" & .sArticleName & " (last diff) (hist)") If .sArticleName = "" & GlVars.SpecialText & "Log/newusers" Then .BodyContent = .BodyContent & Chr(13) & Chr(10) & Chr(13) & Chr(10) & "Block Username" End If       End With str = Right(str, Len(str) - InStr(1, LCase(str), " ") - Len(" ") + 1) 'If InStr(1, lastinstr, LCase(str), " ") > 0 Then lastinstr = InStr(1, lastinstr, LCase(str), " ") i = i + 1 Loop End Sub Public Function BetwixtStr$(ByVal sIn$, ByVal sFirst$, ByVal sLast$) If InStr(sIn, sLast) Then BetwixtStr = Left(sIn, InStrRev(sIn, sLast) - 1) If InStr(BetwixtStr, sFirst) Then BetwixtStr = Right(BetwixtStr, Len(BetwixtStr) - InStr(BetwixtStr, sFirst) - Len(sFirst) + 1) End If   End If End Function

Public Function GetAdded$(ByVal sIn$) Dim fields Debug.Print Debug.Print sIn If InStr(1, sIn, " New page ") Then GetAdded = "##NEWPAGE##" Do Until InStr(1, UCase(sIn), "") = 0 sIn = Right(sIn, Len(sIn) - InStr(1, UCase(sIn), "<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") - Len("<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") + 1) GetAdded = GetAdded & sIn GetAdded = Left(GetAdded, InStr(1, LCase(GetAdded), " ") - 1) Loop GetAdded = FindAndReplace(GetAdded, " ", "") GetAdded = FindAndReplace(GetAdded, " ", "") GetAdded = FindAndReplace(GetAdded, "&lt;/sup&gt;", "") End Function

Public Function GetRemoved$(ByVal sIn$) Dim fields 'If InStr(1, sIn, " New page ") Then GetRemoved = "##NEWPAGE##" Do Until InStr(1, UCase(sIn), UCase("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">")) = 0 sIn = Right(sIn, Len(sIn) - InStr(1, UCase(sIn), UCase("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">")) - Len("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">") + 1) GetRemoved = GetRemoved & sIn GetRemoved = Left(GetRemoved, InStr(1, LCase(GetRemoved), " ") - 1) Loop GetRemoved = FindAndReplace(GetRemoved, " ", "") GetRemoved = FindAndReplace(GetRemoved, " ", "") GetRemoved = FindAndReplace(GetRemoved, "&lt;/sup&gt;", "") End Function

Function FindAndReplace(ByVal strIn$, ByVal strFind$, ByVal strReplace$) Dim lastInstr%, lastInstr_New% lastInstr = 1 Do Until InStr(lastInstr, strIn, strFind) = 0 lastInstr_New = InStr(lastInstr, strIn, strFind) strIn = Left(strIn, InStr(lastInstr, strIn, strFind) - 1) & strReplace & Right(strIn, Len(strIn) - InStr(lastInstr, strIn, strFind) - Len(strFind) + 1) lastInstr = lastInstr_New Loop FindAndReplace = strIn End Function