macro for finding duplicate text in word that excluded in-text citations

silk186

[H]ard|Gawd
Joined
Feb 26, 2008
Messages
1,628
I'm looking for a macro for finding duplicate text in my thesis which is several hundred pages long. I have found a few that have helped. I'm wondering if anyone knows of one (as many have been written) that would exclude intext citations. Something to ignore text in brackets (name, year). It would make scanning the document much easier. The one I'm using also highlights 'd.','s.' 'y.' and 'r.' which is annoying as well.

I've attached 3 sets of code that I've found but am unable to compare which is better.

Sub DemoA()
Application.ScreenUpdating = False
Options.DefaultHighlightColorIndex = wdYellow
Dim RngA As Range, RngB As Range, i As Long, j As Long, strFnd As String, strTmp As String
With ActiveDocument
For i = 2 To .Paragraphs.Count - 1
Set RngA = .Paragraphs(i - 1).Range
Set RngB = .Range(.Paragraphs(i).Range.Start, .Range.End)
With RngA
strFnd = Trim(Split(.Text, vbCr)(0))
If Len(strFnd) > 0 Then
If .HighlightColorIndex <> wdYellow Then
If Len(strFnd) > 255 Then
strTmp = Left(strFnd, 255)
With RngB
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = strTmp
.Format = True
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.Execute
If .Found = True Then RngA.HighlightColorIndex = wdBrightGreen
End With
Do While .Find.Found = True
If Trim(Split(.Paragraphs.First.Range.Text, vbCr)(0)) = strFnd Then
.Paragraphs.First.Range.HighlightColorIndex = wdYellow
End If
.Collapse wdCollapseEnd
.Find.Execute
Loop
End With
Else
With RngB.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = strFnd
.Replacement.Text = "^&"
.Replacement.Highlight = True
.Format = True
.Forward = True
.Wrap = wdFindStop
.MatchWildcards = True
.Execute Replace:=wdReplaceAll
If .Found = True Then RngA.HighlightColorIndex = wdBrightGreen
End With
End If
End If
End If
End With
If i Mod 100 = 0 Then DoEvents
Next
End With
Application.ScreenUpdating = True
End Sub

Sub FindDuplicateSentences()
Application.ScreenUpdating = False
Dim i As Long, RngSrc As Range, RngFnd As Range
Const Clr As Long = wdBrightGreen
Dim eTime As Single
eTime = Timer
Options.DefaultHighlightColorIndex = Clr
With ActiveDocument
With .Range.Find
.ClearFormatting
.Replacement.ClearFormatting
.Forward = True
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
.Execute
End With
For i = 1 To .Sentences.Count
If i Mod 100 = 0 Then DoEvents
On Error Resume Next
Set RngSrc = .Sentences(i)
If RngSrc.HighlightColorIndex <> Clr Then
Set RngFnd = .Range(.Sentences(i).End, .Range.End)
If Len(RngSrc.Text) < 256 Then
With RngFnd.Find
.Text = RngSrc.Text
.Replacement.Text = "^&"
.Replacement.Highlight = True
.Wrap = wdFindStop
.Execute Replace:=wdReplaceAll
End With
Else
With RngFnd
With .Find
.Text = Left(RngSrc.Text, 255)
.Wrap = wdFindStop
.Execute
End With
Do While .Find.Found
If RngSrc.Text = .Duplicate.Text Then
RngSrc.HighlightColorIndex = Clr
.Duplicate.HighlightColorIndex = Clr
End If
.Collapse wdCollapseEnd
.Find.Execute
Loop
End With
End If
End If
Next
End With
' Report time taken. Elapsed time calculation allows for execution to extend past midnight.
MsgBox "Finished. Elapsed time: " & (Timer - eTime + 86400) Mod 86400 & " seconds."
Application.ScreenUpdating = True
End Sub

Option Explicit

Sub Sample()
Dim MyArray() As String
Dim n As Long, i As Long
Dim Col As New Collection
Dim itm

n = 0
'~~> Get all the sentences from the word document in an array
For i = 1 To ActiveDocument.Sentences.Count
n = n + 1
ReDim Preserve MyArray(n)
MyArray(n) = Trim(ActiveDocument.Sentences(i).Text)
Next

'~~> Sort the array
SortArray MyArray, 0, UBound(MyArray)

'~~> Extract Duplicates
For i = 1 To UBound(MyArray)
If i = UBound(MyArray) Then Exit For
If InStr(1, MyArray(i + 1), MyArray(i), vbTextCompare) Then
On Error Resume Next
Col.Add MyArray(i), """" & MyArray(i) & """"
On Error GoTo 0
End If
Next i

'~~> Highlight duplicates
For Each itm In Col
Selection.Find.ClearFormatting
Selection.HomeKey wdStory, wdMove
Selection.Find.Execute itm
Do Until Selection.Find.Found = False
Selection.Range.HighlightColorIndex = wdPink
Selection.Find.Execute
Loop
Next
End Sub

'~~> Sort the array
Public Sub SortArray(vArray As Variant, i As Long, j As Long)
Dim tmp As Variant, tmpSwap As Variant
Dim ii As Long, jj As Long

ii = i: jj = j: tmp = vArray((i + j) \ 2)

While (ii <= jj)
While (vArray(ii) < tmp And ii < j)
ii = ii + 1
Wend
While (tmp < vArray(jj) And jj > i)
jj = jj - 1
Wend
If (ii <= jj) Then
tmpSwap = vArray(ii)
vArray(ii) = vArray(jj): vArray(jj) = tmpSwap
ii = ii + 1: jj = jj - 1
End If
Wend
If (i < jj) Then SortArray vArray, i, jj
If (ii < j) Then SortArray vArray, ii, j
End Sub
 
Back
Top