Files Used by the Dedup Algorithm
CDLMatchingProfile.xml
<?xml version="1.0" encoding="UTF-8"?>
<MatchingProfile
xmlns="http://www.exlibrisgroup.com/xsd/primo/platform"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.exlibrisgroup.com/xsd/primo/platform ../../../../../../primo_publishing/publish/src/main/com/exlibris/publish/xsd/matchingProfile_v1.0.xsd ">
<handlers>
<handler id="SINGLE_MATCH">
<fieldID>f20</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupStringComparator</name>
<arguments>
<argument name="match">800</argument>
<argument name="mismatch">0</argument>
</arguments>
</handler>
<handler id="CDLDate">
<fieldID>f6</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupNumericComparator</name>
<arguments>
<argument name="match">+200</argument>
<argument name="within" param="2">-25</argument>
<argument name="mismatch">-250</argument>
</arguments>
</handler>
<handler id="CDLID">
<fieldID>f1,f2,f3,f4</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.cdlimpl.CDLIDComparator</name>
<arguments>
<argument name="recID_match">+200</argument>
<argument name="recID_recIDInvalid_match">+100</argument>
<argument name="recIDInvalid_match">+50</argument>
<argument name="recID_mismatch">-470</argument>
<argument name="recID_recIDInvalid_mismatch">-50</argument>
<argument name="ISBN_match">+85</argument>
<argument name="ISBN_ISSN_match">+30</argument>
<argument name="ISSN_ISSN_match">+10</argument>
<argument name="ISSN_ISBN_mismatch">-225</argument>
</arguments>
</handler>
<handler id="CDLShortTitle">
<fieldID>f5</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupStringComparator</name>
<arguments>
<argument name="match">+450</argument>
</arguments>
</handler>
<handler id="CDLSubShortTitle">
<fieldID>f5</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupStringComparator</name>
<arguments>
<argument name="match">-450</argument>
</arguments>
</handler>
<handler id="CDLLongTitle">
<fieldID>f7</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.cdlimpl.CDLTitleComparator</name>
<arguments>
<argument name="match">600</argument>
<argument name="within">350</argument>
<argument name="keywords_weight_factor">450</argument>
<argument name="keywords_order_base_weight">50</argument>
<argument name="mismatch">-600</argument>
</arguments>
</handler>
<handler id="CDLCountryOfPub">
<fieldID>f8</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupStringComparator</name>
<arguments>
<argument name="match">40</argument>
<argument name="mismatch">-205</argument>
</arguments>
</handler>
<handler id="CDLPagination">
<fieldID>f9</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.cdlimpl.CDLPageHandlerComparator</name>
<arguments>
<argument name="matchgt">100</argument>
<argument name="matchlt">50</argument>
<argument name="withingt">50</argument>
<argument name="withinlt">20</argument>
<argument name="mismatch">-225</argument>
</arguments>
</handler>
<handler id="CDLPublisher">
<fieldID>f10</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupStringComparator</name>
<arguments>
<argument name="match">+100</argument>
<argument name="within">+100</argument>
<argument name="mismatch">-25</argument>
</arguments>
</handler>
<handler id="CDLMainEntry">
<fieldID>f11</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.cdlimpl.CDLMainEntryComparator</name>
<arguments>
<argument name="match">+125</argument>
<argument name="both_missing">+75</argument>
<argument name="one_missing">+25</argument>
<argument name="keywords_weight_factor" param="49">80</argument>
<argument name="keywords_order_base_weight" param="49">10</argument>
<argument name="mismatch">-200</argument>
</arguments>
</handler>
</handlers>
<thresholds>
<threshold id="tr1">
<upper_threshold>+850</upper_threshold>
<lower_threshold>0</lower_threshold>
</threshold>
<threshold id="tr2">
<upper_threshold>+875</upper_threshold>
</threshold>
<threshold id="tr3">
<upper_threshold>+800</upper_threshold>
</threshold>
</thresholds>
<steps>
<step type="handler">SINGLE_MATCH</step>
<step type="threshold">tr3</step>
<step type="handler">CDLID</step>
<step type="handler">CDLShortTitle</step>
<step type="handler">CDLDate</step>
<step type="threshold">tr1</step>
<step type="handler">CDLSubShortTitle</step>
<step type="handler">CDLLongTitle</step>
<step type="handler">CDLCountryOfPub</step>
<step type="handler">CDLPagination</step>
<step type="handler">CDLPublisher</step>
<step type="handler">CDLMainEntry</step>
<step type="threshold">tr2</step>
</steps>
</MatchingProfile>
CDLSeMatchingProfile.xml
xmlns="http://www.exlibrisgroup.com/xsd/primo/platform"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.exlibrisgroup.com/xsd/primo/platform ../../../../../../primo_publishing/publish/src/main/com/exlibris/publish/xsd/matchingProfile_v1.0.xsd ">
<handlers>
<handler id="SINGLE_MATCH">
<fieldID>f20</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupStringComparator</name>
<arguments>
<argument name="match">800</argument>
<argument name="mismatch">0</argument>
</arguments>
</handler>
<handler id="CDLID">
<fieldID>f1,f2,f3,f4,f5</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.cdlimpl.CDLIDSerialComparator</name>
<arguments>
<argument name="recID_match">+200</argument>
<argument name="recID_recIDInvalid_match">+100</argument>
<argument name="recIDInvalid_match">+50</argument>
<argument name="recID_mismatch">-470</argument>
<argument name="recID_recIDInvalid_mismatch">-50</argument>
<argument name="ISSN_match">+200</argument>
<argument name="ISSNInvalid_match">+50</argument>
<argument name="ISSNCanceled_match">+10</argument>
<argument name="ISSN_ISSNInvalid_match">+100</argument>
<argument name="ISSN_ISSNCanceled_match">+50</argument>
<argument name="ISSNInvalid_ISSNCanceled_match">+30</argument>
<argument name="ISSN_ISSN_mismatch">-250</argument>
</arguments>
</handler>
<handler id="CDLLongTitle">
<fieldID>f7,f8</fieldID>
<arguments>
<argument name="full_common_match">135</argument>
<argument name="full_match">600</argument>
<argument name="full_truncated_common_match">135</argument>
<argument name="full_truncated_match">175</argument>
<argument name="keywords_weight_factor">75</argument>
<argument name="keywords_order_base_weight">50</argument>
<argument name="mismatch">-600</argument>
</arguments>
</handler>
<handler id="CDLDate">
<fieldID>f6</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.cdlimpl.CDLDateSerialComparator</name>
<arguments>
<argument name="match">225</argument>
<argument name="within1">50</argument>
<argument name="within2">25</argument>
<argument name="last_digit_zero">20</argument>
<argument name="mismatch">-150</argument>
</arguments>
</handler>
<handler id="CDLCountryOfPub">
<fieldID>f9</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupStringComparator</name>
<arguments>
<argument name="match">40</argument>
<argument name="mismatch">-20</argument>
</arguments>
</handler>
<handler id="CDLPlaceOfPub">
<fieldID>f10</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupStringComparator</name>
<arguments>
<argument name="match">200</argument>
<argument name="mismatch">-100</argument>
</arguments>
</handler>
<handler id="CDLMainEntry_se">
<fieldID>f11</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.cdlimpl.CDLMainEntrySerialComparator</name>
<arguments>
<argument name="match">+200</argument>
<argument name="keywords_order_base_weight" param="59">25</argument>
<argument name="mismatch">-250</argument>
</arguments>
</handler>
</handlers>
<thresholds>
<threshold id="tr1">
<upper_threshold>+800</upper_threshold>
<lower_threshold>0</lower_threshold>
</threshold>
<threshold id="tr2">
<upper_threshold>+800</upper_threshold>
</threshold>
<threshold id="tr3">
<upper_threshold>+800</upper_threshold>
</threshold>
</thresholds>
<steps>
<step type="handler">SINGLE_MATCH</step>
<step type="threshold">tr3</step>
<step type="handler">CDLID</step>
<step type="handler">CDLLongTitle</step>
<step type="threshold">tr1</step>
<step type="handler">CDLDate</step>
<step type="handler">CDLCountryOfPub</step>
<step type="handler">CDLPlaceOfPub</step>
<step type="handler">CDLMainEntry_se</step>
<step type="threshold">tr2</step>
</steps>
<common_title_list>
<file_name>CDLSeCommonTitleList.txt</file_name>
</common_title_list>
</MatchingProfile>
CDLArticlesMatchingProfile.xml
<?xml version="1.0" encoding="UTF-8"?>
<MatchingProfile
xmlns="http://www.exlibrisgroup.com/xsd/primo/platform"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.exlibrisgroup.com/xsd/primo/platform ../../../../../../primo_publishing/publish/src/main/com/exlibris/publish/xsd/matchingProfile_v1.0.xsd ">
<handlers>
<handler id="CDLArtTitle">
<fieldID>f1</fieldID>
<name>com.exlibris.primo.publish.platform.dedup.comparator.DedupStringComparator</name>
<arguments>
<argument name="match">100</argument>
<argument name="mismatch">-100</argument>
</arguments>
</handler>
</handlers>
<thresholds>
<threshold id="tr1">
<upper_threshold>+100</upper_threshold>
<lower_threshold>+100</lower_threshold>
</threshold>
</thresholds>
<steps>
<step type="handler">CDLArtTitle</step>
<step type="threshold">tr1</step>
</steps>
</MatchingProfile>
CDLSeCommonTitleList.txt
ANALES
ANNUAL BUDGET
ANNUAL FINANCIAL REPORT
ANNUAL REPORT
ANNUAL REPORTS
ANNUAL REPORT FOR
ANNUAL REPORT FOR THE FISCAL YEAR ENDED MARCH \265\265\265\265\265\265\267
BIENNIAL REPORT
BOLETIN
BUDGET
BULLETIN
CALENDAR
CATALOGUE
CIRCULAR
COMPREHENSIVE ANNUAL FINANCIAL REPORT
!COMPILATION OF SELECTED ACTS
COMPILATION OF SELECTED ACTS WITHIN THE JURISDICTION OF THE COMMITTEE ON COMMERCE
CONFERENCE PROCEEDINGS
CONFERENCE RECORD
DIRECTORY
FACT SHEET
FINAL BUDGET
FINANCIAL REPORT
GENERAL CATALOG
JAHRESBERICHT
JOURNAL
LANGUAGE SCIENCES
LEGISLATIVE CALENDAR
LEGISLATIVE SUMMARY
MEMBERSHIP DIRECTORY
MEMOIRES
MEMORIA
MINUTES
MITTEILUNGEN
MONOGRAPH
MONTHLY BULLETIN
NEWS RELEASE
NEWSLETTER
OCCASIONAL PAPER
OCCASIONAL PAPERS
PROCEEDINGS
PROCEEDINGS OF THE ANNUAL MEETING
PROGRESS REPORT
PROPOSED BUDGET
PUBLICACIONES
PUBLICATION
PUBLICATIONS
RAPPORT
RAPPORT ANNUEL
REPORT
REPORT AND ACCOUNTS
RESEARCH REPORT
REVISTA
SEMI ANNUAL REPORT TO THE CONGRESS
SEMIANNUAL REPORT TO THE CONGRESS
SESSION LAWS
STATISTICAL REPORT
TECHNICAL BULLETIN
TECHNICAL REPORT
TRANSACTIONS
TRAVAUX
TRUDY
UPDATE
VEROEFFENTLICHUNGEN
VEROFFENTLICHUNGEN
WORKS
YEAR BOOK
YEARBOOK
TELEPHONE DIRECTORY