Hey Guys,
The following code uses cfcollection and cfindex to create and populate a SOLR collection of data. 10,000 products are added to the collection every 90 seconds. The script is called from a scheduled task setup using the coldfusion administrator.
Code:
<cfsilent>
<cfset myOutput = '<p>START: #NOW()#</p>'>
<cfsetting requestTimeOut = "6000">
<cfset collectionName = 'products_id'>
<cfset collectionPath = 'F:\collections\'>
<cfcollection action="list" name="COLLECTION_LIST" engine="solr">
<cfquery name="COLLECTION_CHECK" dbtype="query">
SELECT * FROM COLLECTION_LIST
WHERE COLLECTION_LIST.NAME = <cfqueryparam cfsqltype="CF_SQL_VARCHAR" value="#collectionName#">
</cfquery>
<cfif #COLLECTION_CHECK.RecordCount# NEQ 0>
<cfset myOutput = '#myOutput#<p>Collection Exists</p>'>
<cfquery datasource="#dsnName#" name="PRODUCT_LIST">
SELECT TOP 10000 PRODUCT.PRODUCT_ID, PRODUCT.PRODUCT_PRIORITY_ID, PRODUCT.BUSINESS_ID, PRODUCT.PRODUCT_NAME,
PRODUCT.PRODUCT_PRICE, PRODUCT.PRODUCT_DESCRIPTION, PRODUCT.PRODUCT_PHOTO, PRODUCT.PRODUCT_KEYWORD1,
PRODUCT.PRODUCT_KEYWORD2, PRODUCT.PRODUCT_KEYWORD3, PRODUCT.PRODUCT_KEYWORD4, PRODUCT.PRODUCT_GOOGLE_IMAGE_URL,
PRODUCT.PRODUCT_VALIDATED
FROM PRODUCT
WHERE PRODUCT.PRODUCT_INDEXED = <cfqueryparam cfsqltype="CF_SQL_NUMERIC" value="0">
</cfquery>
<cfset myOutput = '#myOutput#<p>#PRODUCT_LIST.RecordCount# Product(s) Need Updated</p>'>
<cfset priceRangeArray = ArrayNew(1)>
<cfset productBoolArray = ArrayNew(1)>
<cfset productSubcatIDArray = ArrayNew(1)>
<cfif #PRODUCT_LIST.RecordCount# NEQ 0>
<cfset priceList = '0,10,20,30,40,50,75,100,150,200,250,300,350,400,450,500,600,700,800,900,1000'>
<cfset priceList = '#priceList#,1250,1500,1750,2000,3000,4000,5000,1000000000'>
<cfset priceArray = '#ListToArray(priceList)#'>
<cfset loopLength = #ArrayLen(priceArray)# - 1>
<cfloop index="i" from="1" to="#PRODUCT_LIST.RecordCount#">
<cfset productGooglePhoto = '#PRODUCT_LIST.PRODUCT_GOOGLE_IMAGE_URL[i]#'>
<cfset productValidated = '#PRODUCT_LIST.PRODUCT_VALIDATED[i]#'>
<cfset productPhoto = '#PRODUCT_LIST.PRODUCT_PHOTO[i]#'>
<cfset productPrice = '#PRODUCT_LIST.PRODUCT_PRICE[i]#'>
<cfset productID = '#PRODUCT_LIST.PRODUCT_ID[i]#'>
<cfset priceRangeArray[i] = ''>
<cfset productBoolArray[i] = ''>
<cfset productSubcatIDArray[i] = ''>
<cfquery datasource="#dsnName#" name="SUBCAT_LIST">
SELECT ASSIGN_SUBCAT_PRODUCT.PRODUCT_SUBCATEGORY_ID
FROM ASSIGN_SUBCAT_PRODUCT
WHERE ASSIGN_SUBCAT_PRODUCT.PRODUCT_ID = <cfqueryparam cfsqltype="CF_SQL_NUMERIC" value="#productID#">
</cfquery>
<cfif #SUBCAT_LIST.RecordCount# NEQ 0>
<cfloop index="j" from="1" to="#SUBCAT_LIST.RecordCount#">
<cfset subcatID = '#SUBCAT_LIST.PRODUCT_SUBCATEGORY_ID[j]#'>
<cfif #ListLen(productSubcatIDArray[i])# NEQ 0>
<cfset productSubcatIDArray[i] = '#productSubcatIDArray[i]#,#subcatID#'>
<cfelse>
<cfset productSubcatIDArray[i] = '#subcatID#'>
</cfif>
</cfloop>
</cfif>
<cfif #productValidated# EQ 1>
<cfset productBoolArray[i] = '122'>
<cfelse>
<cfset productBoolArray[i] = '022'>
</cfif>
<cfif #productGooglePhoto# NEQ "">
<cfset productBoolArray[i] = '#productBoolArray[i]# 212'>
<cfelse>
<cfset productBoolArray[i] = '#productBoolArray[i]# 202'>
</cfif>
<cfif #productPhoto# NEQ "">
<cfset productBoolArray[i] = '#productBoolArray[i]# 221'>
<cfelse>
<cfset productBoolArray[i] = '#productBoolArray[i]# 220'>
</cfif>
<cfloop index="j" from="1" to="#loopLength#">
<cfset myPrice = '#priceArray[j]#'>
<cfset loopFrom = #i#+j>
<cfif #productPrice# GTE #myPrice#>
<cfif #ListLen(priceRangeArray[i])# NEQ 0>
<cfset priceRangeArray[i] = '#priceRangeArray[i]# #myPrice#2p'>
<cfelse>
<cfset priceRangeArray[i] = ' #myPrice#2p'>
</cfif>
</cfif>
<cfif #productPrice# LTE #myPrice#>
<cfif #ListLen(priceRangeArray[i])# NEQ 0>
<cfset priceRangeArray[i] = '#priceRangeArray[i]# p2#myPrice#'>
<cfelse>
<cfset priceRangeArray[i] = ' p2#myPrice#'>
</cfif>
</cfif>
</cfloop>
</cfloop>
</cfif>
<cfset nColumnNumber = QueryAddColumn(PRODUCT_LIST,"PRICE_RANGE","VarChar",priceRangeArray)>
<cfset nColumnNumber2 = QueryAddColumn(PRODUCT_LIST,"BOOL_VALUES","VarChar",productBoolArray)>
<cfset nColumnNumber3 = QueryAddColumn(PRODUCT_LIST,"PRODUCT_SUBCATEGORIES","VarChar",productSubcatIDArray)>
<cfset myColumns = 'PRODUCT_NAME,PRODUCT_DESCRIPTION,PRODUCT_KEYWORD1,PRODUCT_KEYWORD2'>
<cfset myColumns = '#myColumns#,PRODUCT_KEYWORD3,PRODUCT_KEYWORD4'>
<cfindex action="update" collection="#collectionName#" query="PRODUCT_LIST" type="custom" key="PRODUCT_ID"
body="#myColumns#" title="PRODUCT_NAME" custom1="BUSINESS_ID" custom2="PRICE_RANGE" custom3="PRODUCT_PRIORITY_ID"
custom4="BOOL_VALUES" category="PRODUCT_SUBCATEGORIES">
<cfif #PRODUCT_LIST.RecordCount# NEQ 0>
<cfloop index="i" from="1" to="#PRODUCT_LIST.RecordCount#">
<cfset productID = '#PRODUCT_LIST.PRODUCT_ID[i]#'>
<cfquery datasource="#dsnName#" maxrows="1">
UPDATE PRODUCT
SET PRODUCT.PRODUCT_INDEXED = <cfqueryparam cfsqltype="CF_SQL_NUMERIC" value="1">
WHERE PRODUCT.PRODUCT_ID = <cfqueryparam cfsqltype="CF_SQL_NUMERIC" value="#productID#">
</cfquery>
</cfloop>
</cfif>
<cfelse>
<cfset myOutput = '#myOutput#<p>Collection Does Not Exist</p>'>
<cfcollection action="create" categories="yes" collection="#collectionName#" engine="solr" path="#collectionPath#">
<cfset myOutput = '#myOutput#<p>Collection Created</p>'>
</cfif>
<cfset myOutput = '#myOutput#<p>END: #NOW()#</p>'>
</cfsilent>
<cfoutput>#myOutput#</cfoutput>
I plan to add some code so that the script can not be called while one is already executing. Besides that, I think the script should be pretty solid. This should take about 19 hours to execute for about 5.6 million products. After that, I should be able to tell you how well the cfsearch performs in comparison with the cfquery search.
Sincerely,
Travis Walters