kafka2ES_Task

master
zhangjun 5 years ago
parent 8d27613bcb
commit 1fbb99aa36

@ -2,6 +2,7 @@ package Kafka2ESService
import (
"dsDataex/MyTask/Kafka2ES/Kafka2ESDAO"
"dsDataex/Utils/ES7Util"
"dsDataex/Utils/KafkaUtil"
"fmt"
"github.com/go-co-op/gocron"
@ -11,18 +12,30 @@ import (
)
var ChanTopic chan []string
var LstTopic []string
//var LstTopic []string
var GROUP_NO = 0
var loc sync.Mutex
/**
* @Author zhangjun
* @Description Kafka 2 ES
* @Date 2020-08-04 09:59
* @Param
* @return
**/
func ServiceStart() {
cronMan := gocron.NewScheduler(time.UTC)
cronMan.Every(5).Seconds().StartImmediately().Do(DBWatch)
cronMan.Every(5).Seconds().StartImmediately().Do(DBWatchProcess)
cronMan.Every(10).Seconds().Do(LogProcess)
cronMan.Every(60).Seconds().Do(ESRefreshProcess)
cronMan.StartAsync()
defer func() {
@ -35,9 +48,9 @@ func ServiceStart() {
//var procNo = int(ConfigUtil.KafkaProcNo)
KafkaUtil.ChanTopicProc = make(map[string] chan bool)
KafkaUtil.StateTopicProc = make(map[string] bool)
KafkaUtil.CountTopicProc = make(map[string] int)
//KafkaUtil.ChanTopicProc = make(map[string]chan bool)
//KafkaUtil.StateTopicProc = make(map[string]bool)
KafkaUtil.CountTopicProc = make(map[string]int)
ChanTopic = make(chan []string, 100)
@ -45,110 +58,137 @@ func ServiceStart() {
for no := 0; no < len(topics); no++ {
topic := topics[no]
_, f := KafkaUtil.CountTopicProc[topic]
if Contains(LstTopic, topic) == -1 {
LstTopic = append(LstTopic, topic)
if f == false {
//change by zhangjun 2020-08-02
cronMan.Every(60).Seconds().SetTag([]string{"kafka_" + topic}).StartImmediately().Do(KafkaProcess, topic)
//go KafkaProcess(topic, procNo)
//cronMan.Every(60).Seconds().SetTag([]string{"kafka_" + topic}).StartImmediately().Do(KafkaProcess, topic)
KafkaProcess(topic)
//time.Sleep(time.Second * 1)
}
}
if len(LstTopic) > len(topics) {
for no := 0; no < len(LstTopic); no++ {
if Contains(topics, LstTopic[no]) == -1 {
if len(KafkaUtil.CountTopicProc) > len(topics) {
for k, _ := range KafkaUtil.CountTopicProc {
if Contains(topics, k) == -1 {
//删除任务
cronMan.RemoveJobByTag("kafka_" + LstTopic[no])
//cronMan.RemoveJobByTag("kafka_" + LstTopic[no])
//关闭子线程
//for no2 := 0; no2 < len(KafkaUtil.ChanTopicProc[LstTopic[no]]); no2++ {
KafkaUtil.ChanTopicProc[LstTopic[no]] <- true
//}
//KafkaUtil.ChanTopicProc[LstTopic[no]] <- true
delete(KafkaUtil.ChanTopicProc, LstTopic[no])
delete(KafkaUtil.StateTopicProc, LstTopic[no])
}
}
loc.Lock()
LstTopic = []string{}
//delete(KafkaUtil.ChanTopicProc, LstTopic[no])
//delete(KafkaUtil.StateTopicProc, k)
delete(KafkaUtil.CountTopicProc, k)
LstTopic = append(LstTopic, topics...)
loc.Unlock()
}
}
//LstTopic = []string{}
//
//LstTopic = append(LstTopic, topics...)
}
}
}
func DBWatch() {
/**
* @Author zhangjun
* @Description mysql
* @Date 2020-08-04 09:59
* @Param
* @return
**/
func DBWatchProcess() {
var _, topics = Kafka2ESDAO.GetTopics()
ChanTopic <- topics
}
/**
* @Author zhangjun
* @Description Consume Kafka
* @Date 2020-08-04 09:59
* @Param
* @return
**/
func KafkaProcess(topic string) {
_, f := KafkaUtil.ChanTopicProc[topic]
_, f := KafkaUtil.CountTopicProc[topic]
if f == false {
//var lst []chan bool
//var lst2 []bool
//var lst3 []int
//
//for no := 0; no < procNo; no++ {
//
// var chanProc = make(chan bool, 100)
//
// lst = append(lst, chanProc)
// lst2 = append(lst2, true)
// lst3 = append(lst3, 0)
//}
//add by zhangjun 2020-07-30
loc.Lock()
KafkaUtil.ChanTopicProc[topic] = nil
KafkaUtil.StateTopicProc[topic] = true
//loc.Lock()
//KafkaUtil.ChanTopicProc[topic] = nil
//KafkaUtil.StateTopicProc[topic] = true
KafkaUtil.CountTopicProc[topic] = 0
//loc.Unlock()
loc.Unlock()
//for no := 0; no < procNo; no++ {
if GROUP_NO == 0 {
fmt.Printf("Dataex Kafka2ES Process Start,Topic:%s,ConsumerGroup:%s.\n", topic, "group_"+topic)
//开启子线程
go KafkaUtil.Consume(topic, "group_"+topic)
//time.Sleep(time.Second * 10)
//}
} else { //TODO处理异常子线程
//for no := 0; no < len(KafkaUtil.StateTopicProc[topic]); no++ {
if KafkaUtil.StateTopicProc[topic] == false {
fmt.Printf("Dataex Kafka2ES Process Start,Topic:%s,ConsumerGroup:%s.\n", topic, "group_"+topic)
KafkaUtil.StateTopicProc[topic] = true
KafkaUtil.CountTopicProc[topic] = 0
go KafkaUtil.Consume(topic, "group_"+topic)
} else {
//add by zhangjun 2020-08-04
//开启双 Consume Group 线程处理,确保数据准确!!!
//time.Sleep(time.Second * 5)
fmt.Printf("Dataex Kafka2ES Process Start,Topic:%s,ConsumerGroup:%s.\n", topic, "group2_"+topic)
go KafkaUtil.Consume(topic, "group2_"+topic)
}
}// else { //TODO处理异常子线程
//
// if KafkaUtil.CountTopicProc[topic] == false {
//
// loc.Lock()
//
// //KafkaUtil.ChanTopicProc[topic] = nil
// //KafkaUtil.StateTopicProc[topic] = true
// KafkaUtil.CountTopicProc[topic] = 0
//
// loc.Unlock()
//
// if GROUP_NO == 0 {
// fmt.Printf("Dataex Kafka2ES Process Start,Topic:%s,ConsumerGroup:%s.\n", topic, "group_"+topic)
// go KafkaUtil.Consume(topic, "group_"+topic)
// } else {
// fmt.Printf("Dataex Kafka2ES Process Start,Topic:%s,ConsumerGroup:%s.\n", topic, "group2_"+topic)
// go KafkaUtil.Consume(topic, "group2_"+topic)
// }
// //time.Sleep(time.Second * 10)
// }
//}
}
time.Sleep(time.Second * 10)
}
//}
/**
* @Author zhangjun
* @Description Kafka
* @Date 2020-08-04 09:59
* @Param
* @return
**/
func LogProcess() {
for k, v := range KafkaUtil.CountTopicProc {
fmt.Println("[Kafka] ["+k+"] "+time.Now().Format("2006/01/02 15:04:05")+" Process message total:", v)
}
}
func LogProcess() {
/**
* @Author zhangjun
* @Description ES
* @Date 2020-08-04 09:59
* @Param
* @return
**/
func ESRefreshProcess() {
for k,v:= range KafkaUtil.CountTopicProc{
for k,_:=range KafkaUtil.CountTopicProc {
//if len(KafkaUtil.CountTopicProc[k])>0{
//for no:=0;no< len(v);no++{
fmt.Println("[Kafka] ["+k+"] "+time.Now().Format("2006/01/02 15:04:05")+" Process message total:",v)
//}
//}
ES7Util.IndexRefresh( k )
}
}

@ -12,7 +12,7 @@ var Count =0
func main() {
fmt.Println("Kafka2ES Service & Task Start !!!")
fmt.Println("Kafka2ES Task One Start !!!")
//s1 := gocron.NewScheduler(time.UTC)
//

@ -0,0 +1,17 @@
package main
import (
"dsDataex/MyTask/Kafka2ES/Kafka2ESService"
"dsDataex/Utils/CacheUtil"
"fmt"
)
func main() {
fmt.Println("Kafka2ES Task Two Start !!!")
CacheUtil.OrgtreeCacheInit()
Kafka2ESService.GROUP_NO=2
Kafka2ESService.ServiceStart()
}

@ -8,6 +8,7 @@ import (
"dsDataex/Utils/ConfigUtil"
"dsDataex/Utils/RedisUtil"
"encoding/json"
"fmt"
"github.com/olivere/elastic/v7"
"reflect"
"strconv"
@ -235,16 +236,29 @@ func IndexDocAdd(indexName string,indexData *DataEX.ESData) (bool,string,error){
* @Param
* @return
**/
func IndexDocAdd2(indexName string,indexData *DataEX.ESData) (bool,string,error){
func IndexDocAdd2(indexName string,indexData *DataEX.ESData){
//defer func() {
// if err := recover(); err != nil {
// fmt.Println("IndexDocAdd2 Panic Recover :", err)
// }
//}()
indexData.BeginTime = DataEX.JsonDate(time.Now());
indexData.EndTime =DataEX.JsonDate(time.Date(9999,9,9,9,9,9,0,time.Now().Location()))
result, err := ES7Client.Index().Index(indexName).Id(indexData.DataId).BodyJson(indexData).Do(CTX)
res, err := ES7Client.Index().Index(indexName).Id(indexData.DataId).BodyJson(indexData).Do(CTX)
if result.Result=="created" {
return true,"文档操作成功",nil
}else{
return false,"文档操作失败",err
if err != nil {
fmt.Println("IndexDocAdd2 Error :" +err.Error())
}else {
if res.Result == "created" {
//fmt.Println("IndexDocAdd2 Result :" ,res)
}
if res.Result == "updated"{
//fmt.Println("IndexDocAdd2 Result :" ,res)
}
}
}
@ -570,6 +584,11 @@ func remove(array []string,del string)[]string {
return result
}
func IndexRefresh(indexName string){
ES7Client.Refresh(indexName).Do(CTX)
}
/**
* @Author zhangjun
* @Description terms query

@ -2,7 +2,9 @@ package GeoIPUtil
import (
"dsDataex/Utils/CacheUtil"
"dsDataex/Utils/RedisUtil"
"github.com/yinheli/qqwry"
"time"
)
var DB *qqwry.QQwry
@ -10,7 +12,7 @@ var DB *qqwry.QQwry
var Province2260 map[string]string
var City2260 map[string][]string
func init() {
func init() {
//打开 纯真 IP离线库
DB = qqwry.NewQQwry("GeoLite2/qqwry.dat")
@ -18,34 +20,44 @@ func init() {
//行政区划缓存
var sql = "SELECT area_code from t_dataex_gbt2260 where area_type=2 "
var list,_,_=CacheUtil.Page(sql, 5000,0)
var list, _, _ = CacheUtil.Page(sql, 5000, 0)
Province2260=make( map[string]string)
Province2260 = make(map[string]string)
for no:=0;no< len(list);no++{
Province2260[list[no]["area_code"].(string)]=list[no]["area_name"].(string)
for no := 0; no < len(list); no++ {
Province2260[list[no]["area_code"].(string)] = list[no]["area_name"].(string)
}
City2260=make( map[string][]string)
City2260 = make(map[string][]string)
for no:=0;no< len(list);no++{
City2260[list[no]["area_name"].(string)]=[]string{list[no]["area_code"].(string),list[no]["area_name"].(string),"",""}
for no := 0; no < len(list); no++ {
City2260[list[no]["area_name"].(string)] = []string{list[no]["area_code"].(string), list[no]["area_name"].(string), "", ""}
}
sql = "SELECT area_code from t_dataex_gbt2260 where area_type=3 "
list,_,_=CacheUtil.Page(sql, 5000,0)
for no:=0;no< len(list);no++{
var provinceCode=list[no]["area_code"].(string)[0:2]+"0000"
var provinceName=Province2260[provinceCode]
list, _, _ = CacheUtil.Page(sql, 5000, 0)
for no := 0; no < len(list); no++ {
var provinceCode = list[no]["area_code"].(string)[0:2] + "0000"
var provinceName = Province2260[provinceCode]
City2260[provinceName+list[no]["area_name"].(string)]=[]string{provinceCode,provinceName,list[no]["area_code"].(string),list[no]["area_name"].(string)}
City2260[provinceName+list[no]["area_name"].(string)] = []string{provinceCode, provinceName, list[no]["area_code"].(string), list[no]["area_name"].(string)}
}
}
func GetGeo4IP(temp string) []string {
DB.Find(temp)
//fmt.Println("qqwry :",DB.Country,DB.City)
return City2260[DB.Country]
//add by zhangjun 2020-08-04 使用 Redis 缓存,缓解 IP文件并发读问题
var city,err =RedisUtil.RedisClient.Get("Dataex_IP:"+temp).Result()
if err==nil && city != "" {
return City2260[ city ]
} else {
DB.Find(temp)
RedisUtil.RedisClient.Set("Dataex_IP:"+temp, DB.Country, 10*time.Hour)
//fmt.Println("qqwry :",DB.Country,DB.City)
return City2260[DB.Country]
}
}

@ -12,6 +12,7 @@ import (
"github.com/segmentio/kafka-go/snappy"
"math/rand"
"strconv"
"sync"
"time"
)
@ -22,12 +23,12 @@ var kafkaPool map[string]map[int]*kafka.Conn
var kafkaParts map[string]int
//控制 consume 子线程 关闭
var ChanTopicProc map[string] chan bool
//var ChanTopicProc map[string]chan bool
var loc sync.Mutex
//记录 consume 子线程状态 ( 10 分钟内是否成功执行 ReadMessage不执行自动关闭子线程 )
var StateTopicProc map[string] bool
var CountTopicProc map[string] int
//var StateTopicProc map[string]bool
var CountTopicProc map[string]int
func init() {
@ -94,8 +95,8 @@ func ProvideLow(topic string, datas []DataEX.KafkaData) (bool, string) {
client = c
//add by zhangjun 2020-08-02 判断链接超时,异常关闭
_, err:= client.ReadLastOffset()
if err!=nil{
_, err := client.ReadLastOffset()
if err != nil {
client, _ = kafka.DialLeader(context.Background(), "tcp", ConfigUtil.KafkaBrokers[0], topic, num)
kafkaPool[topic][num] = client
@ -132,7 +133,7 @@ func ProvideLow(topic string, datas []DataEX.KafkaData) (bool, string) {
return true, ""
} else {
fmt.Println("Kafka数据存储失败 :",err.Error())
fmt.Println("Kafka数据存储失败 :", err.Error())
return false, "Kafka数据存储失败"
}
}
@ -182,9 +183,9 @@ func Consume(topic string, group string) {
defer func() {
if err := recover(); err != nil {
fmt.Println("KafkaUtil Consume Panic Recover :", err)
StateTopicProc[topic] = false
}
delete(CountTopicProc, topic)
}()
//add by zhangjun 2020-08-03
@ -198,29 +199,32 @@ func Consume(topic string, group string) {
GroupID: group, //必须指定 Group否则需要指定 Partition
MinBytes: 10e3, // 10KB
MaxBytes: 10e6, // 10MB
//CommitInterval: time.Second,// flushes commits to Kafka every second
})
//ticker :=time.NewTicker( 10 * time.Second)
//count:=0
myLoop:
//myLoop:
for {
//10分钟无法ReadMessage ,关闭 Consume 线程
ctx, cancle := context.WithTimeout(context.Background(), time.Minute*10)
ctx, cancle := context.WithTimeout(context.Background(), time.Second * 600)
defer cancle()
select {
case f := <-ChanTopicProc[topic]:
if f == true {
r.Close()
StateTopicProc[topic] = false
fmt.Printf("Dataex Kafka2ES Process Close ,Topic:%s,ConsumerGroup:%s.\n", topic, group)
return
}
//select {
//case f := <-ChanTopicProc[topic]:
// if f == true {
//
// loc.Lock()
// StateTopicProc[topic] = false
// loc.Unlock()
//
// r.Close()
// fmt.Printf("Dataex Kafka2ES Process Close ,Topic:%s,ConsumerGroup:%s.\n", topic, group)
//
// return
// }
//case <- ticker.C:
// if count==0{
// r.Close()
@ -230,27 +234,34 @@ myLoop:
// }else {
// count=0
// }
default:
//default:
msg, err := r.ReadMessage( ctx )
//阻塞读取 Kafka
msg, err := r.ReadMessage(ctx)
if err != nil {
fmt.Println("KafkaUtil ReadMessage Error :",err.Error())
break myLoop
}
//fmt.Println("KafkaUtil ReadMessage :", topic , msg.Offset)
if err != nil {
fmt.Println("KafkaUtil ReadMessage Error :", err.Error())
break
}
//TODO:此处不能使用 go否则速度太快数据会丢失以后可以考虑优化ES集群
Kafka2ESTask.Process(topic, msg)
//TODO:此处不能使用 go否则速度太快数据会丢失以后可以考虑优化ES集群
Kafka2ESTask.Process(topic, msg)
_, f := CountTopicProc[topic]
if f == false {
break
} else {
loc.Lock()
//count++
CountTopicProc[topic]++
loc.Unlock()
//fmt.Printf("message at partiton %d offset %d: %s ==> %s\n",msg.Partition, msg.Offset, string(msg.Key), string(msg.Value))
}
}
StateTopicProc[topic]= false
r.Close()
fmt.Printf("Dataex Kafka2ES Process Stop ,Topic:%s,ConsumerGroup:%s.\n", topic, group)
}

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save