消息存储
消息存储核心类
private final MessageStoreConfig messageStoreConfig;//消息配置属性private final CommitLog commitLog;//CommitLog文件存储的实现类private final ConcurrentMap<String, ConcurrentMap<Integer, ConsumeQueue>> consumeQueueTable;//消息队列存储缓存表,按照消息主题分组private final FlushConsumeQueueService flushConsumeQueueService;//消息队列文件刷盘线程private final CleanCommitLogService cleanCommitLogService;//清除CommitLog文件服务private final CleanConsumeQueueService cleanConsumeQueueService;//清除ConsumerQueue队列文件服务private final IndexService indexService;//索引实现类private final AllocateMappedFileService allocateMappedFileService;//MappedFile分配服务private final ReputMessageService reputMessageService;//CommitLog消息分发,根据CommitLog文件构建ConsumerQueue、IndexFile文件private final HAService haService;//存储HA机制private final ScheduleMessageService scheduleMessageService;//消息服务调度线程private final StoreStatsService storeStatsService;//消息存储服务private final TransientStorePool transientStorePool;//消息堆外内存缓存private final BrokerStatsManager brokerStatsManager;//Broker状态管理器private final MessageArrivingListener messageArrivingListener;//消息拉取长轮询模式消息达到监听器private final BrokerConfig brokerConfig;//Broker配置类private StoreCheckpoint storeCheckpoint;//文件刷盘监测点private final LinkedList<CommitLogDispatcher> dispatcherList;//CommitLog文件转发请求
消息存储流程
消息存储入口:DefaultMessageStore#putMessage
//判断Broker角色如果是从节点,则无需写入if (BrokerRole.SLAVE == this.messageStoreConfig.getBrokerRole()) { long value = this.printTimes.getAndIncrement(); if ((value % 50000) == 0) { log.warn("message store is slave mode, so putMessage is forbidden"); } return new PutMessageResult(PutMessageStatus.SERVICE_NOT_AVAILABLE, null);}//判断当前写入状态如果是正在写入,则不能继续if (!this.runningFlags.isWriteable()) { long value = this.printTimes.getAndIncrement(); return new PutMessageResult(PutMessageStatus.SERVICE_NOT_AVAILABLE, null);} else { this.printTimes.set(0);}//判断消息主题长度是否超过最大限制if (msg.getTopic().length() > Byte.MAX_VALUE) { log.warn("putMessage message topic length too long" + msg.getTopic().length()); return new PutMessageResult(PutMessageStatus.MESSAGE_ILLEGAL, null);}//判断消息属性长度是否超过限制if (msg.getPropertiesString() != null && msg.getPropertiesString().length() > Short.MAX_VALUE) { log.warn("putMessage message properties length too long" + msg.getPropertiesString().length()); return new PutMessageResult(PutMessageStatus.PROPERTIES_SIZE_EXCEEDED, null);}//判断系统PageCache缓存去是否占用if (this.isOSPageCacheBusy()) { return new PutMessageResult(PutMessageStatus.OS_PAGECACHE_BUSY, null);}//将消息写入CommitLog文件PutMessageResult result = this.commitLog.putMessage(msg);
代码:CommitLog#putMessage
//记录消息存储时间msg.setStoreTimestamp(beginLockTimestamp);//判断如果mappedFile如果为空或者已满,创建新的mappedFile文件if (null == mappedFile || mappedFile.isFull()) { mappedFile = this.mappedFileQueue.getLastMappedFile(0); }//如果创建失败,直接返回if (null == mappedFile) { log.error("create mapped file1 error, topic:" + msg.getTopic() +" clientAddr:" + msg.getBornHostString()); beginTimeInLock = 0; return new PutMessageResult(PutMessageStatus.CREATE_MAPEDFILE_FAILED, null);}//写入消息到mappedFile中result = mappedFile.appendMessage(msg, this.appendMessageCallback);
代码:MappedFile#appendMessagesInner
//获得文件的写入指针int currentPos = this.wrotePosition.get();//如果指针大于文件大小则直接返回if (currentPos < this.fileSize) { //通过writeBuffer.slice()创建一个与MappedFile共享的内存区,并设置position为当前指针 ByteBuffer byteBuffer = writeBuffer != null ? writeBuffer.slice() : this.mappedByteBuffer.slice(); byteBuffer.position(currentPos); AppendMessageResult result = null; if (messageExt instanceof MessageExtBrokerInner) { //通过回调方法写入 result = cb.doAppend(this.getFileFromOffset(), byteBuffer, this.fileSize - currentPos, (MessageExtBrokerInner) messageExt); } else if (messageExt instanceof MessageExtBatch) { result = cb.doAppend(this.getFileFromOffset(), byteBuffer, this.fileSize - currentPos, (MessageExtBatch) messageExt); } else { return new AppendMessageResult(AppendMessageStatus.UNKNOWN_ERROR); } this.wrotePosition.addAndGet(result.getWroteBytes()); this.storeTimestamp = result.getStoreTimestamp(); return result;}
代码:CommitLog#doAppend
//文件写入位置long wroteOffset = fileFromOffset + byteBuffer.position();//设置消息IDthis.resetByteBuffer(hostHolder, 8);String msgId = MessageDecoder.createMessageId(this.msgIdMemory, msgInner.getStoreHostBytes(hostHolder), wroteOffset);//获得该消息在消息队列中的偏移量keyBuilder.setLength(0);keyBuilder.append(msgInner.getTopic());keyBuilder.append('-');keyBuilder.append(msgInner.getQueueId());String key = keyBuilder.toString();Long queueOffset = CommitLog.this.topicQueueTable.get(key);if (null == queueOffset) { queueOffset = 0L; CommitLog.this.topicQueueTable.put(key, queueOffset);}//获得消息属性长度final byte[] propertiesData =msgInner.getPropertiesString() == null ? null : msgInner.getPropertiesString().getBytes(MessageDecoder.CHARSET_UTF8);final int propertiesLength = propertiesData == null ? 0 : propertiesData.length;if (propertiesLength > Short.MAX_VALUE) { log.warn("putMessage message properties length too long. length={}", propertiesData.length); return new AppendMessageResult(AppendMessageStatus.PROPERTIES_SIZE_EXCEEDED);}//获得消息主题大小final byte[] topicData = msgInner.getTopic().getBytes(MessageDecoder.CHARSET_UTF8);final int topicLength = topicData.length;//获得消息体大小final int bodyLength = msgInner.getBody() == null ? 0 : msgInner.getBody().length;//计算消息总长度final int msgLen = calMsgLength(bodyLength, topicLength, propertiesLength);
代码:CommitLog#calMsgLength
protected static int calMsgLength(int bodyLength, int topicLength, int propertiesLength) { final int msgLen = 4 //TOTALSIZE + 4 //MAGICCODE + 4 //BODYCRC + 4 //QUEUEID + 4 //FLAG + 8 //QUEUEOFFSET + 8 //PHYSICALOFFSET + 4 //SYSFLAG + 8 //BORNTIMESTAMP + 8 //BORNHOST + 8 //STORETIMESTAMP + 8 //STOREHOSTADDRESS + 4 //RECONSUMETIMES + 8 //Prepared Transaction Offset + 4 + (bodyLength > 0 ? bodyLength : 0) //BODY + 1 + topicLength //TOPIC + 2 + (propertiesLength > 0 ? propertiesLength : 0) //propertiesLength + 0; return msgLen;}
代码:CommitLog#doAppend
//消息长度不能超过4Mif (msgLen > this.maxMessageSize) { CommitLog.log.warn("message size exceeded, msg total size:" + msgLen +", msg body size:" + bodyLength +", maxMessageSize:" + this.maxMessageSize); return new AppendMessageResult(AppendMessageStatus.MESSAGE_SIZE_EXCEEDED);}//消息是如果没有足够的存储空间则新创建CommitLog文件if ((msgLen + END_FILE_MIN_BLANK_LENGTH) > maxBlank) { this.resetByteBuffer(this.msgStoreItemMemory, maxBlank); // 1 TOTALSIZE this.msgStoreItemMemory.putInt(maxBlank); // 2 MAGICCODE this.msgStoreItemMemory.putInt(CommitLog.BLANK_MAGIC_CODE); // 3 The remaining space may be any value // Here the length of the specially set maxBlank final long beginTimeMills = CommitLog.this.defaultMessageStore.now(); byteBuffer.put(this.msgStoreItemMemory.array(), 0, maxBlank); return new AppendMessageResult(AppendMessageStatus.END_OF_FILE, wroteOffset, maxBlank, msgId, msgInner.getStoreTimestamp(), queueOffset, CommitLog.this.defaultMessageStore.now() - beginTimeMills);}//将消息存储到ByteBuffer中,返回AppendMessageResultfinal long beginTimeMills = CommitLog.this.defaultMessageStore.now();// Write messages to the queue bufferbyteBuffer.put(this.msgStoreItemMemory.array(), 0, msgLen);AppendMessageResult result = new AppendMessageResult(AppendMessageStatus.PUT_OK, wroteOffset, msgLen, msgId,msgInner.getStoreTimestamp(), queueOffset, CommitLog.this.defaultMessageStore.now() -beginTimeMills);switch (tranType) { case MessageSysFlag.TRANSACTION_PREPARED_TYPE: case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE: break; case MessageSysFlag.TRANSACTION_NOT_TYPE: case MessageSysFlag.TRANSACTION_COMMIT_TYPE: //更新消息队列偏移量 CommitLog.this.topicQueueTable.put(key, ++queueOffset); break; default: break;}
代码:CommitLog#putMessage
//释放锁putMessageLock.unlock();//刷盘handleDiskFlush(result, putMessageResult, msg);//执行HA主从同步handleHA(result, putMessageResult, msg);
存储文件
- commitLog:消息存储目录
- config:运行期间一些配置信息
- consumerqueue:消息消费队列存储目录
- index:消息索引文件存储目录
- abort:如果存在改文件寿命Broker非正常关闭
- checkpoint:文件检查点,存储CommitLog文件最后一次刷盘时间戳、consumerquueue最后一次刷盘时间,index索引文件最后一次刷盘时间戳。
存储文件内存映射
RocketMQ通过使用内存映射文件提高IO访问性能,无论是CommitLog、ConsumerQueue还是IndexFile,单个文件都被设计为固定长度,如果一个文件写满以后再创建一个新文件,文件名就为该文件第一条消息对应的全局物理偏移量。
1)MappedFileQueue
String storePath;//存储目录int mappedFileSize;// 单个文件大小CopyOnWriteArrayList<MappedFile> mappedFiles;//MappedFile文件集合AllocateMappedFileService allocateMappedFileService;//创建MapFile服务类long flushedWhere = 0;//当前刷盘指针long committedWhere = 0;//当前数据提交指针,内存中ByteBuffer当前的写指针,该值大于等于flushWhere
- 根据存储时间查询MappedFile
public MappedFile getMappedFileByTime(final long timestamp) { Object[] mfs = this.copyMappedFiles(0); if (null == mfs) return null;//遍历MappedFile文件数组 for (int i = 0; i < mfs.length; i++) { MappedFile mappedFile = (MappedFile) mfs[i]; //MappedFile文件的最后修改时间大于指定时间戳则返回该文件 if (mappedFile.getLastModifiedTimestamp() >= timestamp) { return mappedFile; } } return (MappedFile) mfs[mfs.length - 1];}
- 根据消息偏移量offset查找MappedFile
public MappedFile findMappedFileByOffset(final long offset, final boolean returnFirstOnNotFound) { try { //获得第一个MappedFile文件 MappedFile firstMappedFile = this.getFirstMappedFile(); //获得最后一个MappedFile文件 MappedFile lastMappedFile = this.getLastMappedFile(); //第一个文件和最后一个文件均不为空,则进行处理 if (firstMappedFile != null && lastMappedFile != null) { if (offset < firstMappedFile.getFileFromOffset() || offset >= lastMappedFile.getFileFromOffset() + this.mappedFileSize) { } else { //获得文件索引 int index = (int) ((offset / this.mappedFileSize) - (firstMappedFile.getFileFromOffset() / this.mappedFileSize)); MappedFile targetFile = null; try { //根据索引返回目标文件 targetFile = this.mappedFiles.get(index); } catch (Exception ignored) { } if (targetFile != null && offset >= targetFile.getFileFromOffset() && offset < targetFile.getFileFromOffset() + this.mappedFileSize) { return targetFile; } for (MappedFile tmpMappedFile : this.mappedFiles) { if (offset >= tmpMappedFile.getFileFromOffset() && offset < tmpMappedFile.getFileFromOffset() + this.mappedFileSize) { return tmpMappedFile; } } } if (returnFirstOnNotFound) { return firstMappedFile; } } } catch (Exception e) { log.error("findMappedFileByOffset Exception", e); } return null;}
- 获取存储文件最小偏移量
public long getMinOffset() { if (!this.mappedFiles.isEmpty()) { try { return this.mappedFiles.get(0).getFileFromOffset(); } catch (IndexOutOfBoundsException e) { //continue; } catch (Exception e) { log.error("getMinOffset has exception.", e); } } return -1;}
- 获取存储文件最大偏移量
public long getMaxOffset() { MappedFile mappedFile = getLastMappedFile(); if (mappedFile != null) { return mappedFile.getFileFromOffset() + mappedFile.getReadPosition(); } return 0;}
- 返回存储文件当前写指针
public long getMaxWrotePosition() { MappedFile mappedFile = getLastMappedFile(); if (mappedFile != null) { return mappedFile.getFileFromOffset() + mappedFile.getWrotePosition(); } return 0;}
2)MappedFile
int OS_PAGE_SIZE = 1024 * 4;//操作系统每页大小,默认4KAtomicLong TOTAL_MAPPED_VIRTUAL_MEMORY = new AtomicLong(0);//当前JVM实例中MappedFile虚拟内存AtomicInteger TOTAL_MAPPED_FILES = new AtomicInteger(0);//当前JVM实例中MappedFile对象个数AtomicInteger wrotePosition = new AtomicInteger(0);//当前文件的写指针AtomicInteger committedPosition = new AtomicInteger(0);//当前文件的提交指针AtomicInteger flushedPosition = new AtomicInteger(0);//刷写到磁盘指针int fileSize;//文件大小FileChannel fileChannel;//文件通道ByteBuffer writeBuffer = null;//堆外内存ByteBufferTransientStorePool transientStorePool = null;//堆外内存池String fileName;//文件名称long fileFromOffset;//该文件的处理偏移量File file;//物理文件MappedByteBuffer mappedByteBuffer;//物理文件对应的内存映射Buffervolatile long storeTimestamp = 0;//文件最后一次内容写入时间boolean firstCreateInQueue = false;//是否是MappedFileQueue队列中第一个文件
MappedFile初始化
- 未开启
transientStorePoolEnable
。transientStorePoolEnable=true
为true
表示数据先存储到堆外内存,然后通过Commit
线程将数据提交到内存映射Buffer中,再通过Flush
线程将内存映射Buffer
中数据持久化磁盘。
private void init(final String fileName, final int fileSize) throws IOException { this.fileName = fileName; this.fileSize = fileSize; this.file = new File(fileName); this.fileFromOffset = Long.parseLong(this.file.getName()); boolean ok = false; ensureDirOK(this.file.getParent()); try { this.fileChannel = new RandomAccessFile(this.file,"rw").getChannel(); this.mappedByteBuffer = this.fileChannel.map(MapMode.READ_WRITE, 0, fileSize); TOTAL_MAPPED_VIRTUAL_MEMORY.addAndGet(fileSize); TOTAL_MAPPED_FILES.incrementAndGet(); ok = true; } catch (FileNotFoundException e) { log.error("create file channel" + this.fileName +" Failed.", e); throw e; } catch (IOException e) { log.error("map file" + this.fileName +" Failed.", e); throw e; } finally { if (!ok && this.fileChannel != null) { this.fileChannel.close(); } }}
开启transientStorePoolEnable
public void init(final String fileName, final int fileSize, final TransientStorePool transientStorePool) throws IOException { init(fileName, fileSize); this.writeBuffer = transientStorePool.borrowBuffer();//初始化writeBuffer this.transientStorePool = transientStorePool;}
MappedFile提交
提交数据到FileChannel,commitLeastPages为本次提交最小的页数,如果待提交数据不满commitLeastPages,则不执行本次提交操作。如果writeBuffer如果为空,直接返回writePosition指针,无需执行commit操作,表名commit操作主体是writeBuffer。
public int commit(final int commitLeastPages) { if (writeBuffer == null) { //no need to commit data to file channel, so just regard wrotePosition as committedPosition. return this.wrotePosition.get(); } //判断是否满足提交条件 if (this.isAbleToCommit(commitLeastPages)) { if (this.hold()) { commit0(commitLeastPages); this.release(); } else { log.warn("in commit, hold failed, commit offset =" + this.committedPosition.get()); } } // 所有数据提交后,清空缓冲区 if (writeBuffer != null && this.transientStorePool != null && this.fileSize == this.committedPosition.get()) { this.transientStorePool.returnBuffer(writeBuffer); this.writeBuffer = null; } return this.committedPosition.get();}
MappedFile#isAbleToCommit
判断是否执行commit操作,如果文件已满返回true;如果commitLeastpages大于0,则比较writePosition与上一次提交的指针commitPosition的差值,除以OS_PAGE_SIZE得到当前脏页的数量,如果大于commitLeastPages则返回true,如果commitLeastpages小于0表示只要存在脏页就提交。
protected boolean isAbleToCommit(final int commitLeastPages) { //已经刷盘指针 int flush = this.committedPosition.get(); //文件写指针 int write = this.wrotePosition.get();//写满刷盘 if (this.isFull()) { return true; } if (commitLeastPages > 0) { //文件内容达到commitLeastPages页数,则刷盘 return ((write / OS_PAGE_SIZE) - (flush / OS_PAGE_SIZE)) >= commitLeastPages; } return write > flush;}
MappedFile#commit0
具体提交的实现,首先创建WriteBuffer区共享缓存区,然后将新创建的position回退到上一次提交的位置(commitPosition),设置limit为wrotePosition(当前最大有效数据指针),然后把commitPosition到wrotePosition的数据写入到FileChannel中,然后更新committedPosition指针为wrotePosition。commit的作用就是将MappedFile的writeBuffer中数据提交到文件通道FileChannel中。
protected void commit0(final int commitLeastPages) { //写指针 int writePos = this.wrotePosition.get(); //上次提交指针 int lastCommittedPosition = this.committedPosition.get(); if (writePos - this.committedPosition.get() > 0) { try { //复制共享内存区域 ByteBuffer byteBuffer = writeBuffer.slice(); //设置提交位置是上次提交位置 byteBuffer.position(lastCommittedPosition); //最大提交数量 byteBuffer.limit(writePos); //设置fileChannel位置为上次提交位置 this.fileChannel.position(lastCommittedPosition); //将lastCommittedPosition到writePos的数据复制到FileChannel中 this.fileChannel.write(byteBuffer); //重置提交位置 this.committedPosition.set(writePos); } catch (Throwable e) { log.error("Error occurred when commit data to FileChannel.", e); } }}
MappedFile#flush
刷写磁盘,直接调用MappedByteBuffer或fileChannel的force方法将内存中的数据持久化到磁盘,那么flushedPosition应该等于MappedByteBuffer中的写指针;如果writeBuffer不为空,则flushPosition应该等于上一次的commit指针;因为上一次提交的数据就是进入到MappedByteBuffer中的数据;如果writeBuffer为空,数据时直接进入到MappedByteBuffer,wrotePosition代表的是MappedByteBuffer中的指针,故设置flushPosition为wrotePosition。
public int flush(final int flushLeastPages) { //数据达到刷盘条件 if (this.isAbleToFlush(flushLeastPages)) { //加锁,同步刷盘 if (this.hold()) { //获得读指针 int value = getReadPosition(); try { //数据从writeBuffer提交数据到fileChannel再刷新到磁盘 if (writeBuffer != null || this.fileChannel.position() != 0) { this.fileChannel.force(false); } else { //从mmap刷新数据到磁盘 this.mappedByteBuffer.force(); } } catch (Throwable e) { log.error("Error occurred when force data to disk.", e); }//更新刷盘位置 this.flushedPosition.set(value); this.release(); } else { log.warn("in flush, hold failed, flush offset =" + this.flushedPosition.get()); this.flushedPosition.set(getReadPosition()); } } return this.getFlushedPosition();}
MappedFile#getReadPosition
获取当前文件最大可读指针。如果writeBuffer为空,则直接返回当前的写指针;如果writeBuffer不为空,则返回上一次提交的指针。在MappedFile设置中,只有提交了的数据(写入到MappedByteBuffer或FileChannel中的数据)才是安全的数据
public int getReadPosition() { //如果writeBuffer为空,刷盘的位置就是应该等于上次commit的位置,如果为空则为mmap的写指针 return this.writeBuffer == null ? this.wrotePosition.get() : this.committedPosition.get();}
MappedFile#selectMappedBuffer
查找pos到当前最大可读之间的数据,由于在整个写入期间都未曾改MappedByteBuffer的指针,如果mappedByteBuffer.slice()方法返回的共享缓存区空间为整个MappedFile,然后通过设置ByteBuffer的position为待查找的值,读取字节长度当前可读最大长度,最终返回的ByteBuffer的limit为size。整个共享缓存区的容量为(MappedFile#fileSize-pos)。故在操作SelectMappedBufferResult不能对包含在里面的ByteBuffer调用filp方法。
public SelectMappedBufferResult selectMappedBuffer(int pos) { //获得最大可读指针 int readPosition = getReadPosition(); //pos小于最大可读指针,并且大于0 if (pos < readPosition && pos >= 0) { if (this.hold()) { //复制mappedByteBuffer读共享区 ByteBuffer byteBuffer = this.mappedByteBuffer.slice(); //设置读指针位置 byteBuffer.position(pos); //获得可读范围 int size = readPosition - pos; //设置最大刻度范围 ByteBuffer byteBufferNew = byteBuffer.slice(); byteBufferNew.limit(size); return new SelectMappedBufferResult(this.fileFromOffset + pos, byteBufferNew, size, this); } } return null;}
MappedFile#shutdown
MappedFile文件销毁的实现方法为public boolean destory(long intervalForcibly),intervalForcibly表示拒绝被销毁的最大存活时间。
public void shutdown(final long intervalForcibly) { if (this.available) { //关闭MapedFile this.available = false; //设置当前关闭时间戳 this.firstShutdownTimestamp = System.currentTimeMillis(); //释放资源 this.release(); } else if (this.getRefCount() > 0) { if ((System.currentTimeMillis() - this.firstShutdownTimestamp) >= intervalForcibly) { this.refCount.set(-1000 - this.getRefCount()); this.release(); } }}
3)TransientStorePool
短暂的存储池。RocketMQ单独创建一个MappedByteBuffer内存缓存池,用来临时存储数据,数据先写入该内存映射中,然后由commit线程定时将数据从该内存复制到与目标物理文件对应的内存映射中。RocketMQ引入该机制主要的原因是提供一种内存锁定,将当前堆外内存一直锁定在内存中,避免被进程将内存交换到磁盘。
private final int poolSize;//availableBuffers个数private final int fileSize;//每隔ByteBuffer大小private final Deque<ByteBuffer> availableBuffers;//ByteBuffer容器。双端队列
初始化
public void init() { //创建poolSize个堆外内存 for (int i = 0; i < poolSize; i++) { ByteBuffer byteBuffer = ByteBuffer.allocateDirect(fileSize); final long address = ((DirectBuffer) byteBuffer).address(); Pointer pointer = new Pointer(address); //使用com.sun.jna.Library类库将该批内存锁定,避免被置换到交换区,提高存储性能 LibC.INSTANCE.mlock(pointer, new NativeLong(fileSize)); availableBuffers.offer(byteBuffer); }}
实时更新消息消费队列与索引文件
消息消费队文件、消息属性索引文件都是基于CommitLog文件构建的,当消息生产者提交的消息存储在CommitLog文件中,ConsumerQueue、IndexFile需要及时更新,否则消息无法及时被消费,根据消息属性查找消息也会出现较大延迟。RocketMQ通过开启一个线程ReputMessageService来准实时转发CommitLog文件更新事件,相应的任务处理器根据转发的消息及时更新ConsumerQueue、IndexFile文件。
代码:DefaultMessageStore:start
//设置CommitLog内存中最大偏移量this.reputMessageService.setReputFromOffset(maxPhysicalPosInLogicQueue);//启动this.reputMessageService.start();
代码:DefaultMessageStore:run
public void run() { DefaultMessageStore.log.info(this.getServiceName() +" service started");//每隔1毫秒就继续尝试推送消息到消息消费队列和索引文件 while (!this.isStopped()) { try { Thread.sleep(1); this.doReput(); } catch (Exception e) { DefaultMessageStore.log.warn(this.getServiceName() +" service has exception.", e); } } DefaultMessageStore.log.info(this.getServiceName() +" service end");}
代码:DefaultMessageStore:deReput
//从result中循环遍历消息,一次读一条,创建DispatherRequest对象。for (int readSize = 0; readSize < result.getSize() && doNext; ) {DispatchRequest dispatchRequest = DefaultMessageStore.this.commitLog.checkMessageAndReturnSize(result.getByteBuffer(), false, false);int size = dispatchRequest.getBufferSize() == -1 ? dispatchRequest.getMsgSize() : dispatchRequest.getBufferSize();if (dispatchRequest.isSuccess()) { if (size > 0) { DefaultMessageStore.this.doDispatch(dispatchRequest); } }}
DispatchRequest
String topic; //消息主题名称int queueId; //消息队列IDlong commitLogOffset;//消息物理偏移量int msgSize;//消息长度long tagsCode;//消息过滤tag hashCodelong storeTimestamp;//消息存储时间戳long consumeQueueOffset;//消息队列偏移量String keys;//消息索引keyboolean success;//是否成功解析到完整的消息String uniqKey;//消息唯一键int sysFlag;//消息系统标记long preparedTransactionOffset;//消息预处理事务偏移量Map<String, String> propertiesMap;//消息属性byte[] bitMap;//位图
1)转发到ConsumerQueue
class CommitLogDispatcherBuildConsumeQueue implements CommitLogDispatcher { @Override public void dispatch(DispatchRequest request) { final int tranType = MessageSysFlag.getTransactionValue(request.getSysFlag()); switch (tranType) { case MessageSysFlag.TRANSACTION_NOT_TYPE: case MessageSysFlag.TRANSACTION_COMMIT_TYPE: //消息分发 DefaultMessageStore.this.putMessagePositionInfo(request); break; case MessageSysFlag.TRANSACTION_PREPARED_TYPE: case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE: break; } }}
代码:DefaultMessageStore#putMessagePositionInfo
public void putMessagePositionInfo(DispatchRequest dispatchRequest) { //获得消费队列 ConsumeQueue cq = this.findConsumeQueue(dispatchRequest.getTopic(), dispatchRequest.getQueueId()); //消费队列分发消息 cq.putMessagePositionInfoWrapper(dispatchRequest);}
代码:DefaultMessageStore#putMessagePositionInfo
//依次将消息偏移量、消息长度、tag写入到ByteBuffer中this.byteBufferIndex.flip();this.byteBufferIndex.limit(CQ_STORE_UNIT_SIZE);this.byteBufferIndex.putLong(offset);this.byteBufferIndex.putInt(size);this.byteBufferIndex.putLong(tagsCode);//获得内存映射文件MappedFile mappedFile = this.mappedFileQueue.getLastMappedFile(expectLogicOffset);if (mappedFile != null) { //将消息追加到内存映射文件,异步输盘 return mappedFile.appendMessage(this.byteBufferIndex.array());}
2)转发到Index
class CommitLogDispatcherBuildIndex implements CommitLogDispatcher { @Override public void dispatch(DispatchRequest request) { if (DefaultMessageStore.this.messageStoreConfig.isMessageIndexEnable()) { DefaultMessageStore.this.indexService.buildIndex(request); } }}
代码:DefaultMessageStore#buildIndex
public void buildIndex(DispatchRequest req) { //获得索引文件 IndexFile indexFile = retryGetAndCreateIndexFile(); if (indexFile != null) { //获得文件最大物理偏移量 long endPhyOffset = indexFile.getEndPhyOffset(); DispatchRequest msg = req; String topic = msg.getTopic(); String keys = msg.getKeys(); //如果该消息的物理偏移量小于索引文件中的最大物理偏移量,则说明是重复数据,忽略本次索引构建 if (msg.getCommitLogOffset() < endPhyOffset) { return; } final int tranType = MessageSysFlag.getTransactionValue(msg.getSysFlag()); switch (tranType) { case MessageSysFlag.TRANSACTION_NOT_TYPE: case MessageSysFlag.TRANSACTION_PREPARED_TYPE: case MessageSysFlag.TRANSACTION_COMMIT_TYPE: break; case MessageSysFlag.TRANSACTION_ROLLBACK_TYPE: return; } //如果消息ID不为空,则添加到Hash索引中 if (req.getUniqKey() != null) { indexFile = putKey(indexFile, msg, buildKey(topic, req.getUniqKey())); if (indexFile == null) { return; } }//构建索引key,RocketMQ支持为同一个消息建立多个索引,多个索引键空格隔开. if (keys != null && keys.length() > 0) { String[] keyset = keys.split(MessageConst.KEY_SEPARATOR); for (int i = 0; i < keyset.length; i++) { String key = keyset[i]; if (key.length() > 0) { indexFile = putKey(indexFile, msg, buildKey(topic, key)); if (indexFile == null) { return; } } } } } else { log.error("build index error, stop building index"); }}
消息队列和索引文件恢复
由于RocketMQ存储首先将消息全量存储在CommitLog文件中,然后异步生成转发任务更新ConsumerQueue和Index文件。如果消息成功存储到CommitLog文件中,转发任务未成功执行,此时消息服务器Broker由于某个愿意宕机,导致CommitLog、ConsumerQueue、IndexFile文件数据不一致。如果不加以人工修复的话,会有一部分消息即便在CommitLog中文件中存在,但由于没有转发到ConsumerQueue,这部分消息将永远复发被消费者消费。
1)存储文件加载
代码:DefaultMessageStore#load
判断上一次是否异常退出。实现机制是Broker在启动时创建abort文件,在退出时通过JVM钩子函数删除abort文件。如果下次启动时存在abort文件。说明Broker时异常退出的,CommitLog与ConsumerQueue数据有可能不一致,需要进行修复。
//判断临时文件是否存在boolean lastExitOK = !this.isTempFileExist();//根据临时文件判断当前Broker是否异常退出private boolean isTempFileExist() { String fileName = StorePathConfigHelper .getAbortFile(this.messageStoreConfig.getStorePathRootDir()); File file = new File(fileName); return file.exists();}
代码:DefaultMessageStore#load
//加载延时队列if (null != scheduleMessageService) { result = result && this.scheduleMessageService.load();}// 加载CommitLog文件result = result && this.commitLog.load();// 加载消费队列文件result = result && this.loadConsumeQueue();if (result) {//加载存储监测点,监测点主要记录CommitLog文件、ConsumerQueue文件、Index索引文件的刷盘点 this.storeCheckpoint =new StoreCheckpoint(StorePathConfigHelper.getStoreCheckpoint(this.messageStoreConfig.getStorePathRootDir()));//加载index文件 this.indexService.load(lastExitOK);//根据Broker是否异常退出,执行不同的恢复策略 this.recover(lastExitOK);}
代码:MappedFileQueue#load
加载CommitLog到映射文件
//指向CommitLog文件目录File dir = new File(this.storePath);//获得文件数组File[] files = dir.listFiles();if (files != null) { // 文件排序 Arrays.sort(files); //遍历文件 for (File file : files) {//如果文件大小和配置文件不一致,退出 if (file.length() != this.mappedFileSize) { return false; } try { //创建映射文件 MappedFile mappedFile = new MappedFile(file.getPath(), mappedFileSize); mappedFile.setWrotePosition(this.mappedFileSize); mappedFile.setFlushedPosition(this.mappedFileSize); mappedFile.setCommittedPosition(this.mappedFileSize); //将映射文件添加到队列 this.mappedFiles.add(mappedFile); log.info("load" + file.getPath() +" OK"); } catch (IOException e) { log.error("load file" + file +" error", e); return false; } }}return true;
代码:DefaultMessageStore#loadConsumeQueue
加载消息消费队列
//执行消费队列目录File dirLogic = new File(StorePathConfigHelper.getStorePathConsumeQueue(this.messageStoreConfig.getStorePathRootDir()));//遍历消费队列目录File[] fileTopicList = dirLogic.listFiles();if (fileTopicList != null) { for (File fileTopic : fileTopicList) { //获得子目录名称,即topic名称 String topic = fileTopic.getName();//遍历子目录下的消费队列文件 File[] fileQueueIdList = fileTopic.listFiles(); if (fileQueueIdList != null) { //遍历文件 for (File fileQueueId : fileQueueIdList) { //文件名称即队列ID int queueId; try { queueId = Integer.parseInt(fileQueueId.getName()); } catch (NumberFormatException e) { continue; } //创建消费队列并加载到内存 ConsumeQueue logic = new ConsumeQueue( topic, queueId, StorePathConfigHelper.getStorePathConsumeQueue(this.messageStoreConfig.getStorePathRootDir()), this.getMessageStoreConfig().getMapedFileSizeConsumeQueue(), this); this.putConsumeQueue(topic, queueId, logic); if (!logic.load()) { return false; } } } }}log.info("load logics queue all over, OK");return true;
代码:IndexService#load
加载索引文件
public boolean load(final boolean lastExitOK) { //索引文件目录 File dir = new File(this.storePath); //遍历索引文件 File[] files = dir.listFiles(); if (files != null) { //文件排序 Arrays.sort(files); //遍历文件 for (File file : files) { try { //加载索引文件 IndexFile f = new IndexFile(file.getPath(), this.hashSlotNum, this.indexNum, 0, 0); f.load(); if (!lastExitOK) { //索引文件上次的刷盘时间小于该索引文件的消息时间戳,该文件将立即删除 if (f.getEndTimestamp() > this.defaultMessageStore.getStoreCheckpoint() .getIndexMsgTimestamp()) { f.destroy(0); continue; } }//将索引文件添加到队列 log.info("load index file OK," + f.getFileName()); this.indexFileList.add(f); } catch (IOException e) { log.error("load file {} error", file, e); return false; } catch (NumberFormatException e) { log.error("load file {} error", file, e); } } } return true;}
代码:DefaultMessageStore#recover
文件恢复,根据Broker是否正常退出执行不同的恢复策略
private void recover(final boolean lastExitOK) { //获得最大的物理便宜消费队列 long maxPhyOffsetOfConsumeQueue = this.recoverConsumeQueue(); if (lastExitOK) { //正常恢复 this.commitLog.recoverNormally(maxPhyOffsetOfConsumeQueue); } else { //异常恢复 this.commitLog.recoverAbnormally(maxPhyOffsetOfConsumeQueue); }//在CommitLog中保存每个消息消费队列当前的存储逻辑偏移量 this.recoverTopicQueueTable();}
代码:DefaultMessageStore#recoverTopicQueueTable
恢复ConsumerQueue后,将在CommitLog实例中保存每隔消息队列当前的存储逻辑偏移量,这也是消息中不仅存储主题、消息队列ID、还存储了消息队列的关键所在。
public void recoverTopicQueueTable() { HashMap<String, Long> table = new HashMap<String, Long>(1024); //CommitLog最小偏移量 long minPhyOffset = this.commitLog.getMinOffset(); //遍历消费队列,将消费队列保存在CommitLog中 for (ConcurrentMap<Integer, ConsumeQueue> maps : this.consumeQueueTable.values()) { for (ConsumeQueue logic : maps.values()) { String key = logic.getTopic() +"-" + logic.getQueueId(); table.put(key, logic.getMaxOffsetInQueue()); logic.correctMinOffset(minPhyOffset); } } this.commitLog.setTopicQueueTable(table);}
2)正常恢复
代码:CommitLog#recoverNormally
public void recoverNormally(long maxPhyOffsetOfConsumeQueue) { final List<MappedFile> mappedFiles = this.mappedFileQueue.getMappedFiles(); if (!mappedFiles.isEmpty()) { //Broker正常停止再重启时,从倒数第三个开始恢复,如果不足3个文件,则从第一个文件开始恢复。 int index = mappedFiles.size() - 3; if (index < 0) index = 0; MappedFile mappedFile = mappedFiles.get(index); ByteBuffer byteBuffer = mappedFile.sliceByteBuffer(); long processOffset = mappedFile.getFileFromOffset(); //代表当前已校验通过的offset long mappedFileOffset = 0; while (true) { //查找消息 DispatchRequest dispatchRequest = this.checkMessageAndReturnSize(byteBuffer, checkCRCOnRecover); //消息长度 int size = dispatchRequest.getMsgSize(); //查找结果为true,并且消息长度大于0,表示消息正确.mappedFileOffset向前移动本消息长度 if (dispatchRequest.isSuccess() && size > 0) { mappedFileOffset += size; }//如果查找结果为true且消息长度等于0,表示已到该文件末尾,如果还有下一个文件,则重置processOffset和MappedFileOffset重复查找下一个文件,否则跳出循环。 else if (dispatchRequest.isSuccess() && size == 0) { index++; if (index >= mappedFiles.size()) { // Current branch can not happen break; } else { //取出每个文件 mappedFile = mappedFiles.get(index); byteBuffer = mappedFile.sliceByteBuffer(); processOffset = mappedFile.getFileFromOffset(); mappedFileOffset = 0; } } // 查找结果为false,表明该文件未填满所有消息,跳出循环,结束循环 else if (!dispatchRequest.isSuccess()) { log.info("recover physics file end," + mappedFile.getFileName()); break; } }//更新MappedFileQueue的flushedWhere和committedWhere指针 processOffset += mappedFileOffset; this.mappedFileQueue.setFlushedWhere(processOffset); this.mappedFileQueue.setCommittedWhere(processOffset); //删除offset之后的所有文件 this.mappedFileQueue.truncateDirtyFiles(processOffset); if (maxPhyOffsetOfConsumeQueue >= processOffset) { this.defaultMessageStore.truncateDirtyLogicFiles(processOffset); } } else { this.mappedFileQueue.setFlushedWhere(0); this.mappedFileQueue.setCommittedWhere(0); this.defaultMessageStore.destroyLogics(); }}
代码:MappedFileQueue#truncateDirtyFiles
public void truncateDirtyFiles(long offset) { List<MappedFile> willRemoveFiles = new ArrayList<MappedFile>();//遍历目录下文件 for (MappedFile file : this.mappedFiles) { //文件尾部的偏移量 long fileTailOffset = file.getFileFromOffset() + this.mappedFileSize; //文件尾部的偏移量大于offset if (fileTailOffset > offset) { //offset大于文件的起始偏移量 if (offset >= file.getFileFromOffset()) { //更新wrotePosition、committedPosition、flushedPosistion file.setWrotePosition((int) (offset % this.mappedFileSize)); file.setCommittedPosition((int) (offset % this.mappedFileSize)); file.setFlushedPosition((int) (offset % this.mappedFileSize)); } else { //offset小于文件的起始偏移量,说明该文件是有效文件后面创建的,释放mappedFile占用内存,删除文件 file.destroy(1000); willRemoveFiles.add(file); } } } this.deleteExpiredFile(willRemoveFiles);}
3)异常恢复
Broker异常停止文件恢复的实现为CommitLog#recoverAbnormally。异常文件恢复步骤与正常停止文件恢复流程基本相同,其主要差别有两个。首先,正常停止默认从倒数第三个文件开始进行恢复,而异常停止则需要从最后一个文件往前走,找到第一个消息存储正常的文件。其次,如果CommitLog目录没有消息文件,如果消息消费队列目录下存在文件,则需要销毁。
代码:CommitLog#recoverAbnormally
if (!mappedFiles.isEmpty()) { // Looking beginning to recover from which file int index = mappedFiles.size() - 1; MappedFile mappedFile = null; for (; index >= 0; index--) { mappedFile = mappedFiles.get(index); //判断消息文件是否是一个正确的文件 if (this.isMappedFileMatchedRecover(mappedFile)) { log.info("recover from this mapped file" + mappedFile.getFileName()); break; } }//根据索引取出mappedFile文件 if (index < 0) { index = 0; mappedFile = mappedFiles.get(index); } //...验证消息的合法性,并将消息转发到消息消费队列和索引文件 }else{ //未找到mappedFile,重置flushWhere、committedWhere都为0,销毁消息队列文件 this.mappedFileQueue.setFlushedWhere(0); this.mappedFileQueue.setCommittedWhere(0); this.defaultMessageStore.destroyLogics();}
刷盘机制
RocketMQ的存储是基于JDK NIO的内存映射机制(MappedByteBuffer)的,消息存储首先将消息追加到内存,再根据配置的刷盘策略在不同时间进行刷写磁盘。
同步刷盘
消息追加到内存后,立即将数据刷写到磁盘文件
代码:CommitLog#handleDiskFlush
//刷盘服务final GroupCommitService service = (GroupCommitService) this.flushCommitLogService;if (messageExt.isWaitStoreMsgOK()) { //封装刷盘请求 GroupCommitRequest request = new GroupCommitRequest(result.getWroteOffset() + result.getWroteBytes()); //提交刷盘请求 service.putRequest(request); //线程阻塞5秒,等待刷盘结束 boolean flushOK = request.waitForFlush(this.defaultMessageStore.getMessageStoreConfig().getSyncFlushTimeout()); if (!flushOK) { putMessageResult.setPutMessageStatus(PutMessageStatus.FLUSH_DISK_TIMEOUT); }
GroupCommitRequest
long nextOffset;//刷盘点偏移量CountDownLatch countDownLatch = new CountDownLatch(1);//倒计树锁存器volatile boolean flushOK = false;//刷盘结果;默认为false
代码:GroupCommitService#run
public void run() { CommitLog.log.info(this.getServiceName() +" service started"); while (!this.isStopped()) { try { //线程等待10ms this.waitForRunning(10); //执行提交 this.doCommit(); } catch (Exception e) { CommitLog.log.warn(this.getServiceName() +" service has exception.", e); } }...}
代码:GroupCommitService#doCommit
private void doCommit() { //加锁 synchronized (this.requestsRead) { if (!this.requestsRead.isEmpty()) { //遍历requestsRead for (GroupCommitRequest req : this.requestsRead) { // There may be a message in the next file, so a maximum of // two times the flush boolean flushOK = false; for (int i = 0; i < 2 && !flushOK; i++) { flushOK = CommitLog.this.mappedFileQueue.getFlushedWhere() >= req.getNextOffset();//刷盘 if (!flushOK) { CommitLog.this.mappedFileQueue.flush(0); } }//唤醒发送消息客户端 req.wakeupCustomer(flushOK); } //更新刷盘监测点 long storeTimestamp = CommitLog.this.mappedFileQueue.getStoreTimestamp(); if (storeTimestamp > 0) { CommitLog.this.defaultMessageStore.getStoreCheckpoint().setPhysicMsgTimestamp(storeTimestamp); } this.requestsRead.clear(); } else { // Because of individual messages is set to not sync flush, it // will come to this process CommitLog.this.mappedFileQueue.flush(0); } }}
异步刷盘
在消息追加到内存后,立即返回给消息发送端。如果开启transientStorePoolEnable,RocketMQ会单独申请一个与目标物理文件(commitLog)同样大小的堆外内存,该堆外内存将使用内存锁定,确保不会被置换到虚拟内存中去,消息首先追加到堆外内存,然后提交到物理文件的内存映射中,然后刷写到磁盘。如果未开启transientStorePoolEnable,消息直接追加到物理文件直接映射文件中,然后刷写到磁盘中。
开启transientStorePoolEnable后异步刷盘步骤:
- 将消息直接追加到ByteBuffer(堆外内存)
- CommitRealTimeService线程每隔200ms将ByteBuffer新追加内容提交到MappedByteBuffer中
- MappedByteBuffer在内存中追加提交的内容,wrotePosition指针向后移动
- commit操作成功返回,将committedPosition位置恢复
- FlushRealTimeService线程默认每500ms将MappedByteBuffer中新追加的内存刷写到磁盘
代码:CommitLog$CommitRealTimeService#run
提交线程工作机制
//间隔时间,默认200msint interval = CommitLog.this.defaultMessageStore.getMessageStoreConfig().getCommitIntervalCommitLog();//一次提交的至少页数int commitDataLeastPages = CommitLog.this.defaultMessageStore.getMessageStoreConfig().getCommitCommitLogLeastPages();//两次真实提交的最大间隔,默认200msint commitDataThoroughInterval =CommitLog.this.defaultMessageStore.getMessageStoreConfig().getCommitCommitLogThoroughInterval();//上次提交间隔超过commitDataThoroughInterval,则忽略提交commitDataThoroughInterval参数,直接提交long begin = System.currentTimeMillis();if (begin >= (this.lastCommitTimestamp + commitDataThoroughInterval)) { this.lastCommitTimestamp = begin; commitDataLeastPages = 0;}//执行提交操作,将待提交数据提交到物理文件的内存映射区boolean result = CommitLog.this.mappedFileQueue.commit(commitDataLeastPages);long end = System.currentTimeMillis();if (!result) { this.lastCommitTimestamp = end; // result = false means some data committed. //now wake up flush thread. //唤醒刷盘线程 flushCommitLogService.wakeup();}if (end - begin > 500) { log.info("Commit data to file costs {} ms", end - begin);}this.waitForRunning(interval);
代码:CommitLog$FlushRealTimeService#run
刷盘线程工作机制
//表示await方法等待,默认falseboolean flushCommitLogTimed = CommitLog.this.defaultMessageStore.getMessageStoreConfig().isFlushCommitLogTimed();//线程执行时间间隔int interval = CommitLog.this.defaultMessageStore.getMessageStoreConfig().getFlushIntervalCommitLog();//一次刷写任务至少包含页数int flushPhysicQueueLeastPages = CommitLog.this.defaultMessageStore.getMessageStoreConfig().getFlushCommitLogLeastPages();//两次真实刷写任务最大间隔int flushPhysicQueueThoroughInterval =CommitLog.this.defaultMessageStore.getMessageStoreConfig().getFlushCommitLogThoroughInterval();...//距离上次提交间隔超过flushPhysicQueueThoroughInterval,则本次刷盘任务将忽略flushPhysicQueueLeastPages,直接提交long currentTimeMillis = System.currentTimeMillis();if (currentTimeMillis >= (this.lastFlushTimestamp + flushPhysicQueueThoroughInterval)) { this.lastFlushTimestamp = currentTimeMillis; flushPhysicQueueLeastPages = 0; printFlushProgress = (printTimes++ % 10) == 0;}...//执行一次刷盘前,先等待指定时间间隔if (flushCommitLogTimed) { Thread.sleep(interval);} else { this.waitForRunning(interval);}...long begin = System.currentTimeMillis();//刷写磁盘CommitLog.this.mappedFileQueue.flush(flushPhysicQueueLeastPages);long storeTimestamp = CommitLog.this.mappedFileQueue.getStoreTimestamp();if (storeTimestamp > 0) {//更新存储监测点文件的时间戳CommitLog.this.defaultMessageStore.getStoreCheckpoint().setPhysicMsgTimestamp(storeTimestamp);
过期文件删除机制
由于RocketMQ操作CommitLog、ConsumerQueue文件是基于内存映射机制并在启动的时候回加载CommitLog、ConsumerQueue目录下的所有文件,为了避免内存与磁盘的浪费,不可能将消息永久存储在消息服务器上,所以要引入一种机制来删除已过期的文件。RocketMQ顺序写CommitLog、ConsumerQueue文件,所有写操作全部落在最后一个CommitLog或者ConsumerQueue文件上,之前的文件在下一个文件创建后将不会再被更新。RocketMQ清除过期文件的方法时:如果当前文件在在一定时间间隔内没有再次被消费,则认为是过期文件,可以被删除,RocketMQ不会关注这个文件上的消息是否全部被消费。默认每个文件的过期时间为72小时,通过在Broker配置文件中设置fileReservedTime来改变过期时间,单位为小时。
代码:DefaultMessageStore#addScheduleTask
private void addScheduleTask() {//每隔10s调度一次清除文件 this.scheduledExecutorService.scheduleAtFixedRate(new Runnable() { @Override public void run() { DefaultMessageStore.this.cleanFilesPeriodically(); } }, 1000 * 60, this.messageStoreConfig.getCleanResourceInterval(), TimeUnit.MILLISECONDS);...}
代码:DefaultMessageStore#cleanFilesPeriodically
private void cleanFilesPeriodically() { //清除存储文件 this.cleanCommitLogService.run(); //清除消息消费队列文件 this.cleanConsumeQueueService.run();}
代码:DefaultMessageStore#deleteExpiredFiles
private void deleteExpiredFiles() { //删除的数量 int deleteCount = 0; //文件保留的时间 long fileReservedTime = DefaultMessageStore.this.getMessageStoreConfig().getFileReservedTime(); //删除物理文件的间隔 int deletePhysicFilesInterval = DefaultMessageStore.this.getMessageStoreConfig().getDeleteCommitLogFilesInterval(); //线程被占用,第一次拒绝删除后能保留的最大时间,超过该时间,文件将被强制删除 int destroyMapedFileIntervalForcibly = DefaultMessageStore.this.getMessageStoreConfig().getDestroyMapedFileIntervalForcibly();boolean timeup = this.isTimeToDelete();boolean spacefull = this.isSpaceToDelete();boolean manualDelete = this.manualDeleteFileSeveralTimes > 0;if (timeup || spacefull || manualDelete) {...执行删除逻辑}else{ ...无作为}
删除文件操作的条件
- 指定删除文件的时间点,RocketMQ通过deleteWhen设置一天的固定时间执行一次删除过期文件操作,默认4点
- 磁盘空间如果不充足,删除过期文件
- 预留,手工触发。
代码:CleanCommitLogService#isSpaceToDelete
当磁盘空间不足时执行删除过期文件
private boolean isSpaceToDelete() { //磁盘分区的最大使用量 double ratio = DefaultMessageStore.this.getMessageStoreConfig().getDiskMaxUsedSpaceRatio() / 100.0;//是否需要立即执行删除过期文件操作 cleanImmediately = false; { String storePathPhysic = DefaultMessageStore.this.getMessageStoreConfig().getStorePathCommitLog(); //当前CommitLog目录所在的磁盘分区的磁盘使用率 double physicRatio = UtilAll.getDiskPartitionSpaceUsedPercent(storePathPhysic); //diskSpaceWarningLevelRatio:磁盘使用率警告阈值,默认0.90 if (physicRatio > diskSpaceWarningLevelRatio) { boolean diskok = DefaultMessageStore.this.runningFlags.getAndMakeDiskFull(); if (diskok) { DefaultMessageStore.log.error("physic disk maybe full soon" + physicRatio +", so mark disk full"); }//diskSpaceCleanForciblyRatio:强制清除阈值,默认0.85 cleanImmediately = true; } else if (physicRatio > diskSpaceCleanForciblyRatio) { cleanImmediately = true; } else { boolean diskok = DefaultMessageStore.this.runningFlags.getAndMakeDiskOK(); if (!diskok) { DefaultMessageStore.log.info("physic disk space OK" + physicRatio +", so mark disk ok"); } } if (physicRatio < 0 || physicRatio > ratio) { DefaultMessageStore.log.info("physic disk maybe full soon, so reclaim space," + physicRatio); return true; }}
代码:MappedFileQueue#deleteExpiredFileByTime
执行文件销毁和删除
for (int i = 0; i < mfsLength; i++) { //遍历每隔文件 MappedFile mappedFile = (MappedFile) mfs[i]; //计算文件存活时间 long liveMaxTimestamp = mappedFile.getLastModifiedTimestamp() + expiredTime; //如果超过72小时,执行文件删除 if (System.currentTimeMillis() >= liveMaxTimestamp || cleanImmediately) { if (mappedFile.destroy(intervalForcibly)) { files.add(mappedFile); deleteCount++; if (files.size() >= DELETE_FILES_BATCH_MAX) { break; } if (deleteFilesInterval > 0 && (i + 1) < mfsLength) { try { Thread.sleep(deleteFilesInterval); } catch (InterruptedException e) { } } } else { break; } } else { //avoid deleting files in the middle break; }}
小结
RocketMQ的存储文件包括消息文件(Commitlog)、消息消费队列文件(ConsumerQueue)、Hash索引文件(IndexFile)、监测点文件(checkPoint)、abort(关闭异常文件)。单个消息存储文件、消息消费队列文件、Hash索引文件长度固定以便使用内存映射机制进行文件的读写操作。RocketMQ组织文件以文件的起始偏移量来命令文件,这样根据偏移量能快速定位到真实的物理文件。RocketMQ基于内存映射文件机制提供了同步刷盘和异步刷盘两种机制,异步刷盘是指在消息存储时先追加到内存映射文件,然后启动专门的刷盘线程定时将内存中的文件数据刷写到磁盘。
CommitLog,消息存储文件,RocketMQ为了保证消息发送的高吞吐量,采用单一文件存储所有主题消息,保证消息存储是完全的顺序写,但这样给文件读取带来了不便,为此RocketMQ为了方便消息消费构建了消息消费队列文件,基于主题与队列进行组织,同时RocketMQ为消息实现了Hash索引,可以为消息设置索引键,根据所以能够快速从CommitLog文件中检索消息。
当消息达到CommitLog后,会通过ReputMessageService线程接近实时地将消息转发给消息消费队列文件与索引文件。为了安全起见,RocketMQ引入abort文件,记录Broker的停机是否是正常关闭还是异常关闭,在重启Broker时为了保证CommitLog文件,消息消费队列文件与Hash索引文件的正确性,分别采用不同策略来恢复文件。
RocketMQ不会永久存储消息文件、消息消费队列文件,而是启动文件过期机制并在磁盘空间不足或者默认凌晨4点删除过期文件,文件保存72小时并且在删除文件时并不会判断该消息文件上的消息是否被消费。
Consumer
消息消费概述
消息消费以组的模式开展,一个消费组内可以包含多个消费者,每一个消费者组可订阅多个主题,消费组之间有ff式和广播模式两种消费模式。集群模式,主题下的同一条消息只允许被其中一个消费者消费。广播模式,主题下的同一条消息,将被集群内的所有消费者消费一次。消息服务器与消费者之间的消息传递也有两种模式:推模式、拉模式。所谓的拉模式,是消费端主动拉起拉消息请求,而推模式是消息达到消息服务器后,推送给消息消费者。RocketMQ消息推模式的实现基于拉模式,在拉模式上包装一层,一个拉取任务完成后开始下一个拉取任务。
集群模式下,多个消费者如何对消息队列进行负载呢?消息队列负载机制遵循一个通用思想:一个消息队列同一个时间只允许被一个消费者消费,一个消费者可以消费多个消息队列。
RocketMQ支持局部顺序消息消费,也就是保证同一个消息队列上的消息顺序消费。不支持消息全局顺序消费,如果要实现某一个主题的全局顺序消费,可以将该主题的队列数设置为1,牺牲高可用性。
消息消费初探
消息推送模式
消息消费重要方法
void sendMessageBack(final MessageExt msg, final int delayLevel, final String brokerName):发送消息确认Set<MessageQueue> fetchSubscribeMessageQueues(final String topic) :获取消费者对主题分配了那些消息队列void registerMessageListener(final MessageListenerConcurrently messageListener):注册并发事件监听器void registerMessageListener(final MessageListenerOrderly messageListener):注册顺序消息事件监听器void subscribe(final String topic, final String subExpression):基于主题订阅消息,消息过滤使用表达式void subscribe(final String topic, final String fullClassName,final String filterClassSource):基于主题订阅消息,消息过滤使用类模式void subscribe(final String topic, final MessageSelector selector) :订阅消息,并指定队列选择器void unsubscribe(final String topic):取消消息订阅
DefaultMQPushConsumer
//消费者组private String consumerGroup;//消息消费模式private MessageModel messageModel = MessageModel.CLUSTERING;//指定消费开始偏移量(最大偏移量、最小偏移量、启动时间戳)开始消费private ConsumeFromWhere consumeFromWhere = ConsumeFromWhere.CONSUME_FROM_LAST_OFFSET;//集群模式下的消息队列负载策略private AllocateMessageQueueStrategy allocateMessageQueueStrategy;//订阅信息private Map<String , String > subscription = new HashMap<String, String>();//消息业务监听器private MessageListener messageListener;//消息消费进度存储器private OffsetStore offsetStore;//消费者最小线程数量private int consumeThreadMin = 20;//消费者最大线程数量private int consumeThreadMax = 20;//并发消息消费时处理队列最大跨度private int consumeConcurrentlyMaxSpan = 2000;//每1000次流控后打印流控日志private int pullThresholdForQueue = 1000;//推模式下任务间隔时间private long pullInterval = 0;//推模式下任务拉取的条数,默认32条private int pullBatchSize = 32;//每次传入MessageListener#consumerMessage中消息的数量private int consumeMessageBatchMaxSize = 1;//是否每次拉取消息都订阅消息private boolean postSubscriptionWhenPull = false;//消息重试次数,-1代表16次private int maxReconsumeTimes = -1;//消息消费超时时间private long consumeTimeout = 15;
消费者启动流程
代码:DefaultMQPushConsumerImpl#start
public synchronized void start() throws MQClientException { switch (this.serviceState) { case CREATE_JUST: this.defaultMQPushConsumer.getMessageModel(), this.defaultMQPushConsumer.isUnitMode()); this.serviceState = ServiceState.START_FAILED;//检查消息者是否合法 this.checkConfig();//构建主题订阅信息 this.copySubscription();//设置消费者客户端实例名称为进程ID if (this.defaultMQPushConsumer.getMessageModel() == MessageModel.CLUSTERING) { this.defaultMQPushConsumer.changeInstanceNameToPID(); }//创建MQClient实例 this.mQClientFactory = MQClientManager.getInstance().getAndCreateMQClientInstance(this.defaultMQPushConsumer, this.rpcHook);//构建rebalanceImpl this.rebalanceImpl.setConsumerGroup(this.defaultMQPushConsumer.getConsumerGroup()); this.rebalanceImpl.setMessageModel(this.defaultMQPushConsumer.getMessageModel()); this.rebalanceImpl.setAllocateMessageQueueStrategy(this.defaultMQPushConsumer.getAllocateMessageQueueStrategy()); this.rebalanceImpl.setmQClientFactory(this.mQClientFactor this.pullAPIWrapper = new PullAPIWrapper( mQClientFactory, this.defaultMQPushConsumer.getConsumerGroup(), isUnitMode()); this.pullAPIWrapper.registerFilterMessageHook(filterMessageHookLis if (this.defaultMQPushConsumer.getOffsetStore() != null) { this.offsetStore = this.defaultMQPushConsumer.getOffsetStore(); } else { switch (this.defaultMQPushConsumer.getMessageModel()) { case BROADCASTING: //消息消费广播模式,将消费进度保存在本地 this.offsetStore = new LocalFileOffsetStore(this.mQClientFactory, this.defaultMQPushConsumer.getConsumerGroup()); break; case CLUSTERING://消息消费集群模式,将消费进度保存在远端Broker this.offsetStore = new RemoteBrokerOffsetStore(this.mQClientFactory, this.defaultMQPushConsumer.getConsumerGroup()); break; default: break; } this.defaultMQPushConsumer.setOffsetStore(this.offsetStore); } this.offsetStore.load //创建顺序消息消费服务 if (this.getMessageListenerInner() instanceof MessageListenerOrderly) { this.consumeOrderly = true; this.consumeMessageService = new ConsumeMessageOrderlyService(this, (MessageListenerOrderly) this.getMessageListenerInner()); //创建并发消息消费服务 } else if (this.getMessageListenerInner() instanceof MessageListenerConcurrently) { this.consumeOrderly = false; this.consumeMessageService = new ConsumeMessageConcurrentlyService(this, (MessageListenerConcurrently) this.getMessageListenerInner()); } //消息消费服务启动 this.consumeMessageService.start(); //注册消费者实例 boolean registerOK = mQClientFactory.registerConsumer(this.defaultMQPushConsumer.getConsumerGroup(), this); if (!registerOK) { this.serviceState = ServiceState.CREATE_JUST; this.consumeMessageService.shutdown(); throw new MQClientException("The consumer group[" + this.defaultMQPushConsumer.getConsumerGroup() +"] has been created before, specify another name please." + FAQUrl.suggestTodo(FAQUrl.GROUP_NAME_DUPLICATE_URL), null); //启动消费者客户端 mQClientFactory.start(); log.info("the consumer [{}] start OK.", this.defaultMQPushConsumer.getConsumerGroup()); this.serviceState = ServiceState.RUNNING; break; case RUNNING: case START_FAILED: case SHUTDOWN_ALREADY: throw new MQClientException("The PushConsumer service state not OK, maybe started once," + this.serviceState + FAQUrl.suggestTodo(FAQUrl.CLIENT_SERVICE_NOT_OK), null); default: break; } this.updateTopicSubscribeInfoWhenSubscriptionChanged(); this.mQClientFactory.checkClientInBroker(); this.mQClientFactory.sendHeartbeatToAllBrokerWithLock(); this.mQClientFactory.rebalanceImmediately();}
消息拉取
消息消费模式有两种模式:广播模式与集群模式。广播模式比较简单,每一个消费者需要拉取订阅主题下所有队列的消息。本文重点讲解集群模式。在集群模式下,同一个消费者组内有多个消息消费者,同一个主题存在多个消费队列,消费者通过负载均衡的方式消费消息。
消息队列负载均衡,通常的作法是一个消息队列在同一个时间只允许被一个消费消费者消费,一个消息消费者可以同时消费多个消息队列。
1)PullMessageService实现机制
从MQClientInstance的启动流程中可以看出,RocketMQ使用一个单独的线程PullMessageService来负责消息的拉取。
代码:PullMessageService#run
public void run() { log.info(this.getServiceName() +" service started");//循环拉取消息 while (!this.isStopped()) { try { //从请求队列中获取拉取消息请求 PullRequest pullRequest = this.pullRequestQueue.take(); //拉取消息 this.pullMessage(pullRequest); } catch (InterruptedException ignored) { } catch (Exception e) { log.error("Pull Message Service Run Method exception", e); } } log.info(this.getServiceName() +" service end");}
PullRequest
private String consumerGroup;//消费者组private MessageQueue messageQueue;//待拉取消息队列private ProcessQueue processQueue;//消息处理队列private long nextOffset;//待拉取的MessageQueue偏移量private boolean lockedFirst = false;//是否被锁定
代码:PullMessageService#pullMessage
private void pullMessage(final PullRequest pullRequest) { //获得消费者实例 final MQConsumerInner consumer = this.mQClientFactory.selectConsumer(pullRequest.getConsumerGroup()); if (consumer != null) { //强转为推送模式消费者 DefaultMQPushConsumerImpl impl = (DefaultMQPushConsumerImpl) consumer; //推送消息 impl.pullMessage(pullRequest); } else { log.warn("No matched consumer for the PullRequest {}, drop it", pullRequest); }}
2)ProcessQueue实现机制
ProcessQueue是MessageQueue在消费端的重现、快照。PullMessageService从消息服务器默认每次拉取32条消息,按照消息的队列偏移量顺序存放在ProcessQueue中,PullMessageService然后将消息提交到消费者消费线程池,消息成功消费后从ProcessQueue中移除。
属性
//消息容器private final TreeMap<Long, MessageExt> msgTreeMap = new TreeMap<Long, MessageExt>();//读写锁private final ReadWriteLock lockTreeMap = new ReentrantReadWriteLock();//ProcessQueue总消息树private final AtomicLong msgCount = new AtomicLong();//ProcessQueue队列最大偏移量private volatile long queueOffsetMax = 0L;//当前ProcessQueue是否被丢弃private volatile boolean dropped = false;//上一次拉取时间戳private volatile long lastPullTimestamp = System.currentTimeMillis();//上一次消费时间戳private volatile long lastConsumeTimestamp = System.currentTimeMillis();
方法
//移除消费超时消息public void cleanExpiredMsg(DefaultMQPushConsumer pushConsumer)//添加消息public boolean putMessage(final List<MessageExt> msgs)//获取消息最大间隔public long getMaxSpan()//移除消息public long removeMessage(final List<MessageExt> msgs)//将consumingMsgOrderlyTreeMap中消息重新放在msgTreeMap,并清空consumingMsgOrderlyTreeMap public void rollback() //将consumingMsgOrderlyTreeMap消息清除,表示成功处理该批消息public long commit()//重新处理该批消息public void makeMessageToCosumeAgain(List<MessageExt> msgs) //从processQueue中取出batchSize条消息public List<MessageExt> takeMessags(final int batchSize)
3)消息拉取基本流程
客户端发起拉取请求
代码:DefaultMQPushConsumerImpl#pullMessage
public void pullMessage(final PullRequest pullRequest) { //从pullRequest获得ProcessQueue final ProcessQueue processQueue = pullRequest.getProcessQueue(); //如果处理队列被丢弃,直接返回 if (processQueue.isDropped()) { log.info("the pull request[{}] is dropped.", pullRequest.toString()); return; }//如果处理队列未被丢弃,更新时间戳 pullRequest.getProcessQueue().setLastPullTimestamp(System.currentTimeMillis()); try { this.makeSureStateOK(); } catch (MQClientException e) { log.warn("pullMessage exception, consumer state not ok", e); this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_EXCEPTION); return; }//如果处理队列被挂起,延迟1s后再执行 if (this.isPause()) { log.warn("consumer was paused, execute pull request later. instanceName={}, group={}", this.defaultMQPushConsumer.getInstanceName(), this.defaultMQPushConsumer.getConsumerGroup()); this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_SUSPEND); return; }//获得最大待处理消息数量long cachedMessageCount = processQueue.getMsgCount().get(); //获得最大待处理消息大小long cachedMessageSizeInMiB = processQueue.getMsgSize().get() / (1024 * 1024);//从数量进行流控if (cachedMessageCount > this.defaultMQPushConsumer.getPullThresholdForQueue()) { this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_FLOW_CONTROL); if ((queueFlowControlTimes++ % 1000) == 0) { log.warn("the cached message count exceeds the threshold {}, so do flow control, minOffset={}, maxOffset={}, count={}, size={} MiB, pullRequest={}, flowControlTimes={}", this.defaultMQPushConsumer.getPullThresholdForQueue(), processQueue.getMsgTreeMap().firstKey(), processQueue.getMsgTreeMap().lastKey(), cachedMessageCount, cachedMessageSizeInMiB, pullRequest, queueFlowControlTimes); } return;}//从消息大小进行流控if (cachedMessageSizeInMiB > this.defaultMQPushConsumer.getPullThresholdSizeForQueue()) { this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_FLOW_CONTROL); if ((queueFlowControlTimes++ % 1000) == 0) { log.warn("the cached message size exceeds the threshold {} MiB, so do flow control, minOffset={}, maxOffset={}, count={}, size={} MiB, pullRequest={}, flowControlTimes={}", this.defaultMQPushConsumer.getPullThresholdSizeForQueue(), processQueue.getMsgTreeMap().firstKey(), processQueue.getMsgTreeMap().lastKey(), cachedMessageCount, cachedMessageSizeInMiB, pullRequest, queueFlowControlTimes); } return; } //获得订阅信息 final SubscriptionData subscriptionData = this.rebalanceImpl.getSubscriptionInner().get(pullRequest.getMessageQueue().getTopic()); if (null == subscriptionData) { this.executePullRequestLater(pullRequest, PULL_TIME_DELAY_MILLS_WHEN_EXCEPTION); log.warn("find the consumer's subscription failed, {}", pullRequest); return;//与服务端交互,获取消息 this.pullAPIWrapper.pullKernelImpl( pullRequest.getMessageQueue(), subExpression, subscriptionData.getExpressionType(), subscriptionData.getSubVersion(), pullRequest.getNextOffset(), this.defaultMQPushConsumer.getPullBatchSize(), sysFlag, commitOffsetValue, BROKER_SUSPEND_MAX_TIME_MILLIS, CONSUMER_TIMEOUT_MILLIS_WHEN_SUSPEND, CommunicationMode.ASYNC, pullCallback); }
消息服务端Broker组装消息
代码:PullMessageProcessor#processRequest
//构建消息过滤器MessageFilter messageFilter;if (this.brokerController.getBrokerConfig().isFilterSupportRetry()) { messageFilter = new ExpressionForRetryMessageFilter(subscriptionData, consumerFilterData, this.brokerController.getConsumerFilterManager());} else { messageFilter = new ExpressionMessageFilter(subscriptionData, consumerFilterData, this.brokerController.getConsumerFilterManager());}//调用MessageStore.getMessage查找消息final GetMessageResult getMessageResult = this.brokerController.getMessageStore().getMessage( requestHeader.getConsumerGroup(), //消费组名称 requestHeader.getTopic(),//主题名称 requestHeader.getQueueId(), //队列ID requestHeader.getQueueOffset(), //待拉取偏移量 requestHeader.getMaxMsgNums(), //最大拉取消息条数 messageFilter//消息过滤器 );
代码:DefaultMessageStore#getMessage
GetMessageStatus status = GetMessageStatus.NO_MESSAGE_IN_QUEUE;long nextBeginOffset = offset;//查找下一次队列偏移量long minOffset = 0;//当前消息队列最小偏移量long maxOffset = 0;//当前消息队列最大偏移量GetMessageResult getResult = new GetMessageResult();final long maxOffsetPy = this.commitLog.getMaxOffset();//当前commitLog最大偏移量//根据主题名称和队列编号获取消息消费队列ConsumeQueue consumeQueue = findConsumeQueue(topic, queueId);...minOffset = consumeQueue.getMinOffsetInQueue();maxOffset = consumeQueue.getMaxOffsetInQueue();//消息偏移量异常情况校对下一次拉取偏移量if (maxOffset == 0) {//表示当前消息队列中没有消息 status = GetMessageStatus.NO_MESSAGE_IN_QUEUE; nextBeginOffset = nextOffsetCorrection(offset, 0);} else if (offset < minOffset) {//待拉取消息的偏移量小于队列的其实偏移量 status = GetMessageStatus.OFFSET_TOO_SMALL; nextBeginOffset = nextOffsetCorrection(offset, minOffset);} else if (offset == maxOffset) {//待拉取偏移量为队列最大偏移量 status = GetMessageStatus.OFFSET_OVERFLOW_ONE; nextBeginOffset = nextOffsetCorrection(offset, offset);} else if (offset > maxOffset) {//偏移量越界 status = GetMessageStatus.OFFSET_OVERFLOW_BADLY; if (0 == minOffset) { nextBeginOffset = nextOffsetCorrection(offset, minOffset); } else { nextBeginOffset = nextOffsetCorrection(offset, maxOffset); }}...//根据偏移量从CommitLog中拉取32条消息SelectMappedBufferResult selectResult = this.commitLog.getMessage(offsetPy, sizePy);
代码:PullMessageProcessor#processRequest
//根据拉取结果填充responseHeaderresponse.setRemark(getMessageResult.getStatus().name());responseHeader.setNextBeginOffset(getMessageResult.getNextBeginOffset());responseHeader.setMinOffset(getMessageResult.getMinOffset());responseHeader.setMaxOffset(getMessageResult.getMaxOffset());//判断如果存在主从同步慢,设置下一次拉取任务的ID为主节点switch (this.brokerController.getMessageStoreConfig().getBrokerRole()) { case ASYNC_MASTER: case SYNC_MASTER: break; case SLAVE: if (!this.brokerController.getBrokerConfig().isSlaveReadEnable()) { response.setCode(ResponseCode.PULL_RETRY_IMMEDIATELY); responseHeader.setSuggestWhichBrokerId(MixAll.MASTER_ID); } break;}...//GetMessageResult与Response的Code转换switch (getMessageResult.getStatus()) { case FOUND://成功 response.setCode(ResponseCode.SUCCESS); break; case MESSAGE_WAS_REMOVING://消息存放在下一个commitLog中 response.setCode(ResponseCode.PULL_RETRY_IMMEDIATELY);//消息重试 break; case NO_MATCHED_LOGIC_QUEUE://未找到队列 case NO_MESSAGE_IN_QUEUE://队列中未包含消息 if (0 != requestHeader.getQueueOffset()) { response.setCode(ResponseCode.PULL_OFFSET_MOVED); requestHeader.getQueueOffset(), getMessageResult.getNextBeginOffset(), requestHeader.getTopic(), requestHeader.getQueueId(), requestHeader.getConsumerGroup() ); } else { response.setCode(ResponseCode.PULL_NOT_FOUND); } break; case NO_MATCHED_MESSAGE://未找到消息 response.setCode(ResponseCode.PULL_RETRY_IMMEDIATELY); break; case OFFSET_FOUND_NULL://消息物理偏移量为空 response.setCode(ResponseCode.PULL_NOT_FOUND); break; case OFFSET_OVERFLOW_BADLY://offset越界 response.setCode(ResponseCode.PULL_OFFSET_MOVED); // XXX: warn and notify me log.info("the request offset: {} over flow badly, broker max offset: {}, consumer: {}", requestHeader.getQueueOffset(), getMessageResult.getMaxOffset(), channel.remoteAddress()); break; case OFFSET_OVERFLOW_ONE://offset在队列中未找到 response.setCode(ResponseCode.PULL_NOT_FOUND); break; case OFFSET_TOO_SMALL://offset未在队列中 response.setCode(ResponseCode.PULL_OFFSET_MOVED); requestHeader.getConsumerGroup(), requestHeader.getTopic(), requestHeader.getQueueOffset(), getMessageResult.getMinOffset(), channel.remoteAddress()); break; default: assert false; break;}...//如果CommitLog标记可用,并且当前Broker为主节点,则更新消息消费进度boolean storeOffsetEnable = brokerAllowSuspend;storeOffsetEnable = storeOffsetEnable && hasCommitOffsetFlag;storeOffsetEnable = storeOffsetEnable && this.brokerController.getMessageStoreConfig().getBrokerRole() != BrokerRole.SLAVE;if (storeOffsetEnable) { this.brokerController.getConsumerOffsetManager().commitOffset(RemotingHelper.parseChannelRemoteAddr(channel), requestHeader.getConsumerGroup(), requestHeader.getTopic(), requestHeader.getQueueId(), requestHeader.getCommitOffset());}
消息拉取客户端处理消息
代码:MQClientAPIImpl#processPullResponse
private PullResult processPullResponse( final RemotingCommand response) throws MQBrokerException, RemotingCommandException { PullStatus pullStatus = PullStatus.NO_NEW_MSG; //判断响应结果 switch (response.getCode()) { case ResponseCode.SUCCESS: pullStatus = PullStatus.FOUND; break; case ResponseCode.PULL_NOT_FOUND: pullStatus = PullStatus.NO_NEW_MSG; break; case ResponseCode.PULL_RETRY_IMMEDIATELY: pullStatus = PullStatus.NO_MATCHED_MSG; break; case ResponseCode.PULL_OFFSET_MOVED: pullStatus = PullStatus.OFFSET_ILLEGAL; break; default: throw new MQBrokerException(response.getCode(), response.getRemark()); }//解码响应头 PullMessageResponseHeader responseHeader = (PullMessageResponseHeader) response.decodeCommandCustomHeader(PullMessageResponseHeader.class);//封装PullResultExt返回 return new PullResultExt(pullStatus, responseHeader.getNextBeginOffset(), responseHeader.getMinOffset(), responseHeader.getMaxOffset(), null, responseHeader.getSuggestWhichBrokerId(), response.getBody());}
PullResult类
private final PullStatus pullStatus;//拉取结果private final long nextBeginOffset;//下次拉取偏移量private final long minOffset;//消息队列最小偏移量private final long maxOffset;//消息队列最大偏移量private List<MessageExt> msgFoundList;//拉取的消息列表
代码:DefaultMQPushConsumerImpl$PullCallback#OnSuccess
//将拉取到的消息存入processQueueboolean dispatchToConsume = processQueue.putMessage(pullResult.getMsgFoundList());//将processQueue提交到consumeMessageService中供消费者消费DefaultMQPushConsumerImpl.this.consumeMessageService.submitConsumeRequest( pullResult.getMsgFoundList(), processQueue, pullRequest.getMessageQueue(), dispatchToConsume);//如果pullInterval大于0,则等待pullInterval毫秒后将pullRequest对象放入到PullMessageService中的pullRequestQueue队列中if (DefaultMQPushConsumerImpl.this.defaultMQPushConsumer.getPullInterval() > 0) { DefaultMQPushConsumerImpl.this.executePullRequestLater(pullRequest, DefaultMQPushConsumerImpl.this.defaultMQPushConsumer.getPullInterval());} else { DefaultMQPushConsumerImpl.this.executePullRequestImmediately(pullRequest);}
消息拉取总结
4)消息拉取长轮询机制分析
RocketMQ未真正实现消息推模式,而是消费者主动向消息服务器拉取消息,RocketMQ推模式是循环向消息服务端发起消息拉取请求,如果消息消费者向RocketMQ拉取消息时,消息未到达消费队列时,如果不启用长轮询机制,则会在服务端等待shortPollingTimeMills时间后(挂起)再去判断消息是否已经到达指定消息队列,如果消息仍未到达则提示拉取消息客户端PULL—NOT—FOUND(消息不存在);如果开启长轮询模式,RocketMQ一方面会每隔5s轮询检查一次消息是否可达,同时一有消息达到后立马通知挂起线程再次验证消息是否是自己感兴趣的消息,如果是则从CommitLog文件中提取消息返回给消息拉取客户端,否则直到挂起超时,超时时间由消息拉取方在消息拉取是封装在请求参数中,PUSH模式为15s,PULL模式通过DefaultMQPullConsumer#setBrokerSuspendMaxTimeMillis设置。RocketMQ通过在Broker客户端配置longPollingEnable为true来开启长轮询模式。
代码:PullMessageProcessor#processRequest
//当没有拉取到消息时,通过长轮询方式继续拉取消息case ResponseCode.PULL_NOT_FOUND: if (brokerAllowSuspend && hasSuspendFlag) { long pollingTimeMills = suspendTimeoutMillisLong; if (!this.brokerController.getBrokerConfig().isLongPollingEnable()) { pollingTimeMills = this.brokerController.getBrokerConfig().getShortPollingTimeMills(); } String topic = requestHeader.getTopic(); long offset = requestHeader.getQueueOffset(); int queueId = requestHeader.getQueueId(); //构建拉取请求对象 PullRequest pullRequest = new PullRequest(request, channel, pollingTimeMills, this.brokerController.getMessageStore().now(), offset, subscriptionData, messageFilter); //处理拉取请求 this.brokerController.getPullRequestHoldService().suspendPullRequest(topic, queueId, pullRequest); response = null; break; }
PullRequestHoldService方式实现长轮询
代码:PullRequestHoldService#suspendPullRequest
//将拉取消息请求,放置在ManyPullRequest集合中public void suspendPullRequest(final String topic, final int queueId, final PullRequest pullRequest) { String key = this.buildKey(topic, queueId); ManyPullRequest mpr = this.pullRequestTable.get(key); if (null == mpr) { mpr = new ManyPullRequest(); ManyPullRequest prev = this.pullRequestTable.putIfAbsent(key, mpr); if (prev != null) { mpr = prev; } } mpr.addPullRequest(pullRequest);}
代码:PullRequestHoldService#run
public void run() { log.info("{} service started", this.getServiceName()); while (!this.isStopped()) { try { //如果开启长轮询每隔5秒判断消息是否到达 if (this.brokerController.getBrokerConfig().isLongPollingEnable()) { this.waitForRunning(5 * 1000); } else { //没有开启长轮询,每隔1s再次尝试 this.waitForRunning(this.brokerController.getBrokerConfig().getShortPollingTimeMills()); } long beginLockTimestamp = this.systemClock.now(); this.checkHoldRequest(); long costTime = this.systemClock.now() - beginLockTimestamp; if (costTime > 5 * 1000) { log.info("[NOTIFYME] check hold request cost {} ms.", costTime); } } catch (Throwable e) { log.warn(this.getServiceName() +" service has exception.", e); } } log.info("{} service end", this.getServiceName());}
代码:PullRequestHoldService#checkHoldRequest
//遍历拉取任务private void checkHoldRequest() { for (String key : this.pullRequestTable.keySet()) { String[] kArray = key.split(TOPIC_QUEUEID_SEPARATOR); if (2 == kArray.length) { String topic = kArray[0]; int queueId = Integer.parseInt(kArray[1]); //获得消息偏移量 final long offset = this.brokerController.getMessageStore().getMaxOffsetInQueue(topic, queueId); try { //通知有消息达到 this.notifyMessageArriving(topic, queueId, offset); } catch (Throwable e) { log.error("check hold request failed. topic={}, queueId={}", topic, queueId, e); } } }}
代码:PullRequestHoldService#notifyMessageArriving
//如果拉取消息偏移大于请求偏移量,如果消息匹配调用executeRequestWhenWakeup处理消息if (newestOffset > request.getPullFromThisOffset()) { boolean match = request.getMessageFilter().isMatchedByConsumeQueue(tagsCode, new ConsumeQueueExt.CqExtUnit(tagsCode, msgStoreTime, filterBitMap)); // match by bit map, need eval again when properties is not null. if (match && properties != null) { match = request.getMessageFilter().isMatchedByCommitLog(null, properties); } if (match) { try { this.brokerController.getPullMessageProcessor().executeRequestWhenWakeup(request.getClientChannel(), request.getRequestCommand()); } catch (Throwable e) { log.error("execute request when wakeup failed.", e); } continue; }}//如果过期时间超时,则不继续等待将直接返回给客户端消息未找到if (System.currentTimeMillis() >= (request.getSuspendTimestamp() + request.getTimeoutMillis())) { try { this.brokerController.getPullMessageProcessor().executeRequestWhenWakeup(request.getClientChannel(), request.getRequestCommand()); } catch (Throwable e) { log.error("execute request when wakeup failed.", e); } continue;}
如果开启了长轮询机制,PullRequestHoldService会每隔5s被唤醒去尝试检测是否有新的消息的到来才给客户端响应,或者直到超时才给客户端进行响应,消息实时性比较差,为了避免这种情况,RocketMQ引入另外一种机制:当消息到达时唤醒挂起线程触发一次检查。
DefaultMessageStore$ReputMessageService机制
代码:DefaultMessageStore#start
//长轮询入口this.reputMessageService.setReputFromOffset(maxPhysicalPosInLogicQueue);this.reputMessageService.start();
代码:DefaultMessageStore$ReputMessageService#run
public void run() { DefaultMessageStore.log.info(this.getServiceName() +" service started"); while (!this.isStopped()) { try { Thread.sleep(1); //长轮询核心逻辑代码入口 this.doReput(); } catch (Exception e) { DefaultMessageStore.log.warn(this.getServiceName() +" service has exception.", e); } } DefaultMessageStore.log.info(this.getServiceName() +" service end");}
代码:DefaultMessageStore$ReputMessageService#deReput
//当新消息达到是,进行通知监听器进行处理if (BrokerRole.SLAVE != DefaultMessageStore.this.getMessageStoreConfig().getBrokerRole() && DefaultMessageStore.this.brokerConfig.isLongPollingEnable()) { DefaultMessageStore.this.messageArrivingListener.arriving(dispatchRequest.getTopic(), dispatchRequest.getQueueId(), dispatchRequest.getConsumeQueueOffset() + 1, dispatchRequest.getTagsCode(), dispatchRequest.getStoreTimestamp(), dispatchRequest.getBitMap(), dispatchRequest.getPropertiesMap());}
代码:NotifyMessageArrivingListener#arriving
public void arriving(String topic, int queueId, long logicOffset, long tagsCode, long msgStoreTime, byte[] filterBitMap, Map<String, String> properties) { this.pullRequestHoldService.notifyMessageArriving(topic, queueId, logicOffset, tagsCode, msgStoreTime, filterBitMap, properties);}
消息队列负载与重新分布机制
RocketMQ消息队列重新分配是由RebalanceService线程来实现。一个MQClientInstance持有一个RebalanceService实现,并随着MQClientInstance的启动而启动。
代码:RebalanceService#run
public void run() { log.info(this.getServiceName() +" service started");//RebalanceService线程默认每隔20s执行一次mqClientFactory.doRebalance方法 while (!this.isStopped()) { this.waitForRunning(waitInterval); this.mqClientFactory.doRebalance(); } log.info(this.getServiceName() +" service end");}
代码:MQClientInstance#doRebalance
public void doRebalance() { //MQClientInstance遍历以注册的消费者,对消费者执行doRebalance()方法 for (Map.Entry<String, MQConsumerInner> entry : this.consumerTable.entrySet()) { MQConsumerInner impl = entry.getValue(); if (impl != null) { try { impl.doRebalance(); } catch (Throwable e) { log.error("doRebalance exception", e); } } }}
代码:RebalanceImpl#doRebalance
//遍历订阅消息对每个主题的订阅的队列进行重新负载public void doRebalance(final boolean isOrder) { Map<String, SubscriptionData> subTable = this.getSubscriptionInner(); if (subTable != null) { for (final Map.Entry<String, SubscriptionData> entry : subTable.entrySet()) { final String topic = entry.getKey(); try { this.rebalanceByTopic(topic, isOrder); } catch (Throwable e) { if (!topic.startsWith(MixAll.RETRY_GROUP_TOPIC_PREFIX)) { log.warn("rebalanceByTopic Exception", e); } } } } this.truncateMessageQueueNotMyTopic();}
代码:RebalanceImpl#rebalanceByTopic
//从主题订阅消息缓存表中获取主题的队列信息Set<MessageQueue> mqSet = this.topicSubscribeInfoTable.get(topic);//查找该主题订阅组所有的消费者IDList<String> cidAll = this.mQClientFactory.findConsumerIdList(topic, consumerGroup);//给消费者重新分配队列if (mqSet != null && cidAll != null) { List<MessageQueue> mqAll = new ArrayList<MessageQueue>(); mqAll.addAll(mqSet); Collections.sort(mqAll); Collections.sort(cidAll); AllocateMessageQueueStrategy strategy = this.allocateMessageQueueStrategy; List<MessageQueue> allocateResult = null; try { allocateResult = strategy.allocate( this.consumerGroup, this.mQClientFactory.getClientId(), mqAll, cidAll); } catch (Throwable e) { log.error("AllocateMessageQueueStrategy.allocate Exception. allocateMessageQueueStrategyName={}", strategy.getName(), e); return; }
RocketMQ默认提供5中负载均衡分配算法
AllocateMessageQueueAveragely:平均分配举例:8个队列q1,q2,q3,q4,q5,a6,q7,q8,消费者3个:c1,c2,c3分配如下:c1:q1,q2,q3c2:q4,q5,a6c3:q7,q8AllocateMessageQueueAveragelyByCircle:平均轮询分配举例:8个队列q1,q2,q3,q4,q5,a6,q7,q8,消费者3个:c1,c2,c3分配如下:c1:q1,q4,q7c2:q2,q5,a8c3:q3,q6
注意:消息队列的分配遵循一个消费者可以分配到多个队列,但同一个消息队列只会分配给一个消费者,故如果出现消费者个数大于消息队列数量,则有些消费者无法消费消息。
消息消费过程
PullMessageService负责对消息队列进行消息拉取,从远端服务器拉取消息后将消息存储ProcessQueue消息队列处理队列中,然后调用ConsumeMessageService#submitConsumeRequest方法进行消息消费,使用线程池来消费消息,确保了消息拉取与消息消费的解耦。ConsumeMessageService支持顺序消息和并发消息,核心类图如下:
并发消息消费
代码:ConsumeMessageConcurrentlyService#submitConsumeRequest
//消息批次单次final int consumeBatchSize = this.defaultMQPushConsumer.getConsumeMessageBatchMaxSize();//msgs.size()默认最多为32条。//如果msgs.size()小于consumeBatchSize,则直接将拉取到的消息放入到consumeRequest,然后将consumeRequest提交到消费者线程池中if (msgs.size() <= consumeBatchSize) { ConsumeRequest consumeRequest = new ConsumeRequest(msgs, processQueue, messageQueue); try { this.consumeExecutor.submit(consumeRequest); } catch (RejectedExecutionException e) { this.submitConsumeRequestLater(consumeRequest); }}else{//如果拉取的消息条数大于consumeBatchSize,则对拉取消息进行分页 for (int total = 0; total < msgs.size(); ) { List<MessageExt> msgThis = new ArrayList<MessageExt>(consumeBatchSize); for (int i = 0; i < consumeBatchSize; i++, total++) { if (total < msgs.size()) { msgThis.add(msgs.get(total)); } else { break; } ConsumeRequest consumeRequest = new ConsumeRequest(msgThis, processQueue, messageQueue); try { this.consumeExecutor.submit(consumeRequest); } catch (RejectedExecutionException e) { for (; total < msgs.size(); total++) { msgThis.add(msgs.get(total)); this.submitConsumeRequestLater(consumeRequest); } }}
代码:ConsumeMessageConcurrentlyService$ConsumeRequest#run
//检查processQueue的dropped,如果为true,则停止该队列消费。if (this.processQueue.isDropped()) { log.info("the message queue not be able to consume, because it's dropped. group={} {}", ConsumeMessageConcurrentlyService.this.consumerGroup, this.messageQueue); return;}...//执行消息处理的钩子函数if (ConsumeMessageConcurrentlyService.this.defaultMQPushConsumerImpl.hasHook()) { consumeMessageContext = new ConsumeMessageContext(); consumeMessageContext.setNamespace(defaultMQPushConsumer.getNamespace()); consumeMessageContext.setConsumerGroup(defaultMQPushConsumer.getConsumerGroup()); consumeMessageContext.setProps(new HashMap<String, String>()); consumeMessageContext.setMq(messageQueue); consumeMessageContext.setMsgList(msgs); consumeMessageContext.setSuccess(false); ConsumeMessageConcurrentlyService.this.defaultMQPushConsumerImpl.executeHookBefore(consumeMessageContext);}...//调用应用程序消息监听器的consumeMessage方法,进入到具体的消息消费业务处理逻辑status = listener.consumeMessage(Collections.unmodifiableList(msgs), context);//执行消息处理后的钩子函数if (ConsumeMessageConcurrentlyService.this.defaultMQPushConsumerImpl.hasHook()) { consumeMessageContext.setStatus(status.toString()); consumeMessageContext.setSuccess(ConsumeConcurrentlyStatus.CONSUME_SUCCESS == status); ConsumeMessageConcurrentlyService.this.defaultMQPushConsumerImpl.executeHookAfter(consumeMessageContext);}
定时消息机制
定时消息是消息发送到Broker后,并不立即被消费者消费而是要等到特定的时间后才能被消费,RocketMQ并不支持任意的时间精度,如果要支持任意时间精度定时调度,不可避免地需要在Broker层做消息排序,再加上持久化方面的考量,将不可避免的带来巨大的性能消耗,所以RocketMQ只支持特定级别的延迟消息。消息延迟级别在Broker端通过messageDelayLevel配置,默认为“1s 5s 10s 30s 1m 2m 3m 4m 5m 6m 7m 8m 9m 10m 20m 30m 1h 2h”,delayLevel=1表示延迟消息1s,delayLevel=2表示延迟5s,依次类推。
RocketMQ定时消息实现类为ScheduleMessageService,该类在DefaultMessageStore中创建。通过在DefaultMessageStore中调用load方法加载该类并调用start方法启动。
代码:ScheduleMessageService#load
//加载延迟消息消费进度的加载与delayLevelTable的构造。延迟消息的进度默认存储路径为/store/config/delayOffset.jsonpublic boolean load() { boolean result = super.load(); result = result && this.parseDelayLevel(); return result;}
代码:ScheduleMessageService#start
//遍历延迟队列创建定时任务,遍历延迟级别,根据延迟级别level从offsetTable中获取消费队列的消费进度。如果不存在,则使用0for (Map.Entry<Integer, Long> entry : this.delayLevelTable.entrySet()) { Integer level = entry.getKey(); Long timeDelay = entry.getValue(); Long offset = this.offsetTable.get(level); if (null == offset) { offset = 0L; } if (timeDelay != null) { this.timer.schedule(new DeliverDelayedMessageTimerTask(level, offset), FIRST_DELAY_TIME); }}//每隔10s持久化一次延迟队列的消息消费进度this.timer.scheduleAtFixedRate(new TimerTask() { @Override public void run() { try { if (started.get()) ScheduleMessageService.this.persist(); } catch (Throwable e) { log.error("scheduleAtFixedRate flush exception", e); } }}, 10000, this.defaultMessageStore.getMessageStoreConfig().getFlushDelayOffsetInterval());
调度机制
ScheduleMessageService的start方法启动后,会为每一个延迟级别创建一个调度任务,每一个延迟级别对应SCHEDULE_TOPIC_XXXX主题下的一个消息消费队列。定时调度任务的实现类为DeliverDelayedMessageTimerTask,核心实现方法为executeOnTimeup
代码:ScheduleMessageService$DeliverDelayedMessageTimerTask#executeOnTimeup
//根据队列ID与延迟主题查找消息消费队列ConsumeQueue cq = ScheduleMessageService.this.defaultMessageStore.findConsumeQueue(SCHEDULE_TOPIC, delayLevel2QueueId(delayLevel));...//根据偏移量从消息消费队列中获取当前队列中所有有效的消息SelectMappedBufferResult bufferCQ = cq.getIndexBuffer(this.offset);...//遍历ConsumeQueue,解析消息队列中消息for (; i < bufferCQ.getSize(); i += ConsumeQueue.CQ_STORE_UNIT_SIZE) { long offsetPy = bufferCQ.getByteBuffer().getLong(); int sizePy = bufferCQ.getByteBuffer().getInt(); long tagsCode = bufferCQ.getByteBuffer().getLong(); if (cq.isExtAddr(tagsCode)) { if (cq.getExt(tagsCode, cqExtUnit)) { tagsCode = cqExtUnit.getTagsCode(); } else { //can't find ext content.So re compute tags code. log.error("[BUG] can't find consume queue extend file content!addr={}, offsetPy={}, sizePy={}", tagsCode, offsetPy, sizePy); long msgStoreTime = defaultMessageStore.getCommitLog().pickupStoreTimestamp(offsetPy, sizePy); tagsCode = computeDeliverTimestamp(delayLevel, msgStoreTime); } } long now = System.currentTimeMillis(); long deliverTimestamp = this.correctDeliverTimestamp(now, tagsCode); ... //根据消息偏移量与消息大小,从CommitLog中查找消息. MessageExt msgExt = ScheduleMessageService.this.defaultMessageStore.lookMessageByOffset( offsetPy, sizePy);}
顺序消息
顺序消息实现类是org.apache.rocketmq.client.impl.consumer.ConsumeMessageOrderlyService
代码:ConsumeMessageOrderlyService#start
public void start() { //如果消息模式为集群模式,启动定时任务,默认每隔20s执行一次锁定分配给自己的消息消费队列 if (MessageModel.CLUSTERING.equals(ConsumeMessageOrderlyService.this.defaultMQPushConsumerImpl.messageModel())) { this.scheduledExecutorService.scheduleAtFixedRate(new Runnable() { @Override public void run() { ConsumeMessageOrderlyService.this.lockMQPeriodically(); } }, 1000 * 1, ProcessQueue.REBALANCE_LOCK_INTERVAL, TimeUnit.MILLISECONDS); }}
代码:ConsumeMessageOrderlyService#submitConsumeRequest
//构建消息任务,并提交消费线程池中public void submitConsumeRequest( final List<MessageExt> msgs, final ProcessQueue processQueue, final MessageQueue messageQueue, final boolean dispathToConsume) { if (dispathToConsume) { ConsumeRequest consumeRequest = new ConsumeRequest(processQueue, messageQueue); this.consumeExecutor.submit(consumeRequest); }}
代码:ConsumeMessageOrderlyService$ConsumeRequest#run
//如果消息队列为丢弃,则停止本次消费任务if (this.processQueue.isDropped()) { log.warn("run, the message queue not be able to consume, because it's dropped. {}", this.messageQueue); return;}//从消息队列中获取一个对象。然后消费消息时先申请独占objLock锁。顺序消息一个消息消费队列同一时刻只会被一个消费线程池处理final Object objLock = messageQueueLock.fetchLockObject(this.messageQueue);synchronized (objLock) {...}
小结
RocketMQ消息消费方式分别为集群模式、广播模式。
消息队列负载由RebalanceService线程默认每隔20s进行一次消息队列负载,根据当前消费者组内消费者个数与主题队列数量按照某一种负载算法进行队列分配,分配原则为同一个消费者可以分配多个消息消费队列,同一个消息消费队列同一个时间只会分配给一个消费者。
消息拉取由PullMessageService线程根据RebalanceService线程创建的拉取任务进行拉取,默认每次拉取32条消息,提交给消费者消费线程后继续下一次消息拉取。如果消息消费过慢产生消息堆积会触发消息消费拉取流控。
并发消息消费指消费线程池中的线程可以并发对同一个消息队列的消息进行消费,消费成功后,取出消息队列中最小的消息偏移量作为消息消费进度偏移量存储在于消息消费进度存储文件中,集群模式消息消费进度存储在Broker(消息服务器),广播模式消息消费进度存储在消费者端。
RocketMQ不支持任意精度的定时调度消息,只支持自定义的消息延迟级别,例如1s、2s、5s等,可通过在broker配置文件中设置messageDelayLevel。
顺序消息一般使用集群模式,是指对消息消费者内的线程池中的线程对消息消费队列只能串行消费。并并发消息消费最本质的区别是消息消费时必须成功锁定消息消费队列,在Broker端会存储消息消费队列的锁占用情况。