/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.benchmark.byTask.feeds;

import java.io.IOException;
import java.util.Date;
import org.apache.lucene.benchmark.byTask.feeds.DocData;
import org.apache.lucene.benchmark.byTask.feeds.TrecContentSource;
import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser;

public class TrecLATimesParser
extends TrecDocParser {
    private static final String DATE = "<DATE>";
    private static final String DATE_END = "</DATE>";
    private static final String DATE_NOISE = "day,";
    private static final String SUBJECT = "<SUBJECT>";
    private static final String SUBJECT_END = "</SUBJECT>";
    private static final String HEADLINE = "<HEADLINE>";
    private static final String HEADLINE_END = "</HEADLINE>";

    @Override
    public DocData parse(DocData docData, String name, TrecContentSource trecSrc, StringBuilder docBuf, TrecDocParser.ParsePathType pathType) throws IOException {
        String title;
        int mark = 0;
        Date date = null;
        String dateStr = TrecLATimesParser.extract(docBuf, DATE, DATE_END, -1, null);
        if (dateStr != null) {
            int d2a = dateStr.indexOf(DATE_NOISE);
            if (d2a > 0) {
                dateStr = dateStr.substring(0, d2a + 3);
            }
            dateStr = TrecLATimesParser.stripTags(dateStr, 0);
            date = trecSrc.parseDate(dateStr.trim());
        }
        if ((title = TrecLATimesParser.extract(docBuf, SUBJECT, SUBJECT_END, -1, null)) == null) {
            title = TrecLATimesParser.extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
        }
        if (title != null) {
            title = TrecLATimesParser.stripTags(title, 0).trim();
        }
        docData.clear();
        docData.setName(name);
        docData.setDate(date);
        docData.setTitle(title);
        docData.setBody(TrecLATimesParser.stripTags(docBuf, mark));
        return docData;
    }
}

